fix cyfi445 labs 0-3

2026-04-10 11:23:42 +00:00 · 2025-09-15 22:47:31 -04:00
parent e7aca5a24d
commit e1a0b81c65
5 changed files with 254 additions and 278 deletions
--- a/CYFI445/lectures/01_linear_regression_concept/0_review_np_array.ipynb
+++ b/CYFI445/lectures/01_linear_regression_concept/0_review_np_array.ipynb
--- a/CYFI445/lectures/01_linear_regression_concept/1_plot_linear_regression.ipynb
+++ b/CYFI445/lectures/01_linear_regression_concept/1_plot_linear_regression.ipynb
--- a/CYFI445/lectures/02_linear_regression_gradient_np/1_gradient.ipynb
+++ b/CYFI445/lectures/02_linear_regression_gradient_np/1_gradient.ipynb
--- a/CYFI445/lectures/03_linear_regressioin_autogradient/2_autograd.ipynb
+++ b/CYFI445/lectures/03_linear_regressioin_autogradient/2_autograd.ipynb
@@ -30,7 +30,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 11,
+      "execution_count": 37,
      "metadata": {},
      "outputs": [
        {
@@ -56,92 +56,47 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 12,
+      "execution_count": 38,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
-            "Tensor x: tensor([2., 3.], requires_grad=True)\n",
-            "Tensor y: tensor([1., 4.], requires_grad=True)\n"
+            "Tensor w: tensor([2., 3.], requires_grad=True)\n",
+            "Tensor b: tensor([1., 4.], requires_grad=True)\n"
          ]
        }
      ],
      "source": [
        "# Create a tensor with requires_grad=True\n",
-        "x = torch.tensor([2.0, 3.0], requires_grad=True)\n",
-        "print(\"Tensor x:\", x)\n",
+        "# I have two trainable parameters: w0 and w1\n",
+        "w = torch.tensor([2.0, 3.0], requires_grad=True)\n",
+        "print(\"Tensor w:\", w)\n",
        "\n",
        "# Create another tensor\n",
-        "y = torch.tensor([1.0, 4.0], requires_grad=True)\n",
-        "print(\"Tensor y:\", y)"
+        "b = torch.tensor([1.0, 4.0], requires_grad=True)\n",
+        "print(\"Tensor b:\", b)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "## Step 3: Build Computation Graph\n",
-        "When we perform operations on tensors with `requires_grad=True`, PyTorch builds a **computation graph** to track how the output depends on the inputs. Let's compute a simple expression: `z = x^2 + y`.\n",
-        "\n",
-        "\n",
-        "\n",
-        "       x [2.0, 3.0]\n",
-        "           |\n",
-        "           | **2 (PowBackward0)\n",
-        "           v\n",
-        "       temp [4.0, 9.0]\n",
-        "           |\n",
-        "           | + (AddBackward0)\n",
-        "           |        y [1.0, 4.0]\n",
-        "           |        /\n",
-        "           v       /\n",
-        "         z [5.0, 13.0]\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 13,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Result z: tensor([ 5., 13.], grad_fn=<AddBackward0>)\n",
-            "z requires_grad: True\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Compute z = x^2 + y (element-wise)\n",
-        "z = x**2 + y\n",
-        "print(\"Result z:\", z)\n",
-        "\n",
-        "# z is a tensor with gradients tracked\n",
-        "# Means x, y, z are the components of the computatioal graph\n",
-        "print(\"z requires_grad:\", z.requires_grad)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Step 4: Computing Gradients\n",
+        "## Step 3: Computing Gradients\n",
        "To compute gradients, we call `.backward()` on a scalar output. If the output is a tensor (like `z`), we need to reduce it to a scalar (e.g., using `.sum()`). The gradients are stored in the `.grad` attribute of the input tensors.\n",
        "\n",
-        "       x [2.0, 3.0]\n",
+        "       w [2.0, 3.0]\n",
        "           |\n",
        "           | **2 (PowBackward0)\n",
        "           v\n",
        "       temp [4.0, 9.0]\n",
        "           |\n",
        "           | + (AddBackward0)\n",
-        "           |        y [1.0, 4.0]\n",
+        "           |        b [1.0, 4.0]\n",
        "           |        /\n",
        "           v       /\n",
-        "         z [5.0, 13.0]\n",
+        "        z [5.0, 13.0]\n",
        "           |\n",
        "           | sum (SumBackward0)\n",
        "           v\n",
@@ -150,21 +105,32 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 14,
+      "execution_count": null,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
+            "Result z: tensor([ 5., 13.], grad_fn=<AddBackward0>)\n",
+            "z requires_grad: True\n",
            "Loss (sum of z): tensor(18., grad_fn=<SumBackward0>)\n",
-            "Gradient of x (∂loss/∂x): tensor([4., 6.])\n",
-            "Gradient of y (∂loss/∂y): tensor([1., 1.])\n"
+            "Gradient of w (∂loss/∂w): tensor([4., 6.])\n",
+            "Gradient of b (∂loss/∂b): tensor([1., 1.])\n"
          ]
        }
      ],
      "source": [
+        "# Compute z = w^2 + b (element-wise)\n",
+        "z = w**2 + b\n",
+        "print(\"Result z:\", z)\n",
+        "\n",
+        "# z is a tensor with gradients tracked\n",
+        "# Means w, b, z are the components of the computatioal graph\n",
+        "print(\"z requires_grad:\", z.requires_grad)\n",
+        "\n",
        "# Reduce z to a scalar by summing its elements\n",
+        "# both trainable parameters w and b affect the loss\n",
        "loss = z.sum()\n",
        "print(\"Loss (sum of z):\", loss)\n",
        "\n",
@@ -172,8 +138,8 @@
        "loss.backward()\n",
        "\n",
        "# Gradients are stored in x.grad and y.grad\n",
-        "print(\"Gradient of x (∂loss/∂x):\", x.grad)\n",
-        "print(\"Gradient of y (∂loss/∂y):\", y.grad)"
+        "print(\"Gradient of w (∂loss/∂w):\", w.grad)\n",
+        "print(\"Gradient of b (∂loss/∂b):\", b.grad)"
      ]
    },
    {
@@ -182,34 +148,34 @@
      "source": [
        "### Explanation\n",
        "\n",
-        "To reduce the loss, you can modify the four numbers corresponding to the elements of x and y (i.e., x1, x2, y1, y2). Here is why:\n",
+        "To reduce the loss, you can modify the four numbers corresponding to the elements of w and b (i.e., w1, w2, b1, b2). Here is why:\n",
        "\n",
-        "- For `z = x^2 + y`, the loss is `loss = z.sum()`.\n",
-        "- The gradient of `loss` with respect to `x` is `∂loss/∂x = 2x` (derivative of `x^2`).\n",
-        "  - For `x = [2.0, 3.0]`, this gives `2x = [4.0, 6.0]`.\n",
-        "- The gradient of `loss` with respect to `y` is `∂loss/∂y = 1` (derivative of `y`).\n",
-        "  - For `y = [1.0, 4.0]`, this gives `[1.0, 1.0]`.\n",
+        "- For `z = w^2 + b`, the loss is `loss = z.sum()`.\n",
+        "- The gradient of `loss` with respect to `w` is `∂loss/∂w = 2w` (derivative of `w^2`).\n",
+        "  - For `w = [2.0, 3.0]`, this gives `2w = [4.0, 6.0]`.\n",
+        "- The gradient of `loss` with respect to `b` is `∂loss/∂b = 1` (derivative of `b`).\n",
+        "  - For `w = [1.0, 4.0]`, this gives `[1.0, 1.0]`.\n",
        "\n",
-        "In gradient-based optimization (e.g., gradient descent), you update each element of x and y to reduce the loss:\n",
-        "- Update rule: `x[i] = x[i] - learning_rate * x.grad[i]`.\n",
-        "- Update rule: `y[i] = y[i] - learning_rate * y.grad[i]`.\n",
+        "In gradient-based optimization (e.g., gradient descent), you update each element of w and b to reduce the loss:\n",
+        "- Update rule: `w[i] = w[i] - learning_rate * w.grad[i]`.\n",
+        "- Update rule: `b[i] = b[i] - learning_rate * b.grad[i]`.\n",
        "\n",
        "or simply\n",
-        "- `x = x - learning_rate * x.grad`\n",
-        "- `y = y - learning_rate * y.grad`"
+        "- `w = w - learning_rate * w.grad`\n",
+        "- `b = b - learning_rate * b.grad`"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "## Step 5: A Simple Optimization Example (one parameter)\n",
+        "## Step 4: A Simple Optimization Example (one parameter)\n",
        "Autograd is often used to optimize parameters. Let's minimize a simple function `f(w) = w^2` using gradient descent."
      ]
    },
    {
      "cell_type": "code",
-      "execution_count": 15,
+      "execution_count": 40,
      "metadata": {},
      "outputs": [
        {
@@ -268,13 +234,13 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "## Step 6: Plot all weight changes - Gradient Descent\n",
+        "## Step 5: Plot all weight changes - Gradient Descent\n",
        "Autograd is often used to optimize parameters. Let's minimize a simple function `f(w) = w^2` using gradient descent."
      ]
    },
    {
      "cell_type": "code",
-      "execution_count": 16,
+      "execution_count": 41,
      "metadata": {},
      "outputs": [
        {
@@ -356,21 +322,21 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "## Step 7: Disabling Gradient Tracking\n",
+        "## Step 6: Disabling Gradient Tracking\n",
        "- When requires_grad=True, the tensor is part of PyTorch’s computation graph, and converting it to a NumPy array would break the graph.\n",
        "- Sometimes, you need to perform operations without gradient tracking (e.g., during inference). Use `torch.no_grad()` or `.detach()`."
      ]
    },
    {
      "cell_type": "code",
-      "execution_count": 20,
+      "execution_count": 42,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
-            "Result without gradient tracking: tensor([4., 9.])\n",
+            "Result without gradient tracking: tensor([1.7180])\n",
            "Detached tensor requires_grad: False\n",
            "tensor([1., 2.], requires_grad=True)\n",
            "access w with out participating in a computatioinal graph tensor([1., 2.])\n"
@@ -381,15 +347,15 @@
        "\n",
        "# Example with no_grad\n",
        "with torch.no_grad():\n",
-        "    result = x**2  # No computation graph is built\n",
+        "    result = w**2  # No computation graph is built\n",
        "print(\"Result without gradient tracking:\", result)\n",
        "\n",
        "# Detach a tensor\n",
-        "detached_x = x.detach()\n",
+        "detached_x = w.detach()\n",
        "print(\"Detached tensor requires_grad:\", detached_x.requires_grad)\n",
        "\n",
        "# create a new tensor that is a copy of x but detached from the computation graph and independent of gradient tracking\n",
-        "detached_copy_x = x.clone().detach()\n",
+        "detached_copy_x = w.clone().detach()\n",
        "\n",
        "# access to the tensor’s underlying data as another tensor, bypassing the autograd (gradient tracking) system.\n",
        "w = torch.tensor([1.0, 2.0], requires_grad=True)\n",
@@ -401,12 +367,12 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "## Step 8:  Simple Optimization Example (two parameters)"
+        "## Step 7:  Simple Optimization Example (two parameters)"
      ]
    },
    {
      "cell_type": "code",
-      "execution_count": 18,
+      "execution_count": 43,
      "metadata": {},
      "outputs": [
        {
@@ -465,7 +431,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 19,
+      "execution_count": 44,
      "metadata": {},
      "outputs": [
        {
--- a/CYFI445/lectures/03_linear_regressioin_autogradient/compute_autogradient.pptx
+++ b/CYFI445/lectures/03_linear_regressioin_autogradient/compute_autogradient.pptx