add lecture 9

2026-04-10 11:23:42 +00:00 · 2025-05-08 17:41:55 -04:00
parent dcc8004938
commit f5d435e80a
30 changed files with 2551 additions and 94 deletions
--- a/lectures/07_binary_classification_n_to_1/0_ML_workflow_breast_cancer.pptx
+++ b/lectures/07_binary_classification_n_to_1/0_ML_workflow_breast_cancer.pptx
--- a/lectures/07_binary_classification_n_to_1/0_predict_breast_cancer_A_steps.ipynb
+++ b/lectures/07_binary_classification_n_to_1/0_predict_breast_cancer_A_steps.ipynb
@@ -0,0 +1,484 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "31ee256c",
+   "metadata": {},
+   "source": [
+    "## Breast cancer prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "53af081c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from sklearn.datasets import load_breast_cancer\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "536078f0",
+   "metadata": {},
+   "source": [
+    "### Load and preprocess breast cancer dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "06746e3c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"Load and preprocess breast cancer dataset.\"\"\"\n",
+    "# Load dataset\n",
+    "data = load_breast_cancer()\n",
+    "X, y = data.data, data.target"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3477485c",
+   "metadata": {},
+   "source": [
+    "### Understand inputs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "76d4d576",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(569, 30)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "fddcc037",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,\n",
+       "       3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,\n",
+       "       8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,\n",
+       "       3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,\n",
+       "       1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X[0, :]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "070dcd69",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(569,)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "c4632c29",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "np.int64(0)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b74373cb",
+   "metadata": {},
+   "source": [
+    " ### Split dataset into training and testing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "0675a8c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=0.2, random_state=1234\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "bfe70bd9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(455, 30)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "a4df0052",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(114, 30)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_test.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d597a997",
+   "metadata": {},
+   "source": [
+    "### Scale fetures\n",
+    "Scaling features, as done in the code with StandardScaler, transforms the input data so that each feature has a mean of 0 and a standard deviation of 1. This is also known as standardization. The purpose of scaling features in this context is to:\n",
+    "\n",
+    "- Improve Model Convergence: Many machine learning algorithms, including neural networks optimized with gradient-based methods like SGD, converge faster when features are on a similar scale. Unscaled features with different ranges can cause gradients to vary widely, slowing down or destabilizing training.\n",
+    "- Ensure Fair Feature Influence: Features with larger numerical ranges could disproportionately influence the model compared to features with smaller ranges. Standardization ensures all features contribute equally to the model's predictions.\n",
+    "- Enhance Numerical Stability: Large or highly variable feature values can lead to numerical instability in computations, especially in deep learning frameworks like PyTorch. Scaling mitigates this risk."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "3aeb88da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Scale features\n",
+    "scaler = StandardScaler()\n",
+    "X_train = scaler.fit_transform(X_train)\n",
+    "X_test = scaler.transform(X_test)\n",
+    "\n",
+    "# Convert to PyTorch tensors\n",
+    "X_train = torch.from_numpy(X_train.astype(np.float32))\n",
+    "X_test = torch.from_numpy(X_test.astype(np.float32))\n",
+    "y_train = torch.from_numpy(y_train.astype(np.float32)).view(-1, 1)\n",
+    "y_test = torch.from_numpy(y_test.astype(np.float32)).view(-1, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "3b10079f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([455, 30])"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "13f4059c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([-0.3618, -0.2652, -0.3172, -0.4671,  1.8038,  1.1817, -0.5169,  0.1065,\n",
+       "        -0.3901,  1.3914,  0.1437, -0.1208,  0.1601, -0.1326, -0.5863, -0.1248,\n",
+       "        -0.5787,  0.1091, -0.2819, -0.1889, -0.2571, -0.2403, -0.2442, -0.3669,\n",
+       "         0.5449,  0.2481, -0.7109, -0.0797, -0.5280,  0.2506])"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train[0,:]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b0b15d2f",
+   "metadata": {},
+   "source": [
+    "### Binary Classifier model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e1b50a04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class BinaryClassifier(nn.Module):\n",
+    "    \"\"\"Simple neural network for binary classification.\"\"\"\n",
+    "    def __init__(self, input_features):\n",
+    "        super(BinaryClassifier, self).__init__()\n",
+    "        self.linear = nn.Linear(input_features, 1)\n",
+    "    \n",
+    "    def forward(self, x):\n",
+    "        return torch.sigmoid(self.linear(x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "49694959",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([455, 30])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "14873622",
+   "metadata": {},
+   "source": [
+    "### show binary classification model \n",
+    "- the number of input features\n",
+    "- the number of output features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "466f6c41",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BinaryClassifier(\n",
+       "  (linear): Linear(in_features=30, out_features=1, bias=True)\n",
+       ")"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "n_features = X_train.shape[1]\n",
+    "model = BinaryClassifier(n_features)\n",
+    "model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c66978b5",
+   "metadata": {},
+   "source": [
+    "### Train the model with given parameters.\n",
+    "\n",
+    "- forward pass: prediction\n",
+    "- loss: error\n",
+    "- autograd: weight change direction\n",
+    "- stochastic gradient descent (optimizer): update weights\n",
+    "- optimizer.zero_grad()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "1d1d7868",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [10/100], Loss: 0.4627\n",
+      "Epoch [20/100], Loss: 0.4105\n",
+      "Epoch [30/100], Loss: 0.3721\n",
+      "Epoch [40/100], Loss: 0.3424\n",
+      "Epoch [50/100], Loss: 0.3186\n",
+      "Epoch [60/100], Loss: 0.2990\n",
+      "Epoch [70/100], Loss: 0.2825\n",
+      "Epoch [80/100], Loss: 0.2683\n",
+      "Epoch [90/100], Loss: 0.2560\n",
+      "Epoch [100/100], Loss: 0.2452\n"
+     ]
+    }
+   ],
+   "source": [
+    "num_epochs=100\n",
+    "learning_rate=0.01\n",
+    "\n",
+    "\"\"\"Train the model with given parameters.\"\"\"\n",
+    "criterion = nn.BCELoss()\n",
+    "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    # Forward pass\n",
+    "    y_pred = model(X_train)\n",
+    "    loss = criterion(y_pred, y_train)\n",
+    "    \n",
+    "    # Backward pass and optimization\n",
+    "    optimizer.zero_grad()\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "    \n",
+    "    # Log progress\n",
+    "    if (epoch + 1) % 10 == 0:\n",
+    "        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a59248d",
+   "metadata": {},
+   "source": [
+    "### Evaluate model performance on test set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "eeddd812",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Test Accuracy: 0.8947\n"
+     ]
+    }
+   ],
+   "source": [
+    "with torch.no_grad():\n",
+    "    y_pred = model(X_test)\n",
+    "    y_pred_classes = y_pred.round() # Values 𝑥 ≥ 0.5 are rounded to 1, else  0\n",
+    "    accuracy = y_pred_classes.eq(y_test).sum() / float(y_test.shape[0])\n",
+    "    print(f'\\nTest Accuracy: {accuracy:.4f}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1dc4fcd3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/lectures/07_binary_classification_n_to_1/0_predict_breast_cancer_B_modularized.ipynb
+++ b/lectures/07_binary_classification_n_to_1/0_predict_breast_cancer_B_modularized.ipynb
@@ -0,0 +1,146 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "53af081c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training model...\n",
+      "Epoch [10/100], Loss: 0.6247\n",
+      "Epoch [20/100], Loss: 0.4940\n",
+      "Epoch [30/100], Loss: 0.4156\n",
+      "Epoch [40/100], Loss: 0.3641\n",
+      "Epoch [50/100], Loss: 0.3277\n",
+      "Epoch [60/100], Loss: 0.3005\n",
+      "Epoch [70/100], Loss: 0.2794\n",
+      "Epoch [80/100], Loss: 0.2624\n",
+      "Epoch [90/100], Loss: 0.2483\n",
+      "Epoch [100/100], Loss: 0.2364\n",
+      "\n",
+      "Test Accuracy: 0.9211\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from sklearn.datasets import load_breast_cancer\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "def prepare_data():\n",
+    "    \"\"\"Load and preprocess breast cancer dataset.\"\"\"\n",
+    "    # Load dataset\n",
+    "    data = load_breast_cancer()\n",
+    "    X, y = data.data, data.target\n",
+    "    \n",
+    "    # Split dataset\n",
+    "    X_train, X_test, y_train, y_test = train_test_split(\n",
+    "        X, y, test_size=0.2, random_state=1234\n",
+    "    )\n",
+    "    \n",
+    "    # Scale features\n",
+    "    scaler = StandardScaler()\n",
+    "    X_train = scaler.fit_transform(X_train)\n",
+    "    X_test = scaler.transform(X_test)\n",
+    "    \n",
+    "    # Convert to PyTorch tensors\n",
+    "    X_train = torch.from_numpy(X_train.astype(np.float32))\n",
+    "    X_test = torch.from_numpy(X_test.astype(np.float32))\n",
+    "    y_train = torch.from_numpy(y_train.astype(np.float32)).view(-1, 1)\n",
+    "    y_test = torch.from_numpy(y_test.astype(np.float32)).view(-1, 1)\n",
+    "    \n",
+    "    return X_train, X_test, y_train, y_test\n",
+    "\n",
+    "class BinaryClassifier(nn.Module):\n",
+    "    \"\"\"Simple neural network for binary classification.\"\"\"\n",
+    "    def __init__(self, input_features):\n",
+    "        super(BinaryClassifier, self).__init__()\n",
+    "        self.linear = nn.Linear(input_features, 1)\n",
+    "    \n",
+    "    def forward(self, x):\n",
+    "        return torch.sigmoid(self.linear(x))\n",
+    "\n",
+    "def train_model(model, X_train, y_train, num_epochs=100, learning_rate=0.01):\n",
+    "    \"\"\"Train the model with given parameters.\"\"\"\n",
+    "    criterion = nn.BCELoss()\n",
+    "    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n",
+    "    \n",
+    "    for epoch in range(num_epochs):\n",
+    "        # Forward pass\n",
+    "        y_pred = model(X_train)\n",
+    "        loss = criterion(y_pred, y_train)\n",
+    "        \n",
+    "        # Backward pass and optimization\n",
+    "        optimizer.zero_grad()\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        \n",
+    "        # Log progress\n",
+    "        if (epoch + 1) % 10 == 0:\n",
+    "            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')\n",
+    "\n",
+    "def evaluate_model(model, X_test, y_test):\n",
+    "    \"\"\"Evaluate model performance on test set.\"\"\"\n",
+    "    with torch.no_grad():\n",
+    "        y_pred = model(X_test)\n",
+    "        y_pred_classes = y_pred.round()\n",
+    "        accuracy = y_pred_classes.eq(y_test).sum() / float(y_test.shape[0])\n",
+    "        return accuracy.item()\n",
+    "\n",
+    "def main():\n",
+    "    # Prepare data\n",
+    "    X_train, X_test, y_train, y_test = prepare_data()\n",
+    "    \n",
+    "    # Initialize model\n",
+    "    n_features = X_train.shape[1]\n",
+    "    model = BinaryClassifier(n_features)\n",
+    "    \n",
+    "    # Train model\n",
+    "    print(\"Training model...\")\n",
+    "    train_model(model, X_train, y_train)\n",
+    "    \n",
+    "    # Evaluate model\n",
+    "    accuracy = evaluate_model(model, X_test, y_test)\n",
+    "    print(f'\\nTest Accuracy: {accuracy:.4f}')\n",
+    "\n",
+    "main()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "76d4d576",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/lectures/07_binary_classification_n_to_1/1_breast_cancer_F1.ipynb
+++ b/lectures/07_binary_classification_n_to_1/1_breast_cancer_F1.ipynb
--- a/lectures/07_binary_classification_n_to_1/1_breast_cancer_F1.pptx
+++ b/lectures/07_binary_classification_n_to_1/1_breast_cancer_F1.pptx
--- a/lectures/07_binary_classification_n_to_1/2_DataLoader_wine.pptx
+++ b/lectures/07_binary_classification_n_to_1/2_DataLoader_wine.pptx
--- a/lectures/07_binary_classification_n_to_1/2_dataloader_wine.ipynb
+++ b/lectures/07_binary_classification_n_to_1/2_dataloader_wine.ipynb
@@ -0,0 +1,209 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "52950b67",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "First sample - Features: tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,\n",
+      "        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,\n",
+      "        1.0650e+03]), Label: tensor([1.])\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import torchvision\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "import numpy as np\n",
+    "import math\n",
+    "\n",
+    "# Custom Dataset class for Wine dataset\n",
+    "class WineDataset(Dataset):\n",
+    "    def __init__(self, data_path='data/wine.csv'):\n",
+    "        \"\"\"\n",
+    "        Initialize the dataset by loading wine data from a CSV file.\n",
+    "        \n",
+    "        Args:\n",
+    "            data_path (str): Path to the wine CSV file\n",
+    "        \"\"\"\n",
+    "        # Load data from CSV, skipping header row\n",
+    "        xy = np.loadtxt(data_path, delimiter=',', dtype=np.float32, skiprows=1)\n",
+    "        self.n_samples = xy.shape[0]\n",
+    "        \n",
+    "        # Split into features (all columns except first) and labels (first column)\n",
+    "        self.x_data = torch.from_numpy(xy[:, 1:])  # Shape: [n_samples, n_features]\n",
+    "        self.y_data = torch.from_numpy(xy[:, [0]]) # Shape: [n_samples, 1]\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        \"\"\"\n",
+    "        Enable indexing to retrieve a specific sample.\n",
+    "        \n",
+    "        Args:\n",
+    "            index (int): Index of the sample to retrieve\n",
+    "            \n",
+    "        Returns:\n",
+    "            tuple: (features, label) for the specified index\n",
+    "        \"\"\"\n",
+    "        return self.x_data[index], self.y_data[index]\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        \"\"\"\n",
+    "        Return the total number of samples in the dataset.\n",
+    "        \n",
+    "        Returns:\n",
+    "            int: Number of samples\n",
+    "        \"\"\"\n",
+    "        return self.n_samples\n",
+    "\n",
+    "# Create dataset instance\n",
+    "dataset = WineDataset()\n",
+    "\n",
+    "# Access and print first sample\n",
+    "features, labels = dataset[0]\n",
+    "print(f\"First sample - Features: {features}, Label: {labels}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5448f749",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample batch - Features: torch.Size([4, 13]), Labels: torch.Size([4, 1])\n"
+     ]
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "Create a DataLoader for the wine dataset.\n",
+    "\n",
+    "Args:\n",
+    "    dataset (Dataset): The dataset to load\n",
+    "    batch_size (int): Number of samples per batch\n",
+    "    shuffle (bool): Whether to shuffle the data\n",
+    "    num_workers (int): Number of subprocesses for data loading\n",
+    "    \n",
+    "Returns:\n",
+    "    DataLoader: Configured DataLoader instance\n",
+    "\"\"\"\n",
+    "train_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=0)\n",
+    "\n",
+    "# Examine one batch\n",
+    "dataiter = iter(train_loader)\n",
+    "features, labels = next(dataiter)\n",
+    "print(f\"Sample batch - Features: {features.shape}, Labels: {labels.shape}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0e122c46",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total samples: 178, Iterations per epoch: 45\n",
+      "Epoch: 1/2, Step 5/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 1/2, Step 10/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 1/2, Step 15/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 1/2, Step 20/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 1/2, Step 25/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 1/2, Step 30/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 1/2, Step 35/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 1/2, Step 40/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 1/2, Step 45/45 | Inputs torch.Size([2, 13]) | Labels torch.Size([2, 1])\n",
+      "Epoch: 2/2, Step 5/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 2/2, Step 10/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 2/2, Step 15/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 2/2, Step 20/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 2/2, Step 25/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 2/2, Step 30/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 2/2, Step 35/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 2/2, Step 40/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
+      "Epoch: 2/2, Step 45/45 | Inputs torch.Size([2, 13]) | Labels torch.Size([2, 1])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Training loop parameters\n",
+    "num_epochs = 2\n",
+    "total_samples = len(dataset)\n",
+    "n_iterations = math.ceil(total_samples / 4)\n",
+    "print(f\"Total samples: {total_samples}, Iterations per epoch: {n_iterations}\")\n",
+    "\n",
+    "# Dummy training loop\n",
+    "for epoch in range(num_epochs):\n",
+    "    for i, (inputs, labels) in enumerate(train_loader):\n",
+    "        # Training step\n",
+    "        if (i + 1) % 5 == 0:\n",
+    "            print(f'Epoch: {epoch+1}/{num_epochs}, Step {i+1}/{n_iterations} | '\n",
+    "                    f'Inputs {inputs.shape} | Labels {labels.shape}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "37095d28",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MNIST batch - Inputs: torch.Size([3, 1, 28, 28]), Targets: torch.Size([3])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example with MNIST dataset\n",
+    "train_dataset = torchvision.datasets.MNIST(root='./data',\n",
+    "                                            train=True,\n",
+    "                                            transform=torchvision.transforms.ToTensor(),\n",
+    "                                            download=True)\n",
+    "\n",
+    "mnist_loader = DataLoader(dataset=train_dataset,\n",
+    "                            batch_size=3,\n",
+    "                            shuffle=True)\n",
+    "\n",
+    "# Examine MNIST batch\n",
+    "dataiter = iter(mnist_loader)\n",
+    "inputs, targets = next(dataiter)\n",
+    "print(f\"MNIST batch - Inputs: {inputs.shape}, Targets: {targets.shape}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/lectures/07_binary_classification_n_to_1/data/MNIST/raw/t10k-images-idx3-ubyte
+++ b/lectures/07_binary_classification_n_to_1/data/MNIST/raw/t10k-images-idx3-ubyte
--- a/lectures/07_binary_classification_n_to_1/data/MNIST/raw/t10k-images-idx3-ubyte.gz
+++ b/lectures/07_binary_classification_n_to_1/data/MNIST/raw/t10k-images-idx3-ubyte.gz
--- a/lectures/07_binary_classification_n_to_1/data/MNIST/raw/t10k-labels-idx1-ubyte
+++ b/lectures/07_binary_classification_n_to_1/data/MNIST/raw/t10k-labels-idx1-ubyte
--- a/lectures/07_binary_classification_n_to_1/data/MNIST/raw/t10k-labels-idx1-ubyte.gz
+++ b/lectures/07_binary_classification_n_to_1/data/MNIST/raw/t10k-labels-idx1-ubyte.gz
--- a/lectures/07_binary_classification_n_to_1/data/MNIST/raw/train-images-idx3-ubyte
+++ b/lectures/07_binary_classification_n_to_1/data/MNIST/raw/train-images-idx3-ubyte
--- a/lectures/07_binary_classification_n_to_1/data/MNIST/raw/train-images-idx3-ubyte.gz
+++ b/lectures/07_binary_classification_n_to_1/data/MNIST/raw/train-images-idx3-ubyte.gz
--- a/lectures/07_binary_classification_n_to_1/data/MNIST/raw/train-labels-idx1-ubyte
+++ b/lectures/07_binary_classification_n_to_1/data/MNIST/raw/train-labels-idx1-ubyte
--- a/lectures/07_binary_classification_n_to_1/data/MNIST/raw/train-labels-idx1-ubyte.gz
+++ b/lectures/07_binary_classification_n_to_1/data/MNIST/raw/train-labels-idx1-ubyte.gz