add lecture 9

This commit is contained in:
Frank Xu
2025-05-08 17:41:55 -04:00
parent dcc8004938
commit f5d435e80a
30 changed files with 2551 additions and 94 deletions

View File

@@ -13,7 +13,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 2,
"id": "739c5173",
"metadata": {},
"outputs": [
@@ -23,7 +23,7 @@
"'2.6.0+cu126'"
]
},
"execution_count": 48,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 3,
"id": "0e82be1e",
"metadata": {},
"outputs": [
@@ -53,7 +53,7 @@
"tensor(5)"
]
},
"execution_count": 49,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -66,7 +66,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 4,
"id": "7c239759",
"metadata": {},
"outputs": [
@@ -76,7 +76,7 @@
"0"
]
},
"execution_count": 50,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -87,7 +87,7 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 5,
"id": "d176548d",
"metadata": {},
"outputs": [
@@ -97,7 +97,7 @@
"torch.Size([])"
]
},
"execution_count": 51,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -108,7 +108,7 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 6,
"id": "07e03145",
"metadata": {},
"outputs": [
@@ -118,7 +118,7 @@
"5"
]
},
"execution_count": 52,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -129,7 +129,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 7,
"id": "41fcc46e",
"metadata": {},
"outputs": [
@@ -139,7 +139,7 @@
"tensor([1, 2, 3])"
]
},
"execution_count": 53,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -152,7 +152,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 8,
"id": "f9894c37",
"metadata": {},
"outputs": [
@@ -162,7 +162,7 @@
"1"
]
},
"execution_count": 54,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -173,7 +173,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 9,
"id": "7dc166eb",
"metadata": {},
"outputs": [
@@ -183,7 +183,7 @@
"torch.Size([3])"
]
},
"execution_count": 55,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -195,7 +195,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 10,
"id": "2581817b",
"metadata": {},
"outputs": [
@@ -206,7 +206,7 @@
" [ 9, 10]])"
]
},
"execution_count": 56,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -220,7 +220,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 11,
"id": "46961042",
"metadata": {},
"outputs": [
@@ -230,7 +230,7 @@
"2"
]
},
"execution_count": 57,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -241,7 +241,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 12,
"id": "9669fda8",
"metadata": {},
"outputs": [
@@ -251,7 +251,7 @@
"torch.Size([2, 2])"
]
},
"execution_count": 58,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -262,7 +262,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 13,
"id": "15297945",
"metadata": {},
"outputs": [
@@ -274,7 +274,7 @@
" [2, 4, 5]]])"
]
},
"execution_count": 59,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -289,7 +289,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 14,
"id": "5bbed071",
"metadata": {},
"outputs": [
@@ -299,7 +299,7 @@
"3"
]
},
"execution_count": 60,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -310,7 +310,7 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 15,
"id": "483d25c7",
"metadata": {},
"outputs": [
@@ -320,7 +320,7 @@
"torch.Size([1, 3, 3])"
]
},
"execution_count": 61,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -331,7 +331,7 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 16,
"id": "c4e76ef2",
"metadata": {},
"outputs": [
@@ -341,7 +341,7 @@
"torch.Size([1, 3, 3])"
]
},
"execution_count": 62,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -352,7 +352,7 @@
},
{
"cell_type": "code",
"execution_count": 63,
"execution_count": 17,
"id": "b56abf50",
"metadata": {},
"outputs": [
@@ -364,7 +364,7 @@
" [6, 9]])"
]
},
"execution_count": 63,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -376,7 +376,7 @@
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": 18,
"id": "cdd39ae8",
"metadata": {},
"outputs": [
@@ -391,7 +391,7 @@
" [9]])"
]
},
"execution_count": 64,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -403,7 +403,7 @@
},
{
"cell_type": "code",
"execution_count": 65,
"execution_count": 19,
"id": "adf1ab41",
"metadata": {},
"outputs": [
@@ -415,7 +415,7 @@
" [2., 4., 5.]]])"
]
},
"execution_count": 65,
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -430,7 +430,7 @@
},
{
"cell_type": "code",
"execution_count": 66,
"execution_count": 20,
"id": "a368079f",
"metadata": {},
"outputs": [
@@ -440,7 +440,7 @@
"torch.float32"
]
},
"execution_count": 66,
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -451,20 +451,20 @@
},
{
"cell_type": "code",
"execution_count": 67,
"execution_count": 21,
"id": "4d00ea95",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[0.6636, 0.4190],\n",
" [0.4294, 0.9632]]),\n",
" tensor([[0.0473, 0.9045],\n",
" [0.2971, 0.3203]]))"
"(tensor([[0.9019, 0.8531],\n",
" [0.9996, 0.5826]]),\n",
" tensor([[0.0682, 0.6102],\n",
" [0.5610, 0.0305]]))"
]
},
"execution_count": 67,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -485,7 +485,7 @@
},
{
"cell_type": "code",
"execution_count": 68,
"execution_count": 22,
"id": "aeed7a0a",
"metadata": {},
"outputs": [
@@ -495,7 +495,7 @@
"tensor([1, 2])"
]
},
"execution_count": 68,
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -507,7 +507,7 @@
},
{
"cell_type": "code",
"execution_count": 69,
"execution_count": 23,
"id": "721ce7eb",
"metadata": {},
"outputs": [
@@ -517,7 +517,7 @@
"tensor([1, 2])"
]
},
"execution_count": 69,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -529,7 +529,7 @@
},
{
"cell_type": "code",
"execution_count": 70,
"execution_count": 24,
"id": "6423f4d2",
"metadata": {},
"outputs": [
@@ -539,7 +539,7 @@
"tensor(6)"
]
},
"execution_count": 70,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -551,7 +551,7 @@
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 25,
"id": "0125386f",
"metadata": {},
"outputs": [
@@ -561,7 +561,7 @@
"tensor([3, 4, 5, 6])"
]
},
"execution_count": 71,
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -573,7 +573,7 @@
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 26,
"id": "97373387",
"metadata": {},
"outputs": [
@@ -584,7 +584,7 @@
" [4, 5]])"
]
},
"execution_count": 72,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -604,7 +604,7 @@
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 27,
"id": "bba6b1b4",
"metadata": {},
"outputs": [
@@ -614,7 +614,7 @@
"tensor([4, 5, 6])"
]
},
"execution_count": 73,
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -626,7 +626,7 @@
},
{
"cell_type": "code",
"execution_count": 74,
"execution_count": 28,
"id": "12a96c84",
"metadata": {},
"outputs": [
@@ -636,7 +636,7 @@
"tensor([3, 6, 9])"
]
},
"execution_count": 74,
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -648,7 +648,7 @@
},
{
"cell_type": "code",
"execution_count": 75,
"execution_count": 29,
"id": "a0f73c88",
"metadata": {},
"outputs": [
@@ -658,7 +658,7 @@
"tensor([[5, 6]])"
]
},
"execution_count": 75,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -678,12 +678,12 @@
"id": "a3c1d8b5",
"metadata": {},
"source": [
"### sum, mean, "
"### sum, mean, max"
]
},
{
"cell_type": "code",
"execution_count": 76,
"execution_count": 30,
"id": "da5391eb",
"metadata": {},
"outputs": [
@@ -693,7 +693,7 @@
"tensor([False, False, True, True])"
]
},
"execution_count": 76,
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -706,7 +706,7 @@
},
{
"cell_type": "code",
"execution_count": 77,
"execution_count": 31,
"id": "78ed8e4b",
"metadata": {},
"outputs": [
@@ -716,7 +716,7 @@
"tensor(2)"
]
},
"execution_count": 77,
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -727,16 +727,135 @@
]
},
{
"cell_type": "markdown",
"id": "02a00747",
"cell_type": "code",
"execution_count": 32,
"id": "4698dc38",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(6)"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"### Operation"
"torch.tensor([0, 1, 2, 3]).sum()"
]
},
{
"cell_type": "code",
"execution_count": 78,
"execution_count": 54,
"id": "cfa1dcae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(6)"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y=torch.tensor([0, 1, 2, 3])\n",
"torch.sum(y)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "f27ae72f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(3)"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y=torch.tensor([0, 1, 2, 3])\n",
"torch.max(y)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "e10312d5",
"metadata": {},
"outputs": [],
"source": [
"test_outputs = torch.tensor([[2.5, 0.8, 1.3], # Sample 1\n",
" [0.4, 3.2, 1.9]]) # Sample 2\n",
"max_values, max_indices = torch.max(test_outputs,1)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "7f887d49",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([2.5000, 3.2000])"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"max_values"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "600af54b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([0, 1])"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"max_indices"
]
},
{
"cell_type": "markdown",
"id": "02a00747",
"metadata": {},
"source": [
"### Operations"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "45267f2f",
"metadata": {},
"outputs": [
@@ -749,7 +868,7 @@
" [7, 8]]))"
]
},
"execution_count": 78,
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@@ -764,7 +883,7 @@
},
{
"cell_type": "code",
"execution_count": 79,
"execution_count": 34,
"id": "193a7828",
"metadata": {},
"outputs": [
@@ -775,7 +894,7 @@
" [10, 12]])"
]
},
"execution_count": 79,
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
@@ -786,7 +905,7 @@
},
{
"cell_type": "code",
"execution_count": 80,
"execution_count": 35,
"id": "1ce81689",
"metadata": {},
"outputs": [
@@ -797,7 +916,7 @@
" [21, 32]])"
]
},
"execution_count": 80,
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
@@ -809,7 +928,7 @@
},
{
"cell_type": "code",
"execution_count": 81,
"execution_count": 36,
"id": "62f8cde3",
"metadata": {},
"outputs": [
@@ -819,7 +938,7 @@
"tensor([11, 12, 13])"
]
},
"execution_count": 81,
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
@@ -834,7 +953,7 @@
},
{
"cell_type": "code",
"execution_count": 82,
"execution_count": 37,
"id": "2098ad78",
"metadata": {},
"outputs": [
@@ -845,7 +964,7 @@
" [4, 5, 6]])"
]
},
"execution_count": 82,
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
@@ -858,7 +977,7 @@
},
{
"cell_type": "code",
"execution_count": 83,
"execution_count": 38,
"id": "883321f8",
"metadata": {},
"outputs": [
@@ -870,7 +989,7 @@
" [5, 6]])"
]
},
"execution_count": 83,
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
@@ -879,6 +998,113 @@
"tensor.view(3, 2) # Reshape to 3x2"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "9ceace9b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([False, True, True, False, True, False])"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.tensor([0, 1, 1, 0, 1, 0])\n",
"x == 1"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "96ea0d2f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ True, False, True, True, False, False])"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = torch.tensor([0, 1, 0, 0, 1, 1])\n",
"y == 0"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "c1d9f060",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([False, False, True, False, False, False])"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(x == 1) & (y==0)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "796d977f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(1)"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"((x == 1) & (y==0)).sum()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "60402427",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(1)"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.sum(((x == 1) & (y==0)))"
]
},
{
"cell_type": "markdown",
"id": "9d716eb9",
@@ -893,7 +1119,7 @@
},
{
"cell_type": "code",
"execution_count": 84,
"execution_count": 44,
"id": "2a3fd4ae",
"metadata": {},
"outputs": [
@@ -903,7 +1129,7 @@
"tensor([1, 2, 3])"
]
},
"execution_count": 84,
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
@@ -916,7 +1142,7 @@
},
{
"cell_type": "code",
"execution_count": 85,
"execution_count": 45,
"id": "df247bd3",
"metadata": {},
"outputs": [
@@ -926,7 +1152,7 @@
"array([1, 2, 3])"
]
},
"execution_count": 85,
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
@@ -938,7 +1164,7 @@
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": 46,
"id": "9ada07ab",
"metadata": {},
"outputs": [
@@ -948,7 +1174,7 @@
"tensor([1, 2, 3])"
]
},
"execution_count": 86,
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
@@ -968,7 +1194,7 @@
},
{
"cell_type": "code",
"execution_count": 87,
"execution_count": 47,
"id": "30c9ea9f",
"metadata": {},
"outputs": [
@@ -978,7 +1204,7 @@
"True"
]
},
"execution_count": 87,
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
@@ -991,7 +1217,7 @@
},
{
"cell_type": "code",
"execution_count": 88,
"execution_count": 48,
"id": "dd523b3e",
"metadata": {},
"outputs": [
@@ -1001,7 +1227,7 @@
"'cuda'"
]
},
"execution_count": 88,
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
@@ -1014,7 +1240,7 @@
},
{
"cell_type": "code",
"execution_count": 89,
"execution_count": 49,
"id": "11d1a029",
"metadata": {},
"outputs": [
@@ -1031,7 +1257,7 @@
"tensor([1, 2, 3], device='cuda:0')"
]
},
"execution_count": 89,
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
@@ -1050,7 +1276,7 @@
},
{
"cell_type": "code",
"execution_count": 90,
"execution_count": 50,
"id": "db5249d0",
"metadata": {},
"outputs": [
@@ -1058,7 +1284,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Weife\\AppData\\Local\\Temp\\ipykernel_111340\\3540074575.py:6: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"C:\\Users\\Weife\\AppData\\Local\\Temp\\ipykernel_154616\\3540074575.py:6: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" y = torch.tensor(x, device=device) # directly create a tensor on GPU\n"
]
}

View File

@@ -0,0 +1,484 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "31ee256c",
"metadata": {},
"source": [
"## Breast cancer prediction"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "53af081c",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import numpy as np\n",
"from sklearn.datasets import load_breast_cancer\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "markdown",
"id": "536078f0",
"metadata": {},
"source": [
"### Load and preprocess breast cancer dataset"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "06746e3c",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"Load and preprocess breast cancer dataset.\"\"\"\n",
"# Load dataset\n",
"data = load_breast_cancer()\n",
"X, y = data.data, data.target"
]
},
{
"cell_type": "markdown",
"id": "3477485c",
"metadata": {},
"source": [
"### Understand inputs"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "76d4d576",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(569, 30)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "fddcc037",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,\n",
" 3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,\n",
" 8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,\n",
" 3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,\n",
" 1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X[0, :]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "070dcd69",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(569,)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c4632c29",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.int64(0)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y[0]"
]
},
{
"cell_type": "markdown",
"id": "b74373cb",
"metadata": {},
"source": [
" ### Split dataset into training and testing"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0675a8c7",
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.2, random_state=1234\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "bfe70bd9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(455, 30)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "a4df0052",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(114, 30)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test.shape"
]
},
{
"cell_type": "markdown",
"id": "d597a997",
"metadata": {},
"source": [
"### Scale fetures\n",
"Scaling features, as done in the code with StandardScaler, transforms the input data so that each feature has a mean of 0 and a standard deviation of 1. This is also known as standardization. The purpose of scaling features in this context is to:\n",
"\n",
"- Improve Model Convergence: Many machine learning algorithms, including neural networks optimized with gradient-based methods like SGD, converge faster when features are on a similar scale. Unscaled features with different ranges can cause gradients to vary widely, slowing down or destabilizing training.\n",
"- Ensure Fair Feature Influence: Features with larger numerical ranges could disproportionately influence the model compared to features with smaller ranges. Standardization ensures all features contribute equally to the model's predictions.\n",
"- Enhance Numerical Stability: Large or highly variable feature values can lead to numerical instability in computations, especially in deep learning frameworks like PyTorch. Scaling mitigates this risk."
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "3aeb88da",
"metadata": {},
"outputs": [],
"source": [
"# Scale features\n",
"scaler = StandardScaler()\n",
"X_train = scaler.fit_transform(X_train)\n",
"X_test = scaler.transform(X_test)\n",
"\n",
"# Convert to PyTorch tensors\n",
"X_train = torch.from_numpy(X_train.astype(np.float32))\n",
"X_test = torch.from_numpy(X_test.astype(np.float32))\n",
"y_train = torch.from_numpy(y_train.astype(np.float32)).view(-1, 1)\n",
"y_test = torch.from_numpy(y_test.astype(np.float32)).view(-1, 1)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "3b10079f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([455, 30])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "13f4059c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([-0.3618, -0.2652, -0.3172, -0.4671, 1.8038, 1.1817, -0.5169, 0.1065,\n",
" -0.3901, 1.3914, 0.1437, -0.1208, 0.1601, -0.1326, -0.5863, -0.1248,\n",
" -0.5787, 0.1091, -0.2819, -0.1889, -0.2571, -0.2403, -0.2442, -0.3669,\n",
" 0.5449, 0.2481, -0.7109, -0.0797, -0.5280, 0.2506])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train[0,:]"
]
},
{
"cell_type": "markdown",
"id": "b0b15d2f",
"metadata": {},
"source": [
"### Binary Classifier model"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "e1b50a04",
"metadata": {},
"outputs": [],
"source": [
"class BinaryClassifier(nn.Module):\n",
" \"\"\"Simple neural network for binary classification.\"\"\"\n",
" def __init__(self, input_features):\n",
" super(BinaryClassifier, self).__init__()\n",
" self.linear = nn.Linear(input_features, 1)\n",
" \n",
" def forward(self, x):\n",
" return torch.sigmoid(self.linear(x))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "49694959",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([455, 30])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.shape"
]
},
{
"cell_type": "markdown",
"id": "14873622",
"metadata": {},
"source": [
"### show binary classification model \n",
"- the number of input features\n",
"- the number of output features"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "466f6c41",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BinaryClassifier(\n",
" (linear): Linear(in_features=30, out_features=1, bias=True)\n",
")"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n_features = X_train.shape[1]\n",
"model = BinaryClassifier(n_features)\n",
"model"
]
},
{
"cell_type": "markdown",
"id": "c66978b5",
"metadata": {},
"source": [
"### Train the model with given parameters.\n",
"\n",
"- forward pass: prediction\n",
"- loss: error\n",
"- autograd: weight change direction\n",
"- stochastic gradient descent (optimizer): update weights\n",
"- optimizer.zero_grad()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "1d1d7868",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch [10/100], Loss: 0.4627\n",
"Epoch [20/100], Loss: 0.4105\n",
"Epoch [30/100], Loss: 0.3721\n",
"Epoch [40/100], Loss: 0.3424\n",
"Epoch [50/100], Loss: 0.3186\n",
"Epoch [60/100], Loss: 0.2990\n",
"Epoch [70/100], Loss: 0.2825\n",
"Epoch [80/100], Loss: 0.2683\n",
"Epoch [90/100], Loss: 0.2560\n",
"Epoch [100/100], Loss: 0.2452\n"
]
}
],
"source": [
"num_epochs=100\n",
"learning_rate=0.01\n",
"\n",
"\"\"\"Train the model with given parameters.\"\"\"\n",
"criterion = nn.BCELoss()\n",
"optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n",
"\n",
"for epoch in range(num_epochs):\n",
" # Forward pass\n",
" y_pred = model(X_train)\n",
" loss = criterion(y_pred, y_train)\n",
" \n",
" # Backward pass and optimization\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" # Log progress\n",
" if (epoch + 1) % 10 == 0:\n",
" print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')\n"
]
},
{
"cell_type": "markdown",
"id": "1a59248d",
"metadata": {},
"source": [
"### Evaluate model performance on test set"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "eeddd812",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Test Accuracy: 0.8947\n"
]
}
],
"source": [
"with torch.no_grad():\n",
" y_pred = model(X_test)\n",
" y_pred_classes = y_pred.round() # Values 𝑥 ≥ 0.5 are rounded to 1, else 0\n",
" accuracy = y_pred_classes.eq(y_test).sum() / float(y_test.shape[0])\n",
" print(f'\\nTest Accuracy: {accuracy:.4f}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1dc4fcd3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,146 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "53af081c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training model...\n",
"Epoch [10/100], Loss: 0.6247\n",
"Epoch [20/100], Loss: 0.4940\n",
"Epoch [30/100], Loss: 0.4156\n",
"Epoch [40/100], Loss: 0.3641\n",
"Epoch [50/100], Loss: 0.3277\n",
"Epoch [60/100], Loss: 0.3005\n",
"Epoch [70/100], Loss: 0.2794\n",
"Epoch [80/100], Loss: 0.2624\n",
"Epoch [90/100], Loss: 0.2483\n",
"Epoch [100/100], Loss: 0.2364\n",
"\n",
"Test Accuracy: 0.9211\n"
]
}
],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import numpy as np\n",
"from sklearn.datasets import load_breast_cancer\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def prepare_data():\n",
" \"\"\"Load and preprocess breast cancer dataset.\"\"\"\n",
" # Load dataset\n",
" data = load_breast_cancer()\n",
" X, y = data.data, data.target\n",
" \n",
" # Split dataset\n",
" X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.2, random_state=1234\n",
" )\n",
" \n",
" # Scale features\n",
" scaler = StandardScaler()\n",
" X_train = scaler.fit_transform(X_train)\n",
" X_test = scaler.transform(X_test)\n",
" \n",
" # Convert to PyTorch tensors\n",
" X_train = torch.from_numpy(X_train.astype(np.float32))\n",
" X_test = torch.from_numpy(X_test.astype(np.float32))\n",
" y_train = torch.from_numpy(y_train.astype(np.float32)).view(-1, 1)\n",
" y_test = torch.from_numpy(y_test.astype(np.float32)).view(-1, 1)\n",
" \n",
" return X_train, X_test, y_train, y_test\n",
"\n",
"class BinaryClassifier(nn.Module):\n",
" \"\"\"Simple neural network for binary classification.\"\"\"\n",
" def __init__(self, input_features):\n",
" super(BinaryClassifier, self).__init__()\n",
" self.linear = nn.Linear(input_features, 1)\n",
" \n",
" def forward(self, x):\n",
" return torch.sigmoid(self.linear(x))\n",
"\n",
"def train_model(model, X_train, y_train, num_epochs=100, learning_rate=0.01):\n",
" \"\"\"Train the model with given parameters.\"\"\"\n",
" criterion = nn.BCELoss()\n",
" optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n",
" \n",
" for epoch in range(num_epochs):\n",
" # Forward pass\n",
" y_pred = model(X_train)\n",
" loss = criterion(y_pred, y_train)\n",
" \n",
" # Backward pass and optimization\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" # Log progress\n",
" if (epoch + 1) % 10 == 0:\n",
" print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')\n",
"\n",
"def evaluate_model(model, X_test, y_test):\n",
" \"\"\"Evaluate model performance on test set.\"\"\"\n",
" with torch.no_grad():\n",
" y_pred = model(X_test)\n",
" y_pred_classes = y_pred.round()\n",
" accuracy = y_pred_classes.eq(y_test).sum() / float(y_test.shape[0])\n",
" return accuracy.item()\n",
"\n",
"def main():\n",
" # Prepare data\n",
" X_train, X_test, y_train, y_test = prepare_data()\n",
" \n",
" # Initialize model\n",
" n_features = X_train.shape[1]\n",
" model = BinaryClassifier(n_features)\n",
" \n",
" # Train model\n",
" print(\"Training model...\")\n",
" train_model(model, X_train, y_train)\n",
" \n",
" # Evaluate model\n",
" accuracy = evaluate_model(model, X_test, y_test)\n",
" print(f'\\nTest Accuracy: {accuracy:.4f}')\n",
"\n",
"main()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76d4d576",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,209 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "52950b67",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"First sample - Features: tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,\n",
" 3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,\n",
" 1.0650e+03]), Label: tensor([1.])\n"
]
}
],
"source": [
"import torch\n",
"import torchvision\n",
"from torch.utils.data import Dataset, DataLoader\n",
"import numpy as np\n",
"import math\n",
"\n",
"# Custom Dataset class for Wine dataset\n",
"class WineDataset(Dataset):\n",
" def __init__(self, data_path='data/wine.csv'):\n",
" \"\"\"\n",
" Initialize the dataset by loading wine data from a CSV file.\n",
" \n",
" Args:\n",
" data_path (str): Path to the wine CSV file\n",
" \"\"\"\n",
" # Load data from CSV, skipping header row\n",
" xy = np.loadtxt(data_path, delimiter=',', dtype=np.float32, skiprows=1)\n",
" self.n_samples = xy.shape[0]\n",
" \n",
" # Split into features (all columns except first) and labels (first column)\n",
" self.x_data = torch.from_numpy(xy[:, 1:]) # Shape: [n_samples, n_features]\n",
" self.y_data = torch.from_numpy(xy[:, [0]]) # Shape: [n_samples, 1]\n",
"\n",
" def __getitem__(self, index):\n",
" \"\"\"\n",
" Enable indexing to retrieve a specific sample.\n",
" \n",
" Args:\n",
" index (int): Index of the sample to retrieve\n",
" \n",
" Returns:\n",
" tuple: (features, label) for the specified index\n",
" \"\"\"\n",
" return self.x_data[index], self.y_data[index]\n",
"\n",
" def __len__(self):\n",
" \"\"\"\n",
" Return the total number of samples in the dataset.\n",
" \n",
" Returns:\n",
" int: Number of samples\n",
" \"\"\"\n",
" return self.n_samples\n",
"\n",
"# Create dataset instance\n",
"dataset = WineDataset()\n",
"\n",
"# Access and print first sample\n",
"features, labels = dataset[0]\n",
"print(f\"First sample - Features: {features}, Label: {labels}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "5448f749",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sample batch - Features: torch.Size([4, 13]), Labels: torch.Size([4, 1])\n"
]
}
],
"source": [
"\"\"\"\n",
"Create a DataLoader for the wine dataset.\n",
"\n",
"Args:\n",
" dataset (Dataset): The dataset to load\n",
" batch_size (int): Number of samples per batch\n",
" shuffle (bool): Whether to shuffle the data\n",
" num_workers (int): Number of subprocesses for data loading\n",
" \n",
"Returns:\n",
" DataLoader: Configured DataLoader instance\n",
"\"\"\"\n",
"train_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=0)\n",
"\n",
"# Examine one batch\n",
"dataiter = iter(train_loader)\n",
"features, labels = next(dataiter)\n",
"print(f\"Sample batch - Features: {features.shape}, Labels: {labels.shape}\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "0e122c46",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total samples: 178, Iterations per epoch: 45\n",
"Epoch: 1/2, Step 5/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 1/2, Step 10/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 1/2, Step 15/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 1/2, Step 20/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 1/2, Step 25/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 1/2, Step 30/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 1/2, Step 35/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 1/2, Step 40/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 1/2, Step 45/45 | Inputs torch.Size([2, 13]) | Labels torch.Size([2, 1])\n",
"Epoch: 2/2, Step 5/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 2/2, Step 10/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 2/2, Step 15/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 2/2, Step 20/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 2/2, Step 25/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 2/2, Step 30/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 2/2, Step 35/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 2/2, Step 40/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])\n",
"Epoch: 2/2, Step 45/45 | Inputs torch.Size([2, 13]) | Labels torch.Size([2, 1])\n"
]
}
],
"source": [
"# Training loop parameters\n",
"num_epochs = 2\n",
"total_samples = len(dataset)\n",
"n_iterations = math.ceil(total_samples / 4)\n",
"print(f\"Total samples: {total_samples}, Iterations per epoch: {n_iterations}\")\n",
"\n",
"# Dummy training loop\n",
"for epoch in range(num_epochs):\n",
" for i, (inputs, labels) in enumerate(train_loader):\n",
" # Training step\n",
" if (i + 1) % 5 == 0:\n",
" print(f'Epoch: {epoch+1}/{num_epochs}, Step {i+1}/{n_iterations} | '\n",
" f'Inputs {inputs.shape} | Labels {labels.shape}')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "37095d28",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MNIST batch - Inputs: torch.Size([3, 1, 28, 28]), Targets: torch.Size([3])\n"
]
}
],
"source": [
"# Example with MNIST dataset\n",
"train_dataset = torchvision.datasets.MNIST(root='./data',\n",
" train=True,\n",
" transform=torchvision.transforms.ToTensor(),\n",
" download=True)\n",
"\n",
"mnist_loader = DataLoader(dataset=train_dataset,\n",
" batch_size=3,\n",
" shuffle=True)\n",
"\n",
"# Examine MNIST batch\n",
"dataiter = iter(mnist_loader)\n",
"inputs, targets = next(dataiter)\n",
"print(f\"MNIST batch - Inputs: {inputs.shape}, Targets: {targets.shape}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,207 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "c694345f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch [20/100], Loss: 0.7266\n",
"Epoch [40/100], Loss: 0.6070\n",
"Epoch [60/100], Loss: 0.5695\n",
"Epoch [80/100], Loss: 0.5610\n",
"Epoch [100/100], Loss: 0.5574\n",
"\n",
"Model Architecture:\n",
"MultiClassModel(\n",
" (layer1): Linear(in_features=4, out_features=64, bias=True)\n",
" (relu1): ReLU()\n",
" (layer2): Linear(in_features=64, out_features=32, bias=True)\n",
" (relu2): ReLU()\n",
" (output): Linear(in_features=32, out_features=3, bias=True)\n",
" (softmax): Softmax(dim=1)\n",
")\n"
]
}
],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"import numpy as np\n",
"\n",
"# Define the neural network model\n",
"class MultiClassModel(nn.Module):\n",
" def __init__(self, input_dim, num_classes):\n",
" super(MultiClassModel, self).__init__()\n",
" self.layer1 = nn.Linear(input_dim, 64)\n",
" self.relu1 = nn.ReLU()\n",
" self.layer2 = nn.Linear(64, 32)\n",
" self.relu2 = nn.ReLU()\n",
" self.output = nn.Linear(32, num_classes)\n",
" self.softmax = nn.Softmax(dim=1)\n",
" \n",
" def forward(self, x):\n",
" x = self.relu1(self.layer1(x))\n",
" x = self.relu2(self.layer2(x))\n",
" x = self.softmax(self.output(x))\n",
" return x\n",
"\n",
"# Load and preprocess Iris dataset\n",
"iris = load_iris()\n",
"X = iris.data\n",
"y = iris.target\n",
"\n",
"# Standardize features\n",
"scaler = StandardScaler()\n",
"X = scaler.fit_transform(X)\n",
"\n",
"# Convert to PyTorch tensors\n",
"X = torch.FloatTensor(X)\n",
"# nn.CrossEntropyLoss expects the target tensor to have the torch.int64 (long) data type\n",
"y = torch.tensor(y, dtype=torch.int64)\n",
"\n",
"# Split dataset\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"# Model parameters\n",
"input_dim = X.shape[1]\n",
"num_classes = len(np.unique(y))\n",
"\n",
"# Initialize model, loss, and optimizer\n",
"model = MultiClassModel(input_dim, num_classes)\n",
"criterion = nn.CrossEntropyLoss()\n",
"optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
"# Training loop\n",
"num_epochs = 100\n",
"batch_size = 32\n",
"n_batches = len(X_train) // batch_size\n",
"\n",
"for epoch in range(num_epochs):\n",
" model.train()\n",
" for i in range(0, len(X_train), batch_size):\n",
" batch_X = X_train[i:i+batch_size]\n",
" batch_y = y_train[i:i+batch_size]\n",
" \n",
" # Forward pass\n",
" outputs = model(batch_X)\n",
" loss = criterion(outputs, batch_y)\n",
" \n",
" # Backward pass and optimization\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" # Print progress every 20 epochs\n",
" if (epoch + 1) % 20 == 0:\n",
" print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')\n",
"\n",
"# Print model architecture\n",
"print('\\nModel Architecture:')\n",
"print(model)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3d325c03",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Test Accuracy: 1.0000\n"
]
}
],
"source": [
"# Evaluate model\n",
"model.eval()\n",
"with torch.no_grad():\n",
" test_outputs = model(X_test)\n",
" _, predicted = torch.max(test_outputs, 1)\n",
" accuracy = (predicted == y_test).float().mean()\n",
" print(f'\\nTest Accuracy: {accuracy:.4f}')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c15bb757",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The first prediction [0.000702839985024184, 0.9973917007446289, 0.001905460492707789]\n"
]
}
],
"source": [
"print(\"The first prediction\", test_outputs[0].tolist())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "91588c01",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2,\n",
" 2, 2, 2, 2, 0, 0])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predicted"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3811df4a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,280 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "8ce2d850",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1, Loss: 0.1675, Accuracy: 95.05%\n",
"Epoch 2, Loss: 0.0710, Accuracy: 97.94%\n",
"Epoch 3, Loss: 0.0518, Accuracy: 98.42%\n",
"Epoch 4, Loss: 0.0462, Accuracy: 98.64%\n",
"Epoch 5, Loss: 0.0384, Accuracy: 98.87%\n",
"Epoch 6, Loss: 0.0336, Accuracy: 99.00%\n",
"Epoch 7, Loss: 0.0323, Accuracy: 99.06%\n",
"Epoch 8, Loss: 0.0260, Accuracy: 99.22%\n",
"Epoch 9, Loss: 0.0254, Accuracy: 99.19%\n",
"Epoch 10, Loss: 0.0226, Accuracy: 99.27%\n",
"\n",
"Test Accuracy: 99.34%\n",
"Training complete. Plots saved as 'mnist_training_metrics.png', 'mnist_confusion_matrix.png', and 'mnist_sample_predictions.png'\n"
]
}
],
"source": [
"import torch # PyTorch library for tensor operations and deep learning\n",
"import torch.nn as nn # Neural network modules and layers\n",
"import torch.nn.functional as F # Functional operations like activations\n",
"import torch.optim as optim # Optimization algorithms (e.g., Adam)\n",
"import torchvision # Datasets, models, and transforms for computer vision\n",
"import torchvision.transforms as transforms # Image preprocessing transformations\n",
"import matplotlib.pyplot as plt # Plotting library for visualizations\n",
"import numpy as np # Numerical computations for array operations\n",
"from sklearn.metrics import confusion_matrix # For computing confusion matrix\n",
"import seaborn as sns # Visualization library for heatmap plots\n",
"\n",
"# Set random seed for reproducibility across runs\n",
"torch.manual_seed(42)\n",
"\n",
"# Define data transformations for preprocessing MNIST dataset\n",
"# - ToTensor: Converts PIL images to PyTorch tensors (HWC to CHW format)\n",
"# - Normalize: Normalizes pixel values using MNIST dataset mean (0.1307) and std (0.3081)\n",
"transform = transforms.Compose([\n",
" transforms.ToTensor(),\n",
" transforms.Normalize((0.1307,), (0.3081,))\n",
"])\n",
"\n",
"# Load MNIST training dataset\n",
"# - root: Directory to store the dataset\n",
"# - train: True for training set\n",
"# - download: Download dataset if not already present\n",
"# - transform: Apply the defined transformations\n",
"trainset = torchvision.datasets.MNIST(root='./data', train=True,\n",
" download=True, transform=transform)\n",
"# Create DataLoader for training set\n",
"# - batch_size: 64 images per batch\n",
"# - shuffle: Randomly shuffle data for better training\n",
"# - num_workers: 2 subprocesses for faster data loading\n",
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,\n",
" shuffle=True, num_workers=2)\n",
"\n",
"# Load MNIST test dataset\n",
"# - train: False for test set\n",
"# - Same transformations as training set for consistency\n",
"testset = torchvision.datasets.MNIST(root='./data', train=False,\n",
" download=True, transform=transform)\n",
"# Create DataLoader for test set\n",
"# - shuffle: False to maintain order during evaluation\n",
"testloader = torch.utils.data.DataLoader(testset, batch_size=64,\n",
" shuffle=False, num_workers=2)\n",
"\n",
"# Define class labels for MNIST (digits 0-9)\n",
"classes = tuple(str(i) for i in range(10))\n",
"\n",
"# Define CNN architecture\n",
"class Net(nn.Module):\n",
" def __init__(self):\n",
" super(Net, self).__init__()\n",
" # First convolutional layer\n",
" # - Input: 1 channel (grayscale)\n",
" # - Output: 32 channels\n",
" # - Kernel: 3x3, padding=1 to maintain spatial dimensions\n",
" self.conv1 = nn.Conv2d(1, 32, 5, padding=0)\n",
" # Second convolutional layer\n",
" # - Input: 32 channels\n",
" # - Output: 64 channels\n",
" # - Kernel: 3x3, padding=1\n",
" self.conv2 = nn.Conv2d(32, 64, 5, padding=0)\n",
" # Max pooling layer\n",
" # - Kernel: 2x2, stride=2 (reduces spatial dimensions by half)\n",
" self.pool = nn.MaxPool2d(2, 2)\n",
" # Batch normalization for first conv layer (32 channels)\n",
" self.bn1 = nn.BatchNorm2d(32)\n",
" # Batch normalization for second conv layer (64 channels)\n",
" self.bn2 = nn.BatchNorm2d(64)\n",
" # First fully connected layer\n",
" # - Input: 64*4*4 (after two pooling layers on 28x28 input)\n",
" # - Output: 128 units\n",
" self.fc1 = nn.Linear(64 * 4 * 4, 128)\n",
" # Second fully connected layer\n",
" # - Input: 128 units\n",
" # - Output: 10 units (one per class)\n",
" self.fc2 = nn.Linear(128, 10)\n",
" # Dropout layer with 50% probability to prevent overfitting\n",
" self.dropout = nn.Dropout(0.5)\n",
"\n",
" def forward(self, x):\n",
" # Forward pass through the network\n",
" # Conv1 -> BatchNorm -> ReLU -> MaxPool\n",
" x = self.pool(F.relu(self.bn1(self.conv1(x))))\n",
" # Conv2 -> BatchNorm -> ReLU -> MaxPool\n",
" x = self.pool(F.relu(self.bn2(self.conv2(x))))\n",
" # Flatten output for fully connected layers\n",
" # - Input size: 64 channels, 4x4 spatial dimensions\n",
" x = x.view(-1, 64 * 4 * 4)\n",
" # Fully connected layer 1 -> ReLU\n",
" x = F.relu(self.fc1(x))\n",
" # Apply dropout during training\n",
" x = self.dropout(x)\n",
" # Fully connected layer 2 for classification\n",
" x = self.fc2(x)\n",
" return x\n",
"\n",
"# Initialize model and move to appropriate device (GPU if available, else CPU)\n",
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
"model = Net().to(device)\n",
"\n",
"# Define loss function\n",
"# - CrossEntropyLoss: Combines log softmax and negative log likelihood loss\n",
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"# Define optimizer\n",
"# - Adam optimizer with learning rate 0.001\n",
"optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
"# Training loop\n",
"num_epochs = 10 # Number of epochs to train\n",
"train_losses = [] # Store loss for each epoch\n",
"train_accuracies = [] # Store accuracy for each epoch\n",
"\n",
"for epoch in range(num_epochs):\n",
" model.train() # Set model to training mode\n",
" running_loss = 0.0 # Accumulate loss for the epoch\n",
" correct = 0 # Track correct predictions\n",
" total = 0 # Track total samples\n",
" for i, data in enumerate(trainloader, 0):\n",
" # Get inputs and labels, move to device\n",
" inputs, labels = data[0].to(device), data[1].to(device)\n",
" # Zero out gradients to prevent accumulation\n",
" optimizer.zero_grad()\n",
" # Forward pass through the model\n",
" outputs = model(inputs)\n",
" # Compute loss\n",
" loss = criterion(outputs, labels)\n",
" # Backward pass to compute gradients\n",
" loss.backward()\n",
" # Update model parameters\n",
" optimizer.step()\n",
" \n",
" # Update running loss\n",
" running_loss += loss.item()\n",
" # Calculate accuracy\n",
" _, predicted = torch.max(outputs.data, 1)\n",
" total += labels.size(0)\n",
" correct += (predicted == labels).sum().item()\n",
" \n",
" # Calculate and store epoch metrics\n",
" epoch_loss = running_loss / len(trainloader)\n",
" epoch_acc = 100 * correct / total\n",
" train_losses.append(epoch_loss)\n",
" train_accuracies.append(epoch_acc)\n",
" print(f\"Epoch {epoch + 1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%\")\n",
"\n",
"# Evaluate model on test set\n",
"model.eval() # Set model to evaluation mode\n",
"correct = 0\n",
"total = 0\n",
"all_preds = [] # Store predictions for confusion matrix\n",
"all_labels = [] # Store true labels\n",
"with torch.no_grad(): # Disable gradient computation for efficiency\n",
" for data in testloader:\n",
" images, labels = data[0].to(device), data[1].to(device)\n",
" outputs = model(images)\n",
" _, predicted = torch.max(outputs.data, 1)\n",
" total += labels.size(0)\n",
" correct += (predicted == labels).sum().item()\n",
" all_preds.extend(predicted.cpu().numpy())\n",
" all_labels.extend(labels.cpu().numpy())\n",
"\n",
"# Calculate and print test accuracy\n",
"test_accuracy = 100 * correct / total\n",
"print(f\"\\nTest Accuracy: {test_accuracy:.2f}%\")\n",
"\n",
"# Plot training metrics (loss and accuracy)\n",
"plt.figure(figsize=(12, 4))\n",
"\n",
"# Plot training loss\n",
"plt.subplot(1, 2, 1)\n",
"plt.plot(train_losses, label='Training Loss')\n",
"plt.title('Training Loss')\n",
"plt.xlabel('Epoch')\n",
"plt.ylabel('Loss')\n",
"plt.legend()\n",
"\n",
"# Plot training accuracy\n",
"plt.subplot(1, 2, 2)\n",
"plt.plot(train_accuracies, label='Training Accuracy')\n",
"plt.title('Training Accuracy')\n",
"plt.xlabel('Epoch')\n",
"plt.ylabel('Accuracy (%)')\n",
"plt.legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.savefig('mnist_training_metrics.png') # Save the plot\n",
"plt.close()\n",
"\n",
"# Plot confusion matrix to visualize class-wise performance\n",
"cm = confusion_matrix(all_labels, all_preds)\n",
"plt.figure(figsize=(10, 8))\n",
"sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',\n",
" xticklabels=classes, yticklabels=classes)\n",
"plt.title('Confusion Matrix')\n",
"plt.xlabel('Predicted')\n",
"plt.ylabel('True')\n",
"plt.savefig('mnist_confusion_matrix.png') # Save the plot\n",
"plt.close()\n",
"\n",
"# Function to unnormalize and display MNIST images\n",
"def imshow(img):\n",
" img = img * 0.3081 + 0.1307 # Unnormalize (reverse mean/std normalization)\n",
" npimg = img.numpy()\n",
" return npimg[0] # Return single channel for grayscale image\n",
"\n",
"# Display sample test images with predictions\n",
"dataiter = iter(testloader)\n",
"images, labels = next(dataiter)\n",
"images, labels = images[:8].to(device), labels[:8]\n",
"outputs = model(images)\n",
"_, predicted = torch.max(outputs, 1)\n",
"\n",
"plt.figure(figsize=(12, 6))\n",
"for i in range(8):\n",
" plt.subplot(2, 4, i + 1)\n",
" plt.imshow(imshow(images[i].cpu()), cmap='gray') # Display grayscale image\n",
" plt.title(f'Pred: {classes[predicted[i]]}\\nTrue: {classes[labels[i]]}')\n",
" plt.axis('off')\n",
"plt.savefig('mnist_sample_predictions.png') # Save the plot\n",
"plt.close()\n",
"\n",
"# Print completion message\n",
"print(\"Training complete. Plots saved as 'mnist_training_metrics.png', 'mnist_confusion_matrix.png', and 'mnist_sample_predictions.png'\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,273 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "46ca8277",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 170M/170M [00:07<00:00, 24.0MB/s] \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1, Loss: 1.535, Accuracy: 44.58%\n",
"Epoch 2, Loss: 1.254, Accuracy: 54.91%\n",
"Epoch 3, Loss: 1.144, Accuracy: 59.47%\n",
"Epoch 4, Loss: 1.071, Accuracy: 62.25%\n",
"Epoch 5, Loss: 1.017, Accuracy: 64.26%\n",
"Epoch 6, Loss: 0.975, Accuracy: 65.75%\n",
"Epoch 7, Loss: 0.945, Accuracy: 67.02%\n",
"Epoch 8, Loss: 0.916, Accuracy: 68.16%\n",
"Epoch 9, Loss: 0.892, Accuracy: 68.86%\n",
"Epoch 10, Loss: 0.865, Accuracy: 70.02%\n",
"\n",
"Test Accuracy: 73.62%\n",
"Training complete. Plots saved as 'training_metrics.png', 'confusion_matrix.png', and 'sample_predictions.png'\n"
]
}
],
"source": [
"import torch # PyTorch library for tensor computations and deep learning\n",
"import torch.nn as nn # Neural network modules\n",
"import torch.nn.functional as F # Functional interface for neural network operations\n",
"import torch.optim as optim # Optimization algorithms\n",
"import torchvision # Computer vision datasets and models\n",
"import torchvision.transforms as transforms # Image transformations\n",
"import matplotlib.pyplot as plt # Plotting library\n",
"import numpy as np # Numerical computations\n",
"from sklearn.metrics import confusion_matrix # For confusion matrix\n",
"import seaborn as sns # Visualization library for confusion matrix\n",
"\n",
"# Set random seed for reproducibility across runs\n",
"torch.manual_seed(42)\n",
"\n",
"# Define data transformations for preprocessing\n",
"# - RandomHorizontalFlip: Randomly flip images horizontally for data augmentation\n",
"# - RandomRotation: Randomly rotate images by up to 10 degrees for augmentation\n",
"# - ToTensor: Convert images to PyTorch tensors (HWC to CHW format)\n",
"# - Normalize: Normalize RGB channels with mean=0.5 and std=0.5\n",
"transform = transforms.Compose([\n",
" transforms.RandomHorizontalFlip(),\n",
" transforms.RandomRotation(10),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
"])\n",
"\n",
"# Load CIFAR-10 training dataset\n",
"# - root: Directory to store dataset\n",
"# - train: True for training set\n",
"# - download: Download dataset if not present\n",
"# - transform: Apply defined transformations\n",
"trainset = torchvision.datasets.CIFAR10(root='./data', train=True,\n",
" download=True, transform=transform)\n",
"# Create DataLoader for training set\n",
"# - batch_size: Number of images per batch (64)\n",
"# - shuffle: Randomly shuffle data for better training\n",
"# - num_workers: Number of subprocesses for data loading\n",
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,\n",
" shuffle=True, num_workers=2)\n",
"\n",
"# Load CIFAR-10 test dataset\n",
"testset = torchvision.datasets.CIFAR10(root='./data', train=False,\n",
" download=True, transform=transform)\n",
"# Create DataLoader for test set\n",
"# - shuffle: False to maintain order for evaluation\n",
"testloader = torch.utils.data.DataLoader(testset, batch_size=64,\n",
" shuffle=False, num_workers=2)\n",
"\n",
"# Define class labels for CIFAR-10\n",
"classes = ('airplane', 'automobile', 'bird', 'cat', 'deer',\n",
" 'dog', 'frog', 'horse', 'ship', 'truck')\n",
"\n",
"# Define CNN architecture\n",
"class Net(nn.Module):\n",
" def __init__(self):\n",
" super(Net, self).__init__()\n",
" # First convolutional layer: 3 input channels (RGB), 32 output channels, 3x3 kernel\n",
" self.conv1 = nn.Conv2d(3, 32, 3, padding=1)\n",
" # Second convolutional layer: 32 input channels, 64 output channels, 3x3 kernel\n",
" self.conv2 = nn.Conv2d(32, 64, 3, padding=1)\n",
" # Max pooling layer: 2x2 kernel, stride 2\n",
" self.pool = nn.MaxPool2d(2, 2)\n",
" # Batch normalization for first conv layer\n",
" self.bn1 = nn.BatchNorm2d(32)\n",
" # Batch normalization for second conv layer\n",
" self.bn2 = nn.BatchNorm2d(64)\n",
" # First fully connected layer: Input size calculated from conv output (64*8*8), 512 units\n",
" self.fc1 = nn.Linear(64 * 8 * 8, 512)\n",
" # Second fully connected layer: 512 units to 10 output classes\n",
" self.fc2 = nn.Linear(512, 10)\n",
" # Dropout layer with 50% probability to prevent overfitting\n",
" self.dropout = nn.Dropout(0.5)\n",
"\n",
" def forward(self, x):\n",
" # Forward pass through the network\n",
" # Conv1 -> BatchNorm -> ReLU -> MaxPool\n",
" x = self.pool(F.relu(self.bn1(self.conv1(x))))\n",
" # Conv2 -> BatchNorm -> ReLU -> MaxPool\n",
" x = self.pool(F.relu(self.bn2(self.conv2(x))))\n",
" # Flatten the output for fully connected layers\n",
" x = x.view(-1, 64 * 8 * 8)\n",
" # Fully connected layer 1 -> ReLU\n",
" x = F.relu(self.fc1(x))\n",
" # Apply dropout\n",
" x = self.dropout(x)\n",
" # Final fully connected layer for classification\n",
" x = self.fc2(x)\n",
" return x\n",
"\n",
"# Initialize model and move to appropriate device (GPU if available, else CPU)\n",
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
"model = Net().to(device)\n",
"\n",
"# Define loss function (CrossEntropyLoss for multi-class classification)\n",
"criterion = nn.CrossEntropyLoss()\n",
"# Define optimizer (Adam with learning rate 0.001)\n",
"optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
"# Training loop\n",
"num_epochs = 10 # Number of training epochs\n",
"train_losses = [] # Store loss per epoch\n",
"train_accuracies = [] # Store accuracy per epoch\n",
"\n",
"for epoch in range(num_epochs):\n",
" model.train() # Set model to training mode\n",
" running_loss = 0.0 # Track total loss for epoch\n",
" correct = 0 # Track correct predictions\n",
" total = 0 # Track total samples\n",
" for i, data in enumerate(trainloader, 0):\n",
" # Get inputs and labels, move to device\n",
" inputs, labels = data[0].to(device), data[1].to(device)\n",
" # Zero the parameter gradients\n",
" optimizer.zero_grad()\n",
" # Forward pass\n",
" outputs = model(inputs)\n",
" # Compute loss\n",
" loss = criterion(outputs, labels)\n",
" # Backward pass and optimize\n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" # Update running loss\n",
" running_loss += loss.item()\n",
" # Calculate accuracy\n",
" _, predicted = torch.max(outputs.data, 1)\n",
" total += labels.size(0)\n",
" correct += (predicted == labels).sum().item()\n",
" \n",
" # Calculate and store epoch metrics\n",
" epoch_loss = running_loss / len(trainloader)\n",
" epoch_acc = 100 * correct / total\n",
" train_losses.append(epoch_loss)\n",
" train_accuracies.append(epoch_acc)\n",
" print(f\"Epoch {epoch + 1}, Loss: {epoch_loss:.3f}, Accuracy: {epoch_acc:.2f}%\")\n",
"\n",
"# Evaluate model on test set\n",
"model.eval() # Set model to evaluation mode\n",
"correct = 0\n",
"total = 0\n",
"all_preds = [] # Store predictions for confusion matrix\n",
"all_labels = [] # Store true labels\n",
"with torch.no_grad(): # Disable gradient computation for evaluation\n",
" for data in testloader:\n",
" images, labels = data[0].to(device), data[1].to(device)\n",
" outputs = model(images)\n",
" _, predicted = torch.max(outputs.data, 1)\n",
" total += labels.size(0)\n",
" correct += (predicted == labels).sum().item()\n",
" all_preds.extend(predicted.cpu().numpy())\n",
" all_labels.extend(labels.cpu().numpy())\n",
"\n",
"# Calculate and print test accuracy\n",
"test_accuracy = 100 * correct / total\n",
"print(f\"\\nTest Accuracy: {test_accuracy:.2f}%\")\n",
"\n",
"# Plot training metrics (loss and accuracy)\n",
"plt.figure(figsize=(12, 4))\n",
"\n",
"# Plot training loss\n",
"plt.subplot(1, 2, 1)\n",
"plt.plot(train_losses, label='Training Loss')\n",
"plt.title('Training Loss')\n",
"plt.xlabel('Epoch')\n",
"plt.ylabel('Loss')\n",
"plt.legend()\n",
"\n",
"# Plot training accuracy\n",
"plt.subplot(1, 2, 2)\n",
"plt.plot(train_accuracies, label='Training Accuracy')\n",
"plt.title('Training Accuracy')\n",
"plt.xlabel('Epoch')\n",
"plt.ylabel('Accuracy (%)')\n",
"plt.legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.savefig('training_metrics.png') # Save plot\n",
"plt.close()\n",
"\n",
"# Plot confusion matrix\n",
"cm = confusion_matrix(all_labels, all_preds)\n",
"plt.figure(figsize=(10, 8))\n",
"sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',\n",
" xticklabels=classes, yticklabels=classes)\n",
"plt.title('Confusion Matrix')\n",
"plt.xlabel('Predicted')\n",
"plt.ylabel('True')\n",
"plt.savefig('confusion_matrix.png') # Save plot\n",
"plt.close()\n",
"\n",
"# Function to unnormalize and display images\n",
"def imshow(img):\n",
" img = img / 2 + 0.5 # Unnormalize\n",
" npimg = img.numpy()\n",
" return np.transpose(npimg, (1, 2, 0)) # Convert from CHW to HWC\n",
"\n",
"# Show sample test images with predictions\n",
"dataiter = iter(testloader)\n",
"images, labels = next(dataiter)\n",
"images, labels = images[:8].to(device), labels[:8]\n",
"outputs = model(images)\n",
"_, predicted = torch.max(outputs, 1)\n",
"\n",
"plt.figure(figsize=(12, 6))\n",
"for i in range(8):\n",
" plt.subplot(2, 4, i + 1)\n",
" plt.imshow(imshow(images[i].cpu()))\n",
" plt.title(f'Pred: {classes[predicted[i]]}\\nTrue: {classes[labels[i]]}')\n",
" plt.axis('off')\n",
"plt.savefig('sample_predictions.png') # Save plot\n",
"plt.close()\n",
"\n",
"print(\"Training complete. Plots saved as 'training_metrics.png', 'confusion_matrix.png', and 'sample_predictions.png'\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,225 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="402pt" height="263pt" viewBox="0.00 0.00 401.96 263.00">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 259)">
<title>Dropout</title>
<polygon fill="white" stroke="none" points="-4,4 -4,-259 397.96,-259 397.96,4 -4,4"/>
<g id="clust1" class="cluster">
<title>cluster_input</title>
<polygon fill="none" stroke="lightgrey" points="0,-8 0,-247 81.69,-247 81.69,-8 0,-8"/>
<text text-anchor="middle" x="40.85" y="-230.4" font-family="Times,serif" font-size="14.00">Input Layer</text>
</g>
<g id="clust2" class="cluster">
<title>cluster_hidden</title>
<polygon fill="none" stroke="lightgrey" points="101.69,-8 101.69,-247 282.92,-247 282.92,-8 101.69,-8"/>
<text text-anchor="middle" x="192.31" y="-230.4" font-family="Times,serif" font-size="14.00">Hidden Layer (with Dropout)</text>
</g>
<g id="clust3" class="cluster">
<title>cluster_output</title>
<polygon fill="none" stroke="lightgrey" points="302.92,-62 302.92,-193 393.96,-193 393.96,-62 302.92,-62"/>
<text text-anchor="middle" x="348.44" y="-176.4" font-family="Times,serif" font-size="14.00">Output Layer</text>
</g>
<!-- I1 -->
<g id="node1" class="node">
<title>I1</title>
<ellipse fill="lightblue" stroke="black" cx="40.35" cy="-34" rx="18" ry="18"/>
<text text-anchor="middle" x="40.35" y="-29.8" font-family="Times,serif" font-size="14.00">I1</text>
</g>
<!-- H1 -->
<g id="node5" class="node">
<title>H1</title>
<ellipse fill="lightgreen" stroke="black" cx="191.81" cy="-34" rx="18" ry="18"/>
<text text-anchor="middle" x="191.81" y="-29.8" font-family="Times,serif" font-size="14.00">H1</text>
</g>
<!-- I1&#45;&gt;H1 -->
<g id="edge1" class="edge">
<title>I1-&gt;H1</title>
<path fill="none" stroke="black" d="M58.78,-34C84.21,-34 131.75,-34 162.3,-34"/>
<polygon fill="black" stroke="black" points="161.97,-37.5 171.97,-34 161.97,-30.5 161.97,-37.5"/>
</g>
<!-- H2 -->
<g id="node6" class="node">
<title>H2</title>
<ellipse fill="red" stroke="black" stroke-dasharray="5,2" cx="191.81" cy="-88" rx="18" ry="18"/>
<text text-anchor="middle" x="191.81" y="-83.8" font-family="Times,serif" font-size="14.00" fill="white">H2</text>
</g>
<!-- I1&#45;&gt;H2 -->
<g id="edge2" class="edge">
<title>I1-&gt;H2</title>
<path fill="none" stroke="black" d="M57,-41.66C69.05,-47.4 86.21,-55.22 101.69,-61 121.97,-68.57 145.41,-75.61 163.16,-80.6"/>
<polygon fill="black" stroke="black" points="162.15,-83.95 172.72,-83.24 164.01,-77.2 162.15,-83.95"/>
</g>
<!-- H3 -->
<g id="node7" class="node">
<title>H3</title>
<ellipse fill="lightgreen" stroke="black" cx="191.81" cy="-142" rx="18" ry="18"/>
<text text-anchor="middle" x="191.81" y="-137.8" font-family="Times,serif" font-size="14.00">H3</text>
</g>
<!-- I1&#45;&gt;H3 -->
<g id="edge3" class="edge">
<title>I1-&gt;H3</title>
<path fill="none" stroke="black" d="M56.9,-41.37C65.24,-45.99 75.12,-52.66 81.69,-61 97.54,-81.09 83.06,-97.46 101.69,-115 118.07,-130.42 142.97,-137.05 162.29,-139.89"/>
<polygon fill="black" stroke="black" points="161.62,-143.34 171.97,-141.04 162.45,-136.39 161.62,-143.34"/>
</g>
<!-- H4 -->
<g id="node8" class="node">
<title>H4</title>
<ellipse fill="red" stroke="black" stroke-dasharray="5,2" cx="191.81" cy="-196" rx="18" ry="18"/>
<text text-anchor="middle" x="191.81" y="-191.8" font-family="Times,serif" font-size="14.00" fill="white">H4</text>
</g>
<!-- I1&#45;&gt;H4 -->
<g id="edge4" class="edge">
<title>I1-&gt;H4</title>
<path fill="none" stroke="black" d="M57.48,-40.95C65.99,-45.45 75.86,-52.13 81.69,-61 108.53,-101.78 69.14,-132.62 101.69,-169 116.94,-186.04 142.54,-192.49 162.4,-194.86"/>
<polygon fill="black" stroke="black" points="161.82,-198.32 172.09,-195.71 162.44,-191.35 161.82,-198.32"/>
</g>
<!-- I2 -->
<g id="node2" class="node">
<title>I2</title>
<ellipse fill="lightblue" stroke="black" cx="40.35" cy="-88" rx="18" ry="18"/>
<text text-anchor="middle" x="40.35" y="-83.8" font-family="Times,serif" font-size="14.00">I2</text>
</g>
<!-- I2&#45;&gt;H1 -->
<g id="edge5" class="edge">
<title>I2-&gt;H1</title>
<path fill="none" stroke="black" d="M57,-80.34C69.05,-74.6 86.21,-66.78 101.69,-61 121.97,-53.43 145.41,-46.39 163.16,-41.4"/>
<polygon fill="black" stroke="black" points="164.01,-44.8 172.72,-38.76 162.15,-38.05 164.01,-44.8"/>
</g>
<!-- I2&#45;&gt;H2 -->
<g id="edge6" class="edge">
<title>I2-&gt;H2</title>
<path fill="none" stroke="black" d="M58.78,-88C84.21,-88 131.75,-88 162.3,-88"/>
<polygon fill="black" stroke="black" points="161.97,-91.5 171.97,-88 161.97,-84.5 161.97,-91.5"/>
</g>
<!-- I2&#45;&gt;H3 -->
<g id="edge7" class="edge">
<title>I2-&gt;H3</title>
<path fill="none" stroke="black" d="M57,-95.66C69.05,-101.4 86.21,-109.22 101.69,-115 121.97,-122.57 145.41,-129.61 163.16,-134.6"/>
<polygon fill="black" stroke="black" points="162.15,-137.95 172.72,-137.24 164.01,-131.2 162.15,-137.95"/>
</g>
<!-- I2&#45;&gt;H4 -->
<g id="edge8" class="edge">
<title>I2-&gt;H4</title>
<path fill="none" stroke="black" d="M56.9,-95.37C65.24,-99.99 75.12,-106.66 81.69,-115 97.54,-135.09 83.06,-151.46 101.69,-169 118.07,-184.42 142.97,-191.05 162.29,-193.89"/>
<polygon fill="black" stroke="black" points="161.62,-197.34 171.97,-195.04 162.45,-190.39 161.62,-197.34"/>
</g>
<!-- I3 -->
<g id="node3" class="node">
<title>I3</title>
<ellipse fill="lightblue" stroke="black" cx="40.35" cy="-142" rx="18" ry="18"/>
<text text-anchor="middle" x="40.35" y="-137.8" font-family="Times,serif" font-size="14.00">I3</text>
</g>
<!-- I3&#45;&gt;H1 -->
<g id="edge9" class="edge">
<title>I3-&gt;H1</title>
<path fill="none" stroke="black" d="M56.9,-134.63C65.24,-130.01 75.12,-123.34 81.69,-115 97.54,-94.91 83.06,-78.54 101.69,-61 118.07,-45.58 142.97,-38.95 162.29,-36.11"/>
<polygon fill="black" stroke="black" points="162.45,-39.61 171.97,-34.96 161.62,-32.66 162.45,-39.61"/>
</g>
<!-- I3&#45;&gt;H2 -->
<g id="edge10" class="edge">
<title>I3-&gt;H2</title>
<path fill="none" stroke="black" d="M57,-134.34C69.05,-128.6 86.21,-120.78 101.69,-115 121.97,-107.43 145.41,-100.39 163.16,-95.4"/>
<polygon fill="black" stroke="black" points="164.01,-98.8 172.72,-92.76 162.15,-92.05 164.01,-98.8"/>
</g>
<!-- I3&#45;&gt;H3 -->
<g id="edge11" class="edge">
<title>I3-&gt;H3</title>
<path fill="none" stroke="black" d="M58.78,-142C84.21,-142 131.75,-142 162.3,-142"/>
<polygon fill="black" stroke="black" points="161.97,-145.5 171.97,-142 161.97,-138.5 161.97,-145.5"/>
</g>
<!-- I3&#45;&gt;H4 -->
<g id="edge12" class="edge">
<title>I3-&gt;H4</title>
<path fill="none" stroke="black" d="M57,-149.66C69.05,-155.4 86.21,-163.22 101.69,-169 121.97,-176.57 145.41,-183.61 163.16,-188.6"/>
<polygon fill="black" stroke="black" points="162.15,-191.95 172.72,-191.24 164.01,-185.2 162.15,-191.95"/>
</g>
<!-- I4 -->
<g id="node4" class="node">
<title>I4</title>
<ellipse fill="lightblue" stroke="black" cx="40.35" cy="-196" rx="18" ry="18"/>
<text text-anchor="middle" x="40.35" y="-191.8" font-family="Times,serif" font-size="14.00">I4</text>
</g>
<!-- I4&#45;&gt;H1 -->
<g id="edge13" class="edge">
<title>I4-&gt;H1</title>
<path fill="none" stroke="black" d="M57.48,-189.05C65.99,-184.55 75.86,-177.87 81.69,-169 108.53,-128.22 69.14,-97.38 101.69,-61 116.94,-43.96 142.54,-37.51 162.4,-35.14"/>
<polygon fill="black" stroke="black" points="162.44,-38.65 172.09,-34.29 161.82,-31.68 162.44,-38.65"/>
</g>
<!-- I4&#45;&gt;H2 -->
<g id="edge14" class="edge">
<title>I4-&gt;H2</title>
<path fill="none" stroke="black" d="M56.9,-188.63C65.24,-184.01 75.12,-177.34 81.69,-169 97.54,-148.91 83.06,-132.54 101.69,-115 118.07,-99.58 142.97,-92.95 162.29,-90.11"/>
<polygon fill="black" stroke="black" points="162.45,-93.61 171.97,-88.96 161.62,-86.66 162.45,-93.61"/>
</g>
<!-- I4&#45;&gt;H3 -->
<g id="edge15" class="edge">
<title>I4-&gt;H3</title>
<path fill="none" stroke="black" d="M57,-188.34C69.05,-182.6 86.21,-174.78 101.69,-169 121.97,-161.43 145.41,-154.39 163.16,-149.4"/>
<polygon fill="black" stroke="black" points="164.01,-152.8 172.72,-146.76 162.15,-146.05 164.01,-152.8"/>
</g>
<!-- I4&#45;&gt;H4 -->
<g id="edge16" class="edge">
<title>I4-&gt;H4</title>
<path fill="none" stroke="black" d="M58.78,-196C84.21,-196 131.75,-196 162.3,-196"/>
<polygon fill="black" stroke="black" points="161.97,-199.5 171.97,-196 161.97,-192.5 161.97,-199.5"/>
</g>
<!-- O1 -->
<g id="node9" class="node">
<title>O1</title>
<ellipse fill="gold" stroke="black" cx="347.94" cy="-88" rx="18" ry="18"/>
<text text-anchor="middle" x="347.94" y="-83.8" font-family="Times,serif" font-size="14.00">O1</text>
</g>
<!-- H1&#45;&gt;O1 -->
<g id="edge17" class="edge">
<title>H1-&gt;O1</title>
<path fill="none" stroke="black" d="M209.59,-38.44C227.82,-43.41 257.72,-51.94 282.92,-61 295.57,-65.55 309.33,-71.25 320.84,-76.24"/>
<polygon fill="black" stroke="black" points="319.13,-79.31 329.7,-80.14 321.95,-72.9 319.13,-79.31"/>
</g>
<!-- O2 -->
<g id="node10" class="node">
<title>O2</title>
<ellipse fill="gold" stroke="black" cx="347.94" cy="-142" rx="18" ry="18"/>
<text text-anchor="middle" x="347.94" y="-137.8" font-family="Times,serif" font-size="14.00">O2</text>
</g>
<!-- H1&#45;&gt;O2 -->
<g id="edge18" class="edge">
<title>H1-&gt;O2</title>
<path fill="none" stroke="black" d="M209.98,-34.69C229.97,-36.4 262.8,-42.11 282.92,-61 301.59,-78.51 286.68,-95.22 302.92,-115 307.78,-120.91 314.29,-125.88 320.81,-129.88"/>
<polygon fill="black" stroke="black" points="319.04,-132.9 329.5,-134.63 322.4,-126.76 319.04,-132.9"/>
</g>
<!-- H2&#45;&gt;O1 -->
<g id="edge19" class="edge">
<title>H2-&gt;O1</title>
<path fill="none" stroke="gray" stroke-dasharray="5,2" d="M210.19,-88C236.58,-88 287.02,-88 318.72,-88"/>
<polygon fill="gray" stroke="gray" points="318.34,-91.5 328.34,-88 318.34,-84.5 318.34,-91.5"/>
</g>
<!-- H2&#45;&gt;O2 -->
<g id="edge20" class="edge">
<title>H2-&gt;O2</title>
<path fill="none" stroke="gray" stroke-dasharray="5,2" d="M209.59,-92.44C227.82,-97.41 257.72,-105.94 282.92,-115 295.57,-119.55 309.33,-125.25 320.84,-130.24"/>
<polygon fill="gray" stroke="gray" points="319.13,-133.31 329.7,-134.14 321.95,-126.9 319.13,-133.31"/>
</g>
<!-- H3&#45;&gt;O1 -->
<g id="edge21" class="edge">
<title>H3-&gt;O1</title>
<path fill="none" stroke="black" d="M209.59,-137.56C227.82,-132.59 257.72,-124.06 282.92,-115 295.57,-110.45 309.33,-104.75 320.84,-99.76"/>
<polygon fill="black" stroke="black" points="321.95,-103.1 329.7,-95.86 319.13,-96.69 321.95,-103.1"/>
</g>
<!-- H3&#45;&gt;O2 -->
<g id="edge22" class="edge">
<title>H3-&gt;O2</title>
<path fill="none" stroke="black" d="M210.19,-142C236.58,-142 287.02,-142 318.72,-142"/>
<polygon fill="black" stroke="black" points="318.34,-145.5 328.34,-142 318.34,-138.5 318.34,-145.5"/>
</g>
<!-- H4&#45;&gt;O1 -->
<g id="edge23" class="edge">
<title>H4-&gt;O1</title>
<path fill="none" stroke="gray" stroke-dasharray="5,2" d="M209.98,-195.31C229.97,-193.6 262.8,-187.89 282.92,-169 301.59,-151.49 286.68,-134.78 302.92,-115 307.78,-109.09 314.29,-104.12 320.81,-100.12"/>
<polygon fill="gray" stroke="gray" points="322.4,-103.24 329.5,-95.37 319.04,-97.1 322.4,-103.24"/>
</g>
<!-- H4&#45;&gt;O2 -->
<g id="edge24" class="edge">
<title>H4-&gt;O2</title>
<path fill="none" stroke="gray" stroke-dasharray="5,2" d="M209.59,-191.56C227.82,-186.59 257.72,-178.06 282.92,-169 295.57,-164.45 309.33,-158.75 320.84,-153.76"/>
<polygon fill="gray" stroke="gray" points="321.95,-157.1 329.7,-149.86 319.13,-150.69 321.95,-157.1"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 11 KiB