mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
use block size variable in positional embedding layer
This commit is contained in:
@@ -158,7 +158,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "6fb5b2f8-dd2c-4a6d-94ef-a0e9ad163951",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -190,7 +190,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "9842f39b-1654-410e-88bf-d1b899bf0241",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -224,7 +224,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 4,
|
||||
"id": "e3ccc99c-33ce-4f11-b7f2-353cf1cbdaba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -255,7 +255,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 5,
|
||||
"id": "07b2e58d-a6ed-49f0-a1cd-2463e8d53a20",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -289,7 +289,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 6,
|
||||
"id": "2d99cac4-45ea-46b3-b3c1-e000ad16e158",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -319,7 +319,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 7,
|
||||
"id": "8fcb96f0-14e5-4973-a50e-79ea7c6af99f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -378,7 +378,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 8,
|
||||
"id": "04004be8-07a1-468b-ab33-32e16a551b45",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -415,7 +415,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 9,
|
||||
"id": "2cea69d0-9a47-45da-8d5a-47ceef2df673",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -447,7 +447,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 10,
|
||||
"id": "fa4ef062-de81-47ee-8415-bfe1708c81b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -479,7 +479,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 11,
|
||||
"id": "112b492c-fb6f-4e6d-8df5-518ae83363d5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -509,7 +509,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 12,
|
||||
"id": "ba8eafcf-f7f7-4989-b8dc-61b50c4f81dc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -541,7 +541,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 13,
|
||||
"id": "2570eb7d-aee1-457a-a61e-7544478219fa",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -620,7 +620,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 14,
|
||||
"id": "8250fdc6-6cd6-4c5b-b9c0-8c643aadb7db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -640,7 +640,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 15,
|
||||
"id": "bfd7259a-f26c-4cea-b8fc-282b5cae1e00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -662,7 +662,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 16,
|
||||
"id": "73cedd62-01e1-4196-a575-baecc6095601",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -692,7 +692,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 17,
|
||||
"id": "8c1c3949-fc08-4d19-a41e-1c235b4e631b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -731,7 +731,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 18,
|
||||
"id": "64cbc253-a182-4490-a765-246979ea0a28",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -759,7 +759,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 19,
|
||||
"id": "b14e44b5-d170-40f9-8847-8990804af26d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -795,7 +795,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 20,
|
||||
"id": "146f5587-c845-4e30-9894-c7ed3a248153",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -830,7 +830,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 21,
|
||||
"id": "e138f033-fa7e-4e3a-8764-b53a96b26397",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -865,7 +865,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 22,
|
||||
"id": "51590326-cdbe-4e62-93b1-17df71c11ee4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -921,7 +921,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 23,
|
||||
"id": "73f411e3-e231-464a-89fe-0a9035e5f839",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1017,7 +1017,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 24,
|
||||
"id": "1933940d-0fa5-4b17-a3ce-388e5314a1bb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1049,7 +1049,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 25,
|
||||
"id": "43f3d2e3-185b-4184-9f98-edde5e6df746",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1082,7 +1082,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 26,
|
||||
"id": "9f531e2e-f4d2-4fea-a87f-4c132e48b9e7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1122,7 +1122,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"execution_count": 27,
|
||||
"id": "a2be2f43-9cf0-44f6-8d8b-68ef2fb3cc39",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1155,7 +1155,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 28,
|
||||
"id": "b1cd6d7f-16f2-43c1-915e-0824f1a4bc52",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1217,7 +1217,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 29,
|
||||
"id": "0de578db-8289-41d6-b377-ef645751e33f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1244,7 +1244,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"execution_count": 30,
|
||||
"id": "b16c5edb-942b-458c-8e95-25e4e355381e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1278,7 +1278,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 31,
|
||||
"id": "977a5fa7-a9d5-4e2e-8a32-8e0331ccfe28",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1305,7 +1305,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"execution_count": 32,
|
||||
"id": "60d8c2eb-2d8e-4d2c-99bc-9eef8cc53ca0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1313,19 +1313,19 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([[[-0.0827, 0.0405],\n",
|
||||
" [-0.2249, -0.0036],\n",
|
||||
" [-0.4126, -0.0555],\n",
|
||||
" [-0.5054, -0.1016],\n",
|
||||
" [-0.7738, -0.1860],\n",
|
||||
" [-1.1677, -0.3309]],\n",
|
||||
"tensor([[[-0.0844, 0.0414],\n",
|
||||
" [-0.2264, -0.0039],\n",
|
||||
" [-0.4163, -0.0564],\n",
|
||||
" [-0.5014, -0.1011],\n",
|
||||
" [-0.7754, -0.1867],\n",
|
||||
" [-1.1632, -0.3303]],\n",
|
||||
"\n",
|
||||
" [[-0.0827, 0.0405],\n",
|
||||
" [-0.2249, -0.0036],\n",
|
||||
" [-0.4126, -0.0555],\n",
|
||||
" [-0.5054, -0.1016],\n",
|
||||
" [-0.7738, -0.1860],\n",
|
||||
" [-1.1677, -0.3309]]], grad_fn=<UnsafeViewBackward0>)\n",
|
||||
" [[-0.0844, 0.0414],\n",
|
||||
" [-0.2264, -0.0039],\n",
|
||||
" [-0.4163, -0.0564],\n",
|
||||
" [-0.5014, -0.1011],\n",
|
||||
" [-0.7754, -0.1867],\n",
|
||||
" [-1.1632, -0.3303]]], grad_fn=<UnsafeViewBackward0>)\n",
|
||||
"context_vecs.shape: torch.Size([2, 6, 2])\n"
|
||||
]
|
||||
}
|
||||
@@ -1412,7 +1412,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"execution_count": 33,
|
||||
"id": "b9a66e11-7105-4bb4-be84-041f1a1f3bd2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1420,19 +1420,19 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([[[-0.0827, 0.0405, 0.0771, 0.0172],\n",
|
||||
" [-0.2249, -0.0036, 0.2144, 0.1183],\n",
|
||||
" [-0.4126, -0.0555, 0.3871, 0.2445],\n",
|
||||
" [-0.5054, -0.1016, 0.4995, 0.3406],\n",
|
||||
" [-0.7738, -0.1860, 0.7385, 0.4875],\n",
|
||||
" [-1.1677, -0.3309, 1.1223, 0.8457]],\n",
|
||||
"tensor([[[-0.0844, 0.0414, 0.0766, 0.0171],\n",
|
||||
" [-0.2264, -0.0039, 0.2143, 0.1185],\n",
|
||||
" [-0.4163, -0.0564, 0.3878, 0.2453],\n",
|
||||
" [-0.5014, -0.1011, 0.4992, 0.3401],\n",
|
||||
" [-0.7754, -0.1867, 0.7387, 0.4868],\n",
|
||||
" [-1.1632, -0.3303, 1.1224, 0.8460]],\n",
|
||||
"\n",
|
||||
" [[-0.0827, 0.0405, 0.0771, 0.0172],\n",
|
||||
" [-0.2249, -0.0036, 0.2144, 0.1183],\n",
|
||||
" [-0.4126, -0.0555, 0.3871, 0.2445],\n",
|
||||
" [-0.5054, -0.1016, 0.4995, 0.3406],\n",
|
||||
" [-0.7738, -0.1860, 0.7385, 0.4875],\n",
|
||||
" [-1.1677, -0.3309, 1.1223, 0.8457]]], grad_fn=<CatBackward0>)\n",
|
||||
" [[-0.0844, 0.0414, 0.0766, 0.0171],\n",
|
||||
" [-0.2264, -0.0039, 0.2143, 0.1185],\n",
|
||||
" [-0.4163, -0.0564, 0.3878, 0.2453],\n",
|
||||
" [-0.5014, -0.1011, 0.4992, 0.3401],\n",
|
||||
" [-0.7754, -0.1867, 0.7387, 0.4868],\n",
|
||||
" [-1.1632, -0.3303, 1.1224, 0.8460]]], grad_fn=<CatBackward0>)\n",
|
||||
"context_vecs.shape: torch.Size([2, 6, 4])\n"
|
||||
]
|
||||
}
|
||||
@@ -1474,7 +1474,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"execution_count": 34,
|
||||
"id": "dc9a4375-068b-4b2a-aabb-a29347ca5ecd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1482,19 +1482,19 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([[[-0.0932, 0.0353],\n",
|
||||
" [-0.2688, -0.0017],\n",
|
||||
" [-0.4848, -0.0500],\n",
|
||||
" [-0.6469, -0.1051],\n",
|
||||
" [-0.8832, -0.1764],\n",
|
||||
" [-1.4730, -0.3391]],\n",
|
||||
"tensor([[[-9.1476e-02, 3.4164e-02],\n",
|
||||
" [-2.6796e-01, -1.3427e-03],\n",
|
||||
" [-4.8421e-01, -4.8909e-02],\n",
|
||||
" [-6.4808e-01, -1.0625e-01],\n",
|
||||
" [-8.8380e-01, -1.7140e-01],\n",
|
||||
" [-1.4744e+00, -3.4327e-01]],\n",
|
||||
"\n",
|
||||
" [[-0.0932, 0.0353],\n",
|
||||
" [-0.2688, -0.0017],\n",
|
||||
" [-0.4848, -0.0500],\n",
|
||||
" [-0.6469, -0.1051],\n",
|
||||
" [-0.8832, -0.1764],\n",
|
||||
" [-1.4730, -0.3391]]], grad_fn=<CatBackward0>)\n",
|
||||
" [[-9.1476e-02, 3.4164e-02],\n",
|
||||
" [-2.6796e-01, -1.3427e-03],\n",
|
||||
" [-4.8421e-01, -4.8909e-02],\n",
|
||||
" [-6.4808e-01, -1.0625e-01],\n",
|
||||
" [-8.8380e-01, -1.7140e-01],\n",
|
||||
" [-1.4744e+00, -3.4327e-01]]], grad_fn=<CatBackward0>)\n",
|
||||
"context_vecs.shape: torch.Size([2, 6, 2])\n"
|
||||
]
|
||||
}
|
||||
@@ -1531,7 +1531,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 96,
|
||||
"execution_count": 35,
|
||||
"id": "110b0188-6e9e-4e56-a988-10523c6c8538",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1646,7 +1646,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"execution_count": 36,
|
||||
"id": "e8cfc1ae-78ab-4faa-bc73-98bd054806c9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1689,7 +1689,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"execution_count": 37,
|
||||
"id": "053760f1-1a02-42f0-b3bf-3d939e407039",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1760,7 +1760,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user