mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Remove reundant dropout in MLP module (#105)
This commit is contained in:
committed by
GitHub
parent
dd115c1374
commit
3829ccdb34
@@ -222,7 +222,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 4,
|
||||
"id": "a61a4034-797a-4635-bf42-ddfff1b07125",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -253,7 +253,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 5,
|
||||
"id": "ee95a272-b852-43b4-9827-ea7e1dbd5724",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -264,7 +264,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"id": "4ab43658-3240-484a-9072-a40a0ed85be6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -273,11 +273,7 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Output text:\n",
|
||||
" Every effort moves you?\"\n",
|
||||
"\n",
|
||||
"\"Yes--quite insensible to the irony. She wanted him vindicated--and by me!\"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
" Every effort moves you know,\" was one of the axioms he laid down across the Sevres and silver of an exquisitely appointed lun\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -298,7 +294,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"id": "ebb22d06-393a-42d3-ab64-66646d33b39b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -307,11 +303,7 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Output text:\n",
|
||||
" Every effort moves you?\"\n",
|
||||
"\n",
|
||||
"\"Yes--quite insensible to the irony. She wanted him vindicated--and by me!\"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
" Every effort moves you know,\" was one of the axioms he laid down across the Sevres and silver of an exquisitely appointed lun\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -332,7 +324,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"id": "75469f24-47cc-458d-a200-fe64c648131d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -341,11 +333,7 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Output text:\n",
|
||||
" Every effort moves you?\"\n",
|
||||
"\n",
|
||||
"\"Yes--quite insensible to the irony. She wanted him vindicated--and by me!\"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
" Every effort moves you know,\" was one of the axioms he laid down across the Sevres and silver of an exquisitely appointed lun\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -412,7 +400,7 @@
|
||||
"model = GPTModel(GPT_CONFIG_124M)\n",
|
||||
"model.load_state_dict(checkpoint[\"model_state_dict\"])\n",
|
||||
"\n",
|
||||
"optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=0.1)\n",
|
||||
"optimizer = torch.optim.AdamW(model.parameters(), lr=0.0004, weight_decay=0.1)\n",
|
||||
"optimizer.load_state_dict(checkpoint[\"optimizer_state_dict\"])\n",
|
||||
"model.to(device)\n",
|
||||
"model.train();"
|
||||
@@ -497,9 +485,9 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Ep 1 (Step 000000): Train loss 0.523, Val loss 6.445\n",
|
||||
"Ep 1 (Step 000005): Train loss 0.422, Val loss 6.541\n",
|
||||
"Every effort moves you?\" \"Yes--quite insensible to the irony. She wanted him vindicated--and by me!\" \"Oh, and I remember getting off a prodigious phrase about the sketch of the donkey. \"There were days when I\n"
|
||||
"Ep 1 (Step 000000): Train loss 0.271, Val loss 6.545\n",
|
||||
"Ep 1 (Step 000005): Train loss 0.244, Val loss 6.614\n",
|
||||
"Every effort moves you?\" \"Yes--quite insensible to the irony. She wanted him vindicated--and by me!\" He laughed again, and threw back his head to look up at the sketch of the donkey. \"There were days when I\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -558,7 +546,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 12,
|
||||
"id": "68d162d6-bbb9-4d6d-82ee-1c410694f872",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -586,7 +574,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 13,
|
||||
"id": "d8373461-7dad-47da-a489-3e23f0799b23",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -612,7 +600,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 14,
|
||||
"id": "cdd44873-d6c2-4471-a20f-f639b09fdcd3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -637,7 +625,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 15,
|
||||
"id": "c7d562e4-33f6-4611-9b75-6ad1cb441d3b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -652,7 +640,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 16,
|
||||
"id": "46eda9ea-ccb0-46ee-931b-3c07502b2544",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -705,7 +693,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 17,
|
||||
"id": "4e3574a2-687d-47a2-a2f6-457fe9d595f1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -713,8 +701,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Training loss: 3.754748503367106\n",
|
||||
"Validation loss: 3.559617757797241\n"
|
||||
"Training loss: 3.7547483444213867\n",
|
||||
"Validation loss: 3.5596189498901367\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -739,7 +727,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 18,
|
||||
"id": "1a79a4b6-fe8f-40c2-a018-e731dcf391b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -754,8 +742,8 @@
|
||||
"File already exists and is up-to-date: gpt2/1558M/model.ckpt.index\n",
|
||||
"File already exists and is up-to-date: gpt2/1558M/model.ckpt.meta\n",
|
||||
"File already exists and is up-to-date: gpt2/1558M/vocab.bpe\n",
|
||||
"Training loss: 3.3046312597062855\n",
|
||||
"Validation loss: 3.119514226913452\n"
|
||||
"Training loss: 3.3046313656700983\n",
|
||||
"Validation loss: 3.1195149421691895\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -812,7 +800,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 19,
|
||||
"id": "31e0972b-e85e-4904-a0f5-24c3eacd5fa2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -838,7 +826,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 20,
|
||||
"id": "b641ee88-f9d4-43ec-a787-e34199eed356",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -882,7 +870,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 21,
|
||||
"id": "c98f56f4-98fc-43b4-9ee5-726e9d17c73f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -893,7 +881,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 22,
|
||||
"id": "b1f7853c-6e81-4f1f-a1d0-61e2c7d33a20",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
||||
Reference in New Issue
Block a user