mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Remove reundant dropout in MLP module (#105)
This commit is contained in:
committed by
GitHub
parent
edcae09884
commit
5beff4e25a
File diff suppressed because one or more lines are too long
@@ -253,7 +253,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 1,
|
||||
"id": "5fee2cf5-61c3-4167-81b5-44ea155bbaf2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -265,7 +265,6 @@
|
||||
" \"n_heads\": 12,\n",
|
||||
" \"n_layers\": 12,\n",
|
||||
" \"drop_rate_emb\": 0.1, # NEW: dropout for embedding layers\n",
|
||||
" \"drop_rate_ffn\": 0.1, # NEW: dropout for feed forward module\n",
|
||||
" \"drop_rate_attn\": 0.1, # NEW: dropout for multi-head attention \n",
|
||||
" \"drop_rate_resid\": 0.1, # NEW: dropout for residual connections \n",
|
||||
" \"qkv_bias\": False\n",
|
||||
@@ -274,26 +273,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 2,
|
||||
"id": "5aa1b0c1-d78a-48fc-ad08-4802458b43f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch.nn as nn\n",
|
||||
"from gpt import MultiHeadAttention, LayerNorm, GELU\n",
|
||||
"\n",
|
||||
"class FeedForward(nn.Module):\n",
|
||||
" def __init__(self, cfg):\n",
|
||||
" super().__init__()\n",
|
||||
" self.layers = nn.Sequential(\n",
|
||||
" nn.Linear(cfg[\"emb_dim\"], 4 * cfg[\"emb_dim\"]),\n",
|
||||
" GELU(),\n",
|
||||
" nn.Linear(4 * cfg[\"emb_dim\"], cfg[\"emb_dim\"]),\n",
|
||||
" nn.Dropout(cfg[\"drop_rate_ffn\"]) # NEW: dropout for feed forward module\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def forward(self, x):\n",
|
||||
" return self.layers(x)\n",
|
||||
"from gpt import MultiHeadAttention, LayerNorm, GELU, FeedForward\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class TransformerBlock(nn.Module):\n",
|
||||
@@ -356,7 +342,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 3,
|
||||
"id": "1d013d32-c275-4f42-be21-9010f1537227",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -384,7 +370,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -144,7 +144,6 @@ class FeedForward(nn.Module):
|
||||
nn.Linear(cfg["emb_dim"], 4 * cfg["emb_dim"]),
|
||||
GELU(),
|
||||
nn.Linear(4 * cfg["emb_dim"], cfg["emb_dim"]),
|
||||
nn.Dropout(cfg["drop_rate"])
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
Reference in New Issue
Block a user