Remove reundant dropout in MLP module (#105)

2026-04-10 12:33:42 +00:00 · 2024-04-03 20:19:08 -05:00
parent edcae09884
commit 5beff4e25a
11 changed files with 202 additions and 266 deletions
--- a/ch04/01_main-chapter-code/ch04.ipynb
+++ b/ch04/01_main-chapter-code/ch04.ipynb
--- a/ch04/01_main-chapter-code/exercise-solutions.ipynb
+++ b/ch04/01_main-chapter-code/exercise-solutions.ipynb
@@ -253,7 +253,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 1,
   "id": "5fee2cf5-61c3-4167-81b5-44ea155bbaf2",
   "metadata": {},
   "outputs": [],
@@ -265,7 +265,6 @@
    "    \"n_heads\": 12,\n",
    "    \"n_layers\": 12,\n",
    "    \"drop_rate_emb\": 0.1,    # NEW: dropout for embedding layers\n",
-    "    \"drop_rate_ffn\": 0.1,    # NEW: dropout for feed forward module\n",
    "    \"drop_rate_attn\": 0.1,   # NEW: dropout for multi-head attention  \n",
    "    \"drop_rate_resid\": 0.1,   # NEW: dropout for residual connections  \n",
    "    \"qkv_bias\": False\n",
@@ -274,26 +273,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
   "id": "5aa1b0c1-d78a-48fc-ad08-4802458b43f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.nn as nn\n",
-    "from gpt import MultiHeadAttention, LayerNorm, GELU\n",
-    "\n",
-    "class FeedForward(nn.Module):\n",
-    "    def __init__(self, cfg):\n",
-    "        super().__init__()\n",
-    "        self.layers = nn.Sequential(\n",
-    "            nn.Linear(cfg[\"emb_dim\"], 4 * cfg[\"emb_dim\"]),\n",
-    "            GELU(),\n",
-    "            nn.Linear(4 * cfg[\"emb_dim\"], cfg[\"emb_dim\"]),\n",
-    "            nn.Dropout(cfg[\"drop_rate_ffn\"]) # NEW: dropout for feed forward module\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return self.layers(x)\n",
+    "from gpt import MultiHeadAttention, LayerNorm, GELU, FeedForward\n",
    "\n",
    "\n",
    "class TransformerBlock(nn.Module):\n",
@@ -356,7 +342,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
   "id": "1d013d32-c275-4f42-be21-9010f1537227",
   "metadata": {},
   "outputs": [],
@@ -384,7 +370,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.11.4"
  }
 },
 "nbformat": 4,
--- a/ch04/01_main-chapter-code/gpt.py
+++ b/ch04/01_main-chapter-code/gpt.py
@@ -144,7 +144,6 @@ class FeedForward(nn.Module):
            nn.Linear(cfg["emb_dim"], 4 * cfg["emb_dim"]),
            GELU(),
            nn.Linear(4 * cfg["emb_dim"], cfg["emb_dim"]),
-            nn.Dropout(cfg["drop_rate"])
        )

    def forward(self, x):