ch4 exercise solutions

2026-04-10 12:33:42 +00:00 · 2024-02-11 11:51:39 -06:00
parent 103f7826ad
commit fe332006de
4 changed files with 394 additions and 16 deletions
--- a/ch04/01_main-chapter-code/ch04.ipynb
+++ b/ch04/01_main-chapter-code/ch04.ipynb
@@ -942,12 +942,11 @@
    "        super().__init__()\n",
    "        self.tok_emb = nn.Embedding(cfg[\"vocab_size\"], cfg[\"emb_dim\"])\n",
    "        self.pos_emb = nn.Embedding(cfg[\"ctx_len\"], cfg[\"emb_dim\"])\n",
+    "        self.drop_emb = nn.Dropout(cfg[\"drop_rate\"])\n",
    "        \n",
-    "        # Use a placeholder for TransformerBlock\n",
    "        self.trf_blocks = nn.Sequential(\n",
    "            *[TransformerBlock(cfg) for _ in range(cfg[\"n_layers\"])])\n",
    "        \n",
-    "        # Use a placeholder for LayerNorm\n",
    "        self.final_norm = LayerNorm(cfg[\"emb_dim\"])\n",
    "        self.out_head = nn.Linear(\n",
    "            cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False\n",
@@ -1210,7 +1209,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 26,
   "id": "c9b428a9-8764-4b36-80cd-7d4e00595ba6",
   "metadata": {},
   "outputs": [],
@@ -1264,7 +1263,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 27,
   "id": "bb3ffc8e-f95f-4a24-a978-939b8953ea3e",
   "metadata": {},
   "outputs": [
@@ -1282,7 +1281,7 @@
       "            0.0000], grad_fn=<SoftmaxBackward0>)"
      ]
     },
-     "execution_count": 54,
+     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -1299,7 +1298,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 28,
   "id": "3d7e3e94-df0f-4c0f-a6a1-423f500ac1d3",
   "metadata": {},
   "outputs": [
@@ -1324,7 +1323,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 29,
   "id": "a72a9b60-de66-44cf-b2f9-1e638934ada4",
   "metadata": {},
   "outputs": [
@@ -1332,9 +1331,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Output: tensor([[15496,    11,   314,   716, 27018, 24086, 47843, 30961, 42348,  7267,\n",
-      "         49706, 43231, 47062, 34657]])\n",
-      "Output length: 14\n"
+      "Output: tensor([[15496,    11,   314,   716, 27018, 24086, 47843, 30961, 42348,  7267]])\n",
+      "Output length: 10\n"
     ]
    }
   ],
@@ -1344,7 +1342,7 @@
    "out = generate_text_simple(\n",
    "    model=model,\n",
    "    idx=encoded_tensor, \n",
-    "    max_new_tokens=10, \n",
+    "    max_new_tokens=6, \n",
    "    context_size=GPT_CONFIG_124M[\"ctx_len\"]\n",
    ")\n",
    "\n",
@@ -1362,7 +1360,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 30,
   "id": "053d99f6-5710-4446-8d52-117fb34ea9f6",
   "metadata": {},
   "outputs": [
@@ -1370,7 +1368,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Hello, I am Featureiman Byeswickattribute argue logger Normandy Compton analogous\n"
+      "Hello, I am Featureiman Byeswickattribute argue\n"
     ]
    }
   ],