mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
ch4 exercise solutions
This commit is contained in:
@@ -942,12 +942,11 @@
|
||||
" super().__init__()\n",
|
||||
" self.tok_emb = nn.Embedding(cfg[\"vocab_size\"], cfg[\"emb_dim\"])\n",
|
||||
" self.pos_emb = nn.Embedding(cfg[\"ctx_len\"], cfg[\"emb_dim\"])\n",
|
||||
" self.drop_emb = nn.Dropout(cfg[\"drop_rate\"])\n",
|
||||
" \n",
|
||||
" # Use a placeholder for TransformerBlock\n",
|
||||
" self.trf_blocks = nn.Sequential(\n",
|
||||
" *[TransformerBlock(cfg) for _ in range(cfg[\"n_layers\"])])\n",
|
||||
" \n",
|
||||
" # Use a placeholder for LayerNorm\n",
|
||||
" self.final_norm = LayerNorm(cfg[\"emb_dim\"])\n",
|
||||
" self.out_head = nn.Linear(\n",
|
||||
" cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False\n",
|
||||
@@ -1210,7 +1209,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 26,
|
||||
"id": "c9b428a9-8764-4b36-80cd-7d4e00595ba6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -1264,7 +1263,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 27,
|
||||
"id": "bb3ffc8e-f95f-4a24-a978-939b8953ea3e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1282,7 +1281,7 @@
|
||||
" 0.0000], grad_fn=<SoftmaxBackward0>)"
|
||||
]
|
||||
},
|
||||
"execution_count": 54,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1299,7 +1298,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 28,
|
||||
"id": "3d7e3e94-df0f-4c0f-a6a1-423f500ac1d3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1324,7 +1323,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 29,
|
||||
"id": "a72a9b60-de66-44cf-b2f9-1e638934ada4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1332,9 +1331,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Output: tensor([[15496, 11, 314, 716, 27018, 24086, 47843, 30961, 42348, 7267,\n",
|
||||
" 49706, 43231, 47062, 34657]])\n",
|
||||
"Output length: 14\n"
|
||||
"Output: tensor([[15496, 11, 314, 716, 27018, 24086, 47843, 30961, 42348, 7267]])\n",
|
||||
"Output length: 10\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -1344,7 +1342,7 @@
|
||||
"out = generate_text_simple(\n",
|
||||
" model=model,\n",
|
||||
" idx=encoded_tensor, \n",
|
||||
" max_new_tokens=10, \n",
|
||||
" max_new_tokens=6, \n",
|
||||
" context_size=GPT_CONFIG_124M[\"ctx_len\"]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -1362,7 +1360,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 30,
|
||||
"id": "053d99f6-5710-4446-8d52-117fb34ea9f6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1370,7 +1368,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Hello, I am Featureiman Byeswickattribute argue logger Normandy Compton analogous\n"
|
||||
"Hello, I am Featureiman Byeswickattribute argue\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user