Rename variable to context_length to make it easier on readers (#106)

* rename to context length * fix spacing
2026-04-10 12:33:42 +00:00 · 2024-04-04 07:27:41 -05:00
parent a940373a14
commit 2de60d1bfb
25 changed files with 242 additions and 242 deletions
--- a/ch04/01_main-chapter-code/ch04.ipynb
+++ b/ch04/01_main-chapter-code/ch04.ipynb
@@ -117,13 +117,13 @@
   "outputs": [],
   "source": [
    "GPT_CONFIG_124M = {\n",
-    "    \"vocab_size\": 50257,  # Vocabulary size\n",
-    "    \"ctx_len\": 1024,      # Context length\n",
-    "    \"emb_dim\": 768,       # Embedding dimension\n",
-    "    \"n_heads\": 12,        # Number of attention heads\n",
-    "    \"n_layers\": 12,       # Number of layers\n",
-    "    \"drop_rate\": 0.1,     # Dropout rate\n",
-    "    \"qkv_bias\": False     # Query-Key-Value bias\n",
+    "    \"vocab_size\": 50257,    # Vocabulary size\n",
+    "    \"context_length\": 1024, # Context length\n",
+    "    \"emb_dim\": 768,         # Embedding dimension\n",
+    "    \"n_heads\": 12,          # Number of attention heads\n",
+    "    \"n_layers\": 12,         # Number of layers\n",
+    "    \"drop_rate\": 0.1,       # Dropout rate\n",
+    "    \"qkv_bias\": False       # Query-Key-Value bias\n",
    "}"
   ]
  },
@@ -134,7 +134,7 @@
   "source": [
    "- We use short variable names to avoid long lines of code later\n",
    "- `\"vocab_size\"` indicates a vocabulary size of 50,257 words, supported by the BPE tokenizer discussed in Chapter 2\n",
-    "- `\"ctx_len\"` represents the model's maximum input token count, as enabled by positional embeddings covered in Chapter 2\n",
+    "- `\"context_length\"` represents the model's maximum input token count, as enabled by positional embeddings covered in Chapter 2\n",
    "- `\"emb_dim\"` is the embedding size for token inputs, converting each input token into a 768-dimensional vector\n",
    "- `\"n_heads\"` is the number of attention heads in the multi-head attention mechanism implemented in Chapter 3\n",
    "- `\"n_layers\"` is the number of transformer blocks within the model, which we'll implement in upcoming sections\n",
@@ -943,7 +943,7 @@
    "        self.att = MultiHeadAttention(\n",
    "            d_in=cfg[\"emb_dim\"],\n",
    "            d_out=cfg[\"emb_dim\"],\n",
-    "            block_size=cfg[\"ctx_len\"],\n",
+    "            context_length=cfg[\"ctx_len\"],\n",
    "            num_heads=cfg[\"n_heads\"], \n",
    "            dropout=cfg[\"drop_rate\"],\n",
    "            qkv_bias=cfg[\"qkv_bias\"])\n",
@@ -1489,7 +1489,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.6"
  }
 },
 "nbformat": 4,