mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Rename variable to context_length to make it easier on readers (#106)
* rename to context length * fix spacing
This commit is contained in:
committed by
GitHub
parent
a940373a14
commit
2de60d1bfb
@@ -117,13 +117,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"GPT_CONFIG_124M = {\n",
|
||||
" \"vocab_size\": 50257, # Vocabulary size\n",
|
||||
" \"ctx_len\": 1024, # Context length\n",
|
||||
" \"emb_dim\": 768, # Embedding dimension\n",
|
||||
" \"n_heads\": 12, # Number of attention heads\n",
|
||||
" \"n_layers\": 12, # Number of layers\n",
|
||||
" \"drop_rate\": 0.1, # Dropout rate\n",
|
||||
" \"qkv_bias\": False # Query-Key-Value bias\n",
|
||||
" \"vocab_size\": 50257, # Vocabulary size\n",
|
||||
" \"context_length\": 1024, # Context length\n",
|
||||
" \"emb_dim\": 768, # Embedding dimension\n",
|
||||
" \"n_heads\": 12, # Number of attention heads\n",
|
||||
" \"n_layers\": 12, # Number of layers\n",
|
||||
" \"drop_rate\": 0.1, # Dropout rate\n",
|
||||
" \"qkv_bias\": False # Query-Key-Value bias\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
@@ -134,7 +134,7 @@
|
||||
"source": [
|
||||
"- We use short variable names to avoid long lines of code later\n",
|
||||
"- `\"vocab_size\"` indicates a vocabulary size of 50,257 words, supported by the BPE tokenizer discussed in Chapter 2\n",
|
||||
"- `\"ctx_len\"` represents the model's maximum input token count, as enabled by positional embeddings covered in Chapter 2\n",
|
||||
"- `\"context_length\"` represents the model's maximum input token count, as enabled by positional embeddings covered in Chapter 2\n",
|
||||
"- `\"emb_dim\"` is the embedding size for token inputs, converting each input token into a 768-dimensional vector\n",
|
||||
"- `\"n_heads\"` is the number of attention heads in the multi-head attention mechanism implemented in Chapter 3\n",
|
||||
"- `\"n_layers\"` is the number of transformer blocks within the model, which we'll implement in upcoming sections\n",
|
||||
@@ -943,7 +943,7 @@
|
||||
" self.att = MultiHeadAttention(\n",
|
||||
" d_in=cfg[\"emb_dim\"],\n",
|
||||
" d_out=cfg[\"emb_dim\"],\n",
|
||||
" block_size=cfg[\"ctx_len\"],\n",
|
||||
" context_length=cfg[\"ctx_len\"],\n",
|
||||
" num_heads=cfg[\"n_heads\"], \n",
|
||||
" dropout=cfg[\"drop_rate\"],\n",
|
||||
" qkv_bias=cfg[\"qkv_bias\"])\n",
|
||||
@@ -1489,7 +1489,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user