mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Rename variable to context_length to make it easier on readers (#106)
* rename to context length * fix spacing
This commit is contained in:
committed by
GitHub
parent
a940373a14
commit
2de60d1bfb
@@ -140,13 +140,13 @@
|
||||
"from previous_chapters import GPTModel\n",
|
||||
"\n",
|
||||
"GPT_CONFIG_124M = {\n",
|
||||
" \"vocab_size\": 50257, # Vocabulary size\n",
|
||||
" \"ctx_len\": 256, # Shortened context length (orig: 1024)\n",
|
||||
" \"emb_dim\": 768, # Embedding dimension\n",
|
||||
" \"n_heads\": 12, # Number of attention heads\n",
|
||||
" \"n_layers\": 12, # Number of layers\n",
|
||||
" \"drop_rate\": 0.1, # Dropout rate\n",
|
||||
" \"qkv_bias\": False # Query-key-value bias\n",
|
||||
" \"vocab_size\": 50257, # Vocabulary size\n",
|
||||
" \"context_length\": 256, # Shortened context length (orig: 1024)\n",
|
||||
" \"emb_dim\": 768, # Embedding dimension\n",
|
||||
" \"n_heads\": 12, # Number of attention heads\n",
|
||||
" \"n_layers\": 12, # Number of layers\n",
|
||||
" \"drop_rate\": 0.1, # Dropout rate\n",
|
||||
" \"qkv_bias\": False # Query-key-value bias\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"torch.manual_seed(123)\n",
|
||||
@@ -161,10 +161,10 @@
|
||||
"source": [
|
||||
"- We use dropout of 0.1 above, but it's relatively common to train LLMs without dropout nowadays\n",
|
||||
"- Modern LLMs also don't use bias vectors in the `nn.Linear` layers for the query, key, and value matrices (unlike earlier GPT models), which is achieved by setting `\"qkv_bias\": False`\n",
|
||||
"- We reduce the context length (`ctx_len`) of only 256 tokens to reduce the computational resource requirements for training the model, whereas the original 124 million parameter GPT-2 model used 1024 characters\n",
|
||||
"- We reduce the context length (`context_length`) of only 256 tokens to reduce the computational resource requirements for training the model, whereas the original 124 million parameter GPT-2 model used 1024 characters\n",
|
||||
" - This is so that more readers will be able to follow and execute the code examples on their laptop computer\n",
|
||||
" - However, please feel free to increase the `ctx_len` to 1024 tokens (this would not require any code changes)\n",
|
||||
" - We will also load a model with a 1024 `ctx_len` later from pretrained weights"
|
||||
" - However, please feel free to increase the `context_length` to 1024 tokens (this would not require any code changes)\n",
|
||||
" - We will also load a model with a 1024 `context_length` later from pretrained weights"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -219,7 +219,7 @@
|
||||
" model=model,\n",
|
||||
" idx=text_to_token_ids(start_context, tokenizer),\n",
|
||||
" max_new_tokens=10,\n",
|
||||
" context_size=GPT_CONFIG_124M[\"ctx_len\"]\n",
|
||||
" context_size=GPT_CONFIG_124M[\"context_length\"]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Output text:\\n\", token_ids_to_text(token_ids, tokenizer))"
|
||||
@@ -928,8 +928,8 @@
|
||||
"train_loader = create_dataloader_v1(\n",
|
||||
" train_data,\n",
|
||||
" batch_size=2,\n",
|
||||
" max_length=GPT_CONFIG_124M[\"ctx_len\"],\n",
|
||||
" stride=GPT_CONFIG_124M[\"ctx_len\"],\n",
|
||||
" max_length=GPT_CONFIG_124M[\"context_length\"],\n",
|
||||
" stride=GPT_CONFIG_124M[\"context_length\"],\n",
|
||||
" drop_last=True,\n",
|
||||
" shuffle=True\n",
|
||||
")\n",
|
||||
@@ -937,8 +937,8 @@
|
||||
"val_loader = create_dataloader_v1(\n",
|
||||
" val_data,\n",
|
||||
" batch_size=2,\n",
|
||||
" max_length=GPT_CONFIG_124M[\"ctx_len\"],\n",
|
||||
" stride=GPT_CONFIG_124M[\"ctx_len\"],\n",
|
||||
" max_length=GPT_CONFIG_124M[\"context_length\"],\n",
|
||||
" stride=GPT_CONFIG_124M[\"context_length\"],\n",
|
||||
" drop_last=False,\n",
|
||||
" shuffle=False\n",
|
||||
")"
|
||||
@@ -953,14 +953,14 @@
|
||||
"source": [
|
||||
"# Sanity check\n",
|
||||
"\n",
|
||||
"if total_tokens * (train_ratio) < GPT_CONFIG_124M[\"ctx_len\"]:\n",
|
||||
"if total_tokens * (train_ratio) < GPT_CONFIG_124M[\"context_length\"]:\n",
|
||||
" print(\"Not enough tokens for the training loader. \"\n",
|
||||
" \"Try to lower the `GPT_CONFIG_124M['ctx_len']` or \"\n",
|
||||
" \"Try to lower the `GPT_CONFIG_124M['context_length']` or \"\n",
|
||||
" \"increase the `training_ratio`\")\n",
|
||||
"\n",
|
||||
"if total_tokens * (1-train_ratio) < GPT_CONFIG_124M[\"ctx_len\"]:\n",
|
||||
"if total_tokens * (1-train_ratio) < GPT_CONFIG_124M[\"context_length\"]:\n",
|
||||
" print(\"Not enough tokens for the validation loader. \"\n",
|
||||
" \"Try to lower the `GPT_CONFIG_124M['ctx_len']` or \"\n",
|
||||
" \"Try to lower the `GPT_CONFIG_124M['context_length']` or \"\n",
|
||||
" \"decrease the `training_ratio`\")"
|
||||
]
|
||||
},
|
||||
@@ -1441,7 +1441,7 @@
|
||||
" model=model,\n",
|
||||
" idx=text_to_token_ids(\"Every effort moves you\", tokenizer),\n",
|
||||
" max_new_tokens=25,\n",
|
||||
" context_size=GPT_CONFIG_124M[\"ctx_len\"]\n",
|
||||
" context_size=GPT_CONFIG_124M[\"context_length\"]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Output text:\\n\", token_ids_to_text(token_ids, tokenizer))"
|
||||
@@ -1906,7 +1906,7 @@
|
||||
" model=model,\n",
|
||||
" idx=text_to_token_ids(\"Every effort moves you\", tokenizer),\n",
|
||||
" max_new_tokens=15,\n",
|
||||
" context_size=GPT_CONFIG_124M[\"ctx_len\"],\n",
|
||||
" context_size=GPT_CONFIG_124M[\"context_length\"],\n",
|
||||
" top_k=25,\n",
|
||||
" temperature=1.4\n",
|
||||
")\n",
|
||||
@@ -2203,7 +2203,7 @@
|
||||
"model_name = \"gpt2-small (124M)\" # Example model name\n",
|
||||
"NEW_CONFIG = GPT_CONFIG_124M.copy()\n",
|
||||
"NEW_CONFIG.update(model_configs[model_name])\n",
|
||||
"NEW_CONFIG.update({\"ctx_len\": 1024, \"qkv_bias\": True})\n",
|
||||
"NEW_CONFIG.update({\"context_length\": 1024, \"qkv_bias\": True})\n",
|
||||
"\n",
|
||||
"gpt = GPTModel(NEW_CONFIG)\n",
|
||||
"gpt.eval();"
|
||||
@@ -2338,7 +2338,7 @@
|
||||
" model=gpt,\n",
|
||||
" idx=text_to_token_ids(\"Every effort moves you\", tokenizer),\n",
|
||||
" max_new_tokens=25,\n",
|
||||
" context_size=NEW_CONFIG[\"ctx_len\"],\n",
|
||||
" context_size=NEW_CONFIG[\"context_length\"],\n",
|
||||
" top_k=50,\n",
|
||||
" temperature=1.5\n",
|
||||
")\n",
|
||||
@@ -2403,7 +2403,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user