mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
add and update readme files
This commit is contained in:
6
ch04/01_main-chapter-code/README.md
Normal file
6
ch04/01_main-chapter-code/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
# Chapter 4: Implementing a GPT model from Scratch To Generate Text
|
||||
|
||||
- [ch04.ipynb](ch04.ipynb) contains all the code as it appears in the chapter
|
||||
- [previous_chapters.py](previous_chapters.py) is a Python module that contains the `MultiHeadAttention` module from the previous chapter, which we import in [ch04.ipynb](ch04.ipynb) to create the GPT model
|
||||
- [gpt.py](gpt.py) is a standalone Python script file with the code that we implemented thus far, including the GPT model we coded in this chapter
|
||||
|
||||
@@ -134,7 +134,9 @@
|
||||
" \n",
|
||||
" # Use a placeholder for LayerNorm\n",
|
||||
" self.final_norm = DummyLayerNorm(cfg[\"emb_dim\"])\n",
|
||||
" self.out_head = nn.Linear(cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False)\n",
|
||||
" self.out_head = nn.Linear(\n",
|
||||
" cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def forward(self, in_idx):\n",
|
||||
" batch_size, seq_len = in_idx.shape\n",
|
||||
@@ -208,7 +210,7 @@
|
||||
"batch.append(torch.tensor(tokenizer.encode(txt1)))\n",
|
||||
"batch.append(torch.tensor(tokenizer.encode(txt2)))\n",
|
||||
"batch = torch.stack(batch, dim=0)\n",
|
||||
"batch"
|
||||
"print(batch)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -772,7 +774,7 @@
|
||||
"torch.manual_seed(123)\n",
|
||||
"ex_short = ExampleWithShortcut()\n",
|
||||
"inputs = torch.tensor([[-1., 1., 2.]])\n",
|
||||
"ex_short(inputs)"
|
||||
"print(ex_short(inputs))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -947,7 +949,9 @@
|
||||
" \n",
|
||||
" # Use a placeholder for LayerNorm\n",
|
||||
" self.final_norm = LayerNorm(cfg[\"emb_dim\"])\n",
|
||||
" self.out_head = nn.Linear(cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False)\n",
|
||||
" self.out_head = nn.Linear(\n",
|
||||
" cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def forward(self, in_idx):\n",
|
||||
" batch_size, seq_len = in_idx.shape\n",
|
||||
|
||||
Reference in New Issue
Block a user