mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Readability and code quality improvements (#959)
* Consistent dataset naming * consistent section headers
This commit is contained in:
committed by
GitHub
parent
7b1f740f74
commit
be5e2a3331
@@ -79,6 +79,7 @@
|
||||
"id": "2417139b-2357-44d2-bd67-23f5d7f52ae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" \n",
|
||||
"## 2.1 Understanding word embeddings"
|
||||
]
|
||||
},
|
||||
@@ -128,6 +129,7 @@
|
||||
"id": "eddbb984-8d23-40c5-bbfa-c3c379e7eec3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" \n",
|
||||
"## 2.2 Tokenizing text"
|
||||
]
|
||||
},
|
||||
@@ -445,6 +447,7 @@
|
||||
"id": "0b5ce8fe-3a07-4f2a-90f1-a0321ce3a231",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" \n",
|
||||
"## 2.3 Converting tokens into token IDs"
|
||||
]
|
||||
},
|
||||
@@ -738,6 +741,7 @@
|
||||
"id": "4b821ef8-4d53-43b6-a2b2-aef808c343c7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" \n",
|
||||
"## 2.4 Adding special context tokens"
|
||||
]
|
||||
},
|
||||
@@ -1013,6 +1017,7 @@
|
||||
"id": "5c4ba34b-170f-4e71-939b-77aabb776f14",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" \n",
|
||||
"## 2.5 BytePair encoding"
|
||||
]
|
||||
},
|
||||
@@ -1528,6 +1533,7 @@
|
||||
"id": "2cd2fcda-2fda-4aa8-8bc8-de1e496f9db1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" \n",
|
||||
"## 2.7 Creating token embeddings"
|
||||
]
|
||||
},
|
||||
@@ -1715,6 +1721,7 @@
|
||||
"id": "c393d270-b950-4bc8-99ea-97d74f2ea0f6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" \n",
|
||||
"## 2.8 Encoding word positions"
|
||||
]
|
||||
},
|
||||
@@ -1945,7 +1952,8 @@
|
||||
"id": "63230f2e-258f-4497-9e2e-8deee4530364",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Summary and takeaways"
|
||||
" \n",
|
||||
"## Summary and takeaways"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1977,7 +1985,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.16"
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -63,7 +63,8 @@
|
||||
"id": "6f678e62-7bcb-4405-86ae-dce94f494303",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Exercise 2.1"
|
||||
" \n",
|
||||
"## Exercise 2.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -273,7 +274,8 @@
|
||||
"id": "29e5034a-95ed-46d8-9972-589354dc9fd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Exercise 2.2"
|
||||
" \n",
|
||||
"## Exercise 2.2"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -407,7 +409,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.16"
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
"<br>\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"## Using BPE from `tiktoken`"
|
||||
"## 1. Using BPE from `tiktoken`"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -157,7 +157,7 @@
|
||||
"<br>\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"## Using the original BPE implementation used in GPT-2"
|
||||
"## 2. Using the original BPE implementation used in GPT-2"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -247,7 +247,7 @@
|
||||
"<br>\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"## Using the BPE via Hugging Face transformers"
|
||||
"## 3. Using the BPE via Hugging Face transformers"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -355,7 +355,7 @@
|
||||
"<br>\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"## Using my own from-scratch BPE tokenizer"
|
||||
"## 4. Using my own from-scratch BPE tokenizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -449,7 +449,7 @@
|
||||
"<br>\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"## A quick performance benchmark"
|
||||
"## 5. A quick performance benchmark"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -468,7 +468,8 @@
|
||||
"id": "9c0ae9f0-47a1-4e7f-a210-e1d2721f4d1e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Original OpenAI GPT-2 tokenizer"
|
||||
" \n",
|
||||
"### 5.1 Original OpenAI GPT-2 tokenizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -494,7 +495,8 @@
|
||||
"id": "ef2ce3f3-1f81-47ce-b563-99fe2c7a1e90",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Tiktoken OpenAI GPT-2 tokenizer"
|
||||
" \n",
|
||||
"### 5.2 Tiktoken OpenAI GPT-2 tokenizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -520,7 +522,8 @@
|
||||
"id": "0c748de8-273e-42df-b078-3a510106da60",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Hugging Face OpenAI GPT-2 tokenizer"
|
||||
" \n",
|
||||
"### 5.3 Hugging Face OpenAI GPT-2 tokenizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -614,7 +617,8 @@
|
||||
"id": "91ac2876-f36e-498c-bd75-8597a39f2d4b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### My own GPT-2 tokenizer (for educational purposes)"
|
||||
" \n",
|
||||
"### 5.4 My own GPT-2 tokenizer (for educational purposes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -652,7 +656,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.16"
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user