Fix BPE bonus materials (#561)

* Fix BPE bonus materials

* fix bpe implementation

* update

* Add 'Hello, world. Is this-- a test?' test case

* update link to test file

* update path handling

* update path handling

* fix pytest paths
This commit is contained in:
Sebastian Raschka
2025-03-08 17:21:30 -06:00
committed by GitHub
parent 96ca2fcb2f
commit f63f04d8d5
5 changed files with 307 additions and 87 deletions

View File

@@ -67,7 +67,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"tiktoken version: 0.7.0\n"
"tiktoken version: 0.9.0\n"
]
}
],
@@ -180,8 +180,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching encoder.json: 1.04Mit [00:00, 4.13Mit/s] \n",
"Fetching vocab.bpe: 457kit [00:00, 2.56Mit/s] \n"
"Fetching encoder.json: 1.04Mit [00:00, 3.69Mit/s] \n",
"Fetching vocab.bpe: 457kit [00:00, 2.53Mit/s] \n"
]
}
],
@@ -256,10 +256,18 @@
"id": "e9077bf4-f91f-42ad-ab76-f3d89128510e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sebastian/Developer/LLMs-from-scratch/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"data": {
"text/plain": [
"'4.48.0'"
"'4.49.0'"
]
},
"execution_count": 12,
@@ -423,7 +431,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[1544, 18798, 11, 995, 13, 1148, 256, 5303, 82, 438, 257, 1332, 30]\n"
"[15496, 11, 995, 13, 1148, 428, 438, 257, 1332, 30]\n"
]
}
],
@@ -451,7 +459,7 @@
"metadata": {},
"outputs": [],
"source": [
"with open('../01_main-chapter-code/the-verdict.txt', 'r', encoding='utf-8') as f:\n",
"with open(\"../01_main-chapter-code/the-verdict.txt\", \"r\", encoding=\"utf-8\") as f:\n",
" raw_text = f.read()"
]
},
@@ -473,7 +481,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"3.39 ms ± 21.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"3.84 ms ± 9.83 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -499,7 +507,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.08 ms ± 5.99 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
"901 μs ± 6.27 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
@@ -532,7 +540,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"10.2 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"11 ms ± 94.4 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -550,7 +558,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"10 ms ± 36.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"10.8 ms ± 180 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -575,7 +583,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"3.79 ms ± 48.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"3.66 ms ± 3.67 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -593,7 +601,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"3.83 ms ± 58.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"3.77 ms ± 49.3 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -619,7 +627,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.59 ms ± 11.5 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
"9.37 ms ± 50.3 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -644,7 +652,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.16"
}
},
"nbformat": 4,