Use instance tokenizer (#116)

* Use instance tokenizer * consistency updates --------- Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com>
2026-04-10 12:33:42 +00:00 · 2024-04-10 21:16:19 -04:00
parent 028a346498
commit 05718c6b94
11 changed files with 14 additions and 14 deletions
--- a/appendix-D/01_main-chapter-code/previous_chapters.py
+++ b/appendix-D/01_main-chapter-code/previous_chapters.py
@@ -25,7 +25,7 @@ class GPTDatasetV1(Dataset):
        self.target_ids = []

        # Tokenize the entire text
-        token_ids = tokenizer.encode(txt)
+        token_ids = self.tokenizer.encode(txt)

        # Use a sliding window to chunk the book into overlapping sequences of max_length
        for i in range(0, len(token_ids) - max_length, stride):