mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Use instance tokenizer (#116)
* Use instance tokenizer * consistency updates --------- Co-authored-by: Sebastian Raschka <mail@sebastianraschka.com>
This commit is contained in:
@@ -78,7 +78,7 @@
|
||||
" self.target_ids = []\n",
|
||||
"\n",
|
||||
" # Tokenize the entire text\n",
|
||||
" token_ids = tokenizer.encode(txt, allowed_special={'<|endoftext|>'})\n",
|
||||
" token_ids = self.tokenizer.encode(txt, allowed_special={'<|endoftext|>'})\n",
|
||||
"\n",
|
||||
" # Use a sliding window to chunk the book into overlapping sequences of max_length\n",
|
||||
" for i in range(0, len(token_ids) - max_length, stride):\n",
|
||||
@@ -374,7 +374,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user