mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
small cosmetic fixes and improvements
This commit is contained in:
@@ -580,9 +580,8 @@
|
||||
"preprocessed = re.split(r'([,.?_!\"()\\']|--|\\s)', raw_text)\n",
|
||||
"preprocessed = [item.strip() for item in preprocessed if item.strip()]\n",
|
||||
"\n",
|
||||
"all_words = sorted(list(set(preprocessed)))\n",
|
||||
"all_tokens = all_words\n",
|
||||
"all_words.extend([\"<|endoftext|>\", \"<|unk|>\"])\n",
|
||||
"all_tokens = sorted(list(set(preprocessed)))\n",
|
||||
"all_tokens.extend([\"<|endoftext|>\", \"<|unk|>\"])\n",
|
||||
"\n",
|
||||
"vocab = {token:integer for integer,token in enumerate(all_tokens)}"
|
||||
]
|
||||
@@ -1626,7 +1625,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user