small cosmetic fixes and improvements

This commit is contained in:
rasbt
2024-01-10 08:01:19 -06:00
parent c93f434f52
commit f279134492
3 changed files with 12 additions and 13 deletions

View File

@@ -580,9 +580,8 @@
"preprocessed = re.split(r'([,.?_!\"()\\']|--|\\s)', raw_text)\n",
"preprocessed = [item.strip() for item in preprocessed if item.strip()]\n",
"\n",
"all_words = sorted(list(set(preprocessed)))\n",
"all_tokens = all_words\n",
"all_words.extend([\"<|endoftext|>\", \"<|unk|>\"])\n",
"all_tokens = sorted(list(set(preprocessed)))\n",
"all_tokens.extend([\"<|endoftext|>\", \"<|unk|>\"])\n",
"\n",
"vocab = {token:integer for integer,token in enumerate(all_tokens)}"
]
@@ -1626,7 +1625,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.12"
}
},
"nbformat": 4,