mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Add more sophisticated Qwen3 tokenizer (#729)
This commit is contained in:
@@ -487,21 +487,6 @@
|
||||
" \"dtype\": torch.bfloat16,\n",
|
||||
" } \n",
|
||||
"\n",
|
||||
"elif CHOOSE_MODEL == \"8B\":\n",
|
||||
" QWEN3_CONFIG = {\n",
|
||||
" \"vocab_size\": 151_936,\n",
|
||||
" \"context_length\": 40_960,\n",
|
||||
" \"emb_dim\": 4096, # 60% larger than above\n",
|
||||
" \"n_heads\": 32,\n",
|
||||
" \"n_layers\": 36, # 26% larger than above\n",
|
||||
" \"hidden_dim\": 12288,\n",
|
||||
" \"head_dim\": 128,\n",
|
||||
" \"qk_norm\": True,\n",
|
||||
" \"n_kv_groups\": 8,\n",
|
||||
" \"rope_base\": 1_000_000.0,\n",
|
||||
" \"dtype\": torch.bfloat16,\n",
|
||||
" } \n",
|
||||
"\n",
|
||||
"elif CHOOSE_MODEL == \"14B\":\n",
|
||||
" QWEN3_CONFIG = {\n",
|
||||
" \"vocab_size\": 151_936,\n",
|
||||
|
||||
Reference in New Issue
Block a user