Add more sophisticated Qwen3 tokenizer (#729)

This commit is contained in:
Sebastian Raschka
2025-07-09 13:16:26 -05:00
committed by rasbt
parent f596aab0cb
commit 14fa50dfc8
4 changed files with 142 additions and 55 deletions

View File

@@ -487,21 +487,6 @@
" \"dtype\": torch.bfloat16,\n",
" } \n",
"\n",
"elif CHOOSE_MODEL == \"8B\":\n",
" QWEN3_CONFIG = {\n",
" \"vocab_size\": 151_936,\n",
" \"context_length\": 40_960,\n",
" \"emb_dim\": 4096, # 60% larger than above\n",
" \"n_heads\": 32,\n",
" \"n_layers\": 36, # 26% larger than above\n",
" \"hidden_dim\": 12288,\n",
" \"head_dim\": 128,\n",
" \"qk_norm\": True,\n",
" \"n_kv_groups\": 8,\n",
" \"rope_base\": 1_000_000.0,\n",
" \"dtype\": torch.bfloat16,\n",
" } \n",
"\n",
"elif CHOOSE_MODEL == \"14B\":\n",
" QWEN3_CONFIG = {\n",
" \"vocab_size\": 151_936,\n",