mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Qwen3Tokenizer fix for Qwen3 Base models and generation mismatch with HF (#828)
* prevent `self.apply_chat_template` being applied for base Qwen models * - added no chat template comparison in `test_chat_wrap_and_equivalence` - removed duplicate comparison * Revert "- added no chat template comparison in `test_chat_wrap_and_equivalence`" This reverts commit3a5ee8cfa1. * Revert "prevent `self.apply_chat_template` being applied for base Qwen models" This reverts commitdf504397a8. * copied `download_file` in `utils` from https://github.com/rasbt/reasoning-from-scratch/blob/main/reasoning_from_scratch/utils.py * added copy of test `def test_tokenizer_equivalence()` from `reasoning-from-scratch` in `test_qwen3.py` * removed duplicate code fragment in`test_chat_wrap_and_equivalence` * use apply_chat_template * add toggle for instruct model * Update tokenizer usage --------- Co-authored-by: rasbt <mail@sebastianraschka.com>
This commit is contained in:
@@ -45,8 +45,14 @@ pip install llms_from_scratch tokenizers
|
||||
Specify which model to use:
|
||||
|
||||
```python
|
||||
USE_REASONING_MODEL = False # The base model
|
||||
USE_REASONING_MODEL = True # The "thinking" model
|
||||
USE_REASONING_MODEL = True
|
||||
# Uses the base model if USE_REASONING_MODEL = False
|
||||
|
||||
USE_INSTRUCT_MODEL = False
|
||||
# Uses the instruct mode (without reasoning) if
|
||||
# USE_REASONING_MODEL = True
|
||||
# USE_INSTRUCT_MODEL = False
|
||||
# This setting does have no effect if USE_REASONING_MODEL = False
|
||||
|
||||
|
||||
# Use
|
||||
@@ -187,10 +193,11 @@ else:
|
||||
tok_filename = "tokenizer-base.json"
|
||||
|
||||
tokenizer = Qwen3Tokenizer(
|
||||
tokenizer_file_path=tok_filename,
|
||||
tokenizer_file_path=tokenizer_file_path,
|
||||
repo_id=repo_id,
|
||||
apply_chat_template=USE_REASONING_MODEL,
|
||||
add_generation_prompt=USE_REASONING_MODEL,
|
||||
add_thinking=USE_REASONING_MODEL
|
||||
add_thinking=not USE_INSTRUCT_MODEL
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
@@ -1064,6 +1064,7 @@
|
||||
"tokenizer = Qwen3Tokenizer(\n",
|
||||
" tokenizer_file_path=tokenizer_file_path,\n",
|
||||
" repo_id=repo_id,\n",
|
||||
" apply_chat_template=True,\n",
|
||||
" add_generation_prompt=True,\n",
|
||||
" add_thinking=True\n",
|
||||
")"
|
||||
|
||||
@@ -1006,6 +1006,7 @@
|
||||
"tokenizer = Qwen3Tokenizer(\n",
|
||||
" tokenizer_file_path=tokenizer_file_path,\n",
|
||||
" repo_id=repo_id,\n",
|
||||
" apply_chat_template=True,\n",
|
||||
" add_generation_prompt=True,\n",
|
||||
" add_thinking=True\n",
|
||||
")"
|
||||
|
||||
@@ -115,7 +115,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"USE_REASONING_MODEL = True"
|
||||
"USE_REASONING_MODEL = True\n",
|
||||
"# Uses the base model if USE_REASONING_MODEL = False\n",
|
||||
"\n",
|
||||
"USE_INSTRUCT_MODEL = False\n",
|
||||
"# Uses the instruct mode (without reasoning) if \n",
|
||||
"# USE_REASONING_MODEL = True\n",
|
||||
"# USE_INSTRUCT_MODEL = False\n",
|
||||
"# This setting does have no effect if USE_REASONING_MODEL = False"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1060,8 +1067,9 @@
|
||||
"tokenizer = Qwen3Tokenizer(\n",
|
||||
" tokenizer_file_path=tokenizer_file_path,\n",
|
||||
" repo_id=repo_id,\n",
|
||||
" apply_chat_template=USE_REASONING_MODEL,\n",
|
||||
" add_generation_prompt=USE_REASONING_MODEL,\n",
|
||||
" add_thinking=USE_REASONING_MODEL\n",
|
||||
" add_thinking=not USE_INSTRUCT_MODEL\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -113,7 +113,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"USE_REASONING_MODEL = True"
|
||||
"USE_REASONING_MODEL = True\n",
|
||||
"# Uses the base model if USE_REASONING_MODEL = False\n",
|
||||
"\n",
|
||||
"USE_INSTRUCT_MODEL = False\n",
|
||||
"# Uses the instruct mode (without reasoning) if \n",
|
||||
"# USE_REASONING_MODEL = True\n",
|
||||
"# USE_INSTRUCT_MODEL = False\n",
|
||||
"# This setting does have no effect if USE_REASONING_MODEL = False"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1002,8 +1009,9 @@
|
||||
"tokenizer = Qwen3Tokenizer(\n",
|
||||
" tokenizer_file_path=tokenizer_file_path,\n",
|
||||
" repo_id=repo_id,\n",
|
||||
" apply_chat_template=USE_REASONING_MODEL,\n",
|
||||
" add_generation_prompt=USE_REASONING_MODEL,\n",
|
||||
" add_thinking=USE_REASONING_MODEL\n",
|
||||
" add_thinking=not USE_INSTRUCT_MODEL\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user