diff --git a/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb b/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb
index fac9a55..b79af45 100644
--- a/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb
+++ b/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb
@@ -82,8 +82,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "huggingface_hub version: 0.34.4\n",
- "tokenizers version: 0.21.4\n",
+ "huggingface_hub version: 0.35.3\n",
+ "tokenizers version: 0.22.1\n",
"torch version: 2.8.0\n"
]
}
@@ -105,7 +105,12 @@
"id": "07e96fbb-8e16-4f6d-835f-c6159321280b",
"metadata": {},
"source": [
- "- This notebook supports both the base model and the reasoning (\"thinking\") model; which model to use can be controlled via the following flag:"
+ "- Note that there are two models, the \"base\" and the \"hybrid\" model, and the hybrid model can be used as either a reasoning or a regular instruction-following model:\n",
+ "- In short, the model types are as follows:\n",
+ " - `base`: the pretrained base model; note that the Qwen3 pretraining contained some reasoning data (chain-of-thought data), so the model sometimes emits reasoning traces even though it didn't undergo the reasoning training (reinforcement learning) stages\n",
+ " - `hybrid` \n",
+ " - `reasoning`: emits long reasoning traces inside `` tags\n",
+ " - `instruct`: the same as above, but long reasoning traces can be suppressed by manually adding empty `` (this is done by the tokenizer); this way, the model acts like a regular instruction-following model"
]
},
{
@@ -115,14 +120,15 @@
"metadata": {},
"outputs": [],
"source": [
- "USE_REASONING_MODEL = True\n",
- "# Uses the base model if USE_REASONING_MODEL = False\n",
+ "# Select which model to use via the following flag; only one can be True\n",
"\n",
+ "USE_BASE_MODEL = False\n",
+ "USE_REASONING_MODEL = True \n",
"USE_INSTRUCT_MODEL = False\n",
- "# Uses the instruct mode (without reasoning) if \n",
- "# USE_REASONING_MODEL = True\n",
- "# USE_INSTRUCT_MODEL = True\n",
- "# This setting does have no effect if USE_REASONING_MODEL = False"
+ "\n",
+ "if (USE_BASE_MODEL + USE_REASONING_MODEL\n",
+ " + USE_INSTRUCT_MODEL) != 1:\n",
+ " raise AttributeError(\"Only one of the options above can be True.\")"
]
},
{
@@ -916,7 +922,16 @@
"id": "699cb1b8-a67d-49fb-80a6-0dad9d81f392",
"outputId": "55b2f28c-142f-4698-9d23-d27456d3ed6d"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/sebastian/Developer/LLMs-from-scratch/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+ " from .autonotebook import tqdm as notebook_tqdm\n"
+ ]
+ }
+ ],
"source": [
"import json\n",
"import os\n",
@@ -925,7 +940,7 @@
"from huggingface_hub import hf_hub_download, snapshot_download\n",
"\n",
"\n",
- "if USE_REASONING_MODEL:\n",
+ "if USE_REASONING_MODEL or USE_INSTRUCT_MODEL:\n",
" repo_id = f\"Qwen/Qwen3-{CHOOSE_MODEL}\"\n",
"else:\n",
" repo_id = f\"Qwen/Qwen3-{CHOOSE_MODEL}-Base\"\n",
@@ -1064,13 +1079,23 @@
" local_dir=local_dir,\n",
")\n",
"\n",
- "tokenizer = Qwen3Tokenizer(\n",
- " tokenizer_file_path=tokenizer_file_path,\n",
- " repo_id=repo_id,\n",
- " apply_chat_template=USE_REASONING_MODEL,\n",
- " add_generation_prompt=USE_REASONING_MODEL,\n",
- " add_thinking=not USE_INSTRUCT_MODEL\n",
- ")"
+ "if USE_REASONING_MODEL or USE_INSTRUCT_MODEL:\n",
+ " tokenizer = Qwen3Tokenizer(\n",
+ " tokenizer_file_path=tokenizer_file_path,\n",
+ " repo_id=repo_id,\n",
+ " apply_chat_template=True,\n",
+ " add_generation_prompt=True,\n",
+ " add_thinking=USE_REASONING_MODEL\n",
+ " )\n",
+ "\n",
+ "else:\n",
+ " tokenizer = Qwen3Tokenizer(\n",
+ " tokenizer_file_path=tokenizer_file_path,\n",
+ " repo_id=repo_id,\n",
+ " apply_chat_template=False,\n",
+ " add_generation_prompt=False,\n",
+ " add_thinking=False\n",
+ " )"
]
},
{
@@ -1228,7 +1253,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.16"
+ "version": "3.13.5"
}
},
"nbformat": 4,
diff --git a/ch05/11_qwen3/standalone-qwen3.ipynb b/ch05/11_qwen3/standalone-qwen3.ipynb
index 911eccb..156854f 100644
--- a/ch05/11_qwen3/standalone-qwen3.ipynb
+++ b/ch05/11_qwen3/standalone-qwen3.ipynb
@@ -103,7 +103,12 @@
"id": "07e96fbb-8e16-4f6d-835f-c6159321280b",
"metadata": {},
"source": [
- "- This notebook supports both the base model and the reasoning (\"thinking\") model; which model to use can be controlled via the following flag:"
+ "- Note that there are two models, the \"base\" and the \"hybrid\" model, and the hybrid model can be used as either a reasoning or a regular instruction-following model:\n",
+ "- In short, the model types are as follows:\n",
+ " - `base`: the pretrained base model; note that the Qwen3 pretraining contained some reasoning data (chain-of-thought data), so the model sometimes emits reasoning traces even though it didn't undergo the reasoning training (reinforcement learning) stages\n",
+ " - `hybrid` \n",
+ " - `reasoning`: emits long reasoning traces inside `` tags\n",
+ " - `instruct`: the same as above, but long reasoning traces can be suppressed by manually adding empty `` (this is done by the tokenizer); this way, the model acts like a regular instruction-following model"
]
},
{
@@ -113,14 +118,15 @@
"metadata": {},
"outputs": [],
"source": [
- "USE_REASONING_MODEL = True\n",
- "# Uses the base model if USE_REASONING_MODEL = False\n",
+ "# Select which model to use via the following flag; only one can be True\n",
"\n",
+ "USE_BASE_MODEL = False\n",
+ "USE_REASONING_MODEL = True \n",
"USE_INSTRUCT_MODEL = False\n",
- "# Uses the instruct mode (without reasoning) if \n",
- "# USE_REASONING_MODEL = True\n",
- "# USE_INSTRUCT_MODEL = True\n",
- "# This setting does have no effect if USE_REASONING_MODEL = False"
+ "\n",
+ "if (USE_BASE_MODEL + USE_REASONING_MODEL\n",
+ " + USE_INSTRUCT_MODEL) != 1:\n",
+ " raise AttributeError(\"Only one of the options above can be True.\")"
]
},
{
@@ -867,7 +873,7 @@
"from huggingface_hub import hf_hub_download, snapshot_download\n",
"\n",
"\n",
- "if USE_REASONING_MODEL:\n",
+ "if USE_REASONING_MODEL or USE_INSTRUCT_MODEL:\n",
" repo_id = f\"Qwen/Qwen3-{CHOOSE_MODEL}\"\n",
"else:\n",
" repo_id = f\"Qwen/Qwen3-{CHOOSE_MODEL}-Base\"\n",
@@ -1006,13 +1012,23 @@
" local_dir=local_dir,\n",
")\n",
"\n",
- "tokenizer = Qwen3Tokenizer(\n",
- " tokenizer_file_path=tokenizer_file_path,\n",
- " repo_id=repo_id,\n",
- " apply_chat_template=USE_REASONING_MODEL,\n",
- " add_generation_prompt=USE_REASONING_MODEL,\n",
- " add_thinking=not USE_INSTRUCT_MODEL\n",
- ")"
+ "if USE_REASONING_MODEL or USE_INSTRUCT_MODEL:\n",
+ " tokenizer = Qwen3Tokenizer(\n",
+ " tokenizer_file_path=tokenizer_file_path,\n",
+ " repo_id=repo_id,\n",
+ " apply_chat_template=True,\n",
+ " add_generation_prompt=True,\n",
+ " add_thinking=USE_REASONING_MODEL\n",
+ " )\n",
+ "\n",
+ "else:\n",
+ " tokenizer = Qwen3Tokenizer(\n",
+ " tokenizer_file_path=tokenizer_file_path,\n",
+ " repo_id=repo_id,\n",
+ " apply_chat_template=False,\n",
+ " add_generation_prompt=False,\n",
+ " add_thinking=False\n",
+ " )"
]
},
{
@@ -1163,7 +1179,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.16"
+ "version": "3.13.5"
}
},
"nbformat": 4,