From df504397a8957886c6d6d808615545e37ceffcad Mon Sep 17 00:00:00 2001 From: casinca <47400729+casinca@users.noreply.github.com> Date: Mon, 15 Sep 2025 16:26:17 +0200 Subject: [PATCH] prevent `self.apply_chat_template` being applied for base Qwen models --- ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb | 11 ++++++----- ch05/11_qwen3/standalone-qwen3-moe.ipynb | 11 ++++++----- ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb | 11 ++++++----- ch05/11_qwen3/standalone-qwen3.ipynb | 11 ++++++----- pkg/llms_from_scratch/qwen3.py | 5 +++-- 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb b/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb index 3ce53ad..6295469 100644 --- a/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb +++ b/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb @@ -973,7 +973,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "b68ab489-48e5-471e-a814-56cda2d60f81", "metadata": {}, "outputs": [], @@ -1000,6 +1000,7 @@ " self.apply_chat_template = apply_chat_template\n", " self.add_generation_prompt = add_generation_prompt\n", " self.add_thinking = add_thinking\n", + " self.repo_id=repo_id\n", "\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", @@ -1012,7 +1013,7 @@ " self.pad_token_id = self._special_to_id[\"<|endoftext|>\"]\n", " self.eos_token_id = self.pad_token_id\n", "\n", - " if repo_id and \"Base\" not in repo_id:\n", + " if self.repo_id and \"Base\" not in self.repo_id:\n", " eos_token = \"<|im_end|>\"\n", " else:\n", " eos_token = \"<|endoftext|>\"\n", @@ -1020,7 +1021,7 @@ " self.eos_token_id = self._special_to_id[eos_token]\n", "\n", " def encode(self, text, chat_wrapped=None):\n", - " if chat_wrapped is None:\n", + " if chat_wrapped is None and self.repo_id and \"Base\" not in self.repo_id:\n", " chat_wrapped = self.apply_chat_template\n", "\n", " stripped = text.strip()\n", @@ -1239,7 +1240,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -1253,7 +1254,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/ch05/11_qwen3/standalone-qwen3-moe.ipynb b/ch05/11_qwen3/standalone-qwen3-moe.ipynb index a979538..c160694 100644 --- a/ch05/11_qwen3/standalone-qwen3-moe.ipynb +++ b/ch05/11_qwen3/standalone-qwen3-moe.ipynb @@ -915,7 +915,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "b68ab489-48e5-471e-a814-56cda2d60f81", "metadata": {}, "outputs": [], @@ -942,6 +942,7 @@ " self.apply_chat_template = apply_chat_template\n", " self.add_generation_prompt = add_generation_prompt\n", " self.add_thinking = add_thinking\n", + " self.repo_id=repo_id\n", "\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", @@ -954,7 +955,7 @@ " self.pad_token_id = self._special_to_id[\"<|endoftext|>\"]\n", " self.eos_token_id = self.pad_token_id\n", "\n", - " if repo_id and \"Base\" not in repo_id:\n", + " if self.repo_id and \"Base\" not in self.repo_id:\n", " eos_token = \"<|im_end|>\"\n", " else:\n", " eos_token = \"<|endoftext|>\"\n", @@ -962,7 +963,7 @@ " self.eos_token_id = self._special_to_id[eos_token]\n", "\n", " def encode(self, text, chat_wrapped=None):\n", - " if chat_wrapped is None:\n", + " if chat_wrapped is None and self.repo_id and \"Base\" not in self.repo_id:\n", " chat_wrapped = self.apply_chat_template\n", "\n", " stripped = text.strip()\n", @@ -1221,7 +1222,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -1235,7 +1236,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb b/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb index 2753be4..380c0f3 100644 --- a/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb +++ b/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb @@ -960,7 +960,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "b68ab489-48e5-471e-a814-56cda2d60f81", "metadata": {}, "outputs": [], @@ -987,6 +987,7 @@ " self.apply_chat_template = apply_chat_template\n", " self.add_generation_prompt = add_generation_prompt\n", " self.add_thinking = add_thinking\n", + " self.repo_id=repo_id\n", "\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", @@ -999,7 +1000,7 @@ " self.pad_token_id = self._special_to_id[\"<|endoftext|>\"]\n", " self.eos_token_id = self.pad_token_id\n", "\n", - " if repo_id and \"Base\" not in repo_id:\n", + " if self.repo_id and \"Base\" not in self.repo_id:\n", " eos_token = \"<|im_end|>\"\n", " else:\n", " eos_token = \"<|endoftext|>\"\n", @@ -1007,7 +1008,7 @@ " self.eos_token_id = self._special_to_id[eos_token]\n", "\n", " def encode(self, text, chat_wrapped=None):\n", - " if chat_wrapped is None:\n", + " if chat_wrapped is None and self.repo_id and \"Base\" not in self.repo_id:\n", " chat_wrapped = self.apply_chat_template\n", "\n", " stripped = text.strip()\n", @@ -1206,7 +1207,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -1220,7 +1221,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/ch05/11_qwen3/standalone-qwen3.ipynb b/ch05/11_qwen3/standalone-qwen3.ipynb index 6bd38d3..0cf6507 100644 --- a/ch05/11_qwen3/standalone-qwen3.ipynb +++ b/ch05/11_qwen3/standalone-qwen3.ipynb @@ -902,7 +902,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "b68ab489-48e5-471e-a814-56cda2d60f81", "metadata": {}, "outputs": [], @@ -929,6 +929,7 @@ " self.apply_chat_template = apply_chat_template\n", " self.add_generation_prompt = add_generation_prompt\n", " self.add_thinking = add_thinking\n", + " self.repo_id=repo_id\n", "\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", @@ -941,7 +942,7 @@ " self.pad_token_id = self._special_to_id[\"<|endoftext|>\"]\n", " self.eos_token_id = self.pad_token_id\n", "\n", - " if repo_id and \"Base\" not in repo_id:\n", + " if self.repo_id and \"Base\" not in self.repo_id:\n", " eos_token = \"<|im_end|>\"\n", " else:\n", " eos_token = \"<|endoftext|>\"\n", @@ -949,7 +950,7 @@ " self.eos_token_id = self._special_to_id[eos_token]\n", "\n", " def encode(self, text, chat_wrapped=None):\n", - " if chat_wrapped is None:\n", + " if chat_wrapped is None and self.repo_id and \"Base\" not in self.repo_id:\n", " chat_wrapped = self.apply_chat_template\n", "\n", " stripped = text.strip()\n", @@ -1141,7 +1142,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -1155,7 +1156,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/pkg/llms_from_scratch/qwen3.py b/pkg/llms_from_scratch/qwen3.py index a68b324..51737db 100644 --- a/pkg/llms_from_scratch/qwen3.py +++ b/pkg/llms_from_scratch/qwen3.py @@ -531,6 +531,7 @@ class Qwen3Tokenizer: self.apply_chat_template = apply_chat_template self.add_generation_prompt = add_generation_prompt self.add_thinking = add_thinking + self.repo_id=repo_id tok_file = Path(tokenizer_file_path) if not tok_file.is_file() and repo_id: @@ -549,7 +550,7 @@ class Qwen3Tokenizer: self.pad_token_id = self._special_to_id["<|endoftext|>"] self.eos_token_id = self.pad_token_id - if repo_id and "Base" not in repo_id: + if self.repo_id and "Base" not in self.repo_id: eos_token = "<|im_end|>" else: eos_token = "<|endoftext|>" @@ -557,7 +558,7 @@ class Qwen3Tokenizer: self.eos_token_id = self._special_to_id[eos_token] def encode(self, text, chat_wrapped=None): - if chat_wrapped is None: + if chat_wrapped is None and self.repo_id and "Base" not in self.repo_id: chat_wrapped = self.apply_chat_template stripped = text.strip()