From 186e83c579b87c0ce559857b8774ec02020d70c1 Mon Sep 17 00:00:00 2001 From: casinca <47400729+casinca@users.noreply.github.com> Date: Tue, 16 Sep 2025 09:43:01 +0200 Subject: [PATCH] Revert "prevent `self.apply_chat_template` being applied for base Qwen models" This reverts commit df504397a8957886c6d6d808615545e37ceffcad. --- ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb | 11 +++++------ ch05/11_qwen3/standalone-qwen3-moe.ipynb | 11 +++++------ ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb | 11 +++++------ ch05/11_qwen3/standalone-qwen3.ipynb | 11 +++++------ pkg/llms_from_scratch/qwen3.py | 5 ++--- 5 files changed, 22 insertions(+), 27 deletions(-) diff --git a/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb b/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb index 78fdcd9..36f8f9d 100644 --- a/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb +++ b/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb @@ -973,7 +973,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "b68ab489-48e5-471e-a814-56cda2d60f81", "metadata": {}, "outputs": [], @@ -1000,7 +1000,6 @@ " self.apply_chat_template = apply_chat_template\n", " self.add_generation_prompt = add_generation_prompt\n", " self.add_thinking = add_thinking\n", - " self.repo_id=repo_id\n", "\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", @@ -1013,7 +1012,7 @@ " self.pad_token_id = self._special_to_id[\"<|endoftext|>\"]\n", " self.eos_token_id = self.pad_token_id\n", "\n", - " if self.repo_id and \"Base\" not in self.repo_id:\n", + " if repo_id and \"Base\" not in repo_id:\n", " eos_token = \"<|im_end|>\"\n", " else:\n", " eos_token = \"<|endoftext|>\"\n", @@ -1021,7 +1020,7 @@ " self.eos_token_id = self._special_to_id[eos_token]\n", "\n", " def encode(self, text, chat_wrapped=None):\n", - " if chat_wrapped is None and self.repo_id and \"Base\" not in self.repo_id:\n", + " if chat_wrapped is None:\n", " chat_wrapped = self.apply_chat_template\n", "\n", " stripped = text.strip()\n", @@ -1240,7 +1239,7 @@ "provenance": [] }, "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1254,7 +1253,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.6" + "version": "3.10.16" } }, "nbformat": 4, diff --git a/ch05/11_qwen3/standalone-qwen3-moe.ipynb b/ch05/11_qwen3/standalone-qwen3-moe.ipynb index 5d36d36..5c1a402 100644 --- a/ch05/11_qwen3/standalone-qwen3-moe.ipynb +++ b/ch05/11_qwen3/standalone-qwen3-moe.ipynb @@ -915,7 +915,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "b68ab489-48e5-471e-a814-56cda2d60f81", "metadata": {}, "outputs": [], @@ -942,7 +942,6 @@ " self.apply_chat_template = apply_chat_template\n", " self.add_generation_prompt = add_generation_prompt\n", " self.add_thinking = add_thinking\n", - " self.repo_id=repo_id\n", "\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", @@ -955,7 +954,7 @@ " self.pad_token_id = self._special_to_id[\"<|endoftext|>\"]\n", " self.eos_token_id = self.pad_token_id\n", "\n", - " if self.repo_id and \"Base\" not in self.repo_id:\n", + " if repo_id and \"Base\" not in repo_id:\n", " eos_token = \"<|im_end|>\"\n", " else:\n", " eos_token = \"<|endoftext|>\"\n", @@ -963,7 +962,7 @@ " self.eos_token_id = self._special_to_id[eos_token]\n", "\n", " def encode(self, text, chat_wrapped=None):\n", - " if chat_wrapped is None and self.repo_id and \"Base\" not in self.repo_id:\n", + " if chat_wrapped is None:\n", " chat_wrapped = self.apply_chat_template\n", "\n", " stripped = text.strip()\n", @@ -1222,7 +1221,7 @@ "provenance": [] }, "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1236,7 +1235,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.6" + "version": "3.10.16" } }, "nbformat": 4, diff --git a/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb b/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb index f975125..ca9e15e 100644 --- a/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb +++ b/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb @@ -960,7 +960,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "b68ab489-48e5-471e-a814-56cda2d60f81", "metadata": {}, "outputs": [], @@ -987,7 +987,6 @@ " self.apply_chat_template = apply_chat_template\n", " self.add_generation_prompt = add_generation_prompt\n", " self.add_thinking = add_thinking\n", - " self.repo_id=repo_id\n", "\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", @@ -1000,7 +999,7 @@ " self.pad_token_id = self._special_to_id[\"<|endoftext|>\"]\n", " self.eos_token_id = self.pad_token_id\n", "\n", - " if self.repo_id and \"Base\" not in self.repo_id:\n", + " if repo_id and \"Base\" not in repo_id:\n", " eos_token = \"<|im_end|>\"\n", " else:\n", " eos_token = \"<|endoftext|>\"\n", @@ -1008,7 +1007,7 @@ " self.eos_token_id = self._special_to_id[eos_token]\n", "\n", " def encode(self, text, chat_wrapped=None):\n", - " if chat_wrapped is None and self.repo_id and \"Base\" not in self.repo_id:\n", + " if chat_wrapped is None:\n", " chat_wrapped = self.apply_chat_template\n", "\n", " stripped = text.strip()\n", @@ -1207,7 +1206,7 @@ "provenance": [] }, "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1221,7 +1220,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.6" + "version": "3.10.16" } }, "nbformat": 4, diff --git a/ch05/11_qwen3/standalone-qwen3.ipynb b/ch05/11_qwen3/standalone-qwen3.ipynb index 8169b11..0302990 100644 --- a/ch05/11_qwen3/standalone-qwen3.ipynb +++ b/ch05/11_qwen3/standalone-qwen3.ipynb @@ -902,7 +902,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "b68ab489-48e5-471e-a814-56cda2d60f81", "metadata": {}, "outputs": [], @@ -929,7 +929,6 @@ " self.apply_chat_template = apply_chat_template\n", " self.add_generation_prompt = add_generation_prompt\n", " self.add_thinking = add_thinking\n", - " self.repo_id=repo_id\n", "\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", @@ -942,7 +941,7 @@ " self.pad_token_id = self._special_to_id[\"<|endoftext|>\"]\n", " self.eos_token_id = self.pad_token_id\n", "\n", - " if self.repo_id and \"Base\" not in self.repo_id:\n", + " if repo_id and \"Base\" not in repo_id:\n", " eos_token = \"<|im_end|>\"\n", " else:\n", " eos_token = \"<|endoftext|>\"\n", @@ -950,7 +949,7 @@ " self.eos_token_id = self._special_to_id[eos_token]\n", "\n", " def encode(self, text, chat_wrapped=None):\n", - " if chat_wrapped is None and self.repo_id and \"Base\" not in self.repo_id:\n", + " if chat_wrapped is None:\n", " chat_wrapped = self.apply_chat_template\n", "\n", " stripped = text.strip()\n", @@ -1142,7 +1141,7 @@ "provenance": [] }, "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1156,7 +1155,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.6" + "version": "3.10.16" } }, "nbformat": 4, diff --git a/pkg/llms_from_scratch/qwen3.py b/pkg/llms_from_scratch/qwen3.py index e9a54ab..8ae2a02 100644 --- a/pkg/llms_from_scratch/qwen3.py +++ b/pkg/llms_from_scratch/qwen3.py @@ -531,7 +531,6 @@ class Qwen3Tokenizer: self.apply_chat_template = apply_chat_template self.add_generation_prompt = add_generation_prompt self.add_thinking = add_thinking - self.repo_id=repo_id tok_file = Path(tokenizer_file_path) if not tok_file.is_file() and repo_id: @@ -550,7 +549,7 @@ class Qwen3Tokenizer: self.pad_token_id = self._special_to_id["<|endoftext|>"] self.eos_token_id = self.pad_token_id - if self.repo_id and "Base" not in self.repo_id: + if repo_id and "Base" not in repo_id: eos_token = "<|im_end|>" else: eos_token = "<|endoftext|>" @@ -558,7 +557,7 @@ class Qwen3Tokenizer: self.eos_token_id = self._special_to_id[eos_token] def encode(self, text, chat_wrapped=None): - if chat_wrapped is None and self.repo_id and "Base" not in self.repo_id: + if chat_wrapped is None: chat_wrapped = self.apply_chat_template stripped = text.strip()