From 8447d70b185dfd34ad60a1cc9e6d10e92314e907 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Sun, 5 Apr 2026 22:05:05 -0400 Subject: [PATCH] Some gemma 3 improvements (#1000) * some gemma 3 improvements * update url --- .../standalone-gemma3-plus-kvcache.ipynb | 60 +++- ch05/12_gemma3/standalone-gemma3.ipynb | 96 ++++-- .../tests/gemma3-transformers-ref.ipynb | 161 +++++++++ ch05/12_gemma3/tests/gemma3_layer_debugger.py | 232 +++++++++++++ .../tests/gemma3_layer_debugger_detailed.py | 322 ++++++++++++++++++ ch05/12_gemma3/tests/test_gemma3_kv_nb.py | 1 + ch05/12_gemma3/tests/test_gemma3_nb.py | 1 + 7 files changed, 825 insertions(+), 48 deletions(-) create mode 100644 ch05/12_gemma3/tests/gemma3-transformers-ref.ipynb create mode 100644 ch05/12_gemma3/tests/gemma3_layer_debugger.py create mode 100644 ch05/12_gemma3/tests/gemma3_layer_debugger_detailed.py diff --git a/ch05/12_gemma3/standalone-gemma3-plus-kvcache.ipynb b/ch05/12_gemma3/standalone-gemma3-plus-kvcache.ipynb index 583d6ee..1195816 100644 --- a/ch05/12_gemma3/standalone-gemma3-plus-kvcache.ipynb +++ b/ch05/12_gemma3/standalone-gemma3-plus-kvcache.ipynb @@ -1137,20 +1137,54 @@ " def __init__(self, tokenizer_file_path: str):\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", - " # Attempt to identify EOS and padding tokens\n", - " eos_token = \"\"\n", - " self.pad_token_id = eos_token\n", - " self.eos_token_id = eos_token\n", "\n", - " def encode(self, text: str) -> list[int]:\n", - " return self._tok.encode(text).ids\n", + " self.bos_token = \"\"\n", + " self.eos_token = \"\"\n", + " self.pad_token = \"\"\n", + " self.start_of_turn_token = \"\"\n", + " self.end_of_turn_token = \"\"\n", "\n", - " def decode(self, ids: list[int]) -> str:\n", - " return self._tok.decode(ids, skip_special_tokens=False)\n", + " self.bos_token_id = self._tok.token_to_id(self.bos_token)\n", + " self.eos_token_id = self._tok.token_to_id(self.eos_token)\n", + " self.pad_token_id = self._tok.token_to_id(self.pad_token)\n", + " self.start_of_turn_token_id = self._tok.token_to_id(self.start_of_turn_token)\n", + " self.end_of_turn_token_id = self._tok.token_to_id(self.end_of_turn_token)\n", + "\n", + " self.add_bos_token = True\n", + " self.add_eos_token = False\n", + " self.clean_up_tokenization_spaces = False\n", + "\n", + " def encode(self, text: str, add_special_tokens: bool = True) -> list[int]:\n", + " return self._tok.encode(text, add_special_tokens=add_special_tokens).ids\n", + "\n", + " def decode(self, ids: list[int] | int, skip_special_tokens: bool = False) -> str:\n", + " if isinstance(ids, int):\n", + " ids = [ids]\n", + " return self._tok.decode(ids, skip_special_tokens=skip_special_tokens)\n", + "\n", + " def apply_chat_template(self, messages, tokenize=False, add_generation_prompt=False):\n", + " text = \"\"\n", + " for message in messages:\n", + " role = message[\"role\"]\n", + " if role == \"assistant\":\n", + " role = \"model\"\n", + " content = message[\"content\"]\n", + " text += f\"{self.start_of_turn_token}{role}\\n{content}{self.end_of_turn_token}\\n\"\n", + "\n", + " if add_generation_prompt:\n", + " text += f\"{self.start_of_turn_token}model\\n\"\n", + "\n", + " if tokenize:\n", + " return self.encode(text)\n", + " return text\n", "\n", "\n", "def apply_chat_template(user_text):\n", - " return f\"user\\n{user_text}\\nmodel\\n\"" + " return tokenizer.apply_chat_template(\n", + " [{\"role\": \"user\", \"content\": user_text}],\n", + " tokenize=False,\n", + " add_generation_prompt=True,\n", + " )" ] }, { @@ -1205,7 +1239,11 @@ ], "source": [ "prompt = \"Give me a short introduction to large language models.\"\n", - "prompt = apply_chat_template(\"Give me a short introduction to large language models.\")\n", + "prompt = tokenizer.apply_chat_template(\n", + " [{\"role\": \"user\", \"content\": prompt}],\n", + " tokenize=False,\n", + " add_generation_prompt=True,\n", + ")\n", "\n", "\n", "input_token_ids = tokenizer.encode(prompt)\n", @@ -1297,7 +1335,7 @@ " model=model,\n", " token_ids=input_token_ids_tensor,\n", " max_new_tokens=500,\n", - " eos_token_id=tokenizer.encode(\"\")[-1]\n", + " eos_token_id=tokenizer.end_of_turn_token_id\n", "):\n", " token_id = token.squeeze(0).tolist()\n", " print(\n", diff --git a/ch05/12_gemma3/standalone-gemma3.ipynb b/ch05/12_gemma3/standalone-gemma3.ipynb index 16a1088..2b3bc88 100644 --- a/ch05/12_gemma3/standalone-gemma3.ipynb +++ b/ch05/12_gemma3/standalone-gemma3.ipynb @@ -77,9 +77,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "huggingface_hub version: 0.35.0\n", - "tokenizers version: 0.22.1\n", - "torch version: 2.9.0+cu130\n" + "huggingface_hub version: 1.3.4\n", + "tokenizers version: 0.22.2\n", + "torch version: 2.10.0\n" ] } ], @@ -627,9 +627,9 @@ { "data": { "text/plain": [ - "tensor([[[ 0.7500, 0.1011, 0.4863, ..., 0.9414, 0.3984, -0.2285],\n", - " [-0.3398, -0.0564, 0.9023, ..., -0.2480, 0.4551, 0.8203],\n", - " [-0.2695, -0.3242, 0.4121, ..., 0.8672, -0.9688, 0.9844]]],\n", + "tensor([[[ 0.7500, 0.1060, 0.4844, ..., 0.9414, 0.3984, -0.2324],\n", + " [-0.3438, -0.0549, 0.8984, ..., -0.2402, 0.4570, 0.8242],\n", + " [-0.2676, -0.3281, 0.4121, ..., 0.8711, -0.9648, 0.9844]]],\n", " dtype=torch.bfloat16, grad_fn=)" ] }, @@ -730,20 +730,7 @@ "metadata": { "id": "31f12baf-f79b-499f-85c0-51328a6a20f5" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/rasbt/jupyterlab/reasoning/.venv/lib/python3.12/site-packages/torch/cuda/__init__.py:283: UserWarning: \n", - " Found GPU0 NVIDIA GB10 which is of cuda capability 12.1.\n", - " Minimum and Maximum cuda capability supported by this version of PyTorch is\n", - " (8.0) - (12.0)\n", - " \n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "if torch.cuda.is_available():\n", " device = torch.device(\"cuda\")\n", @@ -1022,20 +1009,54 @@ " def __init__(self, tokenizer_file_path: str):\n", " tok_file = Path(tokenizer_file_path)\n", " self._tok = Tokenizer.from_file(str(tok_file))\n", - " # Attempt to identify EOS and padding tokens\n", - " eos_token = \"\"\n", - " self.pad_token_id = eos_token\n", - " self.eos_token_id = eos_token\n", "\n", - " def encode(self, text: str) -> list[int]:\n", - " return self._tok.encode(text).ids\n", + " self.bos_token = \"\"\n", + " self.eos_token = \"\"\n", + " self.pad_token = \"\"\n", + " self.start_of_turn_token = \"\"\n", + " self.end_of_turn_token = \"\"\n", "\n", - " def decode(self, ids: list[int]) -> str:\n", - " return self._tok.decode(ids, skip_special_tokens=False)\n", + " self.bos_token_id = self._tok.token_to_id(self.bos_token)\n", + " self.eos_token_id = self._tok.token_to_id(self.eos_token)\n", + " self.pad_token_id = self._tok.token_to_id(self.pad_token)\n", + " self.start_of_turn_token_id = self._tok.token_to_id(self.start_of_turn_token)\n", + " self.end_of_turn_token_id = self._tok.token_to_id(self.end_of_turn_token)\n", + "\n", + " self.add_bos_token = True\n", + " self.add_eos_token = False\n", + " self.clean_up_tokenization_spaces = False\n", + "\n", + " def encode(self, text: str, add_special_tokens: bool = True) -> list[int]:\n", + " return self._tok.encode(text, add_special_tokens=add_special_tokens).ids\n", + "\n", + " def decode(self, ids: list[int] | int, skip_special_tokens: bool = False) -> str:\n", + " if isinstance(ids, int):\n", + " ids = [ids]\n", + " return self._tok.decode(ids, skip_special_tokens=skip_special_tokens)\n", + "\n", + " def apply_chat_template(self, messages, tokenize=False, add_generation_prompt=False):\n", + " text = \"\"\n", + " for message in messages:\n", + " role = message[\"role\"]\n", + " if role == \"assistant\":\n", + " role = \"model\"\n", + " content = message[\"content\"]\n", + " text += f\"{self.start_of_turn_token}{role}\\n{content}{self.end_of_turn_token}\\n\"\n", + "\n", + " if add_generation_prompt:\n", + " text += f\"{self.start_of_turn_token}model\\n\"\n", + "\n", + " if tokenize:\n", + " return self.encode(text)\n", + " return text\n", "\n", "\n", "def apply_chat_template(user_text):\n", - " return f\"user\\n{user_text}\\nmodel\\n\"" + " return tokenizer.apply_chat_template(\n", + " [{\"role\": \"user\", \"content\": user_text}],\n", + " tokenize=False,\n", + " add_generation_prompt=True,\n", + " )" ] }, { @@ -1075,7 +1096,11 @@ ], "source": [ "prompt = \"Give me a short introduction to large language models.\"\n", - "prompt = apply_chat_template(\"Give me a short introduction to large language models.\")\n", + "prompt = tokenizer.apply_chat_template(\n", + " [{\"role\": \"user\", \"content\": prompt}],\n", + " tokenize=False,\n", + " add_generation_prompt=True,\n", + ")\n", "\n", "\n", "input_token_ids = tokenizer.encode(prompt)\n", @@ -1107,7 +1132,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "7b8401c6-e244-4cb7-9849-2ba71ce758d5", "metadata": { "id": "7b8401c6-e244-4cb7-9849-2ba71ce758d5" @@ -1133,7 +1158,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 25, "id": "1c7a04fa-6aac-416b-8f63-f1e19227633d", "metadata": { "id": "1c7a04fa-6aac-416b-8f63-f1e19227633d" @@ -1143,10 +1168,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Large language models (LLMs) are sophisticated artificial intelligence systems that can understand, generate, and manipulate human language. They are trained on massive amounts of text data to learn patterns and relationships within that data, enabling them to perform a wide range of tasks, from writing articles and answering questions to translating languages and summarizing information.\n", - "\n", - "\n", - "GPU memory used: 1.04 GB\n" + "Large language models (LLMs) are sophisticated artificial intelligence systems that can understand, generate, and manipulate human language. They are trained on massive amounts of text data to learn patterns and relationships within language, enabling them to perform a wide range of tasks, from writing articles and answering questions to translating languages and summarizing information.\n" ] } ], @@ -1162,7 +1184,7 @@ " model=model,\n", " token_ids=input_token_ids_tensor,\n", " max_new_tokens=500,\n", - " eos_token_id=tokenizer.encode(\"\")[-1]\n", + " eos_token_id=tokenizer.end_of_turn_token_id\n", "):\n", " token_id = token.squeeze(0).tolist()\n", " print(\n", diff --git a/ch05/12_gemma3/tests/gemma3-transformers-ref.ipynb b/ch05/12_gemma3/tests/gemma3-transformers-ref.ipynb new file mode 100644 index 0000000..f2d8e4d --- /dev/null +++ b/ch05/12_gemma3/tests/gemma3-transformers-ref.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "book-header", + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + "Supplementary code for the Build a Large Language Model From Scratch book by Sebastian Raschka
\n", + "
Code repository: https://github.com/rasbt/LLMs-from-scratch\n", + "
\n", + "
\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "title-cell", + "metadata": {}, + "source": [ + "# Gemma 3 270M With Hugging Face Transformers" + ] + }, + { + "cell_type": "markdown", + "id": "intro-cell", + "metadata": {}, + "source": [ + "- This notebook uses the minimal `AutoTokenizer` / `AutoModelForCausalLM` workflow from the Transformers tutorials.\n", + "- It uses the same user prompt as [standalone-gemma3.ipynb](../standalone-gemma3.ipynb): `Give me a short introduction to large language models.`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "install-cell", + "metadata": {}, + "outputs": [], + "source": [ + "# pip install transformers sentencepiece" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "login-cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment and run the following code if you are executing the notebook for the first time\n", + "\n", + "# from huggingface_hub import login\n", + "# login()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "load-cell", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c3b335b4a1da4658b90e1ef960de8b49", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading weights: 0%| | 0/236 [00:00