From 670f7a4dd0b541acca9262e2b1447ce3bbab7c0d Mon Sep 17 00:00:00 2001 From: casinca <47400729+casinca@users.noreply.github.com> Date: Fri, 22 Aug 2025 22:03:47 +0200 Subject: [PATCH] - added (missing) Gemma3 bullet point in parent folder's `readme.md` (#788) - typo in nbs --- ch05/12_gemma3/standalone-gemma3-plus-kvcache.ipynb | 4 ++-- ch05/12_gemma3/standalone-gemma3.ipynb | 8 ++++---- ch05/README.md | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ch05/12_gemma3/standalone-gemma3-plus-kvcache.ipynb b/ch05/12_gemma3/standalone-gemma3-plus-kvcache.ipynb index 16c5879..20d2bb0 100644 --- a/ch05/12_gemma3/standalone-gemma3-plus-kvcache.ipynb +++ b/ch05/12_gemma3/standalone-gemma3-plus-kvcache.ipynb @@ -427,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "e88de3e3-9f07-42cc-816b-28dbd46e96c4", "metadata": { "id": "e88de3e3-9f07-42cc-816b-28dbd46e96c4" @@ -451,7 +451,7 @@ " self.cfg = cfg\n", " self.current_pos = 0 # Track current position in KV cache\n", "\n", - " # Reusuable utilities\n", + " # Reusable utilities\n", " cos_local, sin_local = compute_rope_params(\n", " head_dim=cfg[\"head_dim\"],\n", " theta_base=cfg[\"rope_local_base\"],\n", diff --git a/ch05/12_gemma3/standalone-gemma3.ipynb b/ch05/12_gemma3/standalone-gemma3.ipynb index 9fa65ab..4e5bc34 100644 --- a/ch05/12_gemma3/standalone-gemma3.ipynb +++ b/ch05/12_gemma3/standalone-gemma3.ipynb @@ -386,7 +386,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "e88de3e3-9f07-42cc-816b-28dbd46e96c4", "metadata": { "id": "e88de3e3-9f07-42cc-816b-28dbd46e96c4" @@ -409,7 +409,7 @@ " self.out_head = nn.Linear(cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False, dtype=cfg[\"dtype\"])\n", " self.cfg = cfg\n", "\n", - " # Reusuable utilities \n", + " # Reusable utilities \n", " cos_local, sin_local = compute_rope_params(\n", " head_dim=cfg[\"head_dim\"],\n", " theta_base=cfg[\"rope_local_base\"],\n", @@ -1176,7 +1176,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -1190,7 +1190,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/ch05/README.md b/ch05/README.md index c315ffc..a6cefbc 100644 --- a/ch05/README.md +++ b/ch05/README.md @@ -18,6 +18,7 @@ - [09_extending-tokenizers](09_extending-tokenizers) contains a from-scratch implementation of the GPT-2 BPE tokenizer - [10_llm-training-speed](10_llm-training-speed) shows PyTorch performance tips to improve the LLM training speed - [11_qwen3](11_qwen3) A from-scratch implementation of Qwen3 0.6B and Qwen3 30B-A3B (Mixture-of-Experts) including code to load the pretrained weights of the base, reasoning, and coding model variants +- [12_gemma3](12_gemma3) A from-scratch implementation of Gemma 3 270M and alternative with KV cache, including code to load the pretrained weights