Qwen3 KV cache (#688)

2026-04-10 12:33:42 +00:00 · 2025-06-21 17:34:39 -05:00
parent 2a530b49fe
commit 0b15a00574
8 changed files with 370 additions and 11 deletions
--- a/pkg/llms_from_scratch/README.md
+++ b/pkg/llms_from_scratch/README.md
@@ -109,7 +109,6 @@ from llms_from_scratch.ch07 import (
 from llms_from_scratch.appendix_a import NeuralNetwork, ToyDataset

 from llms_from_scratch.appendix_d import find_highest_gradient, train_model
-
 ```


@@ -140,11 +139,14 @@ from llms_from_scratch.llama3 import (
    clean_text
 )

+# KV cache drop-in replacements
 from llms_from_scratch.kv_cache.llama3 import Llama3Model
 from llms_from_scratch.kv_cache.generate import generate_text_simple
 ```

-For the `llms_from_scratch.llama3` usage information, please see [this bonus section](../../ch05/07_gpt_to_llama/README.md).
+For the `llms_from_scratch.llama3` usage information, please see [this bonus section](../../ch05/07_gpt_to_llama/README.md). 
+
+For more information about KV caching, please see the [KV cache README](../../ch04/03_kv-cache).


 &nbsp;
@@ -155,7 +157,12 @@ from llms_from_scratch.qwen3 import (
    Qwen3Model,
    Qwen3Tokenizer,
 )
+
+# KV cache drop-in replacements
+from llms_from_scratch.kv_cache.qwen3 import Qwen3Model
+from llms_from_scratch.kv_cache.generate import generate_text_simple
 ```

-
 For the `llms_from_scratch.qwen3` usage information, please see [this bonus section](../../ch05/11_qwen3/README.md).
+
+For more information about KV caching, please see the [KV cache README](../../ch04/03_kv-cache).