Qwen3 KV cache (#688)

This commit is contained in:
Sebastian Raschka
2025-06-21 17:34:39 -05:00
committed by GitHub
parent 2a530b49fe
commit 0b15a00574
8 changed files with 370 additions and 11 deletions

View File

@@ -109,7 +109,6 @@ from llms_from_scratch.ch07 import (
from llms_from_scratch.appendix_a import NeuralNetwork, ToyDataset
from llms_from_scratch.appendix_d import find_highest_gradient, train_model
```
@@ -140,11 +139,14 @@ from llms_from_scratch.llama3 import (
clean_text
)
# KV cache drop-in replacements
from llms_from_scratch.kv_cache.llama3 import Llama3Model
from llms_from_scratch.kv_cache.generate import generate_text_simple
```
For the `llms_from_scratch.llama3` usage information, please see [this bonus section](../../ch05/07_gpt_to_llama/README.md).
For the `llms_from_scratch.llama3` usage information, please see [this bonus section](../../ch05/07_gpt_to_llama/README.md).
For more information about KV caching, please see the [KV cache README](../../ch04/03_kv-cache).
 
@@ -155,7 +157,12 @@ from llms_from_scratch.qwen3 import (
Qwen3Model,
Qwen3Tokenizer,
)
# KV cache drop-in replacements
from llms_from_scratch.kv_cache.qwen3 import Qwen3Model
from llms_from_scratch.kv_cache.generate import generate_text_simple
```
For the `llms_from_scratch.qwen3` usage information, please see [this bonus section](../../ch05/11_qwen3/README.md).
For more information about KV caching, please see the [KV cache README](../../ch04/03_kv-cache).