Batched KV Cache Inference for Qwen3 (#735)

This commit is contained in:
Sebastian Raschka
2025-07-10 08:09:35 -05:00
committed by GitHub
parent 7dc1dcbe27
commit a200698698
8 changed files with 506 additions and 6 deletions

View File

@@ -161,6 +161,10 @@ from llms_from_scratch.qwen3 import (
# KV cache drop-in replacements
from llms_from_scratch.kv_cache.qwen3 import Qwen3Model
from llms_from_scratch.kv_cache.generate import generate_text_simple
# KV cache drop-in replacements with batched inference support
from llms_from_scratch.kv_cache_batched.generate import generate_text_simple
from llms_from_scratch.kv_cache_batched.qwen3 import Qwen3Model
```
For the `llms_from_scratch.qwen3` usage information, please see [this bonus section](../../ch05/11_qwen3/README.md).