mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Batched KV Cache Inference for Qwen3 (#735)
This commit is contained in:
committed by
GitHub
parent
7dc1dcbe27
commit
a200698698
@@ -161,6 +161,10 @@ from llms_from_scratch.qwen3 import (
|
||||
# KV cache drop-in replacements
|
||||
from llms_from_scratch.kv_cache.qwen3 import Qwen3Model
|
||||
from llms_from_scratch.kv_cache.generate import generate_text_simple
|
||||
|
||||
# KV cache drop-in replacements with batched inference support
|
||||
from llms_from_scratch.kv_cache_batched.generate import generate_text_simple
|
||||
from llms_from_scratch.kv_cache_batched.qwen3 import Qwen3Model
|
||||
```
|
||||
|
||||
For the `llms_from_scratch.qwen3` usage information, please see [this bonus section](../../ch05/11_qwen3/README.md).
|
||||
|
||||
Reference in New Issue
Block a user