mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Batched KV Cache Inference for Qwen3 (#735)
This commit is contained in:
committed by
GitHub
parent
b8c8237251
commit
a354555049
24
pkg/llms_from_scratch/kv_cache_batched/utils.py
Normal file
24
pkg/llms_from_scratch/kv_cache_batched/utils.py
Normal file
@@ -0,0 +1,24 @@
|
||||
# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
|
||||
# Source for "Build a Large Language Model From Scratch"
|
||||
# - https://www.manning.com/books/build-a-large-language-model-from-scratch
|
||||
# Code: https://github.com/rasbt/LLMs-from-scratch
|
||||
|
||||
class KVCache:
|
||||
def __init__(self, n_layers, batch_size):
|
||||
self.cache = [
|
||||
[None for _ in range(batch_size)] for _ in range(n_layers)
|
||||
]
|
||||
|
||||
def get(self, layer_idx, batch_idx):
|
||||
return self.cache[layer_idx][batch_idx]
|
||||
|
||||
def update(self, layer_idx, batch_idx, value):
|
||||
self.cache[layer_idx][batch_idx] = value
|
||||
|
||||
def get_layer(self, layer_idx):
|
||||
return self.cache[layer_idx]
|
||||
|
||||
def reset(self):
|
||||
for layer in self.cache:
|
||||
for i in range(len(layer)):
|
||||
layer[i] = None
|
||||
Reference in New Issue
Block a user