Batched KV Cache Inference for Qwen3 (#735)

This commit is contained in:
Sebastian Raschka
2025-07-10 08:09:35 -05:00
committed by GitHub
parent b8c8237251
commit a354555049
8 changed files with 506 additions and 6 deletions

View File

@@ -0,0 +1,24 @@
# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
# - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch
class KVCache:
def __init__(self, n_layers, batch_size):
self.cache = [
[None for _ in range(batch_size)] for _ in range(n_layers)
]
def get(self, layer_idx, batch_idx):
return self.cache[layer_idx][batch_idx]
def update(self, layer_idx, batch_idx, value):
self.cache[layer_idx][batch_idx] = value
def get_layer(self, layer_idx):
return self.cache[layer_idx]
def reset(self):
for layer in self.cache:
for i in range(len(layer)):
layer[i] = None