Batched KV Cache Inference for Qwen3 (#735)

2026-04-10 12:33:42 +00:00 · 2025-07-10 08:09:35 -05:00
parent b8c8237251
commit a354555049
8 changed files with 506 additions and 6 deletions
--- a/pkg/llms_from_scratch/kv_cache_batched/utils.py
+++ b/pkg/llms_from_scratch/kv_cache_batched/utils.py
@@ -0,0 +1,24 @@
+# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
+# Source for "Build a Large Language Model From Scratch"
+#   - https://www.manning.com/books/build-a-large-language-model-from-scratch
+# Code: https://github.com/rasbt/LLMs-from-scratch
+
+class KVCache:
+    def __init__(self, n_layers, batch_size):
+        self.cache = [
+            [None for _ in range(batch_size)] for _ in range(n_layers)
+        ]
+
+    def get(self, layer_idx, batch_idx):
+        return self.cache[layer_idx][batch_idx]
+
+    def update(self, layer_idx, batch_idx, value):
+        self.cache[layer_idx][batch_idx] = value
+
+    def get_layer(self, layer_idx):
+        return self.cache[layer_idx]
+
+    def reset(self):
+        for layer in self.cache:
+            for i in range(len(layer)):
+                layer[i] = None