Add GPT-2 KV cache to pkg (#687)

2026-04-10 12:33:42 +00:00 · 2025-06-21 12:29:04 -05:00
parent 3be0f3202a
commit fdc3e1b701
4 changed files with 315 additions and 5 deletions
--- a/ch04/03_kv-cache/gpt_with_kv_cache_optimized.py
+++ b/ch04/03_kv-cache/gpt_with_kv_cache_optimized.py
@@ -80,8 +80,6 @@ class MultiHeadAttention(nn.Module):
            keys, values = keys_new, values_new
            self.ptr_cur = 0  # keep pointer sane if you interleave modes
        ####################################################
-
-
        # Compute scaled dot-product attention (aka self-attention) with a causal mask
        attn_scores = queries @ keys.transpose(2, 3)  # Dot product for each head