Add GPT-2 KV cache to pkg (#687)

This commit is contained in:
Sebastian Raschka
2025-06-21 12:29:04 -05:00
committed by GitHub
parent 3be0f3202a
commit fdc3e1b701
4 changed files with 315 additions and 5 deletions

View File

@@ -80,8 +80,6 @@ class MultiHeadAttention(nn.Module):
keys, values = keys_new, values_new
self.ptr_cur = 0 # keep pointer sane if you interleave modes
####################################################
# Compute scaled dot-product attention (aka self-attention) with a causal mask
attn_scores = queries @ keys.transpose(2, 3) # Dot product for each head