mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Add GPT-2 KV cache to pkg (#687)
This commit is contained in:
committed by
GitHub
parent
9d62ca0598
commit
bb57756444
@@ -80,8 +80,6 @@ class MultiHeadAttention(nn.Module):
|
||||
keys, values = keys_new, values_new
|
||||
self.ptr_cur = 0 # keep pointer sane if you interleave modes
|
||||
####################################################
|
||||
|
||||
|
||||
# Compute scaled dot-product attention (aka self-attention) with a causal mask
|
||||
attn_scores = queries @ keys.transpose(2, 3) # Dot product for each head
|
||||
|
||||
|
||||
Reference in New Issue
Block a user