mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Qwen3 KV cache (#688)
This commit is contained in:
committed by
GitHub
parent
2a530b49fe
commit
0b15a00574
@@ -87,7 +87,7 @@ class TransformerBlock(nn.Module):
|
||||
# Shortcut connection for attention block
|
||||
shortcut = x
|
||||
x = self.norm1(x)
|
||||
x = self.att(x, mask, cos, sin) # Shape [batch_size, num_tokens, emb_size]
|
||||
x = self.att(x, mask, cos, sin,) # Shape [batch_size, num_tokens, emb_size]
|
||||
x = x + shortcut # Add the original input back
|
||||
|
||||
# Shortcut connection for feed-forward block
|
||||
|
||||
Reference in New Issue
Block a user