Qwen3 KV cache (#688)

This commit is contained in:
Sebastian Raschka
2025-06-21 17:34:39 -05:00
committed by GitHub
parent 2a530b49fe
commit 0b15a00574
8 changed files with 370 additions and 11 deletions

View File

@@ -87,7 +87,7 @@ class TransformerBlock(nn.Module):
# Shortcut connection for attention block
shortcut = x
x = self.norm1(x)
x = self.att(x, mask, cos, sin) # Shape [batch_size, num_tokens, emb_size]
x = self.att(x, mask, cos, sin,) # Shape [batch_size, num_tokens, emb_size]
x = x + shortcut # Add the original input back
# Shortcut connection for feed-forward block