Qwen3 KV cache (#688)

2026-04-10 12:33:42 +00:00 · 2025-06-21 17:34:39 -05:00
parent 2a530b49fe
commit 0b15a00574
8 changed files with 370 additions and 11 deletions
--- a/pkg/llms_from_scratch/qwen3.py
+++ b/pkg/llms_from_scratch/qwen3.py
@@ -87,7 +87,7 @@ class TransformerBlock(nn.Module):
        # Shortcut connection for attention block
        shortcut = x
        x = self.norm1(x)
-        x = self.att(x, mask, cos, sin)  # Shape [batch_size, num_tokens, emb_size]
+        x = self.att(x, mask, cos, sin,)  # Shape [batch_size, num_tokens, emb_size]
        x = x + shortcut  # Add the original input back

        # Shortcut connection for feed-forward block