Fix qk_norm comment (#769)

2026-04-10 12:33:42 +00:00 · 2025-08-15 08:38:48 -05:00
parent b14325e56d
commit e9c1c1da38
2 changed files with 2 additions and 2 deletions
--- a/pkg/llms_from_scratch/qwen3.py
+++ b/pkg/llms_from_scratch/qwen3.py
@@ -22,7 +22,7 @@ QWEN_CONFIG_06_B = {
    "n_layers": 28,                  # Number of layers
    "hidden_dim": 3072,              # Size of the intermediate dimension in FeedForward
    "head_dim": 128,                 # Size of the heads in GQA
-    "qk_norm": True,                 # Whether to normalize queries and values in GQA
+    "qk_norm": True,                 # Whether to normalize queries and keys in GQA
    "n_kv_groups": 8,                # Key-Value groups for grouped-query attention
    "rope_base": 1_000_000.0,        # The base in RoPE's "theta"
    "dtype": torch.bfloat16,         # Lower-precision dtype to reduce memory usage