diff --git a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb index 3ce0f54..14a8727 100644 --- a/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb +++ b/ch03/02_bonus_efficient-multihead-attention/mha-implementations.ipynb @@ -334,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "9a6bd0a2-f27c-4602-afa0-c96cd295c1a6", "metadata": { "colab": { @@ -395,7 +395,7 @@ " self.mask.bool()[:num_tokens, :num_tokens], -torch.inf\n", " )\n", "\n", - " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**-0.5, dim=-1)\n", + " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n", " attn_weights = self.dropout(attn_weights)\n", "\n", " # (b, num_heads, num_tokens, num_tokens) --> (b, num_heads, num_tokens, head_dim)\n", diff --git a/ch05/11_qwen3/README.md b/ch05/11_qwen3/README.md index 08da9b7..77544a5 100644 --- a/ch05/11_qwen3/README.md +++ b/ch05/11_qwen3/README.md @@ -51,7 +51,7 @@ USE_REASONING_MODEL = True USE_INSTRUCT_MODEL = False # Uses the instruct mode (without reasoning) if # USE_REASONING_MODEL = True -# USE_INSTRUCT_MODEL = False +# USE_INSTRUCT_MODEL = True # This setting does have no effect if USE_REASONING_MODEL = False diff --git a/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb b/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb index df216ff..fac9a55 100644 --- a/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb +++ b/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb @@ -121,7 +121,7 @@ "USE_INSTRUCT_MODEL = False\n", "# Uses the instruct mode (without reasoning) if \n", "# USE_REASONING_MODEL = True\n", - "# USE_INSTRUCT_MODEL = False\n", + "# USE_INSTRUCT_MODEL = True\n", "# This setting does have no effect if USE_REASONING_MODEL = False" ] }, diff --git a/ch05/11_qwen3/standalone-qwen3.ipynb b/ch05/11_qwen3/standalone-qwen3.ipynb index fd9f588..911eccb 100644 --- a/ch05/11_qwen3/standalone-qwen3.ipynb +++ b/ch05/11_qwen3/standalone-qwen3.ipynb @@ -119,7 +119,7 @@ "USE_INSTRUCT_MODEL = False\n", "# Uses the instruct mode (without reasoning) if \n", "# USE_REASONING_MODEL = True\n", - "# USE_INSTRUCT_MODEL = False\n", + "# USE_INSTRUCT_MODEL = True\n", "# This setting does have no effect if USE_REASONING_MODEL = False" ] },