remove redundant unsqueeze in mask

2026-04-10 12:33:42 +00:00 · 2024-03-09 17:42:25 -06:00
parent 6ba97adaee
commit da33ce8054
7 changed files with 45 additions and 37 deletions
--- a/ch03/01_main-chapter-code/ch03.ipynb
+++ b/ch03/01_main-chapter-code/ch03.ipynb
@@ -1608,7 +1608,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 42,
   "id": "110b0188-6e9e-4e56-a988-10523c6c8538",
   "metadata": {},
   "outputs": [
@@ -1670,12 +1670,12 @@
    "\n",
    "        # Compute scaled dot-product attention (aka self-attention) with a causal mask\n",
    "        attn_scores = queries @ keys.transpose(2, 3)  # Dot product for each head\n",
+    "\n",
    "        # Original mask truncated to the number of tokens and converted to boolean\n",
    "        mask_bool = self.mask.bool()[:num_tokens, :num_tokens]\n",
-    "        # Unsqueeze the mask to match dimensions\n",
-    "        mask_unsqueezed = mask_bool.unsqueeze(0)\n",
-    "        # Use the unsqueezed mask to fill attention scores\n",
-    "        attn_scores.masked_fill_(mask_unsqueezed, -torch.inf)\n",
+    "\n",
+    "        # Use the mask to fill attention scores\n",
+    "        attn_scores.masked_fill_(mask_bool, -torch.inf)\n",
    "        \n",
    "        attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n",
    "        attn_weights = self.dropout(attn_weights)\n",
@@ -1865,7 +1865,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.4"
  }
 },
 "nbformat": 4,