mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
remove redundant unsqueeze in mask
This commit is contained in:
@@ -1608,7 +1608,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 42,
|
||||
"id": "110b0188-6e9e-4e56-a988-10523c6c8538",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1670,12 +1670,12 @@
|
||||
"\n",
|
||||
" # Compute scaled dot-product attention (aka self-attention) with a causal mask\n",
|
||||
" attn_scores = queries @ keys.transpose(2, 3) # Dot product for each head\n",
|
||||
"\n",
|
||||
" # Original mask truncated to the number of tokens and converted to boolean\n",
|
||||
" mask_bool = self.mask.bool()[:num_tokens, :num_tokens]\n",
|
||||
" # Unsqueeze the mask to match dimensions\n",
|
||||
" mask_unsqueezed = mask_bool.unsqueeze(0)\n",
|
||||
" # Use the unsqueezed mask to fill attention scores\n",
|
||||
" attn_scores.masked_fill_(mask_unsqueezed, -torch.inf)\n",
|
||||
"\n",
|
||||
" # Use the mask to fill attention scores\n",
|
||||
" attn_scores.masked_fill_(mask_bool, -torch.inf)\n",
|
||||
" \n",
|
||||
" attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n",
|
||||
" attn_weights = self.dropout(attn_weights)\n",
|
||||
@@ -1865,7 +1865,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user