mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Rename variable to context_length to make it easier on readers (#106)
* rename to context length * fix spacing
This commit is contained in:
committed by
GitHub
parent
a940373a14
commit
2de60d1bfb
@@ -61,7 +61,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
|
||||
#####################################
|
||||
|
||||
class MultiHeadAttention(nn.Module):
|
||||
def __init__(self, d_in, d_out, block_size, dropout, num_heads, qkv_bias=False):
|
||||
def __init__(self, d_in, d_out, context_length, dropout, num_heads, qkv_bias=False):
|
||||
super().__init__()
|
||||
assert d_out % num_heads == 0, "d_out must be divisible by n_heads"
|
||||
|
||||
@@ -74,7 +74,7 @@ class MultiHeadAttention(nn.Module):
|
||||
self.W_value = nn.Linear(d_in, d_out, bias=qkv_bias)
|
||||
self.out_proj = nn.Linear(d_out, d_out) # Linear layer to combine head outputs
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.register_buffer('mask', torch.triu(torch.ones(block_size, block_size), diagonal=1))
|
||||
self.register_buffer('mask', torch.triu(torch.ones(context_length, context_length), diagonal=1))
|
||||
|
||||
def forward(self, x):
|
||||
b, num_tokens, d_in = x.shape
|
||||
@@ -164,7 +164,7 @@ class TransformerBlock(nn.Module):
|
||||
self.att = MultiHeadAttention(
|
||||
d_in=cfg["emb_dim"],
|
||||
d_out=cfg["emb_dim"],
|
||||
block_size=cfg["ctx_len"],
|
||||
context_length=cfg["ctx_len"],
|
||||
num_heads=cfg["n_heads"],
|
||||
dropout=cfg["drop_rate"],
|
||||
qkv_bias=cfg["qkv_bias"])
|
||||
|
||||
Reference in New Issue
Block a user