mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Merge branch 'main' into qwen-tokenizer-fix
This commit is contained in:
@@ -435,7 +435,7 @@
|
||||
" positions = torch.arange(context_length)\n",
|
||||
"\n",
|
||||
" # Compute the angles\n",
|
||||
" angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)\n",
|
||||
" angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)\n",
|
||||
"\n",
|
||||
" # Expand angles to match the head_dim\n",
|
||||
" angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)\n",
|
||||
|
||||
@@ -310,7 +310,7 @@
|
||||
" positions = torch.arange(context_length)\n",
|
||||
"\n",
|
||||
" # Compute the angles\n",
|
||||
" angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)\n",
|
||||
" angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)\n",
|
||||
"\n",
|
||||
" # Expand angles to match the head_dim\n",
|
||||
" angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)\n",
|
||||
|
||||
@@ -180,7 +180,7 @@
|
||||
" positions = torch.arange(context_length, dtype=dtype)\n",
|
||||
"\n",
|
||||
" # Compute the angles\n",
|
||||
" angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)\n",
|
||||
" angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)\n",
|
||||
"\n",
|
||||
" # Expand angles to match the head_dim\n",
|
||||
" angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)\n",
|
||||
|
||||
@@ -275,7 +275,7 @@
|
||||
" positions = torch.arange(context_length, dtype=dtype)\n",
|
||||
"\n",
|
||||
" # Compute the angles\n",
|
||||
" angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)\n",
|
||||
" angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)\n",
|
||||
"\n",
|
||||
" # Expand angles to match the head_dim\n",
|
||||
" angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)\n",
|
||||
|
||||
@@ -275,7 +275,7 @@
|
||||
" positions = torch.arange(context_length, dtype=dtype)\n",
|
||||
"\n",
|
||||
" # Compute the angles\n",
|
||||
" angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)\n",
|
||||
" angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)\n",
|
||||
"\n",
|
||||
" # Expand angles to match the head_dim\n",
|
||||
" angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)\n",
|
||||
|
||||
@@ -206,7 +206,7 @@
|
||||
" positions = torch.arange(context_length, dtype=dtype)\n",
|
||||
"\n",
|
||||
" # Compute the angles\n",
|
||||
" angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)\n",
|
||||
" angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)\n",
|
||||
"\n",
|
||||
" # Expand angles to match the head_dim\n",
|
||||
" angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)\n",
|
||||
|
||||
@@ -204,7 +204,7 @@
|
||||
" positions = torch.arange(context_length, dtype=dtype)\n",
|
||||
"\n",
|
||||
" # Compute the angles\n",
|
||||
" angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)\n",
|
||||
" angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)\n",
|
||||
"\n",
|
||||
" # Expand angles to match the head_dim\n",
|
||||
" angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)\n",
|
||||
|
||||
@@ -200,7 +200,7 @@
|
||||
" positions = torch.arange(context_length, dtype=dtype)\n",
|
||||
"\n",
|
||||
" # Compute the angles\n",
|
||||
" angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)\n",
|
||||
" angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)\n",
|
||||
"\n",
|
||||
" # Expand angles to match the head_dim\n",
|
||||
" angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)\n",
|
||||
|
||||
@@ -200,7 +200,7 @@
|
||||
" positions = torch.arange(context_length, dtype=dtype)\n",
|
||||
"\n",
|
||||
" # Compute the angles\n",
|
||||
" angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)\n",
|
||||
" angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)\n",
|
||||
"\n",
|
||||
" # Expand angles to match the head_dim\n",
|
||||
" angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)\n",
|
||||
|
||||
@@ -19,7 +19,7 @@ def load_dataframes():
|
||||
return df_train, df_val, df_test
|
||||
|
||||
|
||||
def eval(model, X_train, y_train, X_val, y_val, X_test, y_test):
|
||||
def eval_model(model, X_train, y_train, X_val, y_val, X_test, y_test):
|
||||
# Making predictions
|
||||
y_pred_train = model.predict(X_train)
|
||||
y_pred_val = model.predict(X_val)
|
||||
@@ -67,9 +67,9 @@ if __name__ == "__main__":
|
||||
dummy_clf.fit(X_train, y_train)
|
||||
|
||||
print("Dummy classifier:")
|
||||
eval(dummy_clf, X_train, y_train, X_val, y_val, X_test, y_test)
|
||||
eval_model(dummy_clf, X_train, y_train, X_val, y_val, X_test, y_test)
|
||||
|
||||
print("\n\nLogistic regression classifier:")
|
||||
model = LogisticRegression(max_iter=1000)
|
||||
model.fit(X_train, y_train)
|
||||
eval(model, X_train, y_train, X_val, y_val, X_test, y_test)
|
||||
eval_model(model, X_train, y_train, X_val, y_val, X_test, y_test)
|
||||
|
||||
@@ -238,7 +238,7 @@ def compute_rope_params(head_dim, theta_base=10_000, context_length=4096, freq_c
|
||||
positions = torch.arange(context_length, dtype=dtype)
|
||||
|
||||
# Compute the angles
|
||||
angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)
|
||||
angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)
|
||||
|
||||
# Expand angles to match the head_dim
|
||||
angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)
|
||||
|
||||
@@ -326,7 +326,7 @@ def compute_rope_params(head_dim, theta_base=10_000, context_length=4096, dtype=
|
||||
positions = torch.arange(context_length, dtype=dtype)
|
||||
|
||||
# Compute the angles
|
||||
angles = positions[:, None] * inv_freq[None, :] # Shape: (context_length, head_dim // 2)
|
||||
angles = positions.unsqueeze(1) * inv_freq.unsqueeze(0) # Shape: (context_length, head_dim // 2)
|
||||
|
||||
# Expand angles to match the head_dim
|
||||
angles = torch.cat([angles, angles], dim=1) # Shape: (context_length, head_dim)
|
||||
|
||||
Reference in New Issue
Block a user