mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
fixes for code (#206)
* updated .gitignore * removed unused GELU import * fixed model_configs, fixed all tensors on same device * removed unused tiktoken * update * update hparam search * remove redundant tokenizer argument --------- Co-authored-by: rasbt <mail@sebastianraschka.com>
This commit is contained in:
@@ -1861,7 +1861,7 @@
|
||||
"source": [
|
||||
"# Overall the same as `train_model_simple` in chapter 5\n",
|
||||
"def train_classifier_simple(model, train_loader, val_loader, optimizer, device, num_epochs,\n",
|
||||
" eval_freq, eval_iter, tokenizer):\n",
|
||||
" eval_freq, eval_iter):\n",
|
||||
" # Initialize lists to track losses and examples seen\n",
|
||||
" train_losses, val_losses, train_accs, val_accs = [], [], [], []\n",
|
||||
" examples_seen, global_step = 0, -1\n",
|
||||
@@ -1982,7 +1982,6 @@
|
||||
"train_losses, val_losses, train_accs, val_accs, examples_seen = train_classifier_simple(\n",
|
||||
" model, train_loader, val_loader, optimizer, device,\n",
|
||||
" num_epochs=num_epochs, eval_freq=50, eval_iter=5,\n",
|
||||
" tokenizer=tokenizer\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"end_time = time.time()\n",
|
||||
@@ -2371,7 +2370,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -235,7 +235,7 @@ def evaluate_model(model, train_loader, val_loader, device,
|
||||
|
||||
|
||||
def train_classifier_simple(model, train_loader, val_loader, optimizer, device, num_epochs,
|
||||
eval_freq, eval_iter, tokenizer, max_steps=None, trainable_token_pos=-1,
|
||||
eval_freq, eval_iter, max_steps=None, trainable_token_pos=-1,
|
||||
accumulation_steps=1, ignore_index=-100):
|
||||
# Initialize lists to track losses and tokens seen
|
||||
train_losses, val_losses, train_accs, val_accs = [], [], [], []
|
||||
@@ -565,7 +565,7 @@ if __name__ == "__main__":
|
||||
train_losses, val_losses, train_accs, val_accs, examples_seen = train_classifier_simple(
|
||||
model, train_loader, val_loader, optimizer, device,
|
||||
num_epochs=args.num_epochs, eval_freq=50, eval_iter=5,
|
||||
tokenizer=tokenizer, max_steps=None, trainable_token_pos=args.trainable_token_pos,
|
||||
max_steps=None, trainable_token_pos=args.trainable_token_pos,
|
||||
accumulation_steps=args.accumulation_steps
|
||||
)
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ def evaluate_model(model, train_loader, val_loader, device, eval_iter):
|
||||
|
||||
|
||||
def train_classifier_simple(model, train_loader, val_loader, optimizer, device, num_epochs,
|
||||
eval_freq, eval_iter, tokenizer, max_steps=None):
|
||||
eval_freq, eval_iter, max_steps=None):
|
||||
# Initialize lists to track losses and tokens seen
|
||||
train_losses, val_losses, train_accs, val_accs = [], [], [], []
|
||||
examples_seen, global_step = 0, -1
|
||||
@@ -279,7 +279,7 @@ if __name__ == "__main__":
|
||||
train_losses, val_losses, train_accs, val_accs, examples_seen = train_classifier_simple(
|
||||
model, train_loader, val_loader, optimizer, device,
|
||||
num_epochs=num_epochs, eval_freq=50, eval_iter=20,
|
||||
tokenizer=tokenizer, max_steps=None
|
||||
max_steps=None
|
||||
)
|
||||
|
||||
end_time = time.time()
|
||||
|
||||
@@ -139,7 +139,7 @@ def evaluate_model(model, train_loader, val_loader, device, eval_iter, trainable
|
||||
|
||||
|
||||
def train_classifier_simple(model, train_loader, val_loader, optimizer, device, num_epochs,
|
||||
eval_freq, eval_iter, tokenizer, max_steps=None, trainable_token=-1):
|
||||
eval_freq, eval_iter, max_steps=None, trainable_token=-1):
|
||||
# Initialize lists to track losses and tokens seen
|
||||
train_losses, val_losses, train_accs, val_accs = [], [], [], []
|
||||
examples_seen, global_step = 0, -1
|
||||
@@ -344,7 +344,7 @@ if __name__ == "__main__":
|
||||
train_losses, val_losses, train_accs, val_accs, examples_seen = train_classifier_simple(
|
||||
model, train_loader, val_loader, optimizer, device,
|
||||
num_epochs=num_epochs, eval_freq=50, eval_iter=20,
|
||||
tokenizer=tokenizer, max_steps=None, trainable_token=args.trainable_token
|
||||
max_steps=None, trainable_token=args.trainable_token
|
||||
)
|
||||
|
||||
end_time = time.time()
|
||||
|
||||
Reference in New Issue
Block a user