mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
improve gradient accumulation (#300)
This commit is contained in:
committed by
GitHub
parent
36fbc7aa74
commit
70e5714556
@@ -259,7 +259,8 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
|
|||||||
loss.backward() # Calculate loss gradients
|
loss.backward() # Calculate loss gradients
|
||||||
|
|
||||||
# Use gradient accumulation if accumulation_steps > 1
|
# Use gradient accumulation if accumulation_steps > 1
|
||||||
if batch_idx % accumulation_steps == 0:
|
is_update_step = ((batch_idx + 1) % accumulation_steps == 0) or ((batch_idx + 1) == len(train_loader))
|
||||||
|
if is_update_step:
|
||||||
optimizer.step() # Update model weights using loss gradients
|
optimizer.step() # Update model weights using loss gradients
|
||||||
optimizer.zero_grad() # Reset loss gradients from previous batch iteration
|
optimizer.zero_grad() # Reset loss gradients from previous batch iteration
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user