From cfdf22330b0223d0fd922d1ab68bbdc88ce99fd9 Mon Sep 17 00:00:00 2001 From: Sajjad Baloch Date: Sun, 22 Jun 2025 10:15:12 -0700 Subject: [PATCH] Fix: Typo in `appendix_d.py` comments. (#682) * Fix: pkg/llms_from_scratch/appendix_d.py * minor language typo fix * fix 691 --------- Co-authored-by: PrinceSajjadHussain Co-authored-by: rasbt --- appendix-D/01_main-chapter-code/appendix-D.ipynb | 2 +- pkg/llms_from_scratch/appendix_d.py | 2 +- pkg/llms_from_scratch/ch03.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/appendix-D/01_main-chapter-code/appendix-D.ipynb b/appendix-D/01_main-chapter-code/appendix-D.ipynb index 64c6d67..f8a5aca 100644 --- a/appendix-D/01_main-chapter-code/appendix-D.ipynb +++ b/appendix-D/01_main-chapter-code/appendix-D.ipynb @@ -615,7 +615,7 @@ " if global_step > warmup_steps:\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) \n", " else:\n", - " if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which lead to a skipped clipping step after warmup\n", + " if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which led to a skipped clipping step after warmup\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n", " \n", " optimizer.step()\n", diff --git a/pkg/llms_from_scratch/appendix_d.py b/pkg/llms_from_scratch/appendix_d.py index aa3cda4..e6b9923 100644 --- a/pkg/llms_from_scratch/appendix_d.py +++ b/pkg/llms_from_scratch/appendix_d.py @@ -66,7 +66,7 @@ def train_model(model, train_loader, val_loader, optimizer, device, if global_step > warmup_steps: torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) else: - if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which lead to a skipped clipping step after warmup + if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which led to a skipped clipping step after warmup torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) optimizer.step() diff --git a/pkg/llms_from_scratch/ch03.py b/pkg/llms_from_scratch/ch03.py index 99ff4a7..e6ca561 100644 --- a/pkg/llms_from_scratch/ch03.py +++ b/pkg/llms_from_scratch/ch03.py @@ -160,7 +160,7 @@ class PyTorchMultiHeadAttention(nn.Module): def __init__(self, d_in, d_out, num_heads, dropout=0.0, qkv_bias=False): super().__init__() - assert d_out % num_heads == 0, "embed_dim is indivisible by num_heads" + assert d_out % num_heads == 0, "d_out is indivisible by num_heads" self.num_heads = num_heads self.head_dim = d_out // num_heads