Add LoRA scaling (#823)

2026-04-10 12:33:42 +00:00 · 2025-09-14 11:57:55 -05:00
parent fc101b710e
commit 8f3e5b024d
2 changed files with 44 additions and 38 deletions
--- a/pkg/llms_from_scratch/appendix_e.py
+++ b/pkg/llms_from_scratch/appendix_e.py
@@ -14,9 +14,11 @@ class LoRALayer(torch.nn.Module):
        torch.nn.init.kaiming_uniform_(self.A, a=math.sqrt(5))  # similar to standard weight initialization
        self.B = torch.nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha
+        self.rank = rank

    def forward(self, x):
-        x = self.alpha * (x @ self.A @ self.B)
+
+        x = (self.alpha / self.rank) * (x @ self.A @ self.B)
        return x