mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Readability and code quality improvements (#959)
* Consistent dataset naming * consistent section headers
This commit is contained in:
committed by
GitHub
parent
7b1f740f74
commit
be5e2a3331
@@ -904,7 +904,7 @@
|
||||
},
|
||||
"source": [
|
||||
" \n",
|
||||
"# 4. Load pretrained weights"
|
||||
"# 3. Load pretrained weights"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1269,10 +1269,10 @@
|
||||
" )\n",
|
||||
"\n",
|
||||
"if torch.cuda.is_available():\n",
|
||||
" def gpu_gb(x):\n",
|
||||
" def calc_gpu_gb(x):\n",
|
||||
" return f\"{x / 1024 / 1024 / 1024:.2f} GB\"\n",
|
||||
" \n",
|
||||
" print(f\"\\n\\nGPU memory used: {gpu_gb(torch.cuda.max_memory_allocated())}\")"
|
||||
" print(f\"\\n\\nGPU memory used: {calc_gpu_gb(torch.cuda.max_memory_allocated())}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1320,7 +1320,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -801,7 +801,7 @@
|
||||
},
|
||||
"source": [
|
||||
" \n",
|
||||
"# 4. Load pretrained weights"
|
||||
"# 3. Load pretrained weights"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1160,10 +1160,10 @@
|
||||
" )\n",
|
||||
"\n",
|
||||
"if torch.cuda.is_available():\n",
|
||||
" def gpu_gb(x):\n",
|
||||
" def calc_gpu_gb(x):\n",
|
||||
" return f\"{x / 1024 / 1024 / 1024:.2f} GB\"\n",
|
||||
" \n",
|
||||
" print(f\"\\n\\nGPU memory used: {gpu_gb(torch.cuda.max_memory_allocated())}\")"
|
||||
" print(f\"\\n\\nGPU memory used: {calc_gpu_gb(torch.cuda.max_memory_allocated())}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1211,7 +1211,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -116,11 +116,11 @@ def load_notebook_defs(nb_name="standalone-olmo3.ipynb"):
|
||||
return import_definitions_from_notebook(nb_dir, nb_name)
|
||||
|
||||
|
||||
def build_olmo3_pair(nb_imports, cfg, hf_checkpoint=None):
|
||||
def build_olmo3_pair(import_notebook_defs, cfg, hf_checkpoint=None):
|
||||
if Olmo3ForCausalLM is None:
|
||||
raise ImportError("transformers is required for the Olmo-3 debugger.")
|
||||
|
||||
ours = nb_imports.Olmo3Model(cfg)
|
||||
ours = import_notebook_defs.Olmo3Model(cfg)
|
||||
hf_cfg = _hf_config_from_dict(cfg)
|
||||
|
||||
if hf_checkpoint:
|
||||
@@ -133,7 +133,7 @@ def build_olmo3_pair(nb_imports, cfg, hf_checkpoint=None):
|
||||
hf_model = Olmo3ForCausalLM(hf_cfg)
|
||||
|
||||
param_config = {"n_layers": cfg["n_layers"], "hidden_dim": cfg["hidden_dim"]}
|
||||
nb_imports.load_weights_into_olmo(ours, param_config, hf_model.state_dict())
|
||||
import_notebook_defs.load_weights_into_olmo(ours, param_config, hf_model.state_dict())
|
||||
|
||||
ours.eval()
|
||||
hf_model.eval()
|
||||
@@ -271,10 +271,10 @@ if __name__ == "__main__":
|
||||
if not transformers_available:
|
||||
raise SystemExit("transformers is not installed; install it to run the debugger.")
|
||||
|
||||
nb_imports = load_notebook_defs()
|
||||
import_notebook_defs = load_notebook_defs()
|
||||
cfg = yarn_debug_config()
|
||||
|
||||
ours_model, hf_model = build_olmo3_pair(nb_imports, cfg)
|
||||
ours_model, hf_model = build_olmo3_pair(import_notebook_defs, cfg)
|
||||
torch.manual_seed(0)
|
||||
input_ids = torch.randint(0, cfg["vocab_size"], (1, cfg["context_length"]), dtype=torch.long)
|
||||
diffs = layerwise_differences(ours_model, hf_model, input_ids)
|
||||
|
||||
@@ -16,7 +16,7 @@ transformers_installed = importlib.util.find_spec("transformers") is not None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nb_imports():
|
||||
def import_notebook_defs():
|
||||
nb_dir = Path(__file__).resolve().parents[1]
|
||||
mod = import_definitions_from_notebook(nb_dir, "standalone-olmo3-plus-kv-cache.ipynb")
|
||||
return mod
|
||||
@@ -55,9 +55,9 @@ def dummy_cfg_base():
|
||||
}
|
||||
|
||||
@torch.inference_mode()
|
||||
def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, nb_imports):
|
||||
def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, import_notebook_defs):
|
||||
torch.manual_seed(123)
|
||||
model = nb_imports.Olmo3Model(dummy_cfg_base)
|
||||
model = import_notebook_defs.Olmo3Model(dummy_cfg_base)
|
||||
out = model(dummy_input)
|
||||
assert out.shape == (1, dummy_input.size(1), dummy_cfg_base["vocab_size"]), \
|
||||
f"Expected shape (1, seq_len, vocab_size), got {out.shape}"
|
||||
@@ -65,7 +65,7 @@ def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, nb_imports):
|
||||
|
||||
@torch.inference_mode()
|
||||
@pytest.mark.skipif(not transformers_installed, reason="transformers not installed")
|
||||
def test_olmo3_base_equivalence_with_transformers(nb_imports):
|
||||
def test_olmo3_base_equivalence_with_transformers(import_notebook_defs):
|
||||
from transformers import Olmo3Config, Olmo3ForCausalLM
|
||||
|
||||
# Tiny config so the test is fast
|
||||
@@ -99,7 +99,7 @@ def test_olmo3_base_equivalence_with_transformers(nb_imports):
|
||||
"rope_local_base": 10_000.0,
|
||||
}
|
||||
|
||||
model = nb_imports.Olmo3Model(cfg)
|
||||
model = import_notebook_defs.Olmo3Model(cfg)
|
||||
|
||||
hf_cfg = Olmo3Config(
|
||||
vocab_size=cfg["vocab_size"],
|
||||
@@ -129,7 +129,7 @@ def test_olmo3_base_equivalence_with_transformers(nb_imports):
|
||||
"n_layers": cfg["n_layers"],
|
||||
"hidden_dim": cfg["hidden_dim"],
|
||||
}
|
||||
nb_imports.load_weights_into_olmo(model, param_config, hf_state)
|
||||
import_notebook_defs.load_weights_into_olmo(model, param_config, hf_state)
|
||||
|
||||
x = torch.randint(
|
||||
0,
|
||||
|
||||
@@ -16,7 +16,7 @@ transformers_installed = importlib.util.find_spec("transformers") is not None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nb_imports():
|
||||
def import_notebook_defs():
|
||||
nb_dir = Path(__file__).resolve().parents[1]
|
||||
mod = import_definitions_from_notebook(nb_dir, "standalone-olmo3.ipynb")
|
||||
return mod
|
||||
@@ -55,9 +55,9 @@ def dummy_cfg_base():
|
||||
}
|
||||
|
||||
@torch.inference_mode()
|
||||
def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, nb_imports):
|
||||
def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, import_notebook_defs):
|
||||
torch.manual_seed(123)
|
||||
model = nb_imports.Olmo3Model(dummy_cfg_base)
|
||||
model = import_notebook_defs.Olmo3Model(dummy_cfg_base)
|
||||
out = model(dummy_input)
|
||||
assert out.shape == (1, dummy_input.size(1), dummy_cfg_base["vocab_size"]), \
|
||||
f"Expected shape (1, seq_len, vocab_size), got {out.shape}"
|
||||
@@ -65,7 +65,7 @@ def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, nb_imports):
|
||||
|
||||
@torch.inference_mode()
|
||||
@pytest.mark.skipif(not transformers_installed, reason="transformers not installed")
|
||||
def test_olmo3_base_equivalence_with_transformers(nb_imports):
|
||||
def test_olmo3_base_equivalence_with_transformers(import_notebook_defs):
|
||||
from transformers import Olmo3Config, Olmo3ForCausalLM
|
||||
|
||||
# Tiny config so the test is fast
|
||||
@@ -99,7 +99,7 @@ def test_olmo3_base_equivalence_with_transformers(nb_imports):
|
||||
"rope_local_base": 10_000.0,
|
||||
}
|
||||
|
||||
model = nb_imports.Olmo3Model(cfg)
|
||||
model = import_notebook_defs.Olmo3Model(cfg)
|
||||
|
||||
hf_cfg = Olmo3Config(
|
||||
vocab_size=cfg["vocab_size"],
|
||||
@@ -129,7 +129,7 @@ def test_olmo3_base_equivalence_with_transformers(nb_imports):
|
||||
"n_layers": cfg["n_layers"],
|
||||
"hidden_dim": cfg["hidden_dim"],
|
||||
}
|
||||
nb_imports.load_weights_into_olmo(model, param_config, hf_state)
|
||||
import_notebook_defs.load_weights_into_olmo(model, param_config, hf_state)
|
||||
|
||||
x = torch.randint(
|
||||
0,
|
||||
|
||||
Reference in New Issue
Block a user