Readability and code quality improvements (#959)

* Consistent dataset naming

* consistent section headers
This commit is contained in:
Sebastian Raschka
2026-02-17 19:44:56 -05:00
committed by GitHub
parent 7b1f740f74
commit be5e2a3331
48 changed files with 419 additions and 297 deletions

View File

@@ -904,7 +904,7 @@
},
"source": [
" \n",
"# 4. Load pretrained weights"
"# 3. Load pretrained weights"
]
},
{
@@ -1269,10 +1269,10 @@
" )\n",
"\n",
"if torch.cuda.is_available():\n",
" def gpu_gb(x):\n",
" def calc_gpu_gb(x):\n",
" return f\"{x / 1024 / 1024 / 1024:.2f} GB\"\n",
" \n",
" print(f\"\\n\\nGPU memory used: {gpu_gb(torch.cuda.max_memory_allocated())}\")"
" print(f\"\\n\\nGPU memory used: {calc_gpu_gb(torch.cuda.max_memory_allocated())}\")"
]
},
{
@@ -1320,7 +1320,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
"version": "3.13.5"
}
},
"nbformat": 4,

View File

@@ -801,7 +801,7 @@
},
"source": [
" \n",
"# 4. Load pretrained weights"
"# 3. Load pretrained weights"
]
},
{
@@ -1160,10 +1160,10 @@
" )\n",
"\n",
"if torch.cuda.is_available():\n",
" def gpu_gb(x):\n",
" def calc_gpu_gb(x):\n",
" return f\"{x / 1024 / 1024 / 1024:.2f} GB\"\n",
" \n",
" print(f\"\\n\\nGPU memory used: {gpu_gb(torch.cuda.max_memory_allocated())}\")"
" print(f\"\\n\\nGPU memory used: {calc_gpu_gb(torch.cuda.max_memory_allocated())}\")"
]
},
{
@@ -1211,7 +1211,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
"version": "3.13.5"
}
},
"nbformat": 4,

View File

@@ -116,11 +116,11 @@ def load_notebook_defs(nb_name="standalone-olmo3.ipynb"):
return import_definitions_from_notebook(nb_dir, nb_name)
def build_olmo3_pair(nb_imports, cfg, hf_checkpoint=None):
def build_olmo3_pair(import_notebook_defs, cfg, hf_checkpoint=None):
if Olmo3ForCausalLM is None:
raise ImportError("transformers is required for the Olmo-3 debugger.")
ours = nb_imports.Olmo3Model(cfg)
ours = import_notebook_defs.Olmo3Model(cfg)
hf_cfg = _hf_config_from_dict(cfg)
if hf_checkpoint:
@@ -133,7 +133,7 @@ def build_olmo3_pair(nb_imports, cfg, hf_checkpoint=None):
hf_model = Olmo3ForCausalLM(hf_cfg)
param_config = {"n_layers": cfg["n_layers"], "hidden_dim": cfg["hidden_dim"]}
nb_imports.load_weights_into_olmo(ours, param_config, hf_model.state_dict())
import_notebook_defs.load_weights_into_olmo(ours, param_config, hf_model.state_dict())
ours.eval()
hf_model.eval()
@@ -271,10 +271,10 @@ if __name__ == "__main__":
if not transformers_available:
raise SystemExit("transformers is not installed; install it to run the debugger.")
nb_imports = load_notebook_defs()
import_notebook_defs = load_notebook_defs()
cfg = yarn_debug_config()
ours_model, hf_model = build_olmo3_pair(nb_imports, cfg)
ours_model, hf_model = build_olmo3_pair(import_notebook_defs, cfg)
torch.manual_seed(0)
input_ids = torch.randint(0, cfg["vocab_size"], (1, cfg["context_length"]), dtype=torch.long)
diffs = layerwise_differences(ours_model, hf_model, input_ids)

View File

@@ -16,7 +16,7 @@ transformers_installed = importlib.util.find_spec("transformers") is not None
@pytest.fixture
def nb_imports():
def import_notebook_defs():
nb_dir = Path(__file__).resolve().parents[1]
mod = import_definitions_from_notebook(nb_dir, "standalone-olmo3-plus-kv-cache.ipynb")
return mod
@@ -55,9 +55,9 @@ def dummy_cfg_base():
}
@torch.inference_mode()
def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, nb_imports):
def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, import_notebook_defs):
torch.manual_seed(123)
model = nb_imports.Olmo3Model(dummy_cfg_base)
model = import_notebook_defs.Olmo3Model(dummy_cfg_base)
out = model(dummy_input)
assert out.shape == (1, dummy_input.size(1), dummy_cfg_base["vocab_size"]), \
f"Expected shape (1, seq_len, vocab_size), got {out.shape}"
@@ -65,7 +65,7 @@ def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, nb_imports):
@torch.inference_mode()
@pytest.mark.skipif(not transformers_installed, reason="transformers not installed")
def test_olmo3_base_equivalence_with_transformers(nb_imports):
def test_olmo3_base_equivalence_with_transformers(import_notebook_defs):
from transformers import Olmo3Config, Olmo3ForCausalLM
# Tiny config so the test is fast
@@ -99,7 +99,7 @@ def test_olmo3_base_equivalence_with_transformers(nb_imports):
"rope_local_base": 10_000.0,
}
model = nb_imports.Olmo3Model(cfg)
model = import_notebook_defs.Olmo3Model(cfg)
hf_cfg = Olmo3Config(
vocab_size=cfg["vocab_size"],
@@ -129,7 +129,7 @@ def test_olmo3_base_equivalence_with_transformers(nb_imports):
"n_layers": cfg["n_layers"],
"hidden_dim": cfg["hidden_dim"],
}
nb_imports.load_weights_into_olmo(model, param_config, hf_state)
import_notebook_defs.load_weights_into_olmo(model, param_config, hf_state)
x = torch.randint(
0,

View File

@@ -16,7 +16,7 @@ transformers_installed = importlib.util.find_spec("transformers") is not None
@pytest.fixture
def nb_imports():
def import_notebook_defs():
nb_dir = Path(__file__).resolve().parents[1]
mod = import_definitions_from_notebook(nb_dir, "standalone-olmo3.ipynb")
return mod
@@ -55,9 +55,9 @@ def dummy_cfg_base():
}
@torch.inference_mode()
def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, nb_imports):
def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, import_notebook_defs):
torch.manual_seed(123)
model = nb_imports.Olmo3Model(dummy_cfg_base)
model = import_notebook_defs.Olmo3Model(dummy_cfg_base)
out = model(dummy_input)
assert out.shape == (1, dummy_input.size(1), dummy_cfg_base["vocab_size"]), \
f"Expected shape (1, seq_len, vocab_size), got {out.shape}"
@@ -65,7 +65,7 @@ def test_dummy_olmo3_forward(dummy_cfg_base, dummy_input, nb_imports):
@torch.inference_mode()
@pytest.mark.skipif(not transformers_installed, reason="transformers not installed")
def test_olmo3_base_equivalence_with_transformers(nb_imports):
def test_olmo3_base_equivalence_with_transformers(import_notebook_defs):
from transformers import Olmo3Config, Olmo3ForCausalLM
# Tiny config so the test is fast
@@ -99,7 +99,7 @@ def test_olmo3_base_equivalence_with_transformers(nb_imports):
"rope_local_base": 10_000.0,
}
model = nb_imports.Olmo3Model(cfg)
model = import_notebook_defs.Olmo3Model(cfg)
hf_cfg = Olmo3Config(
vocab_size=cfg["vocab_size"],
@@ -129,7 +129,7 @@ def test_olmo3_base_equivalence_with_transformers(nb_imports):
"n_layers": cfg["n_layers"],
"hidden_dim": cfg["hidden_dim"],
}
nb_imports.load_weights_into_olmo(model, param_config, hf_state)
import_notebook_defs.load_weights_into_olmo(model, param_config, hf_state)
x = torch.randint(
0,