Uv workflow improvements (#531)

* Uv workflow improvements * Uv workflow improvements * linter improvements * pytproject.toml fixes * pytproject.toml fixes * pytproject.toml fixes * pytproject.toml fixes * pytproject.toml fixes * pytproject.toml fixes * windows fixes * windows fixes * windows fixes * windows fixes * windows fixes * windows fixes * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix * win32 fix
2026-04-10 12:33:42 +00:00 · 2025-02-16 13:16:51 -06:00
parent 2f0afedaa7
commit 5016499d1d
29 changed files with 382 additions and 321 deletions
--- a/ch05/02_alternative_weight_loading/weight-loading-hf-safetensors.ipynb
+++ b/ch05/02_alternative_weight_loading/weight-loading-hf-safetensors.ipynb
@@ -95,7 +95,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from previous_chapters import GPTModel, generate_text_simple"
+    "from previous_chapters import GPTModel"
   ]
  },
  {
@@ -242,7 +242,6 @@
   "outputs": [],
   "source": [
    "import torch\n",
-    "from previous_chapters import GPTModel\n",
    "\n",
    "\n",
    "gpt = GPTModel(BASE_CONFIG)\n",
@@ -306,7 +305,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.16"
  }
 },
 "nbformat": 4,
--- a/ch05/02_alternative_weight_loading/weight-loading-hf-transformers.ipynb
+++ b/ch05/02_alternative_weight_loading/weight-loading-hf-transformers.ipynb
@@ -217,8 +217,8 @@
    "        gpt.trf_blocks[b].norm2.scale = assign_check(gpt.trf_blocks[b].norm2.scale, d[f\"h.{b}.ln_2.weight\"])\n",
    "        gpt.trf_blocks[b].norm2.shift = assign_check(gpt.trf_blocks[b].norm2.shift, d[f\"h.{b}.ln_2.bias\"])\n",
    "    \n",
-    "        gpt.final_norm.scale = assign_check(gpt.final_norm.scale, d[f\"ln_f.weight\"])\n",
-    "        gpt.final_norm.shift = assign_check(gpt.final_norm.shift, d[f\"ln_f.bias\"])\n",
+    "        gpt.final_norm.scale = assign_check(gpt.final_norm.scale, d[\"ln_f.weight\"])\n",
+    "        gpt.final_norm.shift = assign_check(gpt.final_norm.shift, d[\"ln_f.bias\"])\n",
    "        gpt.out_head.weight = assign_check(gpt.out_head.weight, d[\"wte.weight\"])"
   ]
  },
@@ -293,7 +293,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.16"
  }
 },
 "nbformat": 4,
--- a/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb
+++ b/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb
@@ -1114,7 +1114,6 @@
   },
   "outputs": [],
   "source": [
-    "import os\n",
    "from pathlib import Path\n",
    "\n",
    "import tiktoken\n",
@@ -2633,7 +2632,7 @@
   "source": [
    "weights_file = hf_hub_download(\n",
    "    repo_id=\"meta-llama/Llama-3.2-1B\",\n",
-    "    filename=f\"model.safetensors\",\n",
+    "    filename=\"model.safetensors\",\n",
    "    local_dir=\"Llama-3.2-1B\"\n",
    ")\n",
    "current_weights = load_file(weights_file)\n",
@@ -2747,7 +2746,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.16"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
--- a/ch05/07_gpt_to_llama/standalone-llama32.ipynb
+++ b/ch05/07_gpt_to_llama/standalone-llama32.ipynb
@@ -993,7 +993,7 @@
    "if LLAMA_SIZE_STR == \"1B\":\n",
    "    weights_file = hf_hub_download(\n",
    "        repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
-    "        filename=f\"model.safetensors\",\n",
+    "        filename=\"model.safetensors\",\n",
    "        local_dir=f\"Llama-3.2-{LLAMA_SIZE_STR}-Instruct\"\n",
    "    )\n",
    "    combined_weights = load_file(weights_file)\n",
@@ -1213,7 +1213,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.16"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {