Switch from urllib to requests to improve reliability (#867)

* Switch from urllib to requests to improve reliability * Keep ruff linter-specific * update * update * update
2026-04-10 12:33:42 +00:00 · 2025-10-07 15:22:59 -05:00
parent 8552565bda
commit 7bd263144e
47 changed files with 592 additions and 436 deletions
--- a/.github/workflows/basic-tests-latest-python.yml
+++ b/.github/workflows/basic-tests-latest-python.yml
@@ -38,14 +38,14 @@ jobs:
    - name: Test Selected Python Scripts
      run: |
        source .venv/bin/activate
-        pytest --ruff setup/02_installing-python-libraries/tests.py
+        pytest setup/02_installing-python-libraries/tests.py
-        pytest --ruff ch04/01_main-chapter-code/tests.py
+        pytest ch04/01_main-chapter-code/tests.py
-        pytest --ruff ch05/01_main-chapter-code/tests.py
+        pytest ch05/01_main-chapter-code/tests.py
-        pytest --ruff ch06/01_main-chapter-code/tests.py
+        pytest ch06/01_main-chapter-code/tests.py
    - name: Validate Selected Jupyter Notebooks
      run: |
        source .venv/bin/activate
-        pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
+        pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
-        pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+        pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-        pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+        pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-linux-uv.yml
+++ b/.github/workflows/basic-tests-linux-uv.yml
@@ -47,24 +47,24 @@ jobs:
        shell: bash
        run: |
          source .venv/bin/activate
-          pytest --ruff setup/02_installing-python-libraries/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch04/03_kv-cache/tests.py
+          pytest ch04/03_kv-cache/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
+          pytest ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/test_llama32_nb.py
+          pytest ch05/07_gpt_to_llama/tests/test_llama32_nb.py
-          pytest --ruff ch05/11_qwen3/tests/test_qwen3_nb.py
+          pytest ch05/11_qwen3/tests/test_qwen3_nb.py
-          pytest --ruff ch05/12_gemma3/tests/test_gemma3_nb.py
+          pytest ch05/12_gemma3/tests/test_gemma3_nb.py
-          pytest --ruff ch05/12_gemma3/tests/test_gemma3_kv_nb.py
+          pytest ch05/12_gemma3/tests/test_gemma3_kv_nb.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest ch06/01_main-chapter-code/tests.py
      - name: Validate Selected Jupyter Notebooks (uv)
        shell: bash
        run: |
          source .venv/bin/activate
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
      - name: Test Selected Bonus Materials
        shell: bash
--- a/.github/workflows/basic-tests-macos-uv.yml
+++ b/.github/workflows/basic-tests-macos-uv.yml
@@ -47,20 +47,20 @@ jobs:
        shell: bash
        run: |
          source .venv/bin/activate
-          pytest --ruff setup/02_installing-python-libraries/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
+          pytest ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/test_llama32_nb.py
+          pytest ch05/07_gpt_to_llama/tests/test_llama32_nb.py
-          pytest --ruff ch05/11_qwen3/tests/test_qwen3_nb.py
+          pytest ch05/11_qwen3/tests/test_qwen3_nb.py
-          pytest --ruff ch05/12_gemma3/tests/test_gemma3_nb.py
+          pytest ch05/12_gemma3/tests/test_gemma3_nb.py
-          pytest --ruff ch05/12_gemma3/tests/test_gemma3_kv_nb.py
+          pytest ch05/12_gemma3/tests/test_gemma3_kv_nb.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest ch06/01_main-chapter-code/tests.py
      - name: Validate Selected Jupyter Notebooks (uv)
        shell: bash
        run: |
          source .venv/bin/activate
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-old-pytorch.yml
+++ b/.github/workflows/basic-tests-old-pytorch.yml
@@ -43,14 +43,14 @@ jobs:
    - name: Test Selected Python Scripts
      run: |
        source .venv/bin/activate
-        pytest --ruff setup/02_installing-python-libraries/tests.py
+        pytest setup/02_installing-python-libraries/tests.py
-        pytest --ruff ch04/01_main-chapter-code/tests.py
+        pytest ch04/01_main-chapter-code/tests.py
-        pytest --ruff ch05/01_main-chapter-code/tests.py
+        pytest ch05/01_main-chapter-code/tests.py
-        pytest --ruff ch06/01_main-chapter-code/tests.py
+        pytest ch06/01_main-chapter-code/tests.py
    - name: Validate Selected Jupyter Notebooks
      run: |
        source .venv/bin/activate
-        pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
+        pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
-        pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+        pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-        pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+        pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-pip.yml
+++ b/.github/workflows/basic-tests-pip.yml
@@ -46,14 +46,14 @@ jobs:
      - name: Test Selected Python Scripts
        run: |
          source .venv/bin/activate
-          pytest --ruff setup/02_installing-python-libraries/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest ch06/01_main-chapter-code/tests.py
      - name: Validate Selected Jupyter Notebooks
        run: |
          source .venv/bin/activate
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-pixi.yml
+++ b/.github/workflows/basic-tests-pixi.yml
@@ -47,14 +47,14 @@ jobs:
      - name: Test Selected Python Scripts
        shell: pixi run --environment tests bash -e {0}
        run: |
-          pytest --ruff setup/02_installing-python-libraries/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest ch06/01_main-chapter-code/tests.py
      - name: Validate Selected Jupyter Notebooks
        shell: pixi run --environment tests bash -e {0}
        run: |
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-pytorch-rc.yml
+++ b/.github/workflows/basic-tests-pytorch-rc.yml
@@ -39,14 +39,14 @@ jobs:
    - name: Test Selected Python Scripts
      run: |
        source .venv/bin/activate
-        pytest --ruff setup/02_installing-python-libraries/tests.py
+        pytest setup/02_installing-python-libraries/tests.py
-        pytest --ruff ch04/01_main-chapter-code/tests.py
+        pytest ch04/01_main-chapter-code/tests.py
-        pytest --ruff ch05/01_main-chapter-code/tests.py
+        pytest ch05/01_main-chapter-code/tests.py
-        pytest --ruff ch06/01_main-chapter-code/tests.py
+        pytest ch06/01_main-chapter-code/tests.py
    - name: Validate Selected Jupyter Notebooks
      run: |
        source .venv/bin/activate
-        pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
+        pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
-        pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+        pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-        pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+        pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-windows-uv-pip.yml
+++ b/.github/workflows/basic-tests-windows-uv-pip.yml
@@ -49,18 +49,18 @@ jobs:
        shell: bash
        run: |
          source .venv/Scripts/activate
-          pytest --ruff setup/02_installing-python-libraries/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
+          pytest ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/test_llama32_nb.py
+          pytest ch05/07_gpt_to_llama/tests/test_llama32_nb.py
-          pytest --ruff ch05/11_qwen3/tests/test_qwen3_nb.py
+          pytest ch05/11_qwen3/tests/test_qwen3_nb.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest ch06/01_main-chapter-code/tests.py
      - name: Run Jupyter Notebook Tests
        shell: bash
        run: |
          source .venv/Scripts/activate
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/appendix-D/01_main-chapter-code/appendix-D.ipynb
+++ b/appendix-D/01_main-chapter-code/appendix-D.ipynb
@@ -121,19 +121,40 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "file_path = \"the-verdict.txt\"\n",
    "url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
    "\n",
    "if not os.path.exists(file_path):\n",
    "    response = requests.get(url, timeout=30)\n",
    "    response.raise_for_status()\n",
    "    text_data = response.text\n",
    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "        file.write(text_data)\n",
    "else:\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "        text_data = file.read()\n",
    "\n",
    "# The book originally used the following code below\n",
    "# However, urllib uses older protocol settings that\n",
    "# can cause problems for some readers using a VPN.\n",
    "# The `requests` version above is more robust\n",
    "# in that regard.\n",
    "\n",
    "\"\"\"\n",
    "import os\n",
    "import urllib.request\n",
    "\n",
    "if not os.path.exists(file_path):\n",
    "    with urllib.request.urlopen(url) as response:\n",
    "        text_data = response.read().decode('utf-8')\n",
    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "        file.write(text_data)\n",
    "else:\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
-    "        text_data = file.read()"
+    "        text_data = file.read()\n",
    "\"\"\""
   ]
  },
  {
--- a/appendix-E/01_main-chapter-code/appendix-E.ipynb
+++ b/appendix-E/01_main-chapter-code/appendix-E.ipynb
@@ -190,7 +190,8 @@
    }
   ],
   "source": [
-    "import urllib\n",
+    "# import urllib\n",
    "import requests\n",
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "from previous_chapters import (\n",
@@ -215,13 +216,20 @@
    "extracted_path = \"sms_spam_collection\"\n",
    "data_file_path = Path(extracted_path) / \"SMSSpamCollection.tsv\"\n",
    "\n",
    "\n",
    "try:\n",
    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
-    "except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
+    "except (requests.exceptions.RequestException, TimeoutError) as e:\n",
    "    print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
    "    url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
    "\n",
    "# The book originally used\n",
    "# except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
    "# in the code above.\n",
    "# However, some VPN users reported issues with `urllib`, so the code was updated\n",
    "# to use `requests` instead\n",
    "\n",
    "df = pd.read_csv(data_file_path, sep=\"\\t\", header=None, names=[\"Label\", \"Text\"])\n",
    "balanced_df = create_balanced_dataset(df)\n",
    "balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1})\n",
--- a/appendix-E/01_main-chapter-code/previous_chapters.py
+++ b/appendix-E/01_main-chapter-code/previous_chapters.py
@@ -9,12 +9,12 @@
 import os
 from pathlib import Path
 import urllib
 import zipfile
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import requests
 import tiktoken
 import torch
 import torch.nn as nn
@@ -367,9 +367,12 @@ def download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path):
        return
    # Downloading the file
-    with urllib.request.urlopen(url) as response:
+    response = requests.get(url, stream=True, timeout=60)
-        with open(zip_path, "wb") as out_file:
+    response.raise_for_status()
-            out_file.write(response.read())
+    with open(zip_path, "wb") as out_file:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                out_file.write(chunk)
    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
--- a/ch02/01_main-chapter-code/ch02.ipynb
+++ b/ch02/01_main-chapter-code/ch02.ipynb
@@ -163,6 +163,30 @@
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import requests\n",
    "\n",
    "if not os.path.exists(\"the-verdict.txt\"):\n",
    "    url = (\n",
    "        \"https://raw.githubusercontent.com/rasbt/\"\n",
    "        \"LLMs-from-scratch/main/ch02/01_main-chapter-code/\"\n",
    "        \"the-verdict.txt\"\n",
    "    )\n",
    "    file_path = \"the-verdict.txt\"\n",
    "\n",
    "    response = requests.get(url, timeout=30)\n",
    "    response.raise_for_status()\n",
    "    with open(file_path, \"wb\") as f:\n",
    "        f.write(response.content)\n",
    "\n",
    "\n",
    "# The book originally used the following code below\n",
    "# However, urllib uses older protocol settings that\n",
    "# can cause problems for some readers using a VPN.\n",
    "# The `requests` version above is more robust\n",
    "# in that regard.\n",
    "\n",
    "\"\"\"\n",
    "import os\n",
    "import urllib.request\n",
    "\n",
@@ -171,7 +195,8 @@
    "           \"LLMs-from-scratch/main/ch02/01_main-chapter-code/\"\n",
    "           \"the-verdict.txt\")\n",
    "    file_path = \"the-verdict.txt\"\n",
-    "    urllib.request.urlretrieve(url, file_path)"
+    "    urllib.request.urlretrieve(url, file_path)\n",
    "\"\"\""
   ]
  },
  {
--- a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb
+++ b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb
@@ -823,7 +823,7 @@
   ],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "def download_file_if_absent(url, filename, search_dirs):\n",
    "    for directory in search_dirs:\n",
@@ -834,13 +834,19 @@
    "\n",
    "    target_path = os.path.join(search_dirs[0], filename)\n",
    "    try:\n",
-    "        with urllib.request.urlopen(url) as response, open(target_path, \"wb\") as out_file:\n",
+    "        response = requests.get(url, stream=True, timeout=60)\n",
-    "            out_file.write(response.read())\n",
+    "        response.raise_for_status()\n",
    "        with open(target_path, \"wb\") as out_file:\n",
    "            for chunk in response.iter_content(chunk_size=8192):\n",
    "                if chunk:\n",
    "                    out_file.write(chunk)\n",
    "        print(f\"Downloaded {filename} to {target_path}\")\n",
    "    except Exception as e:\n",
    "        print(f\"Failed to download {filename}. Error: {e}\")\n",
    "\n",
    "    return target_path\n",
    "\n",
    "\n",
    "verdict_path = download_file_if_absent(\n",
    "    url=(\n",
    "         \"https://raw.githubusercontent.com/rasbt/\"\n",
--- a/ch05/01_main-chapter-code/ch05.ipynb
+++ b/ch05/01_main-chapter-code/ch05.ipynb
@@ -793,19 +793,43 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "file_path = \"the-verdict.txt\"\n",
    "url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
    "\n",
    "if not os.path.exists(file_path):\n",
-    "    with urllib.request.urlopen(url) as response:\n",
+    "    response = requests.get(url, timeout=30)\n",
-    "        text_data = response.read().decode('utf-8')\n",
+    "    response.raise_for_status()\n",
    "    text_data = response.text\n",
    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "        file.write(text_data)\n",
    "else:\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
-    "        text_data = file.read()"
+    "        text_data = file.read()\n",
    "\n",
    "\n",
    "# The book originally used the following code below\n",
    "# However, urllib uses older protocol settings that\n",
    "# can cause problems for some readers using a VPN.\n",
    "# The `requests` version above is more robust\n",
    "# in that regard.\n",
    "\n",
    "        \n",
    "# import os\n",
    "# import urllib.request\n",
    "\n",
    "# file_path = \"the-verdict.txt\"\n",
    "# url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
    "\n",
    "# if not os.path.exists(file_path):\n",
    "#     with urllib.request.urlopen(url) as response:\n",
    "#         text_data = response.read().decode('utf-8')\n",
    "#     with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "#         file.write(text_data)\n",
    "# else:\n",
    "#     with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "#         text_data = file.read()"
   ]
  },
  {
--- a/ch05/01_main-chapter-code/exercise-solutions.ipynb
+++ b/ch05/01_main-chapter-code/exercise-solutions.ipynb
@@ -491,7 +491,7 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "from previous_chapters import create_dataloader_v1\n",
    "\n",
    "\n",
@@ -499,6 +499,25 @@
    "url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
    "\n",
    "if not os.path.exists(file_path):\n",
    "    response = requests.get(url, timeout=30)\n",
    "    response.raise_for_status()\n",
    "    text_data = response.text\n",
    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "        file.write(text_data)\n",
    "else:\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "        text_data = file.read()\n",
    "\n",
    "# The book originally used the following code below\n",
    "# However, urllib uses older protocol settings that\n",
    "# can cause problems for some readers using a VPN.\n",
    "# The `requests` version above is more robust\n",
    "# in that regard.\n",
    "\n",
    "\"\"\"\n",
    "import urllib.request\n",
    "\n",
    "if not os.path.exists(file_path):\n",
    "    with urllib.request.urlopen(url) as response:\n",
    "        text_data = response.read().decode('utf-8')\n",
    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
@@ -506,6 +525,7 @@
    "else:\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "        text_data = file.read()\n",
    "\"\"\"\n",
    "\n",
    "\n",
    "# Train/validation ratio\n",
--- a/ch05/01_main-chapter-code/gpt_download.py
+++ b/ch05/01_main-chapter-code/gpt_download.py
@@ -5,9 +5,8 @@
 import os
 import urllib.request
-# import requests
+import requests
 import json
 import numpy as np
 import tensorflow as tf
@@ -48,44 +47,40 @@ def download_and_load_gpt2(model_size, models_dir):
 def download_file(url, destination, backup_url=None):
    def _attempt_download(download_url):
-        with urllib.request.urlopen(download_url) as response:
+        response = requests.get(download_url, stream=True, timeout=60)
-            # Get the total file size from headers, defaulting to 0 if not present
+        response.raise_for_status()
            file_size = int(response.headers.get("Content-Length", 0))
-            # Check if file exists and has the same size
+        file_size = int(response.headers.get("Content-Length", 0))
            if os.path.exists(destination):
                file_size_local = os.path.getsize(destination)
                if file_size == file_size_local:
                    print(f"File already exists and is up-to-date: {destination}")
                    return True  # Indicate success without re-downloading
-            block_size = 1024  # 1 Kilobyte
+        # Check if file exists and has same size
        if os.path.exists(destination):
            file_size_local = os.path.getsize(destination)
            if file_size and file_size == file_size_local:
                print(f"File already exists and is up-to-date: {destination}")
                return True
-            # Initialize the progress bar with total file size
+        block_size = 1024  # 1 KB
-            progress_bar_description = os.path.basename(download_url)
+        desc = os.path.basename(download_url)
-            with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
+        with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
-                with open(destination, "wb") as file:
+            with open(destination, "wb") as file:
-                    while True:
+                for chunk in response.iter_content(chunk_size=block_size):
-                        chunk = response.read(block_size)
+                    if chunk:
                        if not chunk:
                            break
                        file.write(chunk)
                        progress_bar.update(len(chunk))
-            return True
+        return True
    try:
        if _attempt_download(url):
            return
-    except (urllib.error.HTTPError, urllib.error.URLError):
+    except requests.exceptions.RequestException:
        if backup_url is not None:
            print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
            try:
                if _attempt_download(backup_url):
                    return
-            except urllib.error.HTTPError:
+            except requests.exceptions.RequestException:
                pass
        # If we reach here, both attempts have failed
        error_message = (
            f"Failed to download from both primary URL ({url})"
            f"{' and backup URL (' + backup_url + ')' if backup_url else ''}."
--- a/ch05/01_main-chapter-code/gpt_generate.py
+++ b/ch05/01_main-chapter-code/gpt_generate.py
@@ -7,9 +7,8 @@ import argparse
 import json
 import numpy as np
 import os
 import urllib.request
-# import requests
+import requests
 import tensorflow as tf
 import tiktoken
 import torch
@@ -60,18 +59,18 @@ def download_and_load_gpt2(model_size, models_dir):
    return settings, params
 """
 def download_file(url, destination):
-    # Send a GET request to download the file in streaming mode
+    # Send a GET request to download the file
-    response = requests.get(url, stream=True)
+    response = requests.get(url, stream=True, timeout=60)
    response.raise_for_status()
    # Get the total file size from headers, defaulting to 0 if not present
-    file_size = int(response.headers.get("content-length", 0))
+    file_size = int(response.headers.get("Content-Length", 0))
    # Check if file exists and has the same size
    if os.path.exists(destination):
        file_size_local = os.path.getsize(destination)
-        if file_size == file_size_local:
+        if file_size and file_size == file_size_local:
            print(f"File already exists and is up-to-date: {destination}")
            return
@@ -79,43 +78,12 @@ def download_file(url, destination):
    block_size = 1024  # 1 Kilobyte
    # Initialize the progress bar with total file size
-    progress_bar_description = url.split("/")[-1]  # Extract filename from URL
+    progress_bar_description = os.path.basename(url)
    with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
        # Open the destination file in binary write mode
        with open(destination, "wb") as file:
-            # Iterate over the file data in chunks
+            for chunk in response.iter_content(chunk_size=block_size):
-            for chunk in response.iter_content(block_size):
+                if chunk:
                progress_bar.update(len(chunk))  # Update progress bar
                file.write(chunk)  # Write the chunk to the file
 """
 def download_file(url, destination):
    # Send a GET request to download the file
    with urllib.request.urlopen(url) as response:
        # Get the total file size from headers, defaulting to 0 if not present
        file_size = int(response.headers.get("Content-Length", 0))
        # Check if file exists and has the same size
        if os.path.exists(destination):
            file_size_local = os.path.getsize(destination)
            if file_size == file_size_local:
                print(f"File already exists and is up-to-date: {destination}")
                return
        # Define the block size for reading the file
        block_size = 1024  # 1 Kilobyte
        # Initialize the progress bar with total file size
        progress_bar_description = os.path.basename(url)  # Extract filename from URL
        with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
            # Open the destination file in binary write mode
            with open(destination, "wb") as file:
                # Read the file in chunks and write to destination
                while True:
                    chunk = response.read(block_size)
                    if not chunk:
                        break
                    file.write(chunk)
                    progress_bar.update(len(chunk))  # Update progress bar
--- a/ch05/01_main-chapter-code/gpt_train.py
+++ b/ch05/01_main-chapter-code/gpt_train.py
@@ -5,8 +5,8 @@
 import matplotlib.pyplot as plt
 import os
 import requests
 import torch
 import urllib.request
 import tiktoken
@@ -141,14 +141,14 @@ def main(gpt_config, settings):
    url = "https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt"
    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
+        response = requests.get(url, timeout=30)
-            text_data = response.read().decode('utf-8')
+        response.raise_for_status()
        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)
    else:
        with open(file_path, "r", encoding="utf-8") as file:
            text_data = file.read()
    ##############################
    # Initialize model
    ##############################
--- a/ch05/01_main-chapter-code/tests.py
+++ b/ch05/01_main-chapter-code/tests.py
@@ -7,9 +7,7 @@
 import pytest
 from gpt_train import main
-import http.client
+import requests
 from urllib.parse import urlparse
@pytest.fixture
 def gpt_config():
@@ -43,23 +41,23 @@ def test_main(gpt_config, other_settings):
 def check_file_size(url, expected_size):
-    parsed_url = urlparse(url)
+    try:
-    if parsed_url.scheme == "https":
+        response = requests.head(url, allow_redirects=True, timeout=30)
-        conn = http.client.HTTPSConnection(parsed_url.netloc)
+        if response.status_code != 200:
-    else:
+            return False, f"{url} not accessible"
        conn = http.client.HTTPConnection(parsed_url.netloc)
-    conn.request("HEAD", parsed_url.path)
+        size = response.headers.get("Content-Length")
-    response = conn.getresponse()
+        if size is None:
-    if response.status != 200:
+            return False, "Content-Length header is missing"
-        return False, f"{url} not accessible"
+
-    size = response.getheader("Content-Length")
+        size = int(size)
-    if size is None:
+        if size != expected_size:
-        return False, "Content-Length header is missing"
+            return False, f"{url} file has expected size {expected_size}, but got {size}"
-    size = int(size)
+
-    if size != expected_size:
+        return True, f"{url} file size is correct"
-        return False, f"{url} file has expected size {expected_size}, but got {size}"
+
-    return True, f"{url} file size is correct"
+    except requests.exceptions.RequestException as e:
        return False, f"Failed to access {url}: {e}"
 def test_model_files():
--- a/ch05/02_alternative_weight_loading/weight-loading-hf-safetensors.ipynb
+++ b/ch05/02_alternative_weight_loading/weight-loading-hf-safetensors.ipynb
@@ -134,7 +134,7 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "from safetensors.torch import load_file\n",
    "\n",
    "URL_DIR = {\n",
@@ -149,7 +149,10 @@
    "\n",
    "# Download file\n",
    "if not os.path.exists(output_file):\n",
-    "    urllib.request.urlretrieve(url, output_file)\n",
+    "    response = requests.get(url, timeout=30)\n",
    "    response.raise_for_status()\n",
    "    with open(output_file, \"wb\") as f:\n",
    "        f.write(response.content)\n",
    "\n",
    "# Load file\n",
    "state_dict = load_file(output_file)"
--- a/ch05/02_alternative_weight_loading/weight-loading-pytorch.ipynb
+++ b/ch05/02_alternative_weight_loading/weight-loading-pytorch.ipynb
@@ -144,12 +144,15 @@
   ],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "url = f\"https://huggingface.co/rasbt/gpt2-from-scratch-pytorch/resolve/main/{file_name}\"\n",
    "\n",
    "if not os.path.exists(file_name):\n",
-    "    urllib.request.urlretrieve(url, file_name)\n",
+    "    response = requests.get(url, timeout=60)\n",
    "    response.raise_for_status()\n",
    "    with open(file_name, \"wb\") as f:\n",
    "        f.write(response.content)\n",
    "    print(f\"Downloaded to {file_name}\")"
   ]
  },
@@ -276,12 +279,15 @@
   ],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "url = f\"https://huggingface.co/rasbt/gpt2-from-scratch-pytorch/resolve/main/{file_name}\"\n",
    "\n",
    "if not os.path.exists(file_name):\n",
-    "    urllib.request.urlretrieve(url, file_name)\n",
+    "    response = requests.get(url, timeout=60)\n",
    "    response.raise_for_status()\n",
    "    with open(file_name, \"wb\") as f:\n",
    "        f.write(response.content)\n",
    "    print(f\"Downloaded to {file_name}\")"
   ]
  },
--- a/ch05/07_gpt_to_llama/README.md
+++ b/ch05/07_gpt_to_llama/README.md
@@ -58,12 +58,17 @@ This automatically downloads the weight file based on the model choice above:
 ```python
 import os
-import urllib.request
+import requests
 url = f"https://huggingface.co/rasbt/llama-3.2-from-scratch/resolve/main/{MODEL_FILE}"
 if not os.path.exists(MODEL_FILE):
-    urllib.request.urlretrieve(url, MODEL_FILE)
+    response = requests.get(url, stream=True, timeout=60)
    response.raise_for_status()
    with open(MODEL_FILE, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)
    print(f"Downloaded to {MODEL_FILE}")
 ```
--- a/ch05/10_llm-training-speed/01_opt_single_gpu.py
+++ b/ch05/10_llm-training-speed/01_opt_single_gpu.py
@@ -6,9 +6,9 @@
 import os
 import time
 import urllib.request
 import matplotlib.pyplot as plt
 import requests
 import torch
 import torch.nn as nn
 from torch.utils.data import Dataset, DataLoader
@@ -397,8 +397,9 @@ def main(gpt_config, settings):
    url = "https://www.gutenberg.org/cache/epub/145/pg145.txt"
    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
+        response = requests.get(url, timeout=30)
-            text_data = response.read().decode('utf-8')
+        response.raise_for_status()
        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)
    else:
--- a/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py
+++ b/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py
@@ -6,9 +6,9 @@
 import os
 import time
 import urllib.request
 import matplotlib.pyplot as plt
 import requests
 import torch
 import torch.nn as nn
 from torch.utils.data import Dataset, DataLoader
@@ -468,11 +468,11 @@ def main(gpt_config, settings, rank, world_size):
    # NEW: Only download 1 time
    if rank == 0:
        if not os.path.exists(file_path):
-            with urllib.request.urlopen(url) as response:
+            response = requests.get(url, timeout=30)
-                text_data = response.read().decode('utf-8')
+            response.raise_for_status()
            text_data = response.text
            with open(file_path, "w", encoding="utf-8") as file:
                file.write(text_data)
    # NEW: All processes wait until rank 0 is done, using the GPU index.
    torch.distributed.barrier(device_ids=[device.index])
--- a/ch06/01_main-chapter-code/ch06.ipynb
+++ b/ch06/01_main-chapter-code/ch06.ipynb
@@ -186,6 +186,56 @@
    }
   ],
   "source": [
    "import requests\n",
    "import zipfile\n",
    "import os\n",
    "from pathlib import Path\n",
    "\n",
    "url = \"https://archive.ics.uci.edu/static/public/228/sms+spam+collection.zip\"\n",
    "zip_path = \"sms_spam_collection.zip\"\n",
    "extracted_path = \"sms_spam_collection\"\n",
    "data_file_path = Path(extracted_path) / \"SMSSpamCollection.tsv\"\n",
    "\n",
    "\n",
    "def download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path):\n",
    "    if data_file_path.exists():\n",
    "        print(f\"{data_file_path} already exists. Skipping download and extraction.\")\n",
    "        return\n",
    "\n",
    "    # Downloading the file\n",
    "    response = requests.get(url, stream=True, timeout=60)\n",
    "    response.raise_for_status()\n",
    "    with open(zip_path, \"wb\") as out_file:\n",
    "        for chunk in response.iter_content(chunk_size=8192):\n",
    "            if chunk:\n",
    "                out_file.write(chunk)\n",
    "\n",
    "    # Unzipping the file\n",
    "    with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n",
    "        zip_ref.extractall(extracted_path)\n",
    "\n",
    "    # Add .tsv file extension\n",
    "    original_file_path = Path(extracted_path) / \"SMSSpamCollection\"\n",
    "    os.rename(original_file_path, data_file_path)\n",
    "    print(f\"File downloaded and saved as {data_file_path}\")\n",
    "\n",
    "\n",
    "try:\n",
    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
    "except (requests.exceptions.RequestException, TimeoutError) as e:\n",
    "    print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
    "    url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
    "\n",
    "\n",
    "\n",
    "# The book originally used the following code below\n",
    "# However, urllib uses older protocol settings that\n",
    "# can cause problems for some readers using a VPN.\n",
    "# The `requests` version above is more robust\n",
    "# in that regard.\n",
    "\n",
    "\"\"\"\n",
    "import urllib.request\n",
    "import zipfile\n",
    "import os\n",
@@ -220,7 +270,8 @@
    "except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
    "    print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
    "    url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
-    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path) "
+    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
    "\"\"\""
   ]
  },
  {
--- a/ch06/01_main-chapter-code/gpt_class_finetune.py
+++ b/ch06/01_main-chapter-code/gpt_class_finetune.py
@@ -5,7 +5,7 @@
 # This is a summary file containing the main takeaways from chapter 6.
-import urllib.request
+import requests
 import zipfile
 import os
 from pathlib import Path
@@ -27,9 +27,12 @@ def download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path):
        return
    # Downloading the file
-    with urllib.request.urlopen(url) as response:
+    response = requests.get(url, stream=True, timeout=60)
-        with open(zip_path, "wb") as out_file:
+    response.raise_for_status()
-            out_file.write(response.read())
+    with open(zip_path, "wb") as out_file:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                out_file.write(chunk)
    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
@@ -259,7 +262,7 @@ if __name__ == "__main__":
    try:
        download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)
-    except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+    except (requests.exceptions.RequestException, TimeoutError) as e:
        print(f"Primary URL failed: {e}. Trying backup URL...")
        url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
        download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)
--- a/ch06/02_bonus_additional-experiments/additional_experiments.py
+++ b/ch06/02_bonus_additional-experiments/additional_experiments.py
@@ -8,10 +8,10 @@ import math
 import os
 from pathlib import Path
 import time
 import urllib.request
 import zipfile
 import pandas as pd
 import requests
 import tiktoken
 import torch
 from torch.utils.data import DataLoader
@@ -113,9 +113,12 @@ def download_and_unzip(url, zip_path, extract_to, new_file_path):
        return
    # Downloading the file
-    with urllib.request.urlopen(url) as response:
+    response = requests.get(url, stream=True, timeout=60)
-        with open(zip_path, "wb") as out_file:
+    response.raise_for_status()
-            out_file.write(response.read())
+    with open(zip_path, "wb") as out_file:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                out_file.write(chunk)
    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
@@ -608,11 +611,11 @@ if __name__ == "__main__":
    base_path = Path(".")
    file_names = ["train.csv", "validation.csv", "test.csv"]
    all_exist = all((base_path / file_name).exists() for file_name in file_names)
-
+    
    if not all_exist:
        try:
            download_and_unzip(url, zip_path, extract_to, new_file_path)
-        except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+        except (requests.exceptions.RequestException, TimeoutError) as e:
            print(f"Primary URL failed: {e}. Trying backup URL...")
            backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
            download_and_unzip(backup_url, zip_path, extract_to, new_file_path)
--- a/ch06/03_bonus_imdb-classification/download_prepare_dataset.py
+++ b/ch06/03_bonus_imdb-classification/download_prepare_dataset.py
@@ -7,7 +7,7 @@ import os
 import sys
 import tarfile
 import time
-import urllib.request
+import requests
 import pandas as pd
@@ -32,7 +32,15 @@ def download_and_extract_dataset(dataset_url, target_file, directory):
    if not os.path.exists(directory):
        if os.path.exists(target_file):
            os.remove(target_file)
-        urllib.request.urlretrieve(dataset_url, target_file, reporthook)
+
        response = requests.get(dataset_url, stream=True, timeout=60)
        response.raise_for_status()
        with open(target_file, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
        print("\nExtracting dataset ...")
        with tarfile.open(target_file, "r:gz") as tar:
            tar.extractall()
--- a/ch06/03_bonus_imdb-classification/train_bert_hf_spam.py
+++ b/ch06/03_bonus_imdb-classification/train_bert_hf_spam.py
@@ -7,7 +7,7 @@ import argparse
 import os
 from pathlib import Path
 import time
-import urllib
+import requests
 import zipfile
 import pandas as pd
@@ -62,9 +62,12 @@ def download_and_unzip(url, zip_path, extract_to, new_file_path):
        return
    # Downloading the file
-    with urllib.request.urlopen(url) as response:
+    response = requests.get(url, stream=True, timeout=60)
-        with open(zip_path, "wb") as out_file:
+    response.raise_for_status()
-            out_file.write(response.read())
+    with open(zip_path, "wb") as out_file:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                out_file.write(chunk)
    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
@@ -412,7 +415,7 @@ if __name__ == "__main__":
    if not all_exist:
        try:
            download_and_unzip(url, zip_path, extract_to, new_file_path)
-        except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+        except (requests.exceptions.RequestException, TimeoutError) as e:
            print(f"Primary URL failed: {e}. Trying backup URL...")
            backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
            download_and_unzip(backup_url, zip_path, extract_to, new_file_path)
--- a/ch07/01_main-chapter-code/ch07.ipynb
+++ b/ch07/01_main-chapter-code/ch07.ipynb
@@ -169,10 +169,33 @@
   "source": [
    "import json\n",
    "import os\n",
-    "import urllib\n",
+    "import requests\n",
    "\n",
    "\n",
    "def download_and_load_file(file_path, url):\n",
    "    if not os.path.exists(file_path):\n",
    "        response = requests.get(url, timeout=30)\n",
    "        response.raise_for_status()\n",
    "        text_data = response.text\n",
    "        with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "            file.write(text_data)\n",
    "\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "        data = json.load(file)\n",
    "\n",
    "    return data\n",
    "\n",
    "\n",
    "# The book originally used the following code below\n",
    "# However, urllib uses older protocol settings that\n",
    "# can cause problems for some readers using a VPN.\n",
    "# The `requests` version above is more robust\n",
    "# in that regard.\n",
    "\n",
    "\"\"\"\n",
    "import urllib\n",
    "\n",
    "def download_and_load_file(file_path, url):\n",
    "\n",
    "    if not os.path.exists(file_path):\n",
    "        with urllib.request.urlopen(url) as response:\n",
@@ -180,15 +203,15 @@
    "        with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "            file.write(text_data)\n",
    "\n",
-    "    # The book originally contained this unnecessary \"else\" clause:\n",
+    "    else:\n",
-    "    #else:\n",
+    "        with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
-    "    #    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
+    "            text_data = file.read()\n",
    "    #        text_data = file.read()\n",
    "\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "        data = json.load(file)\n",
    "\n",
    "    return data\n",
    "\"\"\"\n",
    "\n",
    "\n",
    "file_path = \"instruction-data.json\"\n",
@@ -2490,7 +2513,8 @@
    }
   ],
   "source": [
-    "import urllib.request\n",
+    "import requests  # noqa: F811\n",
    "# import urllib.request\n",
    "\n",
    "def query_model(\n",
    "    prompt,\n",
@@ -2512,7 +2536,8 @@
    "        }\n",
    "    }\n",
    "\n",
-    "\n",
+    "    \n",
    "    \"\"\"\n",
    "    # Convert the dictionary to a JSON formatted string and encode it to bytes\n",
    "    payload = json.dumps(data).encode(\"utf-8\")\n",
    "\n",
@@ -2536,6 +2561,26 @@
    "            response_data += response_json[\"message\"][\"content\"]\n",
    "\n",
    "    return response_data\n",
    "    \"\"\"\n",
    "\n",
    "    # The book originally used the commented-out above, which is based\n",
    "    # on urllib. It works generally fine, but some readers reported\n",
    "    # issues with using urlib when using a (company) VPN.\n",
    "    # The code below uses the requests library, which doesn't seem\n",
    "    # to have these issues.\n",
    "\n",
    "    # Send the POST request\n",
    "    with requests.post(url, json=data, stream=True, timeout=30) as r:\n",
    "        r.raise_for_status()\n",
    "        response_data = \"\"\n",
    "        for line in r.iter_lines(decode_unicode=True):\n",
    "            if not line:\n",
    "                continue\n",
    "            response_json = json.loads(line)\n",
    "            if \"message\" in response_json:\n",
    "                response_data += response_json[\"message\"][\"content\"]\n",
    "\n",
    "    return response_data\n",
    "\n",
    "\n",
    "model = \"llama3\"\n",
--- a/ch07/01_main-chapter-code/exercise_experiments.py
+++ b/ch07/01_main-chapter-code/exercise_experiments.py
@@ -12,10 +12,10 @@ import math
 import os
 import re
 import time
 import urllib
 import matplotlib.pyplot as plt
 from matplotlib.ticker import MaxNLocator
 import requests
 import tiktoken
 import torch
 from torch.utils.data import Dataset, DataLoader
@@ -234,17 +234,17 @@ def custom_collate_with_masking_fn(
 def download_and_load_file(file_path, url):
    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
+        response = requests.get(url, timeout=30)
-            text_data = response.read().decode("utf-8")
+        response.raise_for_status()
        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)
    else:
        with open(file_path, "r", encoding="utf-8") as file:
            text_data = file.read()
-    with open(file_path, "r") as file:
+    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data
--- a/ch07/01_main-chapter-code/gpt_download.py
+++ b/ch07/01_main-chapter-code/gpt_download.py
@@ -5,11 +5,10 @@
 import os
 import urllib.request
 # import requests
 import json
 import numpy as np
 import requests
 import tensorflow as tf
 from tqdm import tqdm
@@ -48,44 +47,40 @@ def download_and_load_gpt2(model_size, models_dir):
 def download_file(url, destination, backup_url=None):
    def _attempt_download(download_url):
-        with urllib.request.urlopen(download_url) as response:
+        response = requests.get(download_url, stream=True, timeout=60)
-            # Get the total file size from headers, defaulting to 0 if not present
+        response.raise_for_status()
            file_size = int(response.headers.get("Content-Length", 0))
-            # Check if file exists and has the same size
+        file_size = int(response.headers.get("Content-Length", 0))
            if os.path.exists(destination):
                file_size_local = os.path.getsize(destination)
                if file_size == file_size_local:
                    print(f"File already exists and is up-to-date: {destination}")
                    return True  # Indicate success without re-downloading
-            block_size = 1024  # 1 Kilobyte
+        # Check if file exists and has same size
        if os.path.exists(destination):
            file_size_local = os.path.getsize(destination)
            if file_size and file_size == file_size_local:
                print(f"File already exists and is up-to-date: {destination}")
                return True
-            # Initialize the progress bar with total file size
+        block_size = 1024  # 1 KB
-            progress_bar_description = os.path.basename(download_url)
+        desc = os.path.basename(download_url)
-            with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
+        with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
-                with open(destination, "wb") as file:
+            with open(destination, "wb") as file:
-                    while True:
+                for chunk in response.iter_content(chunk_size=block_size):
-                        chunk = response.read(block_size)
+                    if chunk:
                        if not chunk:
                            break
                        file.write(chunk)
                        progress_bar.update(len(chunk))
-            return True
+        return True
    try:
        if _attempt_download(url):
            return
-    except (urllib.error.HTTPError, urllib.error.URLError):
+    except requests.exceptions.RequestException:
        if backup_url is not None:
            print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
            try:
                if _attempt_download(backup_url):
                    return
-            except urllib.error.HTTPError:
+            except requests.exceptions.RequestException:
                pass
        # If we reach here, both attempts have failed
        error_message = (
            f"Failed to download from both primary URL ({url})"
            f"{' and backup URL (' + backup_url + ')' if backup_url else ''}."
@@ -97,37 +92,6 @@ def download_file(url, destination, backup_url=None):
        print(f"An unexpected error occurred: {e}")
 # Alternative way using `requests`
 """
 def download_file(url, destination):
    # Send a GET request to download the file in streaming mode
    response = requests.get(url, stream=True)
    # Get the total file size from headers, defaulting to 0 if not present
    file_size = int(response.headers.get("content-length", 0))
    # Check if file exists and has the same size
    if os.path.exists(destination):
        file_size_local = os.path.getsize(destination)
        if file_size == file_size_local:
            print(f"File already exists and is up-to-date: {destination}")
            return
    # Define the block size for reading the file
    block_size = 1024  # 1 Kilobyte
    # Initialize the progress bar with total file size
    progress_bar_description = url.split("/")[-1]  # Extract filename from URL
    with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
        # Open the destination file in binary write mode
        with open(destination, "wb") as file:
            # Iterate over the file data in chunks
            for chunk in response.iter_content(block_size):
                progress_bar.update(len(chunk))  # Update progress bar
                file.write(chunk)  # Write the chunk to the file
 """
 def load_gpt2_params_from_tf_ckpt(ckpt_path, settings):
    # Initialize parameters dictionary with empty blocks for each layer
    params = {"blocks": [{} for _ in range(settings["n_layer"])]}
--- a/ch07/01_main-chapter-code/gpt_instruction_finetuning.py
+++ b/ch07/01_main-chapter-code/gpt_instruction_finetuning.py
@@ -11,9 +11,9 @@ import json
 import os
 import re
 import time
 import urllib
 import matplotlib.pyplot as plt
 import requests
 import tiktoken
 import torch
 from torch.utils.data import Dataset, DataLoader
@@ -97,14 +97,14 @@ def custom_collate_fn(
 def download_and_load_file(file_path, url):
    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
+        response = requests.get(url, timeout=30)
-            text_data = response.read().decode("utf-8")
+        response.raise_for_status()
        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)
-    with open(file_path, "r") as file:
+    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data
--- a/ch07/01_main-chapter-code/ollama_evaluate.py
+++ b/ch07/01_main-chapter-code/ollama_evaluate.py
@@ -8,7 +8,7 @@
 import json
 import psutil
 from tqdm import tqdm
-import urllib.request
+import requests
 def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):
@@ -25,23 +25,16 @@ def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):
        }
    }
-    # Convert the dictionary to a JSON formatted string and encode it to bytes
+    # Send the POST request
-    payload = json.dumps(data).encode("utf-8")
+    with requests.post(url, json=data, stream=True, timeout=30) as r:
-
+        r.raise_for_status()
-    # Create a request object, setting the method to POST and adding necessary headers
+        response_data = ""
-    request = urllib.request.Request(url, data=payload, method="POST")
+        for line in r.iter_lines(decode_unicode=True):
    request.add_header("Content-Type", "application/json")
    # Send the request and capture the response
    response_data = ""
    with urllib.request.urlopen(request) as response:
        # Read and decode the response
        while True:
            line = response.readline().decode("utf-8")
            if not line:
-                break
+                continue
            response_json = json.loads(line)
-            response_data += response_json["message"]["content"]
+            if "message" in response_json:
                response_data += response_json["message"]["content"]
    return response_data
--- a/ch07/03_model-evaluation/llm-instruction-eval-ollama.ipynb
+++ b/ch07/03_model-evaluation/llm-instruction-eval-ollama.ipynb
@@ -215,8 +215,8 @@
    }
   ],
   "source": [
    "import urllib.request\n",
    "import json\n",
    "import requests\n",
    "\n",
    "\n",
    "def query_model(prompt, model=\"llama3\", url=\"http://localhost:11434/api/chat\"):\n",
@@ -236,27 +236,19 @@
    "        }\n",
    "    }\n",
    "\n",
-    "    # Convert the dictionary to a JSON formatted string and encode it to bytes\n",
+    "    # Send the POST request\n",
-    "    payload = json.dumps(data).encode(\"utf-8\")\n",
+    "    with requests.post(url, json=data, stream=True, timeout=30) as r:\n",
-    "\n",
+    "        r.raise_for_status()\n",
-    "    # Create a request object, setting the method to POST and adding necessary headers\n",
+    "        response_data = \"\"\n",
-    "    request = urllib.request.Request(url, data=payload, method=\"POST\")\n",
+    "        for line in r.iter_lines(decode_unicode=True):\n",
    "    request.add_header(\"Content-Type\", \"application/json\")\n",
    "\n",
    "    # Send the request and capture the response\n",
    "    response_data = \"\"\n",
    "    with urllib.request.urlopen(request) as response:\n",
    "        # Read and decode the response\n",
    "        while True:\n",
    "            line = response.readline().decode(\"utf-8\")\n",
    "            if not line:\n",
-    "                break\n",
+    "                continue\n",
    "            response_json = json.loads(line)\n",
-    "            response_data += response_json[\"message\"][\"content\"]\n",
+    "            if \"message\" in response_json:\n",
    "                response_data += response_json[\"message\"][\"content\"]\n",
    "\n",
    "    return response_data\n",
    "\n",
    "\n",
    "result = query_model(\"What do Llamas eat?\")\n",
    "print(result)"
   ]
@@ -640,7 +632,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.16"
  }
 },
 "nbformat": 4,
--- a/ch07/04_preference-tuning-with-dpo/create-preference-data-ollama.ipynb
+++ b/ch07/04_preference-tuning-with-dpo/create-preference-data-ollama.ipynb
@@ -274,8 +274,8 @@
    }
   ],
   "source": [
    "import urllib.request\n",
    "import json\n",
    "import requests\n",
    "\n",
    "\n",
    "def query_model(prompt, model=\"llama3.1:70b\", url=\"http://localhost:11434/api/chat\"):\n",
@@ -294,23 +294,16 @@
    "        }\n",
    "    }\n",
    "\n",
-    "    # Convert the dictionary to a JSON formatted string and encode it to bytes\n",
+    "    # Send the POST request\n",
-    "    payload = json.dumps(data).encode(\"utf-8\")\n",
+    "    with requests.post(url, json=data, stream=True, timeout=30) as r:\n",
-    "\n",
+    "        r.raise_for_status()\n",
-    "    # Create a request object, setting the method to POST and adding necessary headers\n",
+    "        response_data = \"\"\n",
-    "    request = urllib.request.Request(url, data=payload, method=\"POST\")\n",
+    "        for line in r.iter_lines(decode_unicode=True):\n",
    "    request.add_header(\"Content-Type\", \"application/json\")\n",
    "\n",
    "    # Send the request and capture the response\n",
    "    response_data = \"\"\n",
    "    with urllib.request.urlopen(request) as response:\n",
    "        # Read and decode the response\n",
    "        while True:\n",
    "            line = response.readline().decode(\"utf-8\")\n",
    "            if not line:\n",
-    "                break\n",
+    "                continue\n",
    "            response_json = json.loads(line)\n",
-    "            response_data += response_json[\"message\"][\"content\"]\n",
+    "            if \"message\" in response_json:\n",
    "                response_data += response_json[\"message\"][\"content\"]\n",
    "\n",
    "    return response_data\n",
    "\n",
@@ -587,7 +580,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.16"
  }
 },
 "nbformat": 4,
--- a/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb
+++ b/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb
@@ -231,23 +231,21 @@
   "source": [
    "import json\n",
    "import os\n",
-    "import urllib\n",
+    "import requests\n",
    "\n",
    "\n",
    "def download_and_load_file(file_path, url):\n",
    "\n",
    "    if not os.path.exists(file_path):\n",
-    "        with urllib.request.urlopen(url) as response:\n",
+    "        response = requests.get(url, timeout=30)\n",
-    "            text_data = response.read().decode(\"utf-8\")\n",
+    "        response.raise_for_status()\n",
    "        text_data = response.text\n",
    "        with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "            file.write(text_data)\n",
    "    else:\n",
    "        with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "            text_data = file.read()\n",
    "\n",
-    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
+    "    data = json.loads(text_data)\n",
    "        data = json.load(file)\n",
    "\n",
    "    return data\n",
    "\n",
    "\n",
--- a/ch07/05_dataset-generation/llama3-ollama.ipynb
+++ b/ch07/05_dataset-generation/llama3-ollama.ipynb
@@ -194,8 +194,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
    "import urllib.request\n",
    "import json\n",
    "import requests\n",
    "\n",
    "def query_model(prompt, model=\"llama3\", url=\"http://localhost:11434/api/chat\", role=\"user\"):\n",
    "    # Create the data payload as a dictionary\n",
@@ -209,25 +209,21 @@
    "        ]\n",
    "    }\n",
    "\n",
-    "    # Convert the dictionary to a JSON formatted string and encode it to bytes\n",
+    "    # Send the POST request\n",
-    "    payload = json.dumps(data).encode(\"utf-8\")\n",
+    "    with requests.post(url, json=data, stream=True, timeout=30) as r:\n",
-    "\n",
+    "        r.raise_for_status()\n",
-    "    # Create a request object, setting the method to POST and adding necessary headers\n",
+    "        response_data = \"\"\n",
-    "    request = urllib.request.Request(url, data=payload, method=\"POST\")\n",
+    "        for line in r.iter_lines(decode_unicode=True):\n",
    "    request.add_header(\"Content-Type\", \"application/json\")\n",
    "\n",
    "    # Send the request and capture the response\n",
    "    response_data = \"\"\n",
    "    with urllib.request.urlopen(request) as response:\n",
    "        # Read and decode the response\n",
    "        while True:\n",
    "            line = response.readline().decode(\"utf-8\")\n",
    "            if not line:\n",
-    "                break\n",
+    "                continue\n",
    "            response_json = json.loads(line)\n",
-    "            response_data += response_json[\"message\"][\"content\"]\n",
+    "            if \"message\" in response_json:\n",
    "                response_data += response_json[\"message\"][\"content\"]\n",
    "\n",
-    "    return response_data"
+    "    return response_data\n",
    "\n",
    "result = query_model(\"What do Llamas eat?\")\n",
    "print(result)"
   ]
  },
  {
@@ -498,7 +494,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.16"
  }
 },
 "nbformat": 4,
--- a/pkg/llms_from_scratch/ch05.py
+++ b/pkg/llms_from_scratch/ch05.py
@@ -7,11 +7,11 @@ from .ch04 import generate_text_simple
 import json
 import os
 import urllib.request
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.ticker import MaxNLocator
 import requests
 import torch
 from tqdm import tqdm
@@ -279,44 +279,40 @@ def download_and_load_gpt2(model_size, models_dir):
 def download_file(url, destination, backup_url=None):
    def _attempt_download(download_url):
-        with urllib.request.urlopen(download_url) as response:
+        response = requests.get(download_url, stream=True, timeout=60)
-            # Get the total file size from headers, defaulting to 0 if not present
+        response.raise_for_status()
            file_size = int(response.headers.get("Content-Length", 0))
-            # Check if file exists and has the same size
+        file_size = int(response.headers.get("Content-Length", 0))
            if os.path.exists(destination):
                file_size_local = os.path.getsize(destination)
                if file_size == file_size_local:
                    print(f"File already exists and is up-to-date: {destination}")
                    return True  # Indicate success without re-downloading
-            block_size = 1024  # 1 Kilobyte
+        # Check if file exists and has same size
        if os.path.exists(destination):
            file_size_local = os.path.getsize(destination)
            if file_size and file_size == file_size_local:
                print(f"File already exists and is up-to-date: {destination}")
                return True
-            # Initialize the progress bar with total file size
+        block_size = 1024  # 1 KB
-            progress_bar_description = os.path.basename(download_url)
+        desc = os.path.basename(download_url)
-            with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
+        with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
-                with open(destination, "wb") as file:
+            with open(destination, "wb") as file:
-                    while True:
+                for chunk in response.iter_content(chunk_size=block_size):
-                        chunk = response.read(block_size)
+                    if chunk:
                        if not chunk:
                            break
                        file.write(chunk)
                        progress_bar.update(len(chunk))
-            return True
+        return True
    try:
        if _attempt_download(url):
            return
-    except (urllib.error.HTTPError, urllib.error.URLError):
+    except requests.exceptions.RequestException:
        if backup_url is not None:
            print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
            try:
                if _attempt_download(backup_url):
                    return
-            except urllib.error.HTTPError:
+            except requests.exceptions.RequestException:
                pass
        # If we reach here, both attempts have failed
        error_message = (
            f"Failed to download from both primary URL ({url})"
            f"{' and backup URL (' + backup_url + ')' if backup_url else ''}."
--- a/pkg/llms_from_scratch/ch06.py
+++ b/pkg/llms_from_scratch/ch06.py
@@ -4,11 +4,11 @@
 # Code: https://github.com/rasbt/LLMs-from-scratch
 import urllib.request
 import zipfile
 import os
 from pathlib import Path
 import requests
 import matplotlib.pyplot as plt
 from torch.utils.data import Dataset
 import torch
@@ -21,9 +21,12 @@ def download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path):
        return
    # Downloading the file
-    with urllib.request.urlopen(url) as response:
+    response = requests.get(url, stream=True, timeout=60)
-        with open(zip_path, "wb") as out_file:
+    response.raise_for_status()
-            out_file.write(response.read())
+    with open(zip_path, "wb") as out_file:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                out_file.write(chunk)
    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
--- a/pkg/llms_from_scratch/ch07.py
+++ b/pkg/llms_from_scratch/ch07.py
@@ -6,7 +6,7 @@
 import json
 import os
 import psutil
-import urllib
+import requests
 import torch
 from tqdm import tqdm
@@ -14,24 +14,46 @@ from torch.utils.data import Dataset
 def download_and_load_file(file_path, url):
    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
+        response = requests.get(url, timeout=30)
-            text_data = response.read().decode("utf-8")
+        response.raise_for_status()
        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)
    # The book originally contained this unnecessary "else" clause:
    # else:
    #     with open(file_path, "r", encoding="utf-8") as file:
    #         text_data = file.read()
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data
 # The book originally used the following code below
 # However, urllib uses older protocol settings that
 # can cause problems for some readers using a VPN.
 # The `requests` version above is more robust
 # in that regard.
 # import urllib
 # def download_and_load_file(file_path, url):
 #     if not os.path.exists(file_path):
 #         with urllib.request.urlopen(url) as response:
 #             text_data = response.read().decode("utf-8")
 #         with open(file_path, "w", encoding="utf-8") as file:
 #             file.write(text_data)
 #     else:
 #         with open(file_path, "r", encoding="utf-8") as file:
 #             text_data = file.read()
 #     with open(file_path, "r", encoding="utf-8") as file:
 #         data = json.load(file)
 #     return data
 def format_input(entry):
    instruction_text = (
        f"Below is an instruction that describes a task. "
@@ -202,27 +224,16 @@ def query_model(
        }
    }
-    # Convert the dictionary to a JSON formatted string and encode it to bytes
+    # Send the POST request
-    payload = json.dumps(data).encode("utf-8")
+    with requests.post(url, json=data, stream=True, timeout=30) as r:
-
+        r.raise_for_status()
-    # Create a request object, setting the method to POST and adding necessary headers
+        response_data = ""
-    request = urllib.request.Request(
+        for line in r.iter_lines(decode_unicode=True):
        url,
        data=payload,
        method="POST"
    )
    request.add_header("Content-Type", "application/json")
    # Send the request and capture the response
    response_data = ""
    with urllib.request.urlopen(request) as response:
        # Read and decode the response
        while True:
            line = response.readline().decode("utf-8")
            if not line:
-                break
+                continue
            response_json = json.loads(line)
-            response_data += response_json["message"]["content"]
+            if "message" in response_json:
                response_data += response_json["message"]["content"]
    return response_data
--- a/pkg/llms_from_scratch/qwen3.py
+++ b/pkg/llms_from_scratch/qwen3.py
@@ -6,9 +6,9 @@
 import os
 import json
 import re
 import urllib.request
 from pathlib import Path
 import requests
 import torch
 import torch.nn as nn
@@ -660,7 +660,12 @@ def download_from_huggingface(repo_id, filename, local_dir, revision="main"):
        print(f"File already exists: {dest_path}")
    else:
        print(f"Downloading {url} to {dest_path}...")
-        urllib.request.urlretrieve(url, dest_path)
+        response = requests.get(url, stream=True, timeout=60)
        response.raise_for_status()
        with open(dest_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
    return dest_path
--- a/pkg/llms_from_scratch/tests/test_appendix_e.py
+++ b/pkg/llms_from_scratch/tests/test_appendix_e.py
@@ -12,9 +12,9 @@ from llms_from_scratch.ch06 import (
 from llms_from_scratch.appendix_e import replace_linear_with_lora
 from pathlib import Path
 import urllib
 import pandas as pd
 import requests
 import tiktoken
 import torch
 from torch.utils.data import DataLoader, Subset
@@ -35,7 +35,7 @@ def test_train_classifier_lora(tmp_path):
        download_and_unzip_spam_data(
            url, zip_path, extracted_path, data_file_path
        )
-    except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+    except (requests.exceptions.RequestException, TimeoutError) as e:
        print(f"Primary URL failed: {e}. Trying backup URL...")
        backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
        download_and_unzip_spam_data(
--- a/pkg/llms_from_scratch/tests/test_ch02.py
+++ b/pkg/llms_from_scratch/tests/test_ch02.py
@@ -6,8 +6,8 @@
 from llms_from_scratch.ch02 import create_dataloader_v1
 import os
 import urllib.request
 import requests
 import pytest
 import torch
@@ -16,11 +16,17 @@ import torch
 def test_dataloader(tmp_path, file_name):
    if not os.path.exists("the-verdict.txt"):
-        url = ("https://raw.githubusercontent.com/rasbt/"
+        url = (
-               "LLMs-from-scratch/main/ch02/01_main-chapter-code/"
+            "https://raw.githubusercontent.com/rasbt/"
-               "the-verdict.txt")
+            "LLMs-from-scratch/main/ch02/01_main-chapter-code/"
            "the-verdict.txt"
        )
        file_path = "the-verdict.txt"
-        urllib.request.urlretrieve(url, file_path)
+
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        with open(file_path, "wb") as f:
            f.write(response.content)
    with open("the-verdict.txt", "r", encoding="utf-8") as f:
        raw_text = f.read()
--- a/pkg/llms_from_scratch/tests/test_ch05.py
+++ b/pkg/llms_from_scratch/tests/test_ch05.py
@@ -8,8 +8,8 @@ from llms_from_scratch.ch04 import GPTModel, GPTModelFast
 from llms_from_scratch.ch05 import train_model_simple
 import os
 import urllib
 import requests
 import pytest
 import tiktoken
 import torch
@@ -46,8 +46,9 @@ def test_train_simple(tmp_path, ModelClass):
    url = "https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt"
    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
+        response = requests.get(url, timeout=30)
-            text_data = response.read().decode("utf-8")
+        response.raise_for_status()
        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as f:
            f.write(text_data)
    else:
--- a/pkg/llms_from_scratch/tests/test_ch06.py
+++ b/pkg/llms_from_scratch/tests/test_ch06.py
@@ -11,8 +11,8 @@ from llms_from_scratch.ch06 import (
 )
 from pathlib import Path
 import urllib
 import requests
 import pandas as pd
 import tiktoken
 import torch
@@ -34,7 +34,7 @@ def test_train_classifier(tmp_path):
        download_and_unzip_spam_data(
            url, zip_path, extracted_path, data_file_path
        )
-    except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+    except (requests.exceptions.RequestException, TimeoutError) as e:
        print(f"Primary URL failed: {e}. Trying backup URL...")
        backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
        download_and_unzip_spam_data(
--- a/pkg/llms_from_scratch/utils.py
+++ b/pkg/llms_from_scratch/utils.py
@@ -9,10 +9,9 @@ import ast
 import re
 import types
 from pathlib import Path
 import urllib.request
 import urllib.parse
 import nbformat
 import requests
 def _extract_imports(src: str):
@@ -125,21 +124,24 @@ def import_definitions_from_notebook(nb_dir_or_path, notebook_name=None, *, extr
    exec(src, mod.__dict__)
    return mod
 def download_file(url, out_dir="."):
    """Simple file download utility for tests."""
    from pathlib import Path
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
-    filename = Path(urllib.parse.urlparse(url).path).name
+    filename = Path(url).name
    dest = out_dir / filename
-    
+
    if dest.exists():
        return dest
-        
+
    try:
-        with urllib.request.urlopen(url) as response:
+        response = requests.get(url, stream=True, timeout=30)
-            with open(dest, 'wb') as f:
+        response.raise_for_status()
-                f.write(response.read())
+        with open(dest, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
        return dest
    except Exception as e:
        raise RuntimeError(f"Failed to download {url}: {e}")