Switch from urllib to requests to improve reliability (#867)

* Switch from urllib to requests to improve reliability * Keep ruff linter-specific * update * update * update
2026-04-10 12:33:42 +00:00 · 2025-10-07 15:22:59 -05:00
parent 8552565bda
commit 7bd263144e
47 changed files with 592 additions and 436 deletions
--- a/.github/workflows/basic-tests-latest-python.yml
+++ b/.github/workflows/basic-tests-latest-python.yml
@@ -38,14 +38,14 @@ jobs:
    - name: Test Selected Python Scripts
      run: |
        source .venv/bin/activate
-        pytest --ruff setup/02_installing-python-libraries/tests.py
-        pytest --ruff ch04/01_main-chapter-code/tests.py
-        pytest --ruff ch05/01_main-chapter-code/tests.py
-        pytest --ruff ch06/01_main-chapter-code/tests.py
+        pytest setup/02_installing-python-libraries/tests.py
+        pytest ch04/01_main-chapter-code/tests.py
+        pytest ch05/01_main-chapter-code/tests.py
+        pytest ch06/01_main-chapter-code/tests.py

    - name: Validate Selected Jupyter Notebooks
      run: |
        source .venv/bin/activate
-        pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
-        pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-        pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+        pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
+        pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+        pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-linux-uv.yml
+++ b/.github/workflows/basic-tests-linux-uv.yml
@@ -47,24 +47,24 @@ jobs:
        shell: bash
        run: |
          source .venv/bin/activate
-          pytest --ruff setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch04/03_kv-cache/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/test_llama32_nb.py
-          pytest --ruff ch05/11_qwen3/tests/test_qwen3_nb.py
-          pytest --ruff ch05/12_gemma3/tests/test_gemma3_nb.py
-          pytest --ruff ch05/12_gemma3/tests/test_gemma3_kv_nb.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
+          pytest ch04/03_kv-cache/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
+          pytest ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
+          pytest ch05/07_gpt_to_llama/tests/test_llama32_nb.py
+          pytest ch05/11_qwen3/tests/test_qwen3_nb.py
+          pytest ch05/12_gemma3/tests/test_gemma3_nb.py
+          pytest ch05/12_gemma3/tests/test_gemma3_kv_nb.py
+          pytest ch06/01_main-chapter-code/tests.py

      - name: Validate Selected Jupyter Notebooks (uv)
        shell: bash
        run: |
          source .venv/bin/activate
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

      - name: Test Selected Bonus Materials
        shell: bash
--- a/.github/workflows/basic-tests-macos-uv.yml
+++ b/.github/workflows/basic-tests-macos-uv.yml
@@ -47,20 +47,20 @@ jobs:
        shell: bash
        run: |
          source .venv/bin/activate
-          pytest --ruff setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/test_llama32_nb.py
-          pytest --ruff ch05/11_qwen3/tests/test_qwen3_nb.py
-          pytest --ruff ch05/12_gemma3/tests/test_gemma3_nb.py
-          pytest --ruff ch05/12_gemma3/tests/test_gemma3_kv_nb.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
+          pytest ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
+          pytest ch05/07_gpt_to_llama/tests/test_llama32_nb.py
+          pytest ch05/11_qwen3/tests/test_qwen3_nb.py
+          pytest ch05/12_gemma3/tests/test_gemma3_nb.py
+          pytest ch05/12_gemma3/tests/test_gemma3_kv_nb.py
+          pytest ch06/01_main-chapter-code/tests.py

      - name: Validate Selected Jupyter Notebooks (uv)
        shell: bash
        run: |
          source .venv/bin/activate
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-old-pytorch.yml
+++ b/.github/workflows/basic-tests-old-pytorch.yml
@@ -43,14 +43,14 @@ jobs:
    - name: Test Selected Python Scripts
      run: |
        source .venv/bin/activate
-        pytest --ruff setup/02_installing-python-libraries/tests.py
-        pytest --ruff ch04/01_main-chapter-code/tests.py
-        pytest --ruff ch05/01_main-chapter-code/tests.py
-        pytest --ruff ch06/01_main-chapter-code/tests.py
+        pytest setup/02_installing-python-libraries/tests.py
+        pytest ch04/01_main-chapter-code/tests.py
+        pytest ch05/01_main-chapter-code/tests.py
+        pytest ch06/01_main-chapter-code/tests.py

    - name: Validate Selected Jupyter Notebooks
      run: |
        source .venv/bin/activate
-        pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
-        pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-        pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+        pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
+        pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+        pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-pip.yml
+++ b/.github/workflows/basic-tests-pip.yml
@@ -46,14 +46,14 @@ jobs:
      - name: Test Selected Python Scripts
        run: |
          source .venv/bin/activate
-          pytest --ruff setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
+          pytest ch06/01_main-chapter-code/tests.py

      - name: Validate Selected Jupyter Notebooks
        run: |
          source .venv/bin/activate
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-pixi.yml
+++ b/.github/workflows/basic-tests-pixi.yml
@@ -47,14 +47,14 @@ jobs:
      - name: Test Selected Python Scripts
        shell: pixi run --environment tests bash -e {0}
        run: |
-          pytest --ruff setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
+          pytest ch06/01_main-chapter-code/tests.py

      - name: Validate Selected Jupyter Notebooks
        shell: pixi run --environment tests bash -e {0}
        run: |
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-pytorch-rc.yml
+++ b/.github/workflows/basic-tests-pytorch-rc.yml
@@ -39,14 +39,14 @@ jobs:
    - name: Test Selected Python Scripts
      run: |
        source .venv/bin/activate
-        pytest --ruff setup/02_installing-python-libraries/tests.py
-        pytest --ruff ch04/01_main-chapter-code/tests.py
-        pytest --ruff ch05/01_main-chapter-code/tests.py
-        pytest --ruff ch06/01_main-chapter-code/tests.py
+        pytest setup/02_installing-python-libraries/tests.py
+        pytest ch04/01_main-chapter-code/tests.py
+        pytest ch05/01_main-chapter-code/tests.py
+        pytest ch06/01_main-chapter-code/tests.py

    - name: Validate Selected Jupyter Notebooks
      run: |
        source .venv/bin/activate
-        pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
-        pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-        pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+        pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
+        pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+        pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/.github/workflows/basic-tests-windows-uv-pip.yml
+++ b/.github/workflows/basic-tests-windows-uv-pip.yml
@@ -49,18 +49,18 @@ jobs:
        shell: bash
        run: |
          source .venv/Scripts/activate
-          pytest --ruff setup/02_installing-python-libraries/tests.py
-          pytest --ruff ch04/01_main-chapter-code/tests.py
-          pytest --ruff ch05/01_main-chapter-code/tests.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
-          pytest --ruff ch05/07_gpt_to_llama/tests/test_llama32_nb.py
-          pytest --ruff ch05/11_qwen3/tests/test_qwen3_nb.py
-          pytest --ruff ch06/01_main-chapter-code/tests.py
+          pytest setup/02_installing-python-libraries/tests.py
+          pytest ch04/01_main-chapter-code/tests.py
+          pytest ch05/01_main-chapter-code/tests.py
+          pytest ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
+          pytest ch05/07_gpt_to_llama/tests/test_llama32_nb.py
+          pytest ch05/11_qwen3/tests/test_qwen3_nb.py
+          pytest ch06/01_main-chapter-code/tests.py

      - name: Run Jupyter Notebook Tests
        shell: bash
        run: |
          source .venv/Scripts/activate
-          pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
-          pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
-          pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
+          pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
+          pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
+          pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
--- a/appendix-D/01_main-chapter-code/appendix-D.ipynb
+++ b/appendix-D/01_main-chapter-code/appendix-D.ipynb
@@ -121,19 +121,40 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "file_path = \"the-verdict.txt\"\n",
    "url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
    "\n",
    "if not os.path.exists(file_path):\n",
+    "    response = requests.get(url, timeout=30)\n",
+    "    response.raise_for_status()\n",
+    "    text_data = response.text\n",
+    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
+    "        file.write(text_data)\n",
+    "else:\n",
+    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
+    "        text_data = file.read()\n",
+    "\n",
+    "# The book originally used the following code below\n",
+    "# However, urllib uses older protocol settings that\n",
+    "# can cause problems for some readers using a VPN.\n",
+    "# The `requests` version above is more robust\n",
+    "# in that regard.\n",
+    "\n",
+    "\"\"\"\n",
+    "import os\n",
+    "import urllib.request\n",
+    "\n",
+    "if not os.path.exists(file_path):\n",
    "    with urllib.request.urlopen(url) as response:\n",
    "        text_data = response.read().decode('utf-8')\n",
    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "        file.write(text_data)\n",
    "else:\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
-    "        text_data = file.read()"
+    "        text_data = file.read()\n",
+    "\"\"\""
   ]
  },
  {
--- a/appendix-E/01_main-chapter-code/appendix-E.ipynb
+++ b/appendix-E/01_main-chapter-code/appendix-E.ipynb
@@ -190,7 +190,8 @@
    }
   ],
   "source": [
-    "import urllib\n",
+    "# import urllib\n",
+    "import requests\n",
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "from previous_chapters import (\n",
@@ -215,13 +216,20 @@
    "extracted_path = \"sms_spam_collection\"\n",
    "data_file_path = Path(extracted_path) / \"SMSSpamCollection.tsv\"\n",
    "\n",
+    "\n",
    "try:\n",
    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
-    "except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
+    "except (requests.exceptions.RequestException, TimeoutError) as e:\n",
    "    print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
    "    url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
    "\n",
+    "# The book originally used\n",
+    "# except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
+    "# in the code above.\n",
+    "# However, some VPN users reported issues with `urllib`, so the code was updated\n",
+    "# to use `requests` instead\n",
+    "\n",
    "df = pd.read_csv(data_file_path, sep=\"\\t\", header=None, names=[\"Label\", \"Text\"])\n",
    "balanced_df = create_balanced_dataset(df)\n",
    "balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1})\n",
--- a/appendix-E/01_main-chapter-code/previous_chapters.py
+++ b/appendix-E/01_main-chapter-code/previous_chapters.py
@@ -9,12 +9,12 @@

 import os
 from pathlib import Path
-import urllib
 import zipfile

 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import requests
 import tiktoken
 import torch
 import torch.nn as nn
@@ -367,9 +367,12 @@ def download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path):
        return

    # Downloading the file
-    with urllib.request.urlopen(url) as response:
-        with open(zip_path, "wb") as out_file:
-            out_file.write(response.read())
+    response = requests.get(url, stream=True, timeout=60)
+    response.raise_for_status()
+    with open(zip_path, "wb") as out_file:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                out_file.write(chunk)

    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
--- a/ch02/01_main-chapter-code/ch02.ipynb
+++ b/ch02/01_main-chapter-code/ch02.ipynb
@@ -163,6 +163,30 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "import os\n",
+    "import requests\n",
+    "\n",
+    "if not os.path.exists(\"the-verdict.txt\"):\n",
+    "    url = (\n",
+    "        \"https://raw.githubusercontent.com/rasbt/\"\n",
+    "        \"LLMs-from-scratch/main/ch02/01_main-chapter-code/\"\n",
+    "        \"the-verdict.txt\"\n",
+    "    )\n",
+    "    file_path = \"the-verdict.txt\"\n",
+    "\n",
+    "    response = requests.get(url, timeout=30)\n",
+    "    response.raise_for_status()\n",
+    "    with open(file_path, \"wb\") as f:\n",
+    "        f.write(response.content)\n",
+    "\n",
+    "\n",
+    "# The book originally used the following code below\n",
+    "# However, urllib uses older protocol settings that\n",
+    "# can cause problems for some readers using a VPN.\n",
+    "# The `requests` version above is more robust\n",
+    "# in that regard.\n",
+    "\n",
+    "\"\"\"\n",
    "import os\n",
    "import urllib.request\n",
    "\n",
@@ -171,7 +195,8 @@
    "           \"LLMs-from-scratch/main/ch02/01_main-chapter-code/\"\n",
    "           \"the-verdict.txt\")\n",
    "    file_path = \"the-verdict.txt\"\n",
-    "    urllib.request.urlretrieve(url, file_path)"
+    "    urllib.request.urlretrieve(url, file_path)\n",
+    "\"\"\""
   ]
  },
  {
--- a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb
+++ b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb
@@ -823,7 +823,7 @@
   ],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "def download_file_if_absent(url, filename, search_dirs):\n",
    "    for directory in search_dirs:\n",
@@ -834,13 +834,19 @@
    "\n",
    "    target_path = os.path.join(search_dirs[0], filename)\n",
    "    try:\n",
-    "        with urllib.request.urlopen(url) as response, open(target_path, \"wb\") as out_file:\n",
-    "            out_file.write(response.read())\n",
+    "        response = requests.get(url, stream=True, timeout=60)\n",
+    "        response.raise_for_status()\n",
+    "        with open(target_path, \"wb\") as out_file:\n",
+    "            for chunk in response.iter_content(chunk_size=8192):\n",
+    "                if chunk:\n",
+    "                    out_file.write(chunk)\n",
    "        print(f\"Downloaded {filename} to {target_path}\")\n",
    "    except Exception as e:\n",
    "        print(f\"Failed to download {filename}. Error: {e}\")\n",
+    "\n",
    "    return target_path\n",
    "\n",
+    "\n",
    "verdict_path = download_file_if_absent(\n",
    "    url=(\n",
    "         \"https://raw.githubusercontent.com/rasbt/\"\n",
--- a/ch05/01_main-chapter-code/ch05.ipynb
+++ b/ch05/01_main-chapter-code/ch05.ipynb
@@ -793,19 +793,43 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "file_path = \"the-verdict.txt\"\n",
    "url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
    "\n",
    "if not os.path.exists(file_path):\n",
-    "    with urllib.request.urlopen(url) as response:\n",
-    "        text_data = response.read().decode('utf-8')\n",
+    "    response = requests.get(url, timeout=30)\n",
+    "    response.raise_for_status()\n",
+    "    text_data = response.text\n",
    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "        file.write(text_data)\n",
    "else:\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
-    "        text_data = file.read()"
+    "        text_data = file.read()\n",
+    "\n",
+    "\n",
+    "# The book originally used the following code below\n",
+    "# However, urllib uses older protocol settings that\n",
+    "# can cause problems for some readers using a VPN.\n",
+    "# The `requests` version above is more robust\n",
+    "# in that regard.\n",
+    "\n",
+    "        \n",
+    "# import os\n",
+    "# import urllib.request\n",
+    "\n",
+    "# file_path = \"the-verdict.txt\"\n",
+    "# url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
+    "\n",
+    "# if not os.path.exists(file_path):\n",
+    "#     with urllib.request.urlopen(url) as response:\n",
+    "#         text_data = response.read().decode('utf-8')\n",
+    "#     with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
+    "#         file.write(text_data)\n",
+    "# else:\n",
+    "#     with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
+    "#         text_data = file.read()"
   ]
  },
  {
--- a/ch05/01_main-chapter-code/exercise-solutions.ipynb
+++ b/ch05/01_main-chapter-code/exercise-solutions.ipynb
@@ -491,7 +491,7 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "from previous_chapters import create_dataloader_v1\n",
    "\n",
    "\n",
@@ -499,6 +499,25 @@
    "url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
    "\n",
    "if not os.path.exists(file_path):\n",
+    "    response = requests.get(url, timeout=30)\n",
+    "    response.raise_for_status()\n",
+    "    text_data = response.text\n",
+    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
+    "        file.write(text_data)\n",
+    "else:\n",
+    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
+    "        text_data = file.read()\n",
+    "\n",
+    "# The book originally used the following code below\n",
+    "# However, urllib uses older protocol settings that\n",
+    "# can cause problems for some readers using a VPN.\n",
+    "# The `requests` version above is more robust\n",
+    "# in that regard.\n",
+    "\n",
+    "\"\"\"\n",
+    "import urllib.request\n",
+    "\n",
+    "if not os.path.exists(file_path):\n",
    "    with urllib.request.urlopen(url) as response:\n",
    "        text_data = response.read().decode('utf-8')\n",
    "    with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
@@ -506,6 +525,7 @@
    "else:\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "        text_data = file.read()\n",
+    "\"\"\"\n",
    "\n",
    "\n",
    "# Train/validation ratio\n",
--- a/ch05/01_main-chapter-code/gpt_download.py
+++ b/ch05/01_main-chapter-code/gpt_download.py
@@ -5,9 +5,8 @@


 import os
-import urllib.request

-# import requests
+import requests
 import json
 import numpy as np
 import tensorflow as tf
@@ -48,44 +47,40 @@ def download_and_load_gpt2(model_size, models_dir):

 def download_file(url, destination, backup_url=None):
    def _attempt_download(download_url):
-        with urllib.request.urlopen(download_url) as response:
-            # Get the total file size from headers, defaulting to 0 if not present
-            file_size = int(response.headers.get("Content-Length", 0))
+        response = requests.get(download_url, stream=True, timeout=60)
+        response.raise_for_status()

-            # Check if file exists and has the same size
-            if os.path.exists(destination):
-                file_size_local = os.path.getsize(destination)
-                if file_size == file_size_local:
-                    print(f"File already exists and is up-to-date: {destination}")
-                    return True  # Indicate success without re-downloading
+        file_size = int(response.headers.get("Content-Length", 0))

-            block_size = 1024  # 1 Kilobyte
+        # Check if file exists and has same size
+        if os.path.exists(destination):
+            file_size_local = os.path.getsize(destination)
+            if file_size and file_size == file_size_local:
+                print(f"File already exists and is up-to-date: {destination}")
+                return True

-            # Initialize the progress bar with total file size
-            progress_bar_description = os.path.basename(download_url)
-            with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
-                with open(destination, "wb") as file:
-                    while True:
-                        chunk = response.read(block_size)
-                        if not chunk:
-                            break
+        block_size = 1024  # 1 KB
+        desc = os.path.basename(download_url)
+        with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
+            with open(destination, "wb") as file:
+                for chunk in response.iter_content(chunk_size=block_size):
+                    if chunk:
                        file.write(chunk)
                        progress_bar.update(len(chunk))
-            return True
+        return True

    try:
        if _attempt_download(url):
            return
-    except (urllib.error.HTTPError, urllib.error.URLError):
+    except requests.exceptions.RequestException:
        if backup_url is not None:
            print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
            try:
                if _attempt_download(backup_url):
                    return
-            except urllib.error.HTTPError:
+            except requests.exceptions.RequestException:
                pass

-        # If we reach here, both attempts have failed
        error_message = (
            f"Failed to download from both primary URL ({url})"
            f"{' and backup URL (' + backup_url + ')' if backup_url else ''}."
--- a/ch05/01_main-chapter-code/gpt_generate.py
+++ b/ch05/01_main-chapter-code/gpt_generate.py
@@ -7,9 +7,8 @@ import argparse
 import json
 import numpy as np
 import os
-import urllib.request

-# import requests
+import requests
 import tensorflow as tf
 import tiktoken
 import torch
@@ -60,18 +59,18 @@ def download_and_load_gpt2(model_size, models_dir):
    return settings, params


-"""
 def download_file(url, destination):
-    # Send a GET request to download the file in streaming mode
-    response = requests.get(url, stream=True)
+    # Send a GET request to download the file
+    response = requests.get(url, stream=True, timeout=60)
+    response.raise_for_status()

    # Get the total file size from headers, defaulting to 0 if not present
-    file_size = int(response.headers.get("content-length", 0))
+    file_size = int(response.headers.get("Content-Length", 0))

    # Check if file exists and has the same size
    if os.path.exists(destination):
        file_size_local = os.path.getsize(destination)
-        if file_size == file_size_local:
+        if file_size and file_size == file_size_local:
            print(f"File already exists and is up-to-date: {destination}")
            return

@@ -79,43 +78,12 @@ def download_file(url, destination):
    block_size = 1024  # 1 Kilobyte

    # Initialize the progress bar with total file size
-    progress_bar_description = url.split("/")[-1]  # Extract filename from URL
+    progress_bar_description = os.path.basename(url)
    with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
        # Open the destination file in binary write mode
        with open(destination, "wb") as file:
-            # Iterate over the file data in chunks
-            for chunk in response.iter_content(block_size):
-                progress_bar.update(len(chunk))  # Update progress bar
-                file.write(chunk)  # Write the chunk to the file
-"""
-
-
-def download_file(url, destination):
-    # Send a GET request to download the file
-    with urllib.request.urlopen(url) as response:
-        # Get the total file size from headers, defaulting to 0 if not present
-        file_size = int(response.headers.get("Content-Length", 0))
-
-        # Check if file exists and has the same size
-        if os.path.exists(destination):
-            file_size_local = os.path.getsize(destination)
-            if file_size == file_size_local:
-                print(f"File already exists and is up-to-date: {destination}")
-                return
-
-        # Define the block size for reading the file
-        block_size = 1024  # 1 Kilobyte
-
-        # Initialize the progress bar with total file size
-        progress_bar_description = os.path.basename(url)  # Extract filename from URL
-        with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
-            # Open the destination file in binary write mode
-            with open(destination, "wb") as file:
-                # Read the file in chunks and write to destination
-                while True:
-                    chunk = response.read(block_size)
-                    if not chunk:
-                        break
+            for chunk in response.iter_content(chunk_size=block_size):
+                if chunk:
                    file.write(chunk)
                    progress_bar.update(len(chunk))  # Update progress bar

--- a/ch05/01_main-chapter-code/gpt_train.py
+++ b/ch05/01_main-chapter-code/gpt_train.py
@@ -5,8 +5,8 @@

 import matplotlib.pyplot as plt
 import os
+import requests
 import torch
-import urllib.request
 import tiktoken


@@ -141,14 +141,14 @@ def main(gpt_config, settings):
    url = "https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt"

    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
-            text_data = response.read().decode('utf-8')
+        response = requests.get(url, timeout=30)
+        response.raise_for_status()
+        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)
    else:
        with open(file_path, "r", encoding="utf-8") as file:
            text_data = file.read()
-
    ##############################
    # Initialize model
    ##############################
--- a/ch05/01_main-chapter-code/tests.py
+++ b/ch05/01_main-chapter-code/tests.py
@@ -7,9 +7,7 @@

 import pytest
 from gpt_train import main
-import http.client
-from urllib.parse import urlparse
-
+import requests

@pytest.fixture
 def gpt_config():
@@ -43,23 +41,23 @@ def test_main(gpt_config, other_settings):


 def check_file_size(url, expected_size):
-    parsed_url = urlparse(url)
-    if parsed_url.scheme == "https":
-        conn = http.client.HTTPSConnection(parsed_url.netloc)
-    else:
-        conn = http.client.HTTPConnection(parsed_url.netloc)
+    try:
+        response = requests.head(url, allow_redirects=True, timeout=30)
+        if response.status_code != 200:
+            return False, f"{url} not accessible"

-    conn.request("HEAD", parsed_url.path)
-    response = conn.getresponse()
-    if response.status != 200:
-        return False, f"{url} not accessible"
-    size = response.getheader("Content-Length")
-    if size is None:
-        return False, "Content-Length header is missing"
-    size = int(size)
-    if size != expected_size:
-        return False, f"{url} file has expected size {expected_size}, but got {size}"
-    return True, f"{url} file size is correct"
+        size = response.headers.get("Content-Length")
+        if size is None:
+            return False, "Content-Length header is missing"
+
+        size = int(size)
+        if size != expected_size:
+            return False, f"{url} file has expected size {expected_size}, but got {size}"
+
+        return True, f"{url} file size is correct"
+
+    except requests.exceptions.RequestException as e:
+        return False, f"Failed to access {url}: {e}"


 def test_model_files():
--- a/ch05/02_alternative_weight_loading/weight-loading-hf-safetensors.ipynb
+++ b/ch05/02_alternative_weight_loading/weight-loading-hf-safetensors.ipynb
@@ -134,7 +134,7 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "from safetensors.torch import load_file\n",
    "\n",
    "URL_DIR = {\n",
@@ -149,7 +149,10 @@
    "\n",
    "# Download file\n",
    "if not os.path.exists(output_file):\n",
-    "    urllib.request.urlretrieve(url, output_file)\n",
+    "    response = requests.get(url, timeout=30)\n",
+    "    response.raise_for_status()\n",
+    "    with open(output_file, \"wb\") as f:\n",
+    "        f.write(response.content)\n",
    "\n",
    "# Load file\n",
    "state_dict = load_file(output_file)"
--- a/ch05/02_alternative_weight_loading/weight-loading-pytorch.ipynb
+++ b/ch05/02_alternative_weight_loading/weight-loading-pytorch.ipynb
@@ -144,12 +144,15 @@
   ],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "url = f\"https://huggingface.co/rasbt/gpt2-from-scratch-pytorch/resolve/main/{file_name}\"\n",
    "\n",
    "if not os.path.exists(file_name):\n",
-    "    urllib.request.urlretrieve(url, file_name)\n",
+    "    response = requests.get(url, timeout=60)\n",
+    "    response.raise_for_status()\n",
+    "    with open(file_name, \"wb\") as f:\n",
+    "        f.write(response.content)\n",
    "    print(f\"Downloaded to {file_name}\")"
   ]
  },
@@ -276,12 +279,15 @@
   ],
   "source": [
    "import os\n",
-    "import urllib.request\n",
+    "import requests\n",
    "\n",
    "url = f\"https://huggingface.co/rasbt/gpt2-from-scratch-pytorch/resolve/main/{file_name}\"\n",
    "\n",
    "if not os.path.exists(file_name):\n",
-    "    urllib.request.urlretrieve(url, file_name)\n",
+    "    response = requests.get(url, timeout=60)\n",
+    "    response.raise_for_status()\n",
+    "    with open(file_name, \"wb\") as f:\n",
+    "        f.write(response.content)\n",
    "    print(f\"Downloaded to {file_name}\")"
   ]
  },
--- a/ch05/07_gpt_to_llama/README.md
+++ b/ch05/07_gpt_to_llama/README.md
@@ -58,12 +58,17 @@ This automatically downloads the weight file based on the model choice above:

 ```python
 import os
-import urllib.request
+import requests

 url = f"https://huggingface.co/rasbt/llama-3.2-from-scratch/resolve/main/{MODEL_FILE}"

 if not os.path.exists(MODEL_FILE):
-    urllib.request.urlretrieve(url, MODEL_FILE)
+    response = requests.get(url, stream=True, timeout=60)
+    response.raise_for_status()
+    with open(MODEL_FILE, "wb") as f:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                f.write(chunk)
    print(f"Downloaded to {MODEL_FILE}")
 ```

--- a/ch05/10_llm-training-speed/01_opt_single_gpu.py
+++ b/ch05/10_llm-training-speed/01_opt_single_gpu.py
@@ -6,9 +6,9 @@

 import os
 import time
-import urllib.request

 import matplotlib.pyplot as plt
+import requests
 import torch
 import torch.nn as nn
 from torch.utils.data import Dataset, DataLoader
@@ -397,8 +397,9 @@ def main(gpt_config, settings):
    url = "https://www.gutenberg.org/cache/epub/145/pg145.txt"

    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
-            text_data = response.read().decode('utf-8')
+        response = requests.get(url, timeout=30)
+        response.raise_for_status()
+        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)
    else:
--- a/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py
+++ b/ch05/10_llm-training-speed/02_opt_multi_gpu_ddp.py
@@ -6,9 +6,9 @@

 import os
 import time
-import urllib.request

 import matplotlib.pyplot as plt
+import requests
 import torch
 import torch.nn as nn
 from torch.utils.data import Dataset, DataLoader
@@ -468,11 +468,11 @@ def main(gpt_config, settings, rank, world_size):
    # NEW: Only download 1 time
    if rank == 0:
        if not os.path.exists(file_path):
-            with urllib.request.urlopen(url) as response:
-                text_data = response.read().decode('utf-8')
+            response = requests.get(url, timeout=30)
+            response.raise_for_status()
+            text_data = response.text
            with open(file_path, "w", encoding="utf-8") as file:
                file.write(text_data)
-
    # NEW: All processes wait until rank 0 is done, using the GPU index.
    torch.distributed.barrier(device_ids=[device.index])

--- a/ch06/01_main-chapter-code/ch06.ipynb
+++ b/ch06/01_main-chapter-code/ch06.ipynb
@@ -186,6 +186,56 @@
    }
   ],
   "source": [
+    "import requests\n",
+    "import zipfile\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "url = \"https://archive.ics.uci.edu/static/public/228/sms+spam+collection.zip\"\n",
+    "zip_path = \"sms_spam_collection.zip\"\n",
+    "extracted_path = \"sms_spam_collection\"\n",
+    "data_file_path = Path(extracted_path) / \"SMSSpamCollection.tsv\"\n",
+    "\n",
+    "\n",
+    "def download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path):\n",
+    "    if data_file_path.exists():\n",
+    "        print(f\"{data_file_path} already exists. Skipping download and extraction.\")\n",
+    "        return\n",
+    "\n",
+    "    # Downloading the file\n",
+    "    response = requests.get(url, stream=True, timeout=60)\n",
+    "    response.raise_for_status()\n",
+    "    with open(zip_path, \"wb\") as out_file:\n",
+    "        for chunk in response.iter_content(chunk_size=8192):\n",
+    "            if chunk:\n",
+    "                out_file.write(chunk)\n",
+    "\n",
+    "    # Unzipping the file\n",
+    "    with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n",
+    "        zip_ref.extractall(extracted_path)\n",
+    "\n",
+    "    # Add .tsv file extension\n",
+    "    original_file_path = Path(extracted_path) / \"SMSSpamCollection\"\n",
+    "    os.rename(original_file_path, data_file_path)\n",
+    "    print(f\"File downloaded and saved as {data_file_path}\")\n",
+    "\n",
+    "\n",
+    "try:\n",
+    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
+    "except (requests.exceptions.RequestException, TimeoutError) as e:\n",
+    "    print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
+    "    url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
+    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
+    "\n",
+    "\n",
+    "\n",
+    "# The book originally used the following code below\n",
+    "# However, urllib uses older protocol settings that\n",
+    "# can cause problems for some readers using a VPN.\n",
+    "# The `requests` version above is more robust\n",
+    "# in that regard.\n",
+    "\n",
+    "\"\"\"\n",
    "import urllib.request\n",
    "import zipfile\n",
    "import os\n",
@@ -220,7 +270,8 @@
    "except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
    "    print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
    "    url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
-    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path) "
+    "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
+    "\"\"\""
   ]
  },
  {
--- a/ch06/01_main-chapter-code/gpt_class_finetune.py
+++ b/ch06/01_main-chapter-code/gpt_class_finetune.py
@@ -5,7 +5,7 @@

 # This is a summary file containing the main takeaways from chapter 6.

-import urllib.request
+import requests
 import zipfile
 import os
 from pathlib import Path
@@ -27,9 +27,12 @@ def download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path):
        return

    # Downloading the file
-    with urllib.request.urlopen(url) as response:
-        with open(zip_path, "wb") as out_file:
-            out_file.write(response.read())
+    response = requests.get(url, stream=True, timeout=60)
+    response.raise_for_status()
+    with open(zip_path, "wb") as out_file:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                out_file.write(chunk)

    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
@@ -259,7 +262,7 @@ if __name__ == "__main__":

    try:
        download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)
-    except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+    except (requests.exceptions.RequestException, TimeoutError) as e:
        print(f"Primary URL failed: {e}. Trying backup URL...")
        url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
        download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)
--- a/ch06/02_bonus_additional-experiments/additional_experiments.py
+++ b/ch06/02_bonus_additional-experiments/additional_experiments.py
@@ -8,10 +8,10 @@ import math
 import os
 from pathlib import Path
 import time
-import urllib.request
 import zipfile

 import pandas as pd
+import requests
 import tiktoken
 import torch
 from torch.utils.data import DataLoader
@@ -113,9 +113,12 @@ def download_and_unzip(url, zip_path, extract_to, new_file_path):
        return

    # Downloading the file
-    with urllib.request.urlopen(url) as response:
-        with open(zip_path, "wb") as out_file:
-            out_file.write(response.read())
+    response = requests.get(url, stream=True, timeout=60)
+    response.raise_for_status()
+    with open(zip_path, "wb") as out_file:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                out_file.write(chunk)

    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
@@ -608,11 +611,11 @@ if __name__ == "__main__":
    base_path = Path(".")
    file_names = ["train.csv", "validation.csv", "test.csv"]
    all_exist = all((base_path / file_name).exists() for file_name in file_names)
-
+    
    if not all_exist:
        try:
            download_and_unzip(url, zip_path, extract_to, new_file_path)
-        except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+        except (requests.exceptions.RequestException, TimeoutError) as e:
            print(f"Primary URL failed: {e}. Trying backup URL...")
            backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
            download_and_unzip(backup_url, zip_path, extract_to, new_file_path)
--- a/ch06/03_bonus_imdb-classification/download_prepare_dataset.py
+++ b/ch06/03_bonus_imdb-classification/download_prepare_dataset.py
@@ -7,7 +7,7 @@ import os
 import sys
 import tarfile
 import time
-import urllib.request
+import requests
 import pandas as pd


@@ -32,7 +32,15 @@ def download_and_extract_dataset(dataset_url, target_file, directory):
    if not os.path.exists(directory):
        if os.path.exists(target_file):
            os.remove(target_file)
-        urllib.request.urlretrieve(dataset_url, target_file, reporthook)
+
+        response = requests.get(dataset_url, stream=True, timeout=60)
+        response.raise_for_status()
+
+        with open(target_file, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+
        print("\nExtracting dataset ...")
        with tarfile.open(target_file, "r:gz") as tar:
            tar.extractall()
--- a/ch06/03_bonus_imdb-classification/train_bert_hf_spam.py
+++ b/ch06/03_bonus_imdb-classification/train_bert_hf_spam.py
@@ -7,7 +7,7 @@ import argparse
 import os
 from pathlib import Path
 import time
-import urllib
+import requests
 import zipfile

 import pandas as pd
@@ -62,9 +62,12 @@ def download_and_unzip(url, zip_path, extract_to, new_file_path):
        return

    # Downloading the file
-    with urllib.request.urlopen(url) as response:
-        with open(zip_path, "wb") as out_file:
-            out_file.write(response.read())
+    response = requests.get(url, stream=True, timeout=60)
+    response.raise_for_status()
+    with open(zip_path, "wb") as out_file:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                out_file.write(chunk)

    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
@@ -412,7 +415,7 @@ if __name__ == "__main__":
    if not all_exist:
        try:
            download_and_unzip(url, zip_path, extract_to, new_file_path)
-        except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+        except (requests.exceptions.RequestException, TimeoutError) as e:
            print(f"Primary URL failed: {e}. Trying backup URL...")
            backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
            download_and_unzip(backup_url, zip_path, extract_to, new_file_path)
--- a/ch07/01_main-chapter-code/ch07.ipynb
+++ b/ch07/01_main-chapter-code/ch07.ipynb
@@ -169,10 +169,33 @@
   "source": [
    "import json\n",
    "import os\n",
-    "import urllib\n",
+    "import requests\n",
    "\n",
    "\n",
    "def download_and_load_file(file_path, url):\n",
+    "    if not os.path.exists(file_path):\n",
+    "        response = requests.get(url, timeout=30)\n",
+    "        response.raise_for_status()\n",
+    "        text_data = response.text\n",
+    "        with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
+    "            file.write(text_data)\n",
+    "\n",
+    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
+    "        data = json.load(file)\n",
+    "\n",
+    "    return data\n",
+    "\n",
+    "\n",
+    "# The book originally used the following code below\n",
+    "# However, urllib uses older protocol settings that\n",
+    "# can cause problems for some readers using a VPN.\n",
+    "# The `requests` version above is more robust\n",
+    "# in that regard.\n",
+    "\n",
+    "\"\"\"\n",
+    "import urllib\n",
+    "\n",
+    "def download_and_load_file(file_path, url):\n",
    "\n",
    "    if not os.path.exists(file_path):\n",
    "        with urllib.request.urlopen(url) as response:\n",
@@ -180,15 +203,15 @@
    "        with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "            file.write(text_data)\n",
    "\n",
-    "    # The book originally contained this unnecessary \"else\" clause:\n",
-    "    #else:\n",
-    "    #    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
-    "    #        text_data = file.read()\n",
+    "    else:\n",
+    "        with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
+    "            text_data = file.read()\n",
    "\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "        data = json.load(file)\n",
    "\n",
    "    return data\n",
+    "\"\"\"\n",
    "\n",
    "\n",
    "file_path = \"instruction-data.json\"\n",
@@ -2490,7 +2513,8 @@
    }
   ],
   "source": [
-    "import urllib.request\n",
+    "import requests  # noqa: F811\n",
+    "# import urllib.request\n",
    "\n",
    "def query_model(\n",
    "    prompt,\n",
@@ -2512,7 +2536,8 @@
    "        }\n",
    "    }\n",
    "\n",
-    "\n",
+    "    \n",
+    "    \"\"\"\n",
    "    # Convert the dictionary to a JSON formatted string and encode it to bytes\n",
    "    payload = json.dumps(data).encode(\"utf-8\")\n",
    "\n",
@@ -2536,6 +2561,26 @@
    "            response_data += response_json[\"message\"][\"content\"]\n",
    "\n",
    "    return response_data\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # The book originally used the commented-out above, which is based\n",
+    "    # on urllib. It works generally fine, but some readers reported\n",
+    "    # issues with using urlib when using a (company) VPN.\n",
+    "    # The code below uses the requests library, which doesn't seem\n",
+    "    # to have these issues.\n",
+    "\n",
+    "    # Send the POST request\n",
+    "    with requests.post(url, json=data, stream=True, timeout=30) as r:\n",
+    "        r.raise_for_status()\n",
+    "        response_data = \"\"\n",
+    "        for line in r.iter_lines(decode_unicode=True):\n",
+    "            if not line:\n",
+    "                continue\n",
+    "            response_json = json.loads(line)\n",
+    "            if \"message\" in response_json:\n",
+    "                response_data += response_json[\"message\"][\"content\"]\n",
+    "\n",
+    "    return response_data\n",
    "\n",
    "\n",
    "model = \"llama3\"\n",
--- a/ch07/01_main-chapter-code/exercise_experiments.py
+++ b/ch07/01_main-chapter-code/exercise_experiments.py
@@ -12,10 +12,10 @@ import math
 import os
 import re
 import time
-import urllib

 import matplotlib.pyplot as plt
 from matplotlib.ticker import MaxNLocator
+import requests
 import tiktoken
 import torch
 from torch.utils.data import Dataset, DataLoader
@@ -234,17 +234,17 @@ def custom_collate_with_masking_fn(


 def download_and_load_file(file_path, url):
-
    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
-            text_data = response.read().decode("utf-8")
+        response = requests.get(url, timeout=30)
+        response.raise_for_status()
+        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)
    else:
        with open(file_path, "r", encoding="utf-8") as file:
            text_data = file.read()

-    with open(file_path, "r") as file:
+    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    return data
--- a/ch07/01_main-chapter-code/gpt_download.py
+++ b/ch07/01_main-chapter-code/gpt_download.py
@@ -5,11 +5,10 @@


 import os
-import urllib.request
-
-# import requests
 import json
+
 import numpy as np
+import requests
 import tensorflow as tf
 from tqdm import tqdm

@@ -48,44 +47,40 @@ def download_and_load_gpt2(model_size, models_dir):

 def download_file(url, destination, backup_url=None):
    def _attempt_download(download_url):
-        with urllib.request.urlopen(download_url) as response:
-            # Get the total file size from headers, defaulting to 0 if not present
-            file_size = int(response.headers.get("Content-Length", 0))
+        response = requests.get(download_url, stream=True, timeout=60)
+        response.raise_for_status()

-            # Check if file exists and has the same size
-            if os.path.exists(destination):
-                file_size_local = os.path.getsize(destination)
-                if file_size == file_size_local:
-                    print(f"File already exists and is up-to-date: {destination}")
-                    return True  # Indicate success without re-downloading
+        file_size = int(response.headers.get("Content-Length", 0))

-            block_size = 1024  # 1 Kilobyte
+        # Check if file exists and has same size
+        if os.path.exists(destination):
+            file_size_local = os.path.getsize(destination)
+            if file_size and file_size == file_size_local:
+                print(f"File already exists and is up-to-date: {destination}")
+                return True

-            # Initialize the progress bar with total file size
-            progress_bar_description = os.path.basename(download_url)
-            with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
-                with open(destination, "wb") as file:
-                    while True:
-                        chunk = response.read(block_size)
-                        if not chunk:
-                            break
+        block_size = 1024  # 1 KB
+        desc = os.path.basename(download_url)
+        with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
+            with open(destination, "wb") as file:
+                for chunk in response.iter_content(chunk_size=block_size):
+                    if chunk:
                        file.write(chunk)
                        progress_bar.update(len(chunk))
-            return True
+        return True

    try:
        if _attempt_download(url):
            return
-    except (urllib.error.HTTPError, urllib.error.URLError):
+    except requests.exceptions.RequestException:
        if backup_url is not None:
            print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
            try:
                if _attempt_download(backup_url):
                    return
-            except urllib.error.HTTPError:
+            except requests.exceptions.RequestException:
                pass

-        # If we reach here, both attempts have failed
        error_message = (
            f"Failed to download from both primary URL ({url})"
            f"{' and backup URL (' + backup_url + ')' if backup_url else ''}."
@@ -97,37 +92,6 @@ def download_file(url, destination, backup_url=None):
        print(f"An unexpected error occurred: {e}")


-# Alternative way using `requests`
-"""
-def download_file(url, destination):
-    # Send a GET request to download the file in streaming mode
-    response = requests.get(url, stream=True)
-
-    # Get the total file size from headers, defaulting to 0 if not present
-    file_size = int(response.headers.get("content-length", 0))
-
-    # Check if file exists and has the same size
-    if os.path.exists(destination):
-        file_size_local = os.path.getsize(destination)
-        if file_size == file_size_local:
-            print(f"File already exists and is up-to-date: {destination}")
-            return
-
-    # Define the block size for reading the file
-    block_size = 1024  # 1 Kilobyte
-
-    # Initialize the progress bar with total file size
-    progress_bar_description = url.split("/")[-1]  # Extract filename from URL
-    with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
-        # Open the destination file in binary write mode
-        with open(destination, "wb") as file:
-            # Iterate over the file data in chunks
-            for chunk in response.iter_content(block_size):
-                progress_bar.update(len(chunk))  # Update progress bar
-                file.write(chunk)  # Write the chunk to the file
-"""
-
-
 def load_gpt2_params_from_tf_ckpt(ckpt_path, settings):
    # Initialize parameters dictionary with empty blocks for each layer
    params = {"blocks": [{} for _ in range(settings["n_layer"])]}
--- a/ch07/01_main-chapter-code/gpt_instruction_finetuning.py
+++ b/ch07/01_main-chapter-code/gpt_instruction_finetuning.py
@@ -11,9 +11,9 @@ import json
 import os
 import re
 import time
-import urllib

 import matplotlib.pyplot as plt
+import requests
 import tiktoken
 import torch
 from torch.utils.data import Dataset, DataLoader
@@ -97,14 +97,14 @@ def custom_collate_fn(


 def download_and_load_file(file_path, url):
-
    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
-            text_data = response.read().decode("utf-8")
+        response = requests.get(url, timeout=30)
+        response.raise_for_status()
+        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)

-    with open(file_path, "r") as file:
+    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    return data
--- a/ch07/01_main-chapter-code/ollama_evaluate.py
+++ b/ch07/01_main-chapter-code/ollama_evaluate.py
@@ -8,7 +8,7 @@
 import json
 import psutil
 from tqdm import tqdm
-import urllib.request
+import requests


 def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):
@@ -25,23 +25,16 @@ def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):
        }
    }

-    # Convert the dictionary to a JSON formatted string and encode it to bytes
-    payload = json.dumps(data).encode("utf-8")
-
-    # Create a request object, setting the method to POST and adding necessary headers
-    request = urllib.request.Request(url, data=payload, method="POST")
-    request.add_header("Content-Type", "application/json")
-
-    # Send the request and capture the response
-    response_data = ""
-    with urllib.request.urlopen(request) as response:
-        # Read and decode the response
-        while True:
-            line = response.readline().decode("utf-8")
+    # Send the POST request
+    with requests.post(url, json=data, stream=True, timeout=30) as r:
+        r.raise_for_status()
+        response_data = ""
+        for line in r.iter_lines(decode_unicode=True):
            if not line:
-                break
+                continue
            response_json = json.loads(line)
-            response_data += response_json["message"]["content"]
+            if "message" in response_json:
+                response_data += response_json["message"]["content"]

    return response_data

--- a/ch07/03_model-evaluation/llm-instruction-eval-ollama.ipynb
+++ b/ch07/03_model-evaluation/llm-instruction-eval-ollama.ipynb
@@ -215,8 +215,8 @@
    }
   ],
   "source": [
-    "import urllib.request\n",
    "import json\n",
+    "import requests\n",
    "\n",
    "\n",
    "def query_model(prompt, model=\"llama3\", url=\"http://localhost:11434/api/chat\"):\n",
@@ -236,27 +236,19 @@
    "        }\n",
    "    }\n",
    "\n",
-    "    # Convert the dictionary to a JSON formatted string and encode it to bytes\n",
-    "    payload = json.dumps(data).encode(\"utf-8\")\n",
-    "\n",
-    "    # Create a request object, setting the method to POST and adding necessary headers\n",
-    "    request = urllib.request.Request(url, data=payload, method=\"POST\")\n",
-    "    request.add_header(\"Content-Type\", \"application/json\")\n",
-    "\n",
-    "    # Send the request and capture the response\n",
-    "    response_data = \"\"\n",
-    "    with urllib.request.urlopen(request) as response:\n",
-    "        # Read and decode the response\n",
-    "        while True:\n",
-    "            line = response.readline().decode(\"utf-8\")\n",
+    "    # Send the POST request\n",
+    "    with requests.post(url, json=data, stream=True, timeout=30) as r:\n",
+    "        r.raise_for_status()\n",
+    "        response_data = \"\"\n",
+    "        for line in r.iter_lines(decode_unicode=True):\n",
    "            if not line:\n",
-    "                break\n",
+    "                continue\n",
    "            response_json = json.loads(line)\n",
-    "            response_data += response_json[\"message\"][\"content\"]\n",
+    "            if \"message\" in response_json:\n",
+    "                response_data += response_json[\"message\"][\"content\"]\n",
    "\n",
    "    return response_data\n",
    "\n",
-    "\n",
    "result = query_model(\"What do Llamas eat?\")\n",
    "print(result)"
   ]
@@ -640,7 +632,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.16"
  }
 },
 "nbformat": 4,
--- a/ch07/04_preference-tuning-with-dpo/create-preference-data-ollama.ipynb
+++ b/ch07/04_preference-tuning-with-dpo/create-preference-data-ollama.ipynb
@@ -274,8 +274,8 @@
    }
   ],
   "source": [
-    "import urllib.request\n",
    "import json\n",
+    "import requests\n",
    "\n",
    "\n",
    "def query_model(prompt, model=\"llama3.1:70b\", url=\"http://localhost:11434/api/chat\"):\n",
@@ -294,23 +294,16 @@
    "        }\n",
    "    }\n",
    "\n",
-    "    # Convert the dictionary to a JSON formatted string and encode it to bytes\n",
-    "    payload = json.dumps(data).encode(\"utf-8\")\n",
-    "\n",
-    "    # Create a request object, setting the method to POST and adding necessary headers\n",
-    "    request = urllib.request.Request(url, data=payload, method=\"POST\")\n",
-    "    request.add_header(\"Content-Type\", \"application/json\")\n",
-    "\n",
-    "    # Send the request and capture the response\n",
-    "    response_data = \"\"\n",
-    "    with urllib.request.urlopen(request) as response:\n",
-    "        # Read and decode the response\n",
-    "        while True:\n",
-    "            line = response.readline().decode(\"utf-8\")\n",
+    "    # Send the POST request\n",
+    "    with requests.post(url, json=data, stream=True, timeout=30) as r:\n",
+    "        r.raise_for_status()\n",
+    "        response_data = \"\"\n",
+    "        for line in r.iter_lines(decode_unicode=True):\n",
    "            if not line:\n",
-    "                break\n",
+    "                continue\n",
    "            response_json = json.loads(line)\n",
-    "            response_data += response_json[\"message\"][\"content\"]\n",
+    "            if \"message\" in response_json:\n",
+    "                response_data += response_json[\"message\"][\"content\"]\n",
    "\n",
    "    return response_data\n",
    "\n",
@@ -587,7 +580,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.16"
  }
 },
 "nbformat": 4,
--- a/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb
+++ b/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb
@@ -231,23 +231,21 @@
   "source": [
    "import json\n",
    "import os\n",
-    "import urllib\n",
+    "import requests\n",
    "\n",
    "\n",
    "def download_and_load_file(file_path, url):\n",
-    "\n",
    "    if not os.path.exists(file_path):\n",
-    "        with urllib.request.urlopen(url) as response:\n",
-    "            text_data = response.read().decode(\"utf-8\")\n",
+    "        response = requests.get(url, timeout=30)\n",
+    "        response.raise_for_status()\n",
+    "        text_data = response.text\n",
    "        with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
    "            file.write(text_data)\n",
    "    else:\n",
    "        with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "            text_data = file.read()\n",
    "\n",
-    "    with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
-    "        data = json.load(file)\n",
-    "\n",
+    "    data = json.loads(text_data)\n",
    "    return data\n",
    "\n",
    "\n",
--- a/ch07/05_dataset-generation/llama3-ollama.ipynb
+++ b/ch07/05_dataset-generation/llama3-ollama.ipynb
@@ -194,8 +194,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import urllib.request\n",
    "import json\n",
+    "import requests\n",
    "\n",
    "def query_model(prompt, model=\"llama3\", url=\"http://localhost:11434/api/chat\", role=\"user\"):\n",
    "    # Create the data payload as a dictionary\n",
@@ -209,25 +209,21 @@
    "        ]\n",
    "    }\n",
    "\n",
-    "    # Convert the dictionary to a JSON formatted string and encode it to bytes\n",
-    "    payload = json.dumps(data).encode(\"utf-8\")\n",
-    "\n",
-    "    # Create a request object, setting the method to POST and adding necessary headers\n",
-    "    request = urllib.request.Request(url, data=payload, method=\"POST\")\n",
-    "    request.add_header(\"Content-Type\", \"application/json\")\n",
-    "\n",
-    "    # Send the request and capture the response\n",
-    "    response_data = \"\"\n",
-    "    with urllib.request.urlopen(request) as response:\n",
-    "        # Read and decode the response\n",
-    "        while True:\n",
-    "            line = response.readline().decode(\"utf-8\")\n",
+    "    # Send the POST request\n",
+    "    with requests.post(url, json=data, stream=True, timeout=30) as r:\n",
+    "        r.raise_for_status()\n",
+    "        response_data = \"\"\n",
+    "        for line in r.iter_lines(decode_unicode=True):\n",
    "            if not line:\n",
-    "                break\n",
+    "                continue\n",
    "            response_json = json.loads(line)\n",
-    "            response_data += response_json[\"message\"][\"content\"]\n",
+    "            if \"message\" in response_json:\n",
+    "                response_data += response_json[\"message\"][\"content\"]\n",
    "\n",
-    "    return response_data"
+    "    return response_data\n",
+    "\n",
+    "result = query_model(\"What do Llamas eat?\")\n",
+    "print(result)"
   ]
  },
  {
@@ -498,7 +494,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.16"
  }
 },
 "nbformat": 4,
--- a/pkg/llms_from_scratch/ch05.py
+++ b/pkg/llms_from_scratch/ch05.py
@@ -7,11 +7,11 @@ from .ch04 import generate_text_simple

 import json
 import os
-import urllib.request

 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.ticker import MaxNLocator
+import requests
 import torch
 from tqdm import tqdm

@@ -279,44 +279,40 @@ def download_and_load_gpt2(model_size, models_dir):

 def download_file(url, destination, backup_url=None):
    def _attempt_download(download_url):
-        with urllib.request.urlopen(download_url) as response:
-            # Get the total file size from headers, defaulting to 0 if not present
-            file_size = int(response.headers.get("Content-Length", 0))
+        response = requests.get(download_url, stream=True, timeout=60)
+        response.raise_for_status()

-            # Check if file exists and has the same size
-            if os.path.exists(destination):
-                file_size_local = os.path.getsize(destination)
-                if file_size == file_size_local:
-                    print(f"File already exists and is up-to-date: {destination}")
-                    return True  # Indicate success without re-downloading
+        file_size = int(response.headers.get("Content-Length", 0))

-            block_size = 1024  # 1 Kilobyte
+        # Check if file exists and has same size
+        if os.path.exists(destination):
+            file_size_local = os.path.getsize(destination)
+            if file_size and file_size == file_size_local:
+                print(f"File already exists and is up-to-date: {destination}")
+                return True

-            # Initialize the progress bar with total file size
-            progress_bar_description = os.path.basename(download_url)
-            with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
-                with open(destination, "wb") as file:
-                    while True:
-                        chunk = response.read(block_size)
-                        if not chunk:
-                            break
+        block_size = 1024  # 1 KB
+        desc = os.path.basename(download_url)
+        with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
+            with open(destination, "wb") as file:
+                for chunk in response.iter_content(chunk_size=block_size):
+                    if chunk:
                        file.write(chunk)
                        progress_bar.update(len(chunk))
-            return True
+        return True

    try:
        if _attempt_download(url):
            return
-    except (urllib.error.HTTPError, urllib.error.URLError):
+    except requests.exceptions.RequestException:
        if backup_url is not None:
            print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
            try:
                if _attempt_download(backup_url):
                    return
-            except urllib.error.HTTPError:
+            except requests.exceptions.RequestException:
                pass

-        # If we reach here, both attempts have failed
        error_message = (
            f"Failed to download from both primary URL ({url})"
            f"{' and backup URL (' + backup_url + ')' if backup_url else ''}."
--- a/pkg/llms_from_scratch/ch06.py
+++ b/pkg/llms_from_scratch/ch06.py
@@ -4,11 +4,11 @@
 # Code: https://github.com/rasbt/LLMs-from-scratch


-import urllib.request
 import zipfile
 import os
 from pathlib import Path

+import requests
 import matplotlib.pyplot as plt
 from torch.utils.data import Dataset
 import torch
@@ -21,9 +21,12 @@ def download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path):
        return

    # Downloading the file
-    with urllib.request.urlopen(url) as response:
-        with open(zip_path, "wb") as out_file:
-            out_file.write(response.read())
+    response = requests.get(url, stream=True, timeout=60)
+    response.raise_for_status()
+    with open(zip_path, "wb") as out_file:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                out_file.write(chunk)

    # Unzipping the file
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
--- a/pkg/llms_from_scratch/ch07.py
+++ b/pkg/llms_from_scratch/ch07.py
@@ -6,7 +6,7 @@
 import json
 import os
 import psutil
-import urllib
+import requests

 import torch
 from tqdm import tqdm
@@ -14,24 +14,46 @@ from torch.utils.data import Dataset


 def download_and_load_file(file_path, url):
-
    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
-            text_data = response.read().decode("utf-8")
+        response = requests.get(url, timeout=30)
+        response.raise_for_status()
+        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(text_data)

-    # The book originally contained this unnecessary "else" clause:
-    # else:
-    #     with open(file_path, "r", encoding="utf-8") as file:
-    #         text_data = file.read()
-
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    return data


+# The book originally used the following code below
+# However, urllib uses older protocol settings that
+# can cause problems for some readers using a VPN.
+# The `requests` version above is more robust
+# in that regard.
+
+
+# import urllib
+
+# def download_and_load_file(file_path, url):
+
+#     if not os.path.exists(file_path):
+#         with urllib.request.urlopen(url) as response:
+#             text_data = response.read().decode("utf-8")
+#         with open(file_path, "w", encoding="utf-8") as file:
+#             file.write(text_data)
+
+#     else:
+#         with open(file_path, "r", encoding="utf-8") as file:
+#             text_data = file.read()
+
+#     with open(file_path, "r", encoding="utf-8") as file:
+#         data = json.load(file)
+
+#     return data
+
+
 def format_input(entry):
    instruction_text = (
        f"Below is an instruction that describes a task. "
@@ -202,27 +224,16 @@ def query_model(
        }
    }

-    # Convert the dictionary to a JSON formatted string and encode it to bytes
-    payload = json.dumps(data).encode("utf-8")
-
-    # Create a request object, setting the method to POST and adding necessary headers
-    request = urllib.request.Request(
-        url,
-        data=payload,
-        method="POST"
-    )
-    request.add_header("Content-Type", "application/json")
-
-    # Send the request and capture the response
-    response_data = ""
-    with urllib.request.urlopen(request) as response:
-        # Read and decode the response
-        while True:
-            line = response.readline().decode("utf-8")
+    # Send the POST request
+    with requests.post(url, json=data, stream=True, timeout=30) as r:
+        r.raise_for_status()
+        response_data = ""
+        for line in r.iter_lines(decode_unicode=True):
            if not line:
-                break
+                continue
            response_json = json.loads(line)
-            response_data += response_json["message"]["content"]
+            if "message" in response_json:
+                response_data += response_json["message"]["content"]

    return response_data

--- a/pkg/llms_from_scratch/qwen3.py
+++ b/pkg/llms_from_scratch/qwen3.py
@@ -6,9 +6,9 @@
 import os
 import json
 import re
-import urllib.request
 from pathlib import Path

+import requests
 import torch
 import torch.nn as nn

@@ -660,7 +660,12 @@ def download_from_huggingface(repo_id, filename, local_dir, revision="main"):
        print(f"File already exists: {dest_path}")
    else:
        print(f"Downloading {url} to {dest_path}...")
-        urllib.request.urlretrieve(url, dest_path)
+        response = requests.get(url, stream=True, timeout=60)
+        response.raise_for_status()
+        with open(dest_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)

    return dest_path

--- a/pkg/llms_from_scratch/tests/test_appendix_e.py
+++ b/pkg/llms_from_scratch/tests/test_appendix_e.py
@@ -12,9 +12,9 @@ from llms_from_scratch.ch06 import (
 from llms_from_scratch.appendix_e import replace_linear_with_lora

 from pathlib import Path
-import urllib

 import pandas as pd
+import requests
 import tiktoken
 import torch
 from torch.utils.data import DataLoader, Subset
@@ -35,7 +35,7 @@ def test_train_classifier_lora(tmp_path):
        download_and_unzip_spam_data(
            url, zip_path, extracted_path, data_file_path
        )
-    except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+    except (requests.exceptions.RequestException, TimeoutError) as e:
        print(f"Primary URL failed: {e}. Trying backup URL...")
        backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
        download_and_unzip_spam_data(
--- a/pkg/llms_from_scratch/tests/test_ch02.py
+++ b/pkg/llms_from_scratch/tests/test_ch02.py
@@ -6,8 +6,8 @@
 from llms_from_scratch.ch02 import create_dataloader_v1

 import os
-import urllib.request

+import requests
 import pytest
 import torch

@@ -16,11 +16,17 @@ import torch
 def test_dataloader(tmp_path, file_name):

    if not os.path.exists("the-verdict.txt"):
-        url = ("https://raw.githubusercontent.com/rasbt/"
-               "LLMs-from-scratch/main/ch02/01_main-chapter-code/"
-               "the-verdict.txt")
+        url = (
+            "https://raw.githubusercontent.com/rasbt/"
+            "LLMs-from-scratch/main/ch02/01_main-chapter-code/"
+            "the-verdict.txt"
+        )
        file_path = "the-verdict.txt"
-        urllib.request.urlretrieve(url, file_path)
+
+        response = requests.get(url, timeout=30)
+        response.raise_for_status()
+        with open(file_path, "wb") as f:
+            f.write(response.content)

    with open("the-verdict.txt", "r", encoding="utf-8") as f:
        raw_text = f.read()
--- a/pkg/llms_from_scratch/tests/test_ch05.py
+++ b/pkg/llms_from_scratch/tests/test_ch05.py
@@ -8,8 +8,8 @@ from llms_from_scratch.ch04 import GPTModel, GPTModelFast
 from llms_from_scratch.ch05 import train_model_simple

 import os
-import urllib

+import requests
 import pytest
 import tiktoken
 import torch
@@ -46,8 +46,9 @@ def test_train_simple(tmp_path, ModelClass):
    url = "https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt"

    if not os.path.exists(file_path):
-        with urllib.request.urlopen(url) as response:
-            text_data = response.read().decode("utf-8")
+        response = requests.get(url, timeout=30)
+        response.raise_for_status()
+        text_data = response.text
        with open(file_path, "w", encoding="utf-8") as f:
            f.write(text_data)
    else:
--- a/pkg/llms_from_scratch/tests/test_ch06.py
+++ b/pkg/llms_from_scratch/tests/test_ch06.py
@@ -11,8 +11,8 @@ from llms_from_scratch.ch06 import (
 )

 from pathlib import Path
-import urllib

+import requests
 import pandas as pd
 import tiktoken
 import torch
@@ -34,7 +34,7 @@ def test_train_classifier(tmp_path):
        download_and_unzip_spam_data(
            url, zip_path, extracted_path, data_file_path
        )
-    except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+    except (requests.exceptions.RequestException, TimeoutError) as e:
        print(f"Primary URL failed: {e}. Trying backup URL...")
        backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
        download_and_unzip_spam_data(
--- a/pkg/llms_from_scratch/utils.py
+++ b/pkg/llms_from_scratch/utils.py
@@ -9,10 +9,9 @@ import ast
 import re
 import types
 from pathlib import Path
-import urllib.request
-import urllib.parse

 import nbformat
+import requests


 def _extract_imports(src: str):
@@ -125,21 +124,24 @@ def import_definitions_from_notebook(nb_dir_or_path, notebook_name=None, *, extr
    exec(src, mod.__dict__)
    return mod

+
 def download_file(url, out_dir="."):
    """Simple file download utility for tests."""
-    from pathlib import Path
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
-    filename = Path(urllib.parse.urlparse(url).path).name
+    filename = Path(url).name
    dest = out_dir / filename
-    
+
    if dest.exists():
        return dest
-        
+
    try:
-        with urllib.request.urlopen(url) as response:
-            with open(dest, 'wb') as f:
-                f.write(response.read())
+        response = requests.get(url, stream=True, timeout=30)
+        response.raise_for_status()
+        with open(dest, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
        return dest
    except Exception as e:
        raise RuntimeError(f"Failed to download {url}: {e}")