Switch from urllib to requests to improve reliability (#867)

* Switch from urllib to requests to improve reliability * Keep ruff linter-specific * update * update * update
2026-04-10 12:33:42 +00:00 · 2025-10-07 15:22:59 -05:00
parent 8552565bda
commit 7bd263144e
47 changed files with 592 additions and 436 deletions
--- a/ch05/01_main-chapter-code/gpt_generate.py
+++ b/ch05/01_main-chapter-code/gpt_generate.py
@@ -7,9 +7,8 @@ import argparse
 import json
 import numpy as np
 import os
-import urllib.request

-# import requests
+import requests
 import tensorflow as tf
 import tiktoken
 import torch
@@ -60,18 +59,18 @@ def download_and_load_gpt2(model_size, models_dir):
    return settings, params


-"""
 def download_file(url, destination):
-    # Send a GET request to download the file in streaming mode
-    response = requests.get(url, stream=True)
+    # Send a GET request to download the file
+    response = requests.get(url, stream=True, timeout=60)
+    response.raise_for_status()

    # Get the total file size from headers, defaulting to 0 if not present
-    file_size = int(response.headers.get("content-length", 0))
+    file_size = int(response.headers.get("Content-Length", 0))

    # Check if file exists and has the same size
    if os.path.exists(destination):
        file_size_local = os.path.getsize(destination)
-        if file_size == file_size_local:
+        if file_size and file_size == file_size_local:
            print(f"File already exists and is up-to-date: {destination}")
            return

@@ -79,43 +78,12 @@ def download_file(url, destination):
    block_size = 1024  # 1 Kilobyte

    # Initialize the progress bar with total file size
-    progress_bar_description = url.split("/")[-1]  # Extract filename from URL
+    progress_bar_description = os.path.basename(url)
    with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
        # Open the destination file in binary write mode
        with open(destination, "wb") as file:
-            # Iterate over the file data in chunks
-            for chunk in response.iter_content(block_size):
-                progress_bar.update(len(chunk))  # Update progress bar
-                file.write(chunk)  # Write the chunk to the file
-"""
-
-
-def download_file(url, destination):
-    # Send a GET request to download the file
-    with urllib.request.urlopen(url) as response:
-        # Get the total file size from headers, defaulting to 0 if not present
-        file_size = int(response.headers.get("Content-Length", 0))
-
-        # Check if file exists and has the same size
-        if os.path.exists(destination):
-            file_size_local = os.path.getsize(destination)
-            if file_size == file_size_local:
-                print(f"File already exists and is up-to-date: {destination}")
-                return
-
-        # Define the block size for reading the file
-        block_size = 1024  # 1 Kilobyte
-
-        # Initialize the progress bar with total file size
-        progress_bar_description = os.path.basename(url)  # Extract filename from URL
-        with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar:
-            # Open the destination file in binary write mode
-            with open(destination, "wb") as file:
-                # Read the file in chunks and write to destination
-                while True:
-                    chunk = response.read(block_size)
-                    if not chunk:
-                        break
+            for chunk in response.iter_content(chunk_size=block_size):
+                if chunk:
                    file.write(chunk)
                    progress_bar.update(len(chunk))  # Update progress bar