Switch from urllib to requests to improve reliability (#867)

* Switch from urllib to requests to improve reliability

* Keep ruff linter-specific

* update

* update

* update
This commit is contained in:
Sebastian Raschka
2025-10-07 15:22:59 -05:00
committed by GitHub
parent 8552565bda
commit 7bd263144e
47 changed files with 592 additions and 436 deletions

View File

@@ -12,9 +12,9 @@ from llms_from_scratch.ch06 import (
from llms_from_scratch.appendix_e import replace_linear_with_lora
from pathlib import Path
import urllib
import pandas as pd
import requests
import tiktoken
import torch
from torch.utils.data import DataLoader, Subset
@@ -35,7 +35,7 @@ def test_train_classifier_lora(tmp_path):
download_and_unzip_spam_data(
url, zip_path, extracted_path, data_file_path
)
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
except (requests.exceptions.RequestException, TimeoutError) as e:
print(f"Primary URL failed: {e}. Trying backup URL...")
backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
download_and_unzip_spam_data(

View File

@@ -6,8 +6,8 @@
from llms_from_scratch.ch02 import create_dataloader_v1
import os
import urllib.request
import requests
import pytest
import torch
@@ -16,11 +16,17 @@ import torch
def test_dataloader(tmp_path, file_name):
if not os.path.exists("the-verdict.txt"):
url = ("https://raw.githubusercontent.com/rasbt/"
"LLMs-from-scratch/main/ch02/01_main-chapter-code/"
"the-verdict.txt")
url = (
"https://raw.githubusercontent.com/rasbt/"
"LLMs-from-scratch/main/ch02/01_main-chapter-code/"
"the-verdict.txt"
)
file_path = "the-verdict.txt"
urllib.request.urlretrieve(url, file_path)
response = requests.get(url, timeout=30)
response.raise_for_status()
with open(file_path, "wb") as f:
f.write(response.content)
with open("the-verdict.txt", "r", encoding="utf-8") as f:
raw_text = f.read()

View File

@@ -8,8 +8,8 @@ from llms_from_scratch.ch04 import GPTModel, GPTModelFast
from llms_from_scratch.ch05 import train_model_simple
import os
import urllib
import requests
import pytest
import tiktoken
import torch
@@ -46,8 +46,9 @@ def test_train_simple(tmp_path, ModelClass):
url = "https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt"
if not os.path.exists(file_path):
with urllib.request.urlopen(url) as response:
text_data = response.read().decode("utf-8")
response = requests.get(url, timeout=30)
response.raise_for_status()
text_data = response.text
with open(file_path, "w", encoding="utf-8") as f:
f.write(text_data)
else:

View File

@@ -11,8 +11,8 @@ from llms_from_scratch.ch06 import (
)
from pathlib import Path
import urllib
import requests
import pandas as pd
import tiktoken
import torch
@@ -34,7 +34,7 @@ def test_train_classifier(tmp_path):
download_and_unzip_spam_data(
url, zip_path, extracted_path, data_file_path
)
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
except (requests.exceptions.RequestException, TimeoutError) as e:
print(f"Primary URL failed: {e}. Trying backup URL...")
backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
download_and_unzip_spam_data(