mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Switch from urllib to requests to improve reliability (#867)
* Switch from urllib to requests to improve reliability * Keep ruff linter-specific * update * update * update
This commit is contained in:
committed by
GitHub
parent
8552565bda
commit
7bd263144e
@@ -12,9 +12,9 @@ from llms_from_scratch.ch06 import (
|
||||
from llms_from_scratch.appendix_e import replace_linear_with_lora
|
||||
|
||||
from pathlib import Path
|
||||
import urllib
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
import tiktoken
|
||||
import torch
|
||||
from torch.utils.data import DataLoader, Subset
|
||||
@@ -35,7 +35,7 @@ def test_train_classifier_lora(tmp_path):
|
||||
download_and_unzip_spam_data(
|
||||
url, zip_path, extracted_path, data_file_path
|
||||
)
|
||||
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
|
||||
except (requests.exceptions.RequestException, TimeoutError) as e:
|
||||
print(f"Primary URL failed: {e}. Trying backup URL...")
|
||||
backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
|
||||
download_and_unzip_spam_data(
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
from llms_from_scratch.ch02 import create_dataloader_v1
|
||||
|
||||
import os
|
||||
import urllib.request
|
||||
|
||||
import requests
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
@@ -16,11 +16,17 @@ import torch
|
||||
def test_dataloader(tmp_path, file_name):
|
||||
|
||||
if not os.path.exists("the-verdict.txt"):
|
||||
url = ("https://raw.githubusercontent.com/rasbt/"
|
||||
"LLMs-from-scratch/main/ch02/01_main-chapter-code/"
|
||||
"the-verdict.txt")
|
||||
url = (
|
||||
"https://raw.githubusercontent.com/rasbt/"
|
||||
"LLMs-from-scratch/main/ch02/01_main-chapter-code/"
|
||||
"the-verdict.txt"
|
||||
)
|
||||
file_path = "the-verdict.txt"
|
||||
urllib.request.urlretrieve(url, file_path)
|
||||
|
||||
response = requests.get(url, timeout=30)
|
||||
response.raise_for_status()
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
|
||||
with open("the-verdict.txt", "r", encoding="utf-8") as f:
|
||||
raw_text = f.read()
|
||||
|
||||
@@ -8,8 +8,8 @@ from llms_from_scratch.ch04 import GPTModel, GPTModelFast
|
||||
from llms_from_scratch.ch05 import train_model_simple
|
||||
|
||||
import os
|
||||
import urllib
|
||||
|
||||
import requests
|
||||
import pytest
|
||||
import tiktoken
|
||||
import torch
|
||||
@@ -46,8 +46,9 @@ def test_train_simple(tmp_path, ModelClass):
|
||||
url = "https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt"
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
with urllib.request.urlopen(url) as response:
|
||||
text_data = response.read().decode("utf-8")
|
||||
response = requests.get(url, timeout=30)
|
||||
response.raise_for_status()
|
||||
text_data = response.text
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(text_data)
|
||||
else:
|
||||
|
||||
@@ -11,8 +11,8 @@ from llms_from_scratch.ch06 import (
|
||||
)
|
||||
|
||||
from pathlib import Path
|
||||
import urllib
|
||||
|
||||
import requests
|
||||
import pandas as pd
|
||||
import tiktoken
|
||||
import torch
|
||||
@@ -34,7 +34,7 @@ def test_train_classifier(tmp_path):
|
||||
download_and_unzip_spam_data(
|
||||
url, zip_path, extracted_path, data_file_path
|
||||
)
|
||||
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
|
||||
except (requests.exceptions.RequestException, TimeoutError) as e:
|
||||
print(f"Primary URL failed: {e}. Trying backup URL...")
|
||||
backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
|
||||
download_and_unzip_spam_data(
|
||||
|
||||
Reference in New Issue
Block a user