Switch from urllib to requests to improve reliability (#867)

* Switch from urllib to requests to improve reliability

* Keep ruff linter-specific

* update

* update

* update
This commit is contained in:
Sebastian Raschka
2025-10-07 15:22:59 -05:00
committed by GitHub
parent 8552565bda
commit 7bd263144e
47 changed files with 592 additions and 436 deletions

View File

@@ -163,6 +163,30 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"\n",
"if not os.path.exists(\"the-verdict.txt\"):\n",
" url = (\n",
" \"https://raw.githubusercontent.com/rasbt/\"\n",
" \"LLMs-from-scratch/main/ch02/01_main-chapter-code/\"\n",
" \"the-verdict.txt\"\n",
" )\n",
" file_path = \"the-verdict.txt\"\n",
"\n",
" response = requests.get(url, timeout=30)\n",
" response.raise_for_status()\n",
" with open(file_path, \"wb\") as f:\n",
" f.write(response.content)\n",
"\n",
"\n",
"# The book originally used the following code below\n",
"# However, urllib uses older protocol settings that\n",
"# can cause problems for some readers using a VPN.\n",
"# The `requests` version above is more robust\n",
"# in that regard.\n",
"\n",
"\"\"\"\n",
"import os\n",
"import urllib.request\n",
"\n",
@@ -171,7 +195,8 @@
" \"LLMs-from-scratch/main/ch02/01_main-chapter-code/\"\n",
" \"the-verdict.txt\")\n",
" file_path = \"the-verdict.txt\"\n",
" urllib.request.urlretrieve(url, file_path)"
" urllib.request.urlretrieve(url, file_path)\n",
"\"\"\""
]
},
{

View File

@@ -823,7 +823,7 @@
],
"source": [
"import os\n",
"import urllib.request\n",
"import requests\n",
"\n",
"def download_file_if_absent(url, filename, search_dirs):\n",
" for directory in search_dirs:\n",
@@ -834,13 +834,19 @@
"\n",
" target_path = os.path.join(search_dirs[0], filename)\n",
" try:\n",
" with urllib.request.urlopen(url) as response, open(target_path, \"wb\") as out_file:\n",
" out_file.write(response.read())\n",
" response = requests.get(url, stream=True, timeout=60)\n",
" response.raise_for_status()\n",
" with open(target_path, \"wb\") as out_file:\n",
" for chunk in response.iter_content(chunk_size=8192):\n",
" if chunk:\n",
" out_file.write(chunk)\n",
" print(f\"Downloaded {filename} to {target_path}\")\n",
" except Exception as e:\n",
" print(f\"Failed to download {filename}. Error: {e}\")\n",
"\n",
" return target_path\n",
"\n",
"\n",
"verdict_path = download_file_if_absent(\n",
" url=(\n",
" \"https://raw.githubusercontent.com/rasbt/\"\n",