Swap urllib.request with requests (#993)

This commit is contained in:
Sebastian Raschka
2026-03-30 22:03:13 -04:00
committed by GitHub
parent 6b9502056f
commit d977841fad
4 changed files with 76 additions and 96 deletions

View File

@@ -5,11 +5,9 @@
import os import os
import urllib.request
# import requests
import json import json
import numpy as np import numpy as np
import requests
import tensorflow as tf import tensorflow as tf
from tqdm import tqdm from tqdm import tqdm
@@ -48,41 +46,38 @@ def download_and_load_gpt2(model_size, models_dir):
def download_file(url, destination, backup_url=None): def download_file(url, destination, backup_url=None):
def _attempt_download(download_url): def _attempt_download(download_url):
with urllib.request.urlopen(download_url) as response: response = requests.get(download_url, stream=True, timeout=60)
# Get the total file size from headers, defaulting to 0 if not present response.raise_for_status()
file_size = int(response.headers.get("Content-Length", 0))
# Check if file exists and has the same size file_size = int(response.headers.get("Content-Length", 0))
if os.path.exists(destination):
file_size_local = os.path.getsize(destination)
if file_size == file_size_local:
print(f"File already exists and is up-to-date: {destination}")
return True # Indicate success without re-downloading
block_size = 1024 # 1 Kilobyte # Check if file exists and has same size
if os.path.exists(destination):
file_size_local = os.path.getsize(destination)
if file_size and file_size == file_size_local:
print(f"File already exists and is up-to-date: {destination}")
return True
# Initialize the progress bar with total file size block_size = 1024 # 1 KB
progress_bar_description = os.path.basename(download_url) desc = os.path.basename(download_url)
with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
with open(destination, "wb") as file: with open(destination, "wb") as file:
while True: for chunk in response.iter_content(chunk_size=block_size):
chunk = response.read(block_size) if chunk:
if not chunk:
break
file.write(chunk) file.write(chunk)
progress_bar.update(len(chunk)) progress_bar.update(len(chunk))
return True return True
try: try:
if _attempt_download(url): if _attempt_download(url):
return return
except (urllib.error.HTTPError, urllib.error.URLError): except requests.exceptions.RequestException:
if backup_url is not None: if backup_url is not None:
print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}") print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
try: try:
if _attempt_download(backup_url): if _attempt_download(backup_url):
return return
except urllib.error.HTTPError: except requests.exceptions.RequestException:
pass pass
# If we reach here, both attempts have failed # If we reach here, both attempts have failed

View File

@@ -5,11 +5,9 @@
import os import os
import urllib.request
# import requests
import json import json
import numpy as np import numpy as np
import requests
import tensorflow as tf import tensorflow as tf
from tqdm import tqdm from tqdm import tqdm
@@ -48,41 +46,38 @@ def download_and_load_gpt2(model_size, models_dir):
def download_file(url, destination, backup_url=None): def download_file(url, destination, backup_url=None):
def _attempt_download(download_url): def _attempt_download(download_url):
with urllib.request.urlopen(download_url) as response: response = requests.get(download_url, stream=True, timeout=60)
# Get the total file size from headers, defaulting to 0 if not present response.raise_for_status()
file_size = int(response.headers.get("Content-Length", 0))
# Check if file exists and has the same size file_size = int(response.headers.get("Content-Length", 0))
if os.path.exists(destination):
file_size_local = os.path.getsize(destination)
if file_size == file_size_local:
print(f"File already exists and is up-to-date: {destination}")
return True # Indicate success without re-downloading
block_size = 1024 # 1 Kilobyte # Check if file exists and has same size
if os.path.exists(destination):
file_size_local = os.path.getsize(destination)
if file_size and file_size == file_size_local:
print(f"File already exists and is up-to-date: {destination}")
return True
# Initialize the progress bar with total file size block_size = 1024 # 1 KB
progress_bar_description = os.path.basename(download_url) desc = os.path.basename(download_url)
with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
with open(destination, "wb") as file: with open(destination, "wb") as file:
while True: for chunk in response.iter_content(chunk_size=block_size):
chunk = response.read(block_size) if chunk:
if not chunk:
break
file.write(chunk) file.write(chunk)
progress_bar.update(len(chunk)) progress_bar.update(len(chunk))
return True return True
try: try:
if _attempt_download(url): if _attempt_download(url):
return return
except (urllib.error.HTTPError, urllib.error.URLError): except requests.exceptions.RequestException:
if backup_url is not None: if backup_url is not None:
print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}") print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
try: try:
if _attempt_download(backup_url): if _attempt_download(backup_url):
return return
except urllib.error.HTTPError: except requests.exceptions.RequestException:
pass pass
# If we reach here, both attempts have failed # If we reach here, both attempts have failed

View File

@@ -5,11 +5,9 @@
import os import os
import urllib.request
# import requests
import json import json
import numpy as np import numpy as np
import requests
import tensorflow as tf import tensorflow as tf
from tqdm import tqdm from tqdm import tqdm
@@ -48,41 +46,38 @@ def download_and_load_gpt2(model_size, models_dir):
def download_file(url, destination, backup_url=None): def download_file(url, destination, backup_url=None):
def _attempt_download(download_url): def _attempt_download(download_url):
with urllib.request.urlopen(download_url) as response: response = requests.get(download_url, stream=True, timeout=60)
# Get the total file size from headers, defaulting to 0 if not present response.raise_for_status()
file_size = int(response.headers.get("Content-Length", 0))
# Check if file exists and has the same size file_size = int(response.headers.get("Content-Length", 0))
if os.path.exists(destination):
file_size_local = os.path.getsize(destination)
if file_size == file_size_local:
print(f"File already exists and is up-to-date: {destination}")
return True # Indicate success without re-downloading
block_size = 1024 # 1 Kilobyte # Check if file exists and has same size
if os.path.exists(destination):
file_size_local = os.path.getsize(destination)
if file_size and file_size == file_size_local:
print(f"File already exists and is up-to-date: {destination}")
return True
# Initialize the progress bar with total file size block_size = 1024 # 1 KB
progress_bar_description = os.path.basename(download_url) desc = os.path.basename(download_url)
with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
with open(destination, "wb") as file: with open(destination, "wb") as file:
while True: for chunk in response.iter_content(chunk_size=block_size):
chunk = response.read(block_size) if chunk:
if not chunk:
break
file.write(chunk) file.write(chunk)
progress_bar.update(len(chunk)) progress_bar.update(len(chunk))
return True return True
try: try:
if _attempt_download(url): if _attempt_download(url):
return return
except (urllib.error.HTTPError, urllib.error.URLError): except requests.exceptions.RequestException:
if backup_url is not None: if backup_url is not None:
print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}") print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
try: try:
if _attempt_download(backup_url): if _attempt_download(backup_url):
return return
except urllib.error.HTTPError: except requests.exceptions.RequestException:
pass pass
# If we reach here, both attempts have failed # If we reach here, both attempts have failed

View File

@@ -5,11 +5,9 @@
import os import os
import urllib.request
# import requests
import json import json
import numpy as np import numpy as np
import requests
import tensorflow as tf import tensorflow as tf
from tqdm import tqdm from tqdm import tqdm
@@ -48,41 +46,38 @@ def download_and_load_gpt2(model_size, models_dir):
def download_file(url, destination, backup_url=None): def download_file(url, destination, backup_url=None):
def _attempt_download(download_url): def _attempt_download(download_url):
with urllib.request.urlopen(download_url) as response: response = requests.get(download_url, stream=True, timeout=60)
# Get the total file size from headers, defaulting to 0 if not present response.raise_for_status()
file_size = int(response.headers.get("Content-Length", 0))
# Check if file exists and has the same size file_size = int(response.headers.get("Content-Length", 0))
if os.path.exists(destination):
file_size_local = os.path.getsize(destination)
if file_size == file_size_local:
print(f"File already exists and is up-to-date: {destination}")
return True # Indicate success without re-downloading
block_size = 1024 # 1 Kilobyte # Check if file exists and has same size
if os.path.exists(destination):
file_size_local = os.path.getsize(destination)
if file_size and file_size == file_size_local:
print(f"File already exists and is up-to-date: {destination}")
return True
# Initialize the progress bar with total file size block_size = 1024 # 1 KB
progress_bar_description = os.path.basename(download_url) desc = os.path.basename(download_url)
with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: with tqdm(total=file_size, unit="iB", unit_scale=True, desc=desc) as progress_bar:
with open(destination, "wb") as file: with open(destination, "wb") as file:
while True: for chunk in response.iter_content(chunk_size=block_size):
chunk = response.read(block_size) if chunk:
if not chunk:
break
file.write(chunk) file.write(chunk)
progress_bar.update(len(chunk)) progress_bar.update(len(chunk))
return True return True
try: try:
if _attempt_download(url): if _attempt_download(url):
return return
except (urllib.error.HTTPError, urllib.error.URLError): except requests.exceptions.RequestException:
if backup_url is not None: if backup_url is not None:
print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}") print(f"Primary URL ({url}) failed. Attempting backup URL: {backup_url}")
try: try:
if _attempt_download(backup_url): if _attempt_download(backup_url):
return return
except urllib.error.HTTPError: except requests.exceptions.RequestException:
pass pass
# If we reach here, both attempts have failed # If we reach here, both attempts have failed