Fix timeout issue related to spam data backup url (#544)

* Add backup url for Spam Dataset

* import urllib

* fix url

* fix timeout issue
This commit is contained in:
Sebastian Raschka
2025-02-20 09:26:23 -06:00
committed by GitHub
parent c39aa32ef5
commit d1e99f6092
5 changed files with 10 additions and 9 deletions

View File

@@ -605,7 +605,8 @@ if __name__ == "__main__":
if not all_exist:
try:
download_and_unzip(url, zip_path, extract_to, new_file_path)
except urllib.error.HTTPError:
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
print(f"Primary URL failed: {e}. Trying backup URL...")
backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
download_and_unzip(backup_url, zip_path, extract_to, new_file_path)
create_dataset_csvs(new_file_path)