From 14c7afaa58b12d0d0f3b011a7acde9b77bd227c7 Mon Sep 17 00:00:00 2001
From: Sebastian Raschka <mail@sebastianraschka.com>
Date: Fri, 2 Jan 2026 14:34:31 -0600
Subject: [PATCH] Fix GitHub CI timeout issue for link checker (#937)

* Fix GitHub CI timeout issue for link checker

* update problematic links
---
 .github/workflows/check-links.yml               |  6 ++++++
 .../bpe-from-scratch-simple.ipynb               |  4 ++--
 ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb |  4 ++--
 conftest.py                                     | 17 +++++++++++++++++
 4 files changed, 27 insertions(+), 4 deletions(-)
 create mode 100644 conftest.py

diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml
index 7ceb28f..a16c69f 100644
--- a/.github/workflows/check-links.yml
+++ b/.github/workflows/check-links.yml
@@ -27,12 +27,18 @@ jobs:
         uv add pytest-check-links
 
     - name: Check links
+      env:
+        CHECK_LINKS_TIMEOUT: "10"
       run: |
         source .venv/bin/activate
         pytest --check-links ./ \
           --check-links-ignore "https://platform.openai.com/*" \
           --check-links-ignore "https://openai.com/*" \
           --check-links-ignore "https://arena.lmsys.org" \
+          --check-links-ignore "https?://localhost(:\\d+)?/.*" \
+          --check-links-ignore "https?://127[.]0[.]0[.]1(:\\d+)?/.*" \
+          --check-links-ignore "https://mng\\.bz/.*" \
+          --check-links-ignore "https://github\\.com/.*" \
           --check-links-ignore "https://unsloth.ai/blog/gradient" \
           --check-links-ignore "https://www.reddit.com/r/*" \
           --check-links-ignore "https://code.visualstudio.com/*" \
diff --git a/ch02/05_bpe-from-scratch/bpe-from-scratch-simple.ipynb b/ch02/05_bpe-from-scratch/bpe-from-scratch-simple.ipynb
index c007d85..dd795f6 100644
--- a/ch02/05_bpe-from-scratch/bpe-from-scratch-simple.ipynb
+++ b/ch02/05_bpe-from-scratch/bpe-from-scratch-simple.ipynb
@@ -36,7 +36,7 @@
     "- This is a standalone notebook implementing the popular byte pair encoding (BPE) tokenization algorithm, which is used in models like GPT-2 to GPT-4, Llama 3, etc., from scratch for educational purposes\n",
     "- For more details about the purpose of tokenization, please refer to [Chapter 2](https://github.com/rasbt/LLMs-from-scratch/blob/main/ch02/01_main-chapter-code/ch02.ipynb); this code here is bonus material explaining the BPE algorithm\n",
     "- The original BPE tokenizer that OpenAI implemented for training the original GPT models can be found [here](https://github.com/openai/gpt-2/blob/master/src/encoder.py)\n",
-    "- The BPE algorithm was originally described in 1994: \"[A New Algorithm for Data Compression](http://www.pennelynn.com/Documents/CUJ/HTML/94HTML/19940045.HTM)\" by Philip Gage\n",
+    "- The BPE algorithm was originally described in 1994: \"[A New Algorithm for Data Compression](https://github.com/tpn/pdfs/blob/master/A%20New%20Algorithm%20for%20Data%20Compression%20(1994).pdf)\" by Philip Gage\n",
     "- Most projects, including Llama 3, nowadays use OpenAI's open-source [tiktoken library](https://github.com/openai/tiktoken) due to its computational performance; it allows loading pretrained GPT-2 and GPT-4 tokenizers, for example (the Llama 3 models were trained using the GPT-4 tokenizer as well)\n",
     "- The difference between the implementations above and my implementation in this notebook, besides it being is that it also includes a function for training the tokenizer (for educational purposes)\n",
     "- There's also an implementation called [minBPE](https://github.com/karpathy/minbpe) with training support, which is maybe more performant (my implementation here is focused on educational purposes); in contrast to `minbpe` my implementation additionally allows loading the original OpenAI tokenizer vocabulary and merges"
@@ -253,7 +253,7 @@
    "id": "8c0d4420-a4c7-4813-916a-06f4f46bc3f0",
    "metadata": {},
    "source": [
-    "- The BPE algorithm was originally described in 1994: \"[A New Algorithm for Data Compression](http://www.pennelynn.com/Documents/CUJ/HTML/94HTML/19940045.HTM)\" by Philip Gage\n",
+    "- The BPE algorithm was originally described in 1994: \"[A New Algorithm for Data Compression](https://github.com/tpn/pdfs/blob/master/A%20New%20Algorithm%20for%20Data%20Compression%20(1994).pdf)\" by Philip Gage\n",
     "- Before we get to the actual code implementation, the form that is used for LLM tokenizers today can be summarized as follows:"
    ]
   },
diff --git a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb
index e8bce18..cad47a8 100644
--- a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb
+++ b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb
@@ -36,7 +36,7 @@
     "- This is a standalone notebook implementing the popular byte pair encoding (BPE) tokenization algorithm, which is used in models like GPT-2 to GPT-4, Llama 3, etc., from scratch for educational purposes\n",
     "- For more details about the purpose of tokenization, please refer to [Chapter 2](https://github.com/rasbt/LLMs-from-scratch/blob/main/ch02/01_main-chapter-code/ch02.ipynb); this code here is bonus material explaining the BPE algorithm\n",
     "- The original BPE tokenizer that OpenAI implemented for training the original GPT models can be found [here](https://github.com/openai/gpt-2/blob/master/src/encoder.py)\n",
-    "- The BPE algorithm was originally described in 1994: \"[A New Algorithm for Data Compression](http://www.pennelynn.com/Documents/CUJ/HTML/94HTML/19940045.HTM)\" by Philip Gage\n",
+    "- The BPE algorithm was originally described in 1994: \"[A New Algorithm for Data Compression](https://github.com/tpn/pdfs/blob/master/A%20New%20Algorithm%20for%20Data%20Compression%20(1994).pdf)\" by Philip Gage\n",
     "- Most projects, including Llama 3, nowadays use OpenAI's open-source [tiktoken library](https://github.com/openai/tiktoken) due to its computational performance; it allows loading pretrained GPT-2 and GPT-4 tokenizers, for example (the Llama 3 models were trained using the GPT-4 tokenizer as well)\n",
     "- The difference between the implementations above and my implementation in this notebook, besides it being is that it also includes a function for training the tokenizer (for educational purposes)\n",
     "- There's also an implementation called [minBPE](https://github.com/karpathy/minbpe) with training support, which is maybe more performant (my implementation here is focused on educational purposes); in contrast to `minbpe` my implementation additionally allows loading the original OpenAI tokenizer vocabulary and BPE \"merges\" (additionally, Hugging Face tokenizers are also capable of training and loading various tokenizers; see [this GitHub discussion](https://github.com/rasbt/LLMs-from-scratch/discussions/485) by a reader who trained a BPE tokenizer on the Nepali language for more info)"
@@ -245,7 +245,7 @@
    "id": "8c0d4420-a4c7-4813-916a-06f4f46bc3f0",
    "metadata": {},
    "source": [
-    "- The BPE algorithm was originally described in 1994: \"[A New Algorithm for Data Compression](http://www.pennelynn.com/Documents/CUJ/HTML/94HTML/19940045.HTM)\" by Philip Gage\n",
+    "- The BPE algorithm was originally described in 1994: \"[A New Algorithm for Data Compression](https://github.com/tpn/pdfs/blob/master/A%20New%20Algorithm%20for%20Data%20Compression%20(1994).pdf)\" by Philip Gage\n",
     "- Before we get to the actual code implementation, the form that is used for LLM tokenizers today can be summarized as described in the following sections."
    ]
   },
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000..4127f71
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,17 @@
+import os
+import requests
+
+
+def pytest_configure(config):
+    if not getattr(config.option, "check_links", False):
+        return
+
+    timeout = float(os.environ.get("CHECK_LINKS_TIMEOUT", "10"))
+    original_request = requests.sessions.Session.request
+
+    def request_with_timeout(self, method, url, **kwargs):
+        if kwargs.get("timeout") is None:
+            kwargs["timeout"] = timeout
+        return original_request(self, method, url, **kwargs)
+
+    requests.sessions.Session.request = request_with_timeout