diff --git a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb index 6dc17e0..2e8981d 100644 --- a/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb +++ b/ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb @@ -246,7 +246,7 @@ "metadata": {}, "source": [ "- The BPE algorithm was originally described in 1994: \"[A New Algorithm for Data Compression](http://www.pennelynn.com/Documents/CUJ/HTML/94HTML/19940045.HTM)\" by Philip Gage\n", - "- Before we get to the actual code implementation, the form that is used for LLM tokenizers today can be summarized as follows:" + "- Before we get to the actual code implementation, the form that is used for LLM tokenizers today can be summarized as described in the following sections." ] }, { @@ -286,7 +286,7 @@ " \n", "## 1.4 BPE algorithm example\n", "\n", - "### 1.4.1 Concrete example of the encoding part (steps 1 & 2)\n", + "### 1.4.1 Concrete example of the encoding part (steps 1 & 2 in section 1.3)\n", "\n", "- Suppose we have the text (training dataset) `the cat in the hat` from which we want to build the vocabulary for a BPE tokenizer\n", "\n", @@ -348,7 +348,7 @@ "- and so forth\n", "\n", " \n", - "### 1.4.2 Concrete example of the decoding part (steps 3)\n", + "### 1.4.2 Concrete example of the decoding part (step 3 in section 1.3)\n", "\n", "- To restore the original text, we reverse the process by substituting each token ID with its corresponding pair in the reverse order they were introduced\n", "- Start with the final compressed text: `<258>cat in <258>hat`\n", @@ -604,10 +604,10 @@ " break\n", "\n", " # Find the pair with the best (lowest) rank\n", - " min_rank = 1_000_000_000\n", + " min_rank = float(\"inf\")\n", " bigram = None\n", " for p in pairs:\n", - " r = self.bpe_ranks.get(p, 1_000_000_000)\n", + " r = self.bpe_ranks.get(p, float(\"inf\"))\n", " if r < min_rank:\n", " min_rank = r\n", " bigram = p\n",