diff --git a/ch02/01_main-chapter-code/ch02.ipynb b/ch02/01_main-chapter-code/ch02.ipynb index a5e5b12..461d920 100644 --- a/ch02/01_main-chapter-code/ch02.ipynb +++ b/ch02/01_main-chapter-code/ch02.ipynb @@ -41,6 +41,14 @@ "print(\"tiktoken version:\", version(\"tiktoken\"))" ] }, + { + "cell_type": "markdown", + "id": "628b2922-594d-4ff9-bd82-04f1ebdf41f5", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "2417139b-2357-44d2-bd67-23f5d7f52ae7", @@ -49,6 +57,14 @@ "## 2.1 Understanding word embeddings" ] }, + { + "cell_type": "markdown", + "id": "ba08d16f-f237-4166-bf89-0e9fe703e7b4", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "0b6816ae-e927-43a9-b4dd-e47a9b0e1cf6", @@ -57,6 +73,14 @@ "- No code in this section" ] }, + { + "cell_type": "markdown", + "id": "d6b80160-1f10-4aad-a85e-9c79444de9e6", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "eddbb984-8d23-40c5-bbfa-c3c379e7eec3", @@ -65,6 +89,14 @@ "## 2.2 Tokenizing text" ] }, + { + "cell_type": "markdown", + "id": "09872fdb-9d4e-40c4-949d-52a01a43ec4b", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "8cceaa18-833d-46b6-b211-b20c53902805", @@ -224,6 +256,14 @@ "- This is pretty good, and we are now ready to apply this tokenization to the raw text" ] }, + { + "cell_type": "markdown", + "id": "6cbe9330-b587-4262-be9f-497a84ec0e8a", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "code", "execution_count": 7, @@ -278,6 +318,14 @@ "## 2.3 Converting tokens into token IDs" ] }, + { + "cell_type": "markdown", + "id": "177b041d-f739-43b8-bd81-0443ae3a7f8d", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "b5973794-7002-4202-8b12-0900cd779720", @@ -396,6 +444,14 @@ " break" ] }, + { + "cell_type": "markdown", + "id": "67407a9f-0202-4e7c-9ed7-1b3154191ebc", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "4e569647-2589-4c9d-9a5c-aef1c88a0a9a", @@ -429,6 +485,14 @@ " return text" ] }, + { + "cell_type": "markdown", + "id": "cc21d347-ec03-4823-b3d4-9d686e495617", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "c2950a94-6b0d-474e-8ed0-66d0c3c1a95c", @@ -518,6 +582,14 @@ "## 2.4 Adding special context tokens" ] }, + { + "cell_type": "markdown", + "id": "aa7fc96c-e1fd-44fb-b7f5-229d7c7922a4", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "9d709d57-2486-4152-b7f9-d3e4bd8634cd", @@ -537,6 +609,14 @@ "\n" ] }, + { + "cell_type": "markdown", + "id": "52442951-752c-4855-9752-b121a17fef55", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "c661a397-da06-4a86-ac27-072dbe7cb172", @@ -585,7 +665,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 21, "id": "ce9df29c-6c5b-43f1-8c1a-c7f7b79db78f", "metadata": {}, "outputs": [], @@ -601,7 +681,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 22, "id": "57c3143b-e860-4d3b-a22a-de22b547a6a9", "metadata": {}, "outputs": [ @@ -611,7 +691,7 @@ "1161" ] }, - "execution_count": 18, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -622,7 +702,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 23, "id": "50e51bb1-ae05-4aa8-a9ff-455b65ed1959", "metadata": {}, "outputs": [ @@ -653,7 +733,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 24, "id": "948861c5-3f30-4712-a234-725f20d26f68", "metadata": {}, "outputs": [], @@ -689,7 +769,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 25, "id": "effcef79-e0a5-4f4a-a43a-31dd94b9250a", "metadata": {}, "outputs": [ @@ -714,7 +794,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 26, "id": "ddfe7346-398d-4bf8-99f1-5b071244ce95", "metadata": {}, "outputs": [ @@ -739,7 +819,7 @@ " 7]" ] }, - "execution_count": 22, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -750,7 +830,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 27, "id": "0c350ff6-2734-4e84-9ec7-d578baa4ae1b", "metadata": {}, "outputs": [ @@ -760,7 +840,7 @@ "'<|unk|>, do you like tea? <|endoftext|> In the sunlit terraces of the <|unk|>.'" ] }, - "execution_count": 23, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -792,7 +872,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 28, "id": "ede1d41f-934b-4bf4-8184-54394a257a94", "metadata": {}, "outputs": [], @@ -802,7 +882,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 29, "id": "48967a77-7d17-42bf-9e92-fc619d63a59e", "metadata": {}, "outputs": [ @@ -823,7 +903,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 30, "id": "6ad3312f-a5f7-4efc-9d7d-8ea09d7b5128", "metadata": {}, "outputs": [], @@ -833,7 +913,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 31, "id": "5ff2cd85-7cfb-4325-b390-219938589428", "metadata": {}, "outputs": [ @@ -855,7 +935,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 32, "id": "d26a48bb-f82e-41a8-a955-a1c9cf9d50ab", "metadata": {}, "outputs": [ @@ -873,6 +953,14 @@ "print(strings)" ] }, + { + "cell_type": "markdown", + "id": "c082d41f-33d7-4827-97d8-993d5a84bb3c", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "abbd7c0d-70f8-4386-a114-907e96c950b0", @@ -881,9 +969,17 @@ "## 2.6 Data sampling with a sliding window" ] }, + { + "cell_type": "markdown", + "id": "39fb44f4-0c43-4a6a-9c2f-9cf31452354c", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 33, "id": "848d5ade-fd1f-46c3-9e31-1426e315c71b", "metadata": {}, "outputs": [ @@ -914,7 +1010,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 34, "id": "e84424a7-646d-45b6-99e3-80d15fb761f2", "metadata": {}, "outputs": [], @@ -924,7 +1020,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 35, "id": "dfbff852-a92f-48c8-a46d-143a0f109f40", "metadata": {}, "outputs": [ @@ -957,7 +1053,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 36, "id": "d97b031e-ed55-409d-95f2-aeb38c6fe366", "metadata": {}, "outputs": [ @@ -982,7 +1078,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 37, "id": "f57bd746-dcbf-4433-8e24-ee213a8c34a1", "metadata": {}, "outputs": [ @@ -1005,6 +1101,14 @@ " print(tokenizer.decode(context), \"---->\", tokenizer.decode([desired]))" ] }, + { + "cell_type": "markdown", + "id": "b59f90fe-fa73-4c2d-bd9b-ce7c2ce2ba00", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "210d2dd9-fc20-4927-8d3d-1466cf41aae1", @@ -1024,7 +1128,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 38, "id": "e1770134-e7f3-4725-a679-e04c3be48cac", "metadata": {}, "outputs": [ @@ -1051,7 +1155,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 39, "id": "74b41073-4c9f-46e2-a1bd-d38e4122b375", "metadata": {}, "outputs": [], @@ -1084,7 +1188,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 40, "id": "5eb30ebe-97b3-43c5-9ff1-a97d621b3c4e", "metadata": {}, "outputs": [], @@ -1114,7 +1218,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 41, "id": "df31d96c-6bfd-4564-a956-6192242d7579", "metadata": {}, "outputs": [], @@ -1125,7 +1229,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 42, "id": "9226d00c-ad9a-4949-a6e4-9afccfc7214f", "metadata": {}, "outputs": [ @@ -1147,7 +1251,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 43, "id": "10deb4bc-4de1-4d20-921e-4b1c7a0e1a6d", "metadata": {}, "outputs": [ @@ -1164,6 +1268,14 @@ "print(second_batch)" ] }, + { + "cell_type": "markdown", + "id": "9cb467e0-bdcd-4dda-b9b0-a738c5d33ac3", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "b1ae6d45-f26e-4b83-9c7b-cff55ffa7d16", @@ -1175,7 +1287,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 44, "id": "1916e7a6-f03d-4f09-91a6-d0bdbac5a58c", "metadata": {}, "outputs": [ @@ -1232,6 +1344,14 @@ "- Usually, these embedding layers are part of the LLM itself and are updated (trained) during model training" ] }, + { + "cell_type": "markdown", + "id": "e85089aa-8671-4e5f-a2b3-ef252004ee4c", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "44e014ca-1fc5-4b90-b6fa-c2097bb92c0b", @@ -1242,12 +1362,12 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 46, "id": "15a6304c-9474-4470-b85d-3991a49fa653", "metadata": {}, "outputs": [], "source": [ - "input_ids = torch.tensor([5, 1, 3, 2])" + "input_ids = torch.tensor([2, 3, 5, 1])" ] }, { @@ -1260,7 +1380,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 47, "id": "93cb2cee-9aa6-4bb8-8977-c65661d16eda", "metadata": {}, "outputs": [], @@ -1282,7 +1402,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 49, "id": "a686eb61-e737-4351-8f1c-222913d47468", "metadata": {}, "outputs": [ @@ -1323,7 +1443,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 50, "id": "e43600ba-f287-4746-8ddf-d0f71a9023ca", "metadata": {}, "outputs": [ @@ -1350,7 +1470,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 51, "id": "50280ead-0363-44c8-8c35-bb885d92c8b7", "metadata": {}, "outputs": [ @@ -1358,10 +1478,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "tensor([[-2.8400, -0.7849, -1.4096],\n", - " [ 0.9178, 1.5810, 1.3010],\n", + "tensor([[ 1.2753, -0.2010, -0.1606],\n", " [-0.4015, 0.9666, -1.1481],\n", - " [ 1.2753, -0.2010, -0.1606]], grad_fn=)\n" + " [-2.8400, -0.7849, -1.4096],\n", + " [ 0.9178, 1.5810, 1.3010]], grad_fn=)\n" ] } ], @@ -1369,6 +1489,14 @@ "print(embedding_layer(input_ids))" ] }, + { + "cell_type": "markdown", + "id": "f33c2741-bf1b-4c60-b7fd-61409d556646", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "c393d270-b950-4bc8-99ea-97d74f2ea0f6", @@ -1377,6 +1505,22 @@ "## 2.8 Encoding word positions" ] }, + { + "cell_type": "markdown", + "id": "9e0b14a2-f3f3-490e-b513-f262dbcf94fa", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "48de37db-d54d-45c4-ab3e-88c0783ad2e4", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "7f187f87-c1f8-4c2e-8050-350bbb972f55", @@ -1535,6 +1679,14 @@ "print(input_embeddings.shape)" ] }, + { + "cell_type": "markdown", + "id": "d1bb0f7e-460d-44db-b366-096adcd84fff", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "63230f2e-258f-4497-9e2e-8deee4530364", diff --git a/ch02/01_main-chapter-code/figures/1.webp b/ch02/01_main-chapter-code/figures/1.webp new file mode 100644 index 0000000..48a4df4 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/1.webp differ diff --git a/ch02/01_main-chapter-code/figures/10.webp b/ch02/01_main-chapter-code/figures/10.webp new file mode 100644 index 0000000..35dd7d5 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/10.webp differ diff --git a/ch02/01_main-chapter-code/figures/11.webp b/ch02/01_main-chapter-code/figures/11.webp new file mode 100644 index 0000000..444d265 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/11.webp differ diff --git a/ch02/01_main-chapter-code/figures/12.webp b/ch02/01_main-chapter-code/figures/12.webp new file mode 100644 index 0000000..2da9d68 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/12.webp differ diff --git a/ch02/01_main-chapter-code/figures/13.webp b/ch02/01_main-chapter-code/figures/13.webp new file mode 100644 index 0000000..fe2e6f4 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/13.webp differ diff --git a/ch02/01_main-chapter-code/figures/14.webp b/ch02/01_main-chapter-code/figures/14.webp new file mode 100644 index 0000000..d8f48e4 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/14.webp differ diff --git a/ch02/01_main-chapter-code/figures/15.webp b/ch02/01_main-chapter-code/figures/15.webp new file mode 100644 index 0000000..61a93ef Binary files /dev/null and b/ch02/01_main-chapter-code/figures/15.webp differ diff --git a/ch02/01_main-chapter-code/figures/16.webp b/ch02/01_main-chapter-code/figures/16.webp new file mode 100644 index 0000000..29d1609 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/16.webp differ diff --git a/ch02/01_main-chapter-code/figures/17.webp b/ch02/01_main-chapter-code/figures/17.webp new file mode 100644 index 0000000..c88db28 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/17.webp differ diff --git a/ch02/01_main-chapter-code/figures/18.webp b/ch02/01_main-chapter-code/figures/18.webp new file mode 100644 index 0000000..a3ae32a Binary files /dev/null and b/ch02/01_main-chapter-code/figures/18.webp differ diff --git a/ch02/01_main-chapter-code/figures/19.webp b/ch02/01_main-chapter-code/figures/19.webp new file mode 100644 index 0000000..b725c2d Binary files /dev/null and b/ch02/01_main-chapter-code/figures/19.webp differ diff --git a/ch02/01_main-chapter-code/figures/2.webp b/ch02/01_main-chapter-code/figures/2.webp new file mode 100644 index 0000000..8be4ae9 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/2.webp differ diff --git a/ch02/01_main-chapter-code/figures/3.webp b/ch02/01_main-chapter-code/figures/3.webp new file mode 100644 index 0000000..b3b26ff Binary files /dev/null and b/ch02/01_main-chapter-code/figures/3.webp differ diff --git a/ch02/01_main-chapter-code/figures/4.webp b/ch02/01_main-chapter-code/figures/4.webp new file mode 100644 index 0000000..54f1a7f Binary files /dev/null and b/ch02/01_main-chapter-code/figures/4.webp differ diff --git a/ch02/01_main-chapter-code/figures/5.webp b/ch02/01_main-chapter-code/figures/5.webp new file mode 100644 index 0000000..10965b0 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/5.webp differ diff --git a/ch02/01_main-chapter-code/figures/6.webp b/ch02/01_main-chapter-code/figures/6.webp new file mode 100644 index 0000000..d8f30f2 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/6.webp differ diff --git a/ch02/01_main-chapter-code/figures/7.webp b/ch02/01_main-chapter-code/figures/7.webp new file mode 100644 index 0000000..ed07e3c Binary files /dev/null and b/ch02/01_main-chapter-code/figures/7.webp differ diff --git a/ch02/01_main-chapter-code/figures/8.webp b/ch02/01_main-chapter-code/figures/8.webp new file mode 100644 index 0000000..6cbed99 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/8.webp differ diff --git a/ch02/01_main-chapter-code/figures/9.webp b/ch02/01_main-chapter-code/figures/9.webp new file mode 100644 index 0000000..ac9d879 Binary files /dev/null and b/ch02/01_main-chapter-code/figures/9.webp differ diff --git a/ch03/01_main-chapter-code/ch03.ipynb b/ch03/01_main-chapter-code/ch03.ipynb index 8e70dda..734bcdf 100644 --- a/ch03/01_main-chapter-code/ch03.ipynb +++ b/ch03/01_main-chapter-code/ch03.ipynb @@ -1865,7 +1865,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.10.12" } }, "nbformat": 4,