diff --git a/ch05/01_main-chapter-code/ch05.ipynb b/ch05/01_main-chapter-code/ch05.ipynb index 7c6a56f..3d59560 100644 --- a/ch05/01_main-chapter-code/ch05.ipynb +++ b/ch05/01_main-chapter-code/ch05.ipynb @@ -75,7 +75,7 @@ "id": "efd27fcc-2886-47cb-b544-046c2c31f02a", "metadata": {}, "source": [ - "" + "" ] }, { @@ -91,7 +91,7 @@ "id": "f67711d4-8391-4fee-aeef-07ea53dd5841", "metadata": {}, "source": [ - "" + "" ] }, { @@ -195,7 +195,7 @@ "id": "741881f3-cee0-49ad-b11d-b9df3b3ac234", "metadata": {}, "source": [ - "" + "" ] }, { @@ -346,7 +346,7 @@ "id": "384d86a9-0013-476c-bb6b-274fd5f20b29", "metadata": {}, "source": [ - "" + "" ] }, { @@ -440,7 +440,7 @@ "id": "ad90592f-0d5d-4ec8-9ff5-e7675beab10e", "metadata": {}, "source": [ - "" + "" ] }, { @@ -601,7 +601,7 @@ "id": "5bd24b7f-b760-47ad-bc84-86d13794aa54", "metadata": {}, "source": [ - "" + "" ] }, { @@ -945,7 +945,7 @@ "id": "46bdaa07-ba96-4ac1-9d71-b3cc153910d9", "metadata": {}, "source": [ - "" + "" ] }, { @@ -1210,7 +1210,7 @@ "id": "43875e95-190f-4b17-8f9a-35034ba649ec", "metadata": {}, "source": [ - "" + "" ] }, { @@ -1231,7 +1231,7 @@ "- In this section, we finally implement the code for training the LLM\n", "- We focus on a simple training function (if you are interested in augmenting this training function with more advanced techniques, such as learning rate warmup, cosine annealing, and gradient clipping, please refer to [Appendix D](../../appendix-D/01_main-chapter-code))\n", "\n", - "" + "" ] }, { @@ -1464,7 +1464,7 @@ "id": "eb380c42-b31c-4ee1-b8b9-244094537272", "metadata": {}, "source": [ - "" + "" ] }, { @@ -1849,7 +1849,7 @@ "id": "7ae6fffd-2730-4abe-a2d3-781fc4836f17", "metadata": {}, "source": [ - "\n", + "\n", "\n", "- (Please note that the numbers in this figure are truncated to two\n", "digits after the decimal point to reduce visual clutter. The values in the Softmax row should add up to 1.0.)" @@ -2060,7 +2060,7 @@ "source": [ "- Training LLMs is computationally expensive, so it's crucial to be able to save and load LLM weights\n", "\n", - "" + "" ] }, { @@ -2393,7 +2393,7 @@ "id": "20f19d32-5aae-4176-9f86-f391672c8f0d", "metadata": {}, "source": [ - "" + "" ] }, { @@ -2627,7 +2627,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.13.5" } }, "nbformat": 4, diff --git a/ch06/01_main-chapter-code/ch06.ipynb b/ch06/01_main-chapter-code/ch06.ipynb index ee4090c..34f0ec4 100644 --- a/ch06/01_main-chapter-code/ch06.ipynb +++ b/ch06/01_main-chapter-code/ch06.ipynb @@ -76,7 +76,7 @@ "id": "a445828a-ff10-4efa-9f60-a2e2aed4c87d", "metadata": {}, "source": [ - "" + "" ] }, { @@ -113,7 +113,7 @@ "id": "6c29ef42-46d9-43d4-8bb4-94974e1665e4", "metadata": {}, "source": [ - "" + "" ] }, { @@ -132,7 +132,7 @@ "id": "0b37a0c4-0bb1-4061-b1fe-eaa4416d52c3", "metadata": {}, "source": [ - "" + "" ] }, { @@ -150,7 +150,7 @@ "id": "5f628975-d2e8-4f7f-ab38-92bb868b7067", "metadata": {}, "source": [ - "" + "" ] }, { @@ -712,7 +712,7 @@ "id": "0829f33f-1428-4f22-9886-7fee633b3666", "metadata": {}, "source": [ - "" + "" ] }, { @@ -887,7 +887,7 @@ "id": "64bcc349-205f-48f8-9655-95ff21f5e72f", "metadata": {}, "source": [ - "" + "" ] }, { @@ -1019,7 +1019,7 @@ "source": [ "- In this section, we initialize the pretrained model we worked with in the previous chapter\n", "\n", - "" + "" ] }, { @@ -1217,7 +1217,7 @@ "id": "d6e9d66f-76b2-40fc-9ec5-3f972a8db9c0", "metadata": {}, "source": [ - "" + "" ] }, { @@ -1550,7 +1550,7 @@ "id": "0be7c1eb-c46c-4065-8525-eea1b8c66d10", "metadata": {}, "source": [ - "" + "" ] }, { @@ -1661,7 +1661,7 @@ "id": "7df9144f-6817-4be4-8d4b-5d4dadfe4a9b", "metadata": {}, "source": [ - "" + "" ] }, { @@ -1704,7 +1704,7 @@ "id": "8df08ae0-e664-4670-b7c5-8a2280d9b41b", "metadata": {}, "source": [ - "" + "" ] }, { @@ -1720,7 +1720,7 @@ "id": "669e1fd1-ace8-44b4-b438-185ed0ba8b33", "metadata": {}, "source": [ - "" + "" ] }, { @@ -1736,7 +1736,7 @@ "id": "557996dd-4c6b-49c4-ab83-f60ef7e1d69e", "metadata": {}, "source": [ - "" + "" ] }, { @@ -2053,7 +2053,7 @@ "id": "979b6222-1dc2-4530-9d01-b6b04fe3de12", "metadata": {}, "source": [ - "" + "" ] }, { @@ -2371,7 +2371,7 @@ "id": "72ebcfa2-479e-408b-9cf0-7421f6144855", "metadata": {}, "source": [ - "" + "" ] }, { @@ -2590,7 +2590,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.13.5" } }, "nbformat": 4, diff --git a/ch07/01_main-chapter-code/ch07.ipynb b/ch07/01_main-chapter-code/ch07.ipynb index 6fe9073..936f03d 100644 --- a/ch07/01_main-chapter-code/ch07.ipynb +++ b/ch07/01_main-chapter-code/ch07.ipynb @@ -79,7 +79,7 @@ "id": "264fca98-2f9a-4193-b435-2abfa3b4142f" }, "source": [ - "" + "" ] }, { @@ -111,7 +111,7 @@ "id": "18dc0535-0904-44ed-beaf-9b678292ef35" }, "source": [ - "" + "" ] }, { @@ -123,7 +123,7 @@ "source": [ "- The topics covered in this chapter are summarized in the figure below\n", "\n", - "" + "" ] }, { @@ -312,7 +312,7 @@ "id": "dffa4f70-44d4-4be4-89a9-2159f4885b10" }, "source": [ - "" + "" ] }, { @@ -509,7 +509,7 @@ "id": "233f63bd-9755-4d07-8884-5e2e5345cf27" }, "source": [ - "" + "" ] }, { @@ -521,7 +521,7 @@ "source": [ "- We tackle this dataset batching in several steps, as summarized in the figure below\n", "\n", - "" + "" ] }, { @@ -533,7 +533,7 @@ "source": [ "- First, we implement an `InstructionDataset` class that pre-tokenizes all inputs in the dataset, similar to the `SpamDataset` in chapter 6\n", "\n", - "" + "" ] }, { @@ -627,7 +627,7 @@ "id": "65c4d943-4aa8-4a44-874e-05bc6831fbd3" }, "source": [ - "" + "" ] }, { @@ -710,12 +710,10 @@ }, { "cell_type": "markdown", - "id": "c46832ab-39b7-45f8-b330-ac9adfa10d1b", - "metadata": { - "id": "c46832ab-39b7-45f8-b330-ac9adfa10d1b" - }, + "id": "5673ade5-be4c-4a2c-9a9a-d5c63fb1c424", + "metadata": {}, "source": [ - "" + "" ] }, { @@ -736,7 +734,7 @@ "id": "0386b6fe-3455-4e70-becd-a5a4681ba2ef" }, "source": [ - "" + "" ] }, { @@ -819,7 +817,7 @@ "source": [ "- Next, we introduce an `ignore_index` value to replace all padding token IDs with a new value; the purpose of this `ignore_index` is that we can ignore padding values in the loss function (more on that later)\n", "\n", - "\n", + "\n", "\n", "- Concretely, this means that we replace the token IDs corresponding to `50256` with `-100` as illustrated below" ] @@ -831,7 +829,7 @@ "id": "bd4bed33-956e-4b3f-a09c-586d8203109a" }, "source": [ - "" + "" ] }, { @@ -1085,7 +1083,7 @@ "id": "fab8f0ed-80e8-4fd9-bf84-e5d0e0bc0a39" }, "source": [ - "" + "" ] }, { @@ -1095,6 +1093,7 @@ "id": "bccaf048-ec95-498c-9155-d5b3ccba6c96" }, "source": [ + " \n", "## 7.4 Creating data loaders for an instruction dataset" ] }, @@ -1115,7 +1114,7 @@ "id": "9fffe390-b226-4d5c-983f-9f4da773cb82" }, "source": [ - "" + "" ] }, { @@ -1515,7 +1514,7 @@ "id": "8d1b438f-88af-413f-96a9-f059c6c55fc4" }, "source": [ - "" + "" ] }, { @@ -1746,7 +1745,7 @@ "source": [ "- In this section, we finetune the model\n", "\n", - "\n", + "\n", "\n", "- Note that we can reuse all the loss calculation and training functions that we used in previous chapters" ] @@ -2015,7 +2014,7 @@ "id": "5a25cc88-1758-4dd0-b8bf-c044cbf2dd49" }, "source": [ - "" + "" ] }, { @@ -2271,7 +2270,7 @@ "id": "805b9d30-7336-499f-abb5-4a21be3129f5" }, "source": [ - "" + "" ] }, { @@ -2309,7 +2308,7 @@ "\n", "- In general, before we can use ollama from the command line, we have to either start the ollama application or run `ollama serve` in a separate terminal\n", "\n", - "" + "" ] }, { @@ -2854,7 +2853,7 @@ "- This marks the final chapter of this book\n", "- We covered the major steps of the LLM development cycle: implementing an LLM architecture, pretraining an LLM, and finetuning it\n", "\n", - "\n", + "\n", "\n", "- An optional step that is sometimes followed after instruction finetuning, as described in this chapter, is preference finetuning\n", "- Preference finetuning process can be particularly useful for customizing a model to better align with specific user preferences; see the [../04_preference-tuning-with-dpo](../04_preference-tuning-with-dpo) folder if you are interested in this\n", @@ -2929,7 +2928,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.13.5" } }, "nbformat": 4,