From e99c511721607b55fd77f15f8e992cbf676918a6 Mon Sep 17 00:00:00 2001 From: rasbt Date: Mon, 6 May 2024 21:02:13 -0500 Subject: [PATCH] spelling and consistency improvements --- .../01_main-chapter-code/appendix-E.ipynb | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/appendix-E/01_main-chapter-code/appendix-E.ipynb b/appendix-E/01_main-chapter-code/appendix-E.ipynb index 13d546d..6db9422 100644 --- a/appendix-E/01_main-chapter-code/appendix-E.ipynb +++ b/appendix-E/01_main-chapter-code/appendix-E.ipynb @@ -94,10 +94,22 @@ "\n", "$$W_{\\text{updated}} = W + AB$$\n", "\n", - "- The figure below illustrates these formulas for full finetuning and LoRA side by side\n", - "\n", - "\n", - "\n", + "- The figure below illustrates these formulas for full finetuning and LoRA side by side" + ] + }, + { + "cell_type": "markdown", + "id": "a8a7419d-cae9-4525-bb44-1641f6ef4f3b", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "4edd43c9-8ec5-48e6-b3fc-5fb3c16037cc", + "metadata": {}, + "source": [ "- If you paid close attention, the full finetuning and LoRA depictions in the figure above look slightly different from the formulas I have shown earlier\n", "- That's due to the distributive law of matrix multiplication: we don't have to add the weights with the updated weights but can keep them separate\n", "- For instance, if $x$ is the input data, then we can write the following for regular finetuning:\n", @@ -110,7 +122,7 @@ "\n", "- The fact that we can keep the LoRA weight matrices separate makes LoRA especially attractive\n", "- In practice, this means that we don't have to modify the weights of the pretrained model at all, as we can apply the LoRA matrices on the fly\n", - "- After setting up the dataset and loading the model, we we will implement LoRA in code to make these concepts less abstract" + "- After setting up the dataset and loading the model, we will implement LoRA in the code to make these concepts less abstract" ] }, { @@ -129,9 +141,9 @@ "metadata": {}, "source": [ "- This section repeats the code from chapter 6 to load and prepare the dataset\n", - "- Instead of repeating this code, one could copy & paste the LoRA code from section E.3 at the end of the chapter 6 notebook\n", + "- Instead of repeating this code, one could open and run the chapter 6 notebook and then insert the LoRA code from section E.4 there\n", "- (The LoRA code was originally the last section of chapter 6 but was moved to the appendix due to the length of chapter 6)\n", - "- In similar fashion, we could also apply LoRA to the models in chapter 7 for instruction finetuning" + "- In a similar fashion, we could also apply LoRA to the models in chapter 7 for instruction finetuning" ] }, { @@ -467,7 +479,7 @@ "id": "8e951cd6-5e42-44d2-b21f-895cb61004fe", "metadata": {}, "source": [ - "- Lastly, let's calcuate the initial classification accuracy of the non-finetuning model (we expect this to be around 50%, which means that the model is not able to reliably distinguish between spam and non-spam messages, yet)" + "- Lastly, let's calculate the initial classification accuracy of the non-finetuning model (we expect this to be around 50%, which means that the model is not able to distinguish between spam and non-spam messages yet reliably)" ] }, { @@ -1218,7 +1230,7 @@ "id": "13735b3e-f0c3-4dba-ae3d-4141b2878101", "metadata": {}, "source": [ - "- Let's now get to the interesting part and finetune the model reusing the training function from chapter 6\n", + "- Let's now get to the interesting part and finetune the model by reusing the training function from chapter 6\n", "- The training takes about 15 minutes on a M3 MacBook Air laptop computer and less than half a minute on a V100 or A100 GPU" ] }, @@ -1333,7 +1345,7 @@ "id": "aa074723-e3f7-4f7e-a267-855531a037dc", "metadata": {}, "source": [ - "- Note that we previously calculated the accuracy values on 10 batches only; below we calculate the accuracies on the full dataset" + "- Note that we previously calculated the accuracy values on 10 batches only; below, we calculate the accuracies on the full dataset" ] }, { @@ -1400,7 +1412,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.12" } }, "nbformat": 4,