diff --git a/ch07/04_preference-tuning-with-dpo/create-preference-data-ollama.ipynb b/ch07/04_preference-tuning-with-dpo/create-preference-data-ollama.ipynb index 2142703..a8721b6 100644 --- a/ch07/04_preference-tuning-with-dpo/create-preference-data-ollama.ipynb +++ b/ch07/04_preference-tuning-with-dpo/create-preference-data-ollama.ipynb @@ -40,9 +40,7 @@ " 1. We use the instruction-finetuned LLM to generate multiple responses and have humans rank them based on their preference and/or given preference criteria\n", " 2. We use the instruction-finetuned LLM to generate multiple responses and have LLMs rank them based on given preference criteria\n", " 3. We use an LLM to generate preferred and dispreferred responses given certain preference criteria\n", - "- In this notebook, we take approach 3\n", - "\n", - "\n", + "- In this notebook, we consider approach 3\n", "- This notebook uses a 70 billion parameter Llama 3.1-Instruct model through ollama to generate preference labels for an instruction dataset\n", "- The expected format of the instruction dataset is as follows:\n", "\n", @@ -589,7 +587,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.6" } }, "nbformat": 4,