mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Add standalone finetuning and evaluation scripts for chapter 7 (#234)
* add finetuning and eval scripts * update link * update links * fix link
This commit is contained in:
committed by
GitHub
parent
e1046746e8
commit
87deec0f5f
8
.github/workflows/check-links.yml
vendored
8
.github/workflows/check-links.yml
vendored
@@ -23,8 +23,12 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install pytest pytest-check-links pytest-retry
|
pip install pytest pytest-check-links
|
||||||
|
# Current version of retry doesn't work well if there are broken non-URL links
|
||||||
|
# pip install pytest pytest-check-links pytest-retry
|
||||||
|
|
||||||
- name: Check links
|
- name: Check links
|
||||||
run: |
|
run: |
|
||||||
pytest --check-links ./ --check-links-ignore "https://platform.openai.com/*" --check-links-ignore "https://arena.lmsys.org" --retries 2 --retry-delay 5
|
pytest --check-links ./ --check-links-ignore "https://platform.openai.com/*" --check-links-ignore "https://arena.lmsys.org"
|
||||||
|
# pytest --check-links ./ --check-links-ignore "https://platform.openai.com/*" --check-links-ignore "https://arena.lmsys.org" --retries 2 --retry-delay 5
|
||||||
|
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,4 +1,6 @@
|
|||||||
# Configs and keys
|
# Configs and keys
|
||||||
|
ch07/01_main-chapter-code/instruction-data-with-response-standalone.json
|
||||||
|
ch07/01_main-chapter-code/gpt2-medium355M-sft-standalone.pth
|
||||||
ch07/02_dataset-utilities/config.json
|
ch07/02_dataset-utilities/config.json
|
||||||
ch07/03_model-evaluation/config.json
|
ch07/03_model-evaluation/config.json
|
||||||
|
|
||||||
@@ -17,6 +19,7 @@ ch06/01_main-chapter-code/loss-plot.pdf
|
|||||||
ch06/01_main-chapter-code/accuracy-plot.pdf
|
ch06/01_main-chapter-code/accuracy-plot.pdf
|
||||||
|
|
||||||
ch07/01_main-chapter-code/loss-plot.pdf
|
ch07/01_main-chapter-code/loss-plot.pdf
|
||||||
|
ch07/01_main-chapter-code/loss-plot-standalone.pdf
|
||||||
|
|
||||||
# Checkpoint files
|
# Checkpoint files
|
||||||
appendix-A/01_main-chapter-code/model.pth
|
appendix-A/01_main-chapter-code/model.pth
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ Alternatively, you can view this and other files on GitHub at [https://github.co
|
|||||||
| Ch 3: Coding Attention Mechanisms | - [ch03.ipynb](ch03/01_main-chapter-code/ch03.ipynb)<br/>- [multihead-attention.ipynb](ch03/01_main-chapter-code/multihead-attention.ipynb) (summary) <br/>- [exercise-solutions.ipynb](ch03/01_main-chapter-code/exercise-solutions.ipynb)| [./ch03](./ch03) |
|
| Ch 3: Coding Attention Mechanisms | - [ch03.ipynb](ch03/01_main-chapter-code/ch03.ipynb)<br/>- [multihead-attention.ipynb](ch03/01_main-chapter-code/multihead-attention.ipynb) (summary) <br/>- [exercise-solutions.ipynb](ch03/01_main-chapter-code/exercise-solutions.ipynb)| [./ch03](./ch03) |
|
||||||
| Ch 4: Implementing a GPT Model from Scratch | - [ch04.ipynb](ch04/01_main-chapter-code/ch04.ipynb)<br/>- [gpt.py](ch04/01_main-chapter-code/gpt.py) (summary)<br/>- [exercise-solutions.ipynb](ch04/01_main-chapter-code/exercise-solutions.ipynb) | [./ch04](./ch04) |
|
| Ch 4: Implementing a GPT Model from Scratch | - [ch04.ipynb](ch04/01_main-chapter-code/ch04.ipynb)<br/>- [gpt.py](ch04/01_main-chapter-code/gpt.py) (summary)<br/>- [exercise-solutions.ipynb](ch04/01_main-chapter-code/exercise-solutions.ipynb) | [./ch04](./ch04) |
|
||||||
| Ch 5: Pretraining on Unlabeled Data | - [ch05.ipynb](ch05/01_main-chapter-code/ch05.ipynb)<br/>- [gpt_train.py](ch05/01_main-chapter-code/gpt_train.py) (summary) <br/>- [gpt_generate.py](ch05/01_main-chapter-code/gpt_generate.py) (summary) <br/>- [exercise-solutions.ipynb](ch05/01_main-chapter-code/exercise-solutions.ipynb) | [./ch05](./ch05) |
|
| Ch 5: Pretraining on Unlabeled Data | - [ch05.ipynb](ch05/01_main-chapter-code/ch05.ipynb)<br/>- [gpt_train.py](ch05/01_main-chapter-code/gpt_train.py) (summary) <br/>- [gpt_generate.py](ch05/01_main-chapter-code/gpt_generate.py) (summary) <br/>- [exercise-solutions.ipynb](ch05/01_main-chapter-code/exercise-solutions.ipynb) | [./ch05](./ch05) |
|
||||||
| Ch 6: Finetuning for Text Classification | - [ch06.ipynb](ch06/01_main-chapter-code/ch06.ipynb) <br/>- [gpt-class-finetune.py](ch06/01_main-chapter-code/gpt-class-finetune.py) <br/>- [exercise-solutions.ipynb](ch06/01_main-chapter-code/exercise-solutions.ipynb) | [./ch06](./ch06) |
|
| Ch 6: Finetuning for Text Classification | - [ch06.ipynb](ch06/01_main-chapter-code/ch06.ipynb) <br/>- [gpt_class_finetune.py](ch06/01_main-chapter-code/gpt_class_finetune.py) <br/>- [exercise-solutions.ipynb](ch06/01_main-chapter-code/exercise-solutions.ipynb) | [./ch06](./ch06) |
|
||||||
| Ch 7: Finetuning to Follow Instructions | - [ch07.ipynb](ch07/01_main-chapter-code/ch07.ipynb) | [./ch07](./ch07) |
|
| Ch 7: Finetuning to Follow Instructions | - [ch07.ipynb](ch07/01_main-chapter-code/ch07.ipynb) | [./ch07](./ch07) |
|
||||||
| Appendix A: Introduction to PyTorch | - [code-part1.ipynb](appendix-A/01_main-chapter-code/code-part1.ipynb)<br/>- [code-part2.ipynb](appendix-A/01_main-chapter-code/code-part2.ipynb)<br/>- [DDP-script.py](appendix-A/01_main-chapter-code/DDP-script.py)<br/>- [exercise-solutions.ipynb](appendix-A/01_main-chapter-code/exercise-solutions.ipynb) | [./appendix-A](./appendix-A) |
|
| Appendix A: Introduction to PyTorch | - [code-part1.ipynb](appendix-A/01_main-chapter-code/code-part1.ipynb)<br/>- [code-part2.ipynb](appendix-A/01_main-chapter-code/code-part2.ipynb)<br/>- [DDP-script.py](appendix-A/01_main-chapter-code/DDP-script.py)<br/>- [exercise-solutions.ipynb](appendix-A/01_main-chapter-code/exercise-solutions.ipynb) | [./appendix-A](./appendix-A) |
|
||||||
| Appendix B: References and Further Reading | No code | - |
|
| Appendix B: References and Further Reading | No code | - |
|
||||||
|
|||||||
@@ -9,5 +9,5 @@
|
|||||||
|
|
||||||
### Optional Code
|
### Optional Code
|
||||||
|
|
||||||
- [gpt-class-finetune.py](gpt-class-finetune.py) is a standalone Python script file with the code that we implemented in [ch06.ipynb](ch06.ipynb) to finetune the GPT model (you can think of it as a chapter summary)
|
- [gpt_class_finetune.py](gpt_class_finetune.py) is a standalone Python script file with the code that we implemented in [ch06.ipynb](ch06.ipynb) to finetune the GPT model (you can think of it as a chapter summary)
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import subprocess
|
|||||||
|
|
||||||
|
|
||||||
def test_gpt_class_finetune():
|
def test_gpt_class_finetune():
|
||||||
command = ["python", "ch06/01_main-chapter-code/gpt-class-finetune.py", "--test_mode"]
|
command = ["python", "ch06/01_main-chapter-code/gpt_class_finetune.py", "--test_mode"]
|
||||||
|
|
||||||
result = subprocess.run(command, capture_output=True, text=True)
|
result = subprocess.run(command, capture_output=True, text=True)
|
||||||
assert result.returncode == 0, f"Script exited with errors: {result.stderr}"
|
assert result.returncode == 0, f"Script exited with errors: {result.stderr}"
|
||||||
|
|||||||
@@ -10,12 +10,12 @@
|
|||||||
|
|
||||||
- [load-finetuned-model.ipynb](load-finetuned-model.ipynb) is a standalone Jupyter notebook to load the instruction finetuned model we created in this chapter
|
- [load-finetuned-model.ipynb](load-finetuned-model.ipynb) is a standalone Jupyter notebook to load the instruction finetuned model we created in this chapter
|
||||||
|
|
||||||
- [gpt-instruction-finetuning.py](gpt-instruction-finetuning.py) is a standalone Python script to instruction finetune the model as described in the main chapter
|
- [gpt_instruction_finetuning.py](gpt_instruction_finetuning.py) is a standalone Python script to instruction finetune the model as described in the main chapter (think of it as a chapter summary focused on the finetuning parts)
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python gpt-instruction-finetuning.py
|
python gpt_instruction_finetuning.py
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
@@ -55,3 +55,18 @@ Responses saved as instruction-data-with-response-standalone.json
|
|||||||
Model saved as gpt2-medium355M-sft-standalone.pth
|
Model saved as gpt2-medium355M-sft-standalone.pth
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- [ollama_evaluate.py](ollama_evaluate.py) is a standalone Python script to evaluate the responses of the finetuned model as described in the main chapter (think of it as a chapter summary focused on the evaluation parts)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python ollama_evaluate.py --file_path instruction-data-with-response-standalone.json
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
Ollama running: True
|
||||||
|
Scoring entries: 100%|███████████████████████████████████████| 110/110 [01:08<00:00, 1.62it/s]
|
||||||
|
Number of scores: 110 of 110
|
||||||
|
Average score: 51.75
|
||||||
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -2616,7 +2616,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"def generate_model_scores(json_data, json_key):\n",
|
"def generate_model_scores(json_data, json_key, model=\"llama3\"):\n",
|
||||||
" scores = []\n",
|
" scores = []\n",
|
||||||
" for entry in tqdm(json_data, desc=\"Scoring entries\"):\n",
|
" for entry in tqdm(json_data, desc=\"Scoring entries\"):\n",
|
||||||
" prompt = (\n",
|
" prompt = (\n",
|
||||||
@@ -2626,7 +2626,7 @@
|
|||||||
" f\" on a scale from 0 to 100, where 100 is the best score. \"\n",
|
" f\" on a scale from 0 to 100, where 100 is the best score. \"\n",
|
||||||
" f\"Respond with the integer number only.\"\n",
|
" f\"Respond with the integer number only.\"\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" score = query_model(prompt)\n",
|
" score = query_model(prompt, model)\n",
|
||||||
" try:\n",
|
" try:\n",
|
||||||
" scores.append(int(score))\n",
|
" scores.append(int(score))\n",
|
||||||
" except ValueError:\n",
|
" except ValueError:\n",
|
||||||
|
|||||||
@@ -259,6 +259,7 @@ def main():
|
|||||||
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00005, weight_decay=0.1)
|
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00005, weight_decay=0.1)
|
||||||
num_epochs = 2
|
num_epochs = 2
|
||||||
|
|
||||||
|
torch.manual_seed(123)
|
||||||
train_losses, val_losses, tokens_seen = train_model_simple(
|
train_losses, val_losses, tokens_seen = train_model_simple(
|
||||||
model, train_loader, val_loader, optimizer, device,
|
model, train_loader, val_loader, optimizer, device,
|
||||||
num_epochs=num_epochs, eval_freq=5, eval_iter=5,
|
num_epochs=num_epochs, eval_freq=5, eval_iter=5,
|
||||||
@@ -276,7 +277,7 @@ def main():
|
|||||||
#######################################
|
#######################################
|
||||||
# Saving results
|
# Saving results
|
||||||
#######################################
|
#######################################
|
||||||
print("Evaluating models")
|
print("Generating responses")
|
||||||
for i, entry in tqdm(enumerate(test_data), total=len(test_data)):
|
for i, entry in tqdm(enumerate(test_data), total=len(test_data)):
|
||||||
|
|
||||||
input_text = format_input(entry)
|
input_text = format_input(entry)
|
||||||
120
ch07/01_main-chapter-code/ollama_evaluate.py
Normal file
120
ch07/01_main-chapter-code/ollama_evaluate.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
|
||||||
|
# Source for "Build a Large Language Model From Scratch"
|
||||||
|
# - https://www.manning.com/books/build-a-large-language-model-from-scratch
|
||||||
|
# Code: https://github.com/rasbt/LLMs-from-scratch
|
||||||
|
#
|
||||||
|
# A minimal instruction finetuning file based on the code in chapter 7
|
||||||
|
|
||||||
|
import json
|
||||||
|
import psutil
|
||||||
|
from tqdm import tqdm
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
|
||||||
|
def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):
|
||||||
|
# Create the data payload as a dictionary
|
||||||
|
data = {
|
||||||
|
"model": model,
|
||||||
|
"seed": 123, # for deterministic responses
|
||||||
|
"temperature": 0, # for deterministic responses
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert the dictionary to a JSON formatted string and encode it to bytes
|
||||||
|
payload = json.dumps(data).encode("utf-8")
|
||||||
|
|
||||||
|
# Create a request object, setting the method to POST and adding necessary headers
|
||||||
|
request = urllib.request.Request(url, data=payload, method="POST")
|
||||||
|
request.add_header("Content-Type", "application/json")
|
||||||
|
|
||||||
|
# Send the request and capture the response
|
||||||
|
response_data = ""
|
||||||
|
with urllib.request.urlopen(request) as response:
|
||||||
|
# Read and decode the response
|
||||||
|
while True:
|
||||||
|
line = response.readline().decode("utf-8")
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
response_json = json.loads(line)
|
||||||
|
response_data += response_json["message"]["content"]
|
||||||
|
|
||||||
|
return response_data
|
||||||
|
|
||||||
|
|
||||||
|
def check_if_running(process_name):
|
||||||
|
running = False
|
||||||
|
for proc in psutil.process_iter(["name"]):
|
||||||
|
if process_name in proc.info["name"]:
|
||||||
|
running = True
|
||||||
|
break
|
||||||
|
return running
|
||||||
|
|
||||||
|
|
||||||
|
def format_input(entry):
|
||||||
|
instruction_text = (
|
||||||
|
f"Below is an instruction that describes a task. "
|
||||||
|
f"Write a response that appropriately completes the request."
|
||||||
|
f"\n\n### Instruction:\n{entry['instruction']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
|
||||||
|
|
||||||
|
return instruction_text + input_text
|
||||||
|
|
||||||
|
|
||||||
|
def main(file_path):
|
||||||
|
ollama_running = check_if_running("ollama")
|
||||||
|
|
||||||
|
if not ollama_running:
|
||||||
|
raise RuntimeError("Ollama not running. Launch ollama before proceeding.")
|
||||||
|
print("Ollama running:", check_if_running("ollama"))
|
||||||
|
|
||||||
|
with open(file_path, "r") as file:
|
||||||
|
test_data = json.load(file)
|
||||||
|
|
||||||
|
model = "llama3"
|
||||||
|
scores = generate_model_scores(test_data, "model_response", model)
|
||||||
|
print(f"Number of scores: {len(scores)} of {len(test_data)}")
|
||||||
|
print(f"Average score: {sum(scores)/len(scores):.2f}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def generate_model_scores(json_data, json_key, model="llama3"):
|
||||||
|
scores = []
|
||||||
|
for entry in tqdm(json_data, desc="Scoring entries"):
|
||||||
|
prompt = (
|
||||||
|
f"Given the input `{format_input(entry)}` "
|
||||||
|
f"and correct output `{entry['output']}`, "
|
||||||
|
f"score the model response `{entry[json_key]}`"
|
||||||
|
f" on a scale from 0 to 100, where 100 is the best score. "
|
||||||
|
f"Respond with the integer number only."
|
||||||
|
)
|
||||||
|
score = query_model(prompt, model)
|
||||||
|
try:
|
||||||
|
scores.append(int(score))
|
||||||
|
except ValueError:
|
||||||
|
print(f"Could not convert score: {score}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
return scores
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Instruction finetune a GPT model"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--file_path",
|
||||||
|
required=True,
|
||||||
|
help=(
|
||||||
|
"The path to the test dataset `.json` file with the"
|
||||||
|
" `'output'` and `'model_response'` keys"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
main(file_path=args.file_path)
|
||||||
Reference in New Issue
Block a user