From ec5baa1f33b4b76d8bcfa81f86ccc25792b32696 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Sat, 22 Jun 2024 08:57:18 -0500 Subject: [PATCH] Add CI tests for chapter 7 (#239) --- .gitignore | 3 +- .../gpt_instruction_finetuning.py | 85 ++++++++++++++----- ch07/01_main-chapter-code/tests.py | 16 ++++ 3 files changed, 81 insertions(+), 23 deletions(-) create mode 100644 ch07/01_main-chapter-code/tests.py diff --git a/.gitignore b/.gitignore index c60d0f1..fa641d1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ # Configs and keys -ch07/01_main-chapter-code/gpt2-medium355M-sft-standalone.pth ch07/02_dataset-utilities/config.json ch07/03_model-evaluation/config.json @@ -36,6 +35,8 @@ ch06/02_bonus_additional-experiments/gpt2 ch06/03_bonus_imdb-classification/gpt2 ch07/01_main-chapter-code/gpt2-medium355M-sft.pth +ch07/01_main-chapter-code/gpt2-medium355M-sft-standalone.pth +ch07/01_main-chapter-code/Smalltestmodel-sft-standalone.pth ch07/01_main-chapter-code/gpt2/ # Datasets diff --git a/ch07/01_main-chapter-code/gpt_instruction_finetuning.py b/ch07/01_main-chapter-code/gpt_instruction_finetuning.py index 5d6f3d0..6bc6429 100644 --- a/ch07/01_main-chapter-code/gpt_instruction_finetuning.py +++ b/ch07/01_main-chapter-code/gpt_instruction_finetuning.py @@ -147,7 +147,7 @@ def plot_losses(epochs_seen, tokens_seen, train_losses, val_losses): # plt.show() -def main(): +def main(test_mode=False): ####################################### # Print package versions ####################################### @@ -177,6 +177,12 @@ def main(): test_data = data[train_portion:train_portion + test_portion] val_data = data[train_portion + test_portion:] + # Use very small subset for testing purposes + if args.test_mode: + train_data = train_data[:10] + val_data = val_data[:10] + test_data = test_data[:10] + print("Training set length:", len(train_data)) print("Validation set length:", len(val_data)) print("Test set length:", len(test_data)) @@ -217,31 +223,50 @@ def main(): ####################################### # Load pretrained model ####################################### - BASE_CONFIG = { - "vocab_size": 50257, # Vocabulary size - "context_length": 1024, # Context length - "drop_rate": 0.0, # Dropout rate - "qkv_bias": True # Query-key-value bias - } - model_configs = { - "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12}, - "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16}, - "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20}, - "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25}, - } + # Small GPT model for testing purposes + if args.test_mode: + BASE_CONFIG = { + "vocab_size": 50257, + "context_length": 120, + "drop_rate": 0.0, + "qkv_bias": False, + "emb_dim": 12, + "n_layers": 1, + "n_heads": 2 + } + model = GPTModel(BASE_CONFIG) + model.eval() + device = "cpu" + CHOOSE_MODEL = "Small test model" - CHOOSE_MODEL = "gpt2-medium (355M)" + # Code as it is used in the main chapter + else: + BASE_CONFIG = { + "vocab_size": 50257, # Vocabulary size + "context_length": 1024, # Context length + "drop_rate": 0.0, # Dropout rate + "qkv_bias": True # Query-key-value bias + } - BASE_CONFIG.update(model_configs[CHOOSE_MODEL]) + model_configs = { + "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12}, + "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16}, + "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20}, + "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25}, + } - model_size = CHOOSE_MODEL.split(" ")[-1].lstrip("(").rstrip(")") - settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2") + CHOOSE_MODEL = "gpt2-medium (355M)" - model = GPTModel(BASE_CONFIG) - load_weights_into_gpt(model, params) - model.eval() - model.to(device) + BASE_CONFIG.update(model_configs[CHOOSE_MODEL]) + + model_size = CHOOSE_MODEL.split(" ")[-1].lstrip("(").rstrip(")") + settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2") + + model = GPTModel(BASE_CONFIG) + load_weights_into_gpt(model, params) + model.eval() + model.to(device) print("Loaded model:", CHOOSE_MODEL) print(50*"-") @@ -259,6 +284,7 @@ def main(): start_time = time.time() optimizer = torch.optim.AdamW(model.parameters(), lr=0.00005, weight_decay=0.1) + num_epochs = 2 torch.manual_seed(123) @@ -307,4 +333,19 @@ def main(): if __name__ == "__main__": - main() + + import argparse + + parser = argparse.ArgumentParser( + description="Finetune a GPT model for classification" + ) + parser.add_argument( + "--test_mode", + default=False, + action="store_true", + help=("This flag runs the model in test mode for internal testing purposes. " + "Otherwise, it runs the model as it is used in the chapter (recommended).") + ) + args = parser.parse_args() + + main(args.test_mode) diff --git a/ch07/01_main-chapter-code/tests.py b/ch07/01_main-chapter-code/tests.py new file mode 100644 index 0000000..40ee892 --- /dev/null +++ b/ch07/01_main-chapter-code/tests.py @@ -0,0 +1,16 @@ +# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt). +# Source for "Build a Large Language Model From Scratch" +# - https://www.manning.com/books/build-a-large-language-model-from-scratch +# Code: https://github.com/rasbt/LLMs-from-scratch + +# File for internal use (unit tests) + + +import subprocess + + +def test_gpt_class_finetune(): + command = ["python", "ch06/01_main-chapter-code/gpt_class_finetune.py", "--test_mode"] + + result = subprocess.run(command, capture_output=True, text=True) + assert result.returncode == 0, f"Script exited with errors: {result.stderr}"