Add alternative weight loading strategy as backup (#82)

2026-04-10 12:33:42 +00:00 · 2024-03-20 08:43:18 -05:00
parent 820d5e3ed1
commit 4582995ced
10 changed files with 621 additions and 6 deletions
--- a/ch05/01_main-chapter-code/README.md
+++ b/ch05/01_main-chapter-code/README.md
@@ -0,0 +1,7 @@
+# Chapter 5: Pretraining on Unlabeled Data
+
+- [ch05.ipynb](ch05.ipynb) contains all the code as it appears in the chapter
+- [previous_chapters.py](previous_chapters.py) is a Python module that contains the `MultiHeadAttention` module from the previous chapter, which we import in [ch05.ipynb](ch05.ipynb) to pretrain the GPT model
+- [train.py](train.py) is a standalone Python script file with the code that we implemented in [ch05.ipynb](ch05.ipynb) to train the GPT model
+- [generate.py](generate.py) is a standalone Python script file with the code that we implemented in [ch05.ipynb](ch05.ipynb) to load and use the pretrained model weights from OpenAI
+
--- a/ch05/01_main-chapter-code/generate.py
+++ b/ch05/01_main-chapter-code/generate.py
@@ -199,16 +199,17 @@ def main(gpt_config, input_prompt, model_size):
    gpt = GPTModel(gpt_config)
    load_weights_into_gpt(gpt, params)
    gpt.to(device)
+    gpt.eval()

    tokenizer = tiktoken.get_encoding("gpt2")

    token_ids = generate(
        model=gpt,
        idx=text_to_token_ids(input_prompt, tokenizer),
-        max_new_tokens=65,
+        max_new_tokens=30,
        context_size=gpt_config["ctx_len"],
-        top_k=50,
-        temperature=1.5
+        top_k=1,
+        temperature=1.0
    )

    print("Output text:\n", token_ids_to_text(token_ids, tokenizer))
@@ -219,7 +220,7 @@ if __name__ == "__main__":
    torch.manual_seed(123)

    CHOOSE_MODEL = "gpt2-small"
-    INPUT_PROMPT = "Every effort moves you"
+    INPUT_PROMPT = "Every effort moves"

    BASE_CONFIG = {
        "vocab_size": 50257,  # Vocabulary size