Add alternative weight loading strategy as backup (#82)

This commit is contained in:
Sebastian Raschka
2024-03-20 08:43:18 -05:00
committed by GitHub
parent 820d5e3ed1
commit 4582995ced
10 changed files with 621 additions and 6 deletions

View File

@@ -0,0 +1,7 @@
# Chapter 5: Pretraining on Unlabeled Data
- [ch05.ipynb](ch05.ipynb) contains all the code as it appears in the chapter
- [previous_chapters.py](previous_chapters.py) is a Python module that contains the `MultiHeadAttention` module from the previous chapter, which we import in [ch05.ipynb](ch05.ipynb) to pretrain the GPT model
- [train.py](train.py) is a standalone Python script file with the code that we implemented in [ch05.ipynb](ch05.ipynb) to train the GPT model
- [generate.py](generate.py) is a standalone Python script file with the code that we implemented in [ch05.ipynb](ch05.ipynb) to load and use the pretrained model weights from OpenAI

View File

@@ -199,16 +199,17 @@ def main(gpt_config, input_prompt, model_size):
gpt = GPTModel(gpt_config)
load_weights_into_gpt(gpt, params)
gpt.to(device)
gpt.eval()
tokenizer = tiktoken.get_encoding("gpt2")
token_ids = generate(
model=gpt,
idx=text_to_token_ids(input_prompt, tokenizer),
max_new_tokens=65,
max_new_tokens=30,
context_size=gpt_config["ctx_len"],
top_k=50,
temperature=1.5
top_k=1,
temperature=1.0
)
print("Output text:\n", token_ids_to_text(token_ids, tokenizer))
@@ -219,7 +220,7 @@ if __name__ == "__main__":
torch.manual_seed(123)
CHOOSE_MODEL = "gpt2-small"
INPUT_PROMPT = "Every effort moves you"
INPUT_PROMPT = "Every effort moves"
BASE_CONFIG = {
"vocab_size": 50257, # Vocabulary size