mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
Add alternative weight loading strategy as backup (#82)
This commit is contained in:
committed by
GitHub
parent
820d5e3ed1
commit
4582995ced
7
ch05/01_main-chapter-code/README.md
Normal file
7
ch05/01_main-chapter-code/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Chapter 5: Pretraining on Unlabeled Data
|
||||
|
||||
- [ch05.ipynb](ch05.ipynb) contains all the code as it appears in the chapter
|
||||
- [previous_chapters.py](previous_chapters.py) is a Python module that contains the `MultiHeadAttention` module from the previous chapter, which we import in [ch05.ipynb](ch05.ipynb) to pretrain the GPT model
|
||||
- [train.py](train.py) is a standalone Python script file with the code that we implemented in [ch05.ipynb](ch05.ipynb) to train the GPT model
|
||||
- [generate.py](generate.py) is a standalone Python script file with the code that we implemented in [ch05.ipynb](ch05.ipynb) to load and use the pretrained model weights from OpenAI
|
||||
|
||||
@@ -199,16 +199,17 @@ def main(gpt_config, input_prompt, model_size):
|
||||
gpt = GPTModel(gpt_config)
|
||||
load_weights_into_gpt(gpt, params)
|
||||
gpt.to(device)
|
||||
gpt.eval()
|
||||
|
||||
tokenizer = tiktoken.get_encoding("gpt2")
|
||||
|
||||
token_ids = generate(
|
||||
model=gpt,
|
||||
idx=text_to_token_ids(input_prompt, tokenizer),
|
||||
max_new_tokens=65,
|
||||
max_new_tokens=30,
|
||||
context_size=gpt_config["ctx_len"],
|
||||
top_k=50,
|
||||
temperature=1.5
|
||||
top_k=1,
|
||||
temperature=1.0
|
||||
)
|
||||
|
||||
print("Output text:\n", token_ids_to_text(token_ids, tokenizer))
|
||||
@@ -219,7 +220,7 @@ if __name__ == "__main__":
|
||||
torch.manual_seed(123)
|
||||
|
||||
CHOOSE_MODEL = "gpt2-small"
|
||||
INPUT_PROMPT = "Every effort moves you"
|
||||
INPUT_PROMPT = "Every effort moves"
|
||||
|
||||
BASE_CONFIG = {
|
||||
"vocab_size": 50257, # Vocabulary size
|
||||
|
||||
Reference in New Issue
Block a user