fixes for code (#206)

* updated .gitignore

* removed unused GELU import

* fixed model_configs, fixed all tensors on same device

* removed unused tiktoken

* update

* update hparam search

* remove redundant tokenizer argument

---------

Co-authored-by: rasbt <mail@sebastianraschka.com>
This commit is contained in:
Daniel Kleine
2024-06-12 03:59:48 +02:00
committed by GitHub
parent 1a65020d81
commit dcbdc1d2e5
12 changed files with 33 additions and 46 deletions

View File

@@ -53,8 +53,8 @@ def calc_loss_batch(input_batch, target_batch, model, device):
def evaluate_model(model, train_loader, val_loader, device, eval_iter):
model.eval()
with torch.no_grad():
train_loss = calc_loss_loader(train_loader, model, device, num_iters=eval_iter)
val_loss = calc_loss_loader(val_loader, model, device, num_iters=eval_iter)
train_loss = calc_loss_loader(train_loader, model, device, num_batches=eval_iter)
val_loss = calc_loss_loader(val_loader, model, device, num_batches=eval_iter)
model.train()
return train_loss, val_loss

View File

@@ -40,12 +40,12 @@ class GPTDatasetV1(Dataset):
def create_dataloader_v1(txt, batch_size=4, max_length=256,
stride=128, shuffle=True, drop_last=True):
stride=128, shuffle=True, drop_last=True, num_workers=0):
# Initialize the tokenizer
tokenizer = tiktoken.get_encoding("gpt2")
# Create dataset
dataset = GPTDatasetV1(txt, tokenizer, max_length, stride, num_workers=0)
dataset = GPTDatasetV1(txt, tokenizer, max_length, stride)
# Create dataloader
dataloader = DataLoader(