fixes for code (#206)

* updated .gitignore

* removed unused GELU import

* fixed model_configs, fixed all tensors on same device

* removed unused tiktoken

* update

* update hparam search

* remove redundant tokenizer argument

---------

Co-authored-by: rasbt <mail@sebastianraschka.com>
This commit is contained in:
Daniel Kleine
2024-06-12 03:59:48 +02:00
committed by GitHub
parent 1a65020d81
commit dcbdc1d2e5
12 changed files with 33 additions and 46 deletions

View File

@@ -262,7 +262,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 6,
"id": "5fee2cf5-61c3-4167-81b5-44ea155bbaf2",
"metadata": {},
"outputs": [],
@@ -282,13 +282,13 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 7,
"id": "5aa1b0c1-d78a-48fc-ad08-4802458b43f7",
"metadata": {},
"outputs": [],
"source": [
"import torch.nn as nn\n",
"from gpt import MultiHeadAttention, LayerNorm, GELU, FeedForward\n",
"from gpt import MultiHeadAttention, LayerNorm, FeedForward\n",
"\n",
"\n",
"class TransformerBlock(nn.Module):\n",
@@ -351,7 +351,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 8,
"id": "1d013d32-c275-4f42-be21-9010f1537227",
"metadata": {},
"outputs": [],