drop_last=True

This commit is contained in:
rasbt
2024-02-25 07:23:38 -06:00
parent 6243726ab3
commit cdcd73ba7f
4 changed files with 21 additions and 14 deletions

View File

@@ -78,7 +78,8 @@
" return self.input_ids[idx], self.target_ids[idx]\n",
"\n",
"\n",
"def create_dataloader_v1(txt, batch_size=4, max_length=256, stride=128, shuffle=True):\n",
"def create_dataloader_v1(txt, batch_size=4, max_length=256, \n",
" stride=128, shuffle=True, drop_last=True):\n",
" # Initialize the tokenizer\n",
" tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
"\n",
@@ -86,12 +87,12 @@
" dataset = GPTDatasetV1(txt, tokenizer, max_length, stride)\n",
"\n",
" # Create dataloader\n",
" dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)\n",
" dataloader = DataLoader(\n",
" dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)\n",
"\n",
" return dataloader\n",
"\n",
"\n",
"\n",
"with open(\"the-verdict.txt\", \"r\", encoding=\"utf-8\") as f:\n",
" raw_text = f.read()\n",
"\n",
@@ -163,7 +164,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.12"
}
},
"nbformat": 4,