mirror of
https://github.com/rasbt/LLMs-from-scratch.git
synced 2026-04-10 12:33:42 +00:00
drop_last=True
This commit is contained in:
@@ -78,7 +78,8 @@
|
||||
" return self.input_ids[idx], self.target_ids[idx]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def create_dataloader_v1(txt, batch_size=4, max_length=256, stride=128, shuffle=True):\n",
|
||||
"def create_dataloader_v1(txt, batch_size=4, max_length=256, \n",
|
||||
" stride=128, shuffle=True, drop_last=True):\n",
|
||||
" # Initialize the tokenizer\n",
|
||||
" tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
|
||||
"\n",
|
||||
@@ -86,12 +87,12 @@
|
||||
" dataset = GPTDatasetV1(txt, tokenizer, max_length, stride)\n",
|
||||
"\n",
|
||||
" # Create dataloader\n",
|
||||
" dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)\n",
|
||||
" dataloader = DataLoader(\n",
|
||||
" dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)\n",
|
||||
"\n",
|
||||
" return dataloader\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open(\"the-verdict.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
||||
" raw_text = f.read()\n",
|
||||
"\n",
|
||||
@@ -163,7 +164,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user