Add standalone instruction finetuning script (#233)

This commit is contained in:
Sebastian Raschka
2024-06-20 07:37:47 -05:00
committed by GitHub
parent c595f3da3f
commit 8ccca3a882
3 changed files with 361 additions and 5 deletions

View File

@@ -426,7 +426,7 @@
"outputs": [],
"source": [
"train_portion = int(len(data) * 0.85) # 85% for training\n",
"test_portion = int(len(data) * 0.1) # 10% for testing\n",
"test_portion = int(len(data) * 0.1) # 10% for testing\n",
"val_portion = len(data) - train_portion - test_portion # Remaining 5% for validation\n",
"\n",
"train_data = data[:train_portion]\n",
@@ -1166,7 +1166,8 @@
" batch_size=batch_size,\n",
" collate_fn=customized_collate_fn,\n",
" shuffle=True,\n",
" drop_last=True\n",
" drop_last=True,\n",
" num_workers=num_workers\n",
")"
]
},
@@ -1185,7 +1186,8 @@
" batch_size=batch_size,\n",
" collate_fn=customized_collate_fn,\n",
" shuffle=False,\n",
" drop_last=False\n",
" drop_last=False,\n",
" num_workers=num_workers\n",
")\n",
"\n",
"test_dataset = InstructionDataset(test_data, tokenizer)\n",
@@ -1194,7 +1196,8 @@
" batch_size=batch_size,\n",
" collate_fn=customized_collate_fn,\n",
" shuffle=False,\n",
" drop_last=False\n",
" drop_last=False,\n",
" num_workers=num_workers\n",
")"
]
},