mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-04-10 12:33:44 +00:00
updated basic tutorials, better comments, code revision, checked it works with latest pytorch version
This commit is contained in:
3
ML/Pytorch/Basics/custom_dataset_txt/get_data.sh
Executable file
3
ML/Pytorch/Basics/custom_dataset_txt/get_data.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
wget https://www.kaggle.com/datasets/e1cd22253a9b23b073794872bf565648ddbe4f17e7fa9e74766ad3707141adeb/download?datasetVersionNumber=1
|
||||
@@ -1,3 +1,15 @@
|
||||
"""
|
||||
Introductory tutorial on how to deal with custom text datasets in PyTorch.
|
||||
Note that there are better ways to do this when dealing with huge text datasets.
|
||||
But this is a good way of understanding how it works and can be used as a starting
|
||||
point, particularly for smaller/medium datasets.
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-09 Initial coding
|
||||
* 2022-12-19 Updated comments, minor code revision, and checked code still works with latest PyTorch.
|
||||
"""
|
||||
|
||||
|
||||
import os # when loading file paths
|
||||
import pandas as pd # for lookup in annotation file
|
||||
import spacy # for tokenizer
|
||||
@@ -15,8 +27,8 @@ import torchvision.transforms as transforms
|
||||
# of same seq_len and setup dataloader)
|
||||
# Note that loading the image is very easy compared to the text!
|
||||
|
||||
# Download with: python -m spacy download en
|
||||
spacy_eng = spacy.load("en")
|
||||
# Download with: python -m spacy download en_core_web_sm
|
||||
spacy_eng = spacy.load("en_core_web_sm")
|
||||
|
||||
|
||||
class Vocabulary:
|
||||
@@ -130,7 +142,10 @@ def get_loader(
|
||||
|
||||
if __name__ == "__main__":
|
||||
transform = transforms.Compose(
|
||||
[transforms.Resize((224, 224)), transforms.ToTensor(),]
|
||||
[
|
||||
transforms.Resize((224, 224)),
|
||||
transforms.ToTensor(),
|
||||
]
|
||||
)
|
||||
|
||||
loader, dataset = get_loader(
|
||||
|
||||
Reference in New Issue
Block a user