mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-02-21 11:18:01 +00:00
91 lines
3.0 KiB
Python
91 lines
3.0 KiB
Python
|
|
"""
|
||
|
|
Creates a Pytorch dataset to load the Pascal VOC dataset
|
||
|
|
"""
|
||
|
|
|
||
|
|
import torch
|
||
|
|
import os
|
||
|
|
import pandas as pd
|
||
|
|
from PIL import Image
|
||
|
|
|
||
|
|
|
||
|
|
class VOCDataset(torch.utils.data.Dataset):
|
||
|
|
def __init__(
|
||
|
|
self, csv_file, img_dir, label_dir, S=7, B=2, C=20, transform=None,
|
||
|
|
):
|
||
|
|
self.annotations = pd.read_csv(csv_file)
|
||
|
|
self.img_dir = img_dir
|
||
|
|
self.label_dir = label_dir
|
||
|
|
self.transform = transform
|
||
|
|
self.S = S
|
||
|
|
self.B = B
|
||
|
|
self.C = C
|
||
|
|
|
||
|
|
def __len__(self):
|
||
|
|
return len(self.annotations)
|
||
|
|
|
||
|
|
def __getitem__(self, index):
|
||
|
|
label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
|
||
|
|
boxes = []
|
||
|
|
with open(label_path) as f:
|
||
|
|
for label in f.readlines():
|
||
|
|
class_label, x, y, width, height = [
|
||
|
|
float(x) if float(x) != int(float(x)) else int(x)
|
||
|
|
for x in label.replace("\n", "").split()
|
||
|
|
]
|
||
|
|
|
||
|
|
boxes.append([class_label, x, y, width, height])
|
||
|
|
|
||
|
|
img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
|
||
|
|
image = Image.open(img_path)
|
||
|
|
boxes = torch.tensor(boxes)
|
||
|
|
|
||
|
|
if self.transform:
|
||
|
|
# image = self.transform(image)
|
||
|
|
image, boxes = self.transform(image, boxes)
|
||
|
|
|
||
|
|
# Convert To Cells
|
||
|
|
label_matrix = torch.zeros((self.S, self.S, self.C + 5 * self.B))
|
||
|
|
for box in boxes:
|
||
|
|
class_label, x, y, width, height = box.tolist()
|
||
|
|
class_label = int(class_label)
|
||
|
|
|
||
|
|
# i,j represents the cell row and cell column
|
||
|
|
i, j = int(self.S * y), int(self.S * x)
|
||
|
|
x_cell, y_cell = self.S * x - j, self.S * y - i
|
||
|
|
|
||
|
|
"""
|
||
|
|
Calculating the width and height of cell of bounding box,
|
||
|
|
relative to the cell is done by the following, with
|
||
|
|
width as the example:
|
||
|
|
|
||
|
|
width_pixels = (width*self.image_width)
|
||
|
|
cell_pixels = (self.image_width)
|
||
|
|
|
||
|
|
Then to find the width relative to the cell is simply:
|
||
|
|
width_pixels/cell_pixels, simplification leads to the
|
||
|
|
formulas below.
|
||
|
|
"""
|
||
|
|
width_cell, height_cell = (
|
||
|
|
width * self.S,
|
||
|
|
height * self.S,
|
||
|
|
)
|
||
|
|
|
||
|
|
# If no object already found for specific cell i,j
|
||
|
|
# Note: This means we restrict to ONE object
|
||
|
|
# per cell!
|
||
|
|
if label_matrix[i, j, 20] == 0:
|
||
|
|
# Set that there exists an object
|
||
|
|
label_matrix[i, j, 20] = 1
|
||
|
|
|
||
|
|
# Box coordinates
|
||
|
|
box_coordinates = torch.tensor(
|
||
|
|
[x_cell, y_cell, width_cell, height_cell]
|
||
|
|
)
|
||
|
|
|
||
|
|
label_matrix[i, j, 21:25] = box_coordinates
|
||
|
|
|
||
|
|
# Set one hot encoding for class_label
|
||
|
|
label_matrix[i, j, class_label] = 1
|
||
|
|
|
||
|
|
return image, label_matrix
|