mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-02-21 11:18:01 +00:00
128 lines
4.4 KiB
Python
128 lines
4.4 KiB
Python
"""
|
|
Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
|
|
"""
|
|
|
|
import config
|
|
import numpy as np
|
|
import os
|
|
import pandas as pd
|
|
import torch
|
|
|
|
from PIL import Image, ImageFile
|
|
from torch.utils.data import Dataset, DataLoader
|
|
from utils import (
|
|
cells_to_bboxes,
|
|
iou_width_height as iou,
|
|
non_max_suppression as nms,
|
|
plot_image
|
|
)
|
|
|
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
|
|
|
class YOLODataset(Dataset):
|
|
def __init__(
|
|
self,
|
|
csv_file,
|
|
img_dir,
|
|
label_dir,
|
|
anchors,
|
|
image_size=416,
|
|
S=[13, 26, 52],
|
|
C=20,
|
|
transform=None,
|
|
):
|
|
self.annotations = pd.read_csv(csv_file)
|
|
self.img_dir = img_dir
|
|
self.label_dir = label_dir
|
|
self.image_size = image_size
|
|
self.transform = transform
|
|
self.S = S
|
|
self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales
|
|
self.num_anchors = self.anchors.shape[0]
|
|
self.num_anchors_per_scale = self.num_anchors // 3
|
|
self.C = C
|
|
self.ignore_iou_thresh = 0.5
|
|
|
|
def __len__(self):
|
|
return len(self.annotations)
|
|
|
|
def __getitem__(self, index):
|
|
label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
|
|
bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
|
|
img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
|
|
image = np.array(Image.open(img_path).convert("RGB"))
|
|
|
|
if self.transform:
|
|
augmentations = self.transform(image=image, bboxes=bboxes)
|
|
image = augmentations["image"]
|
|
bboxes = augmentations["bboxes"]
|
|
|
|
# Below assumes 3 scale predictions (as paper) and same num of anchors per scale
|
|
targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
|
|
for box in bboxes:
|
|
iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
|
|
anchor_indices = iou_anchors.argsort(descending=True, dim=0)
|
|
x, y, width, height, class_label = box
|
|
has_anchor = [False] * 3 # each scale should have one anchor
|
|
for anchor_idx in anchor_indices:
|
|
scale_idx = anchor_idx // self.num_anchors_per_scale
|
|
anchor_on_scale = anchor_idx % self.num_anchors_per_scale
|
|
S = self.S[scale_idx]
|
|
i, j = int(S * y), int(S * x) # which cell
|
|
anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
|
|
if not anchor_taken and not has_anchor[scale_idx]:
|
|
targets[scale_idx][anchor_on_scale, i, j, 0] = 1
|
|
x_cell, y_cell = S * x - j, S * y - i # both between [0,1]
|
|
width_cell, height_cell = (
|
|
width * S,
|
|
height * S,
|
|
) # can be greater than 1 since it's relative to cell
|
|
box_coordinates = torch.tensor(
|
|
[x_cell, y_cell, width_cell, height_cell]
|
|
)
|
|
targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
|
|
targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
|
|
has_anchor[scale_idx] = True
|
|
|
|
elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
|
|
targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction
|
|
|
|
return image, tuple(targets)
|
|
|
|
|
|
def test():
|
|
anchors = config.ANCHORS
|
|
|
|
transform = config.test_transforms
|
|
|
|
dataset = YOLODataset(
|
|
"COCO/train.csv",
|
|
"COCO/images/images/",
|
|
"COCO/labels/labels_new/",
|
|
S=[13, 26, 52],
|
|
anchors=anchors,
|
|
transform=transform,
|
|
)
|
|
S = [13, 26, 52]
|
|
scaled_anchors = torch.tensor(anchors) / (
|
|
1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
|
|
)
|
|
loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
|
|
for x, y in loader:
|
|
boxes = []
|
|
|
|
for i in range(y[0].shape[1]):
|
|
anchor = scaled_anchors[i]
|
|
print(anchor.shape)
|
|
print(y[i].shape)
|
|
boxes += cells_to_bboxes(
|
|
y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
|
|
)[0]
|
|
boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
|
|
print(boxes)
|
|
plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test()
|