Initial commit

This commit is contained in:
Aladdin Persson
2021-01-30 21:49:15 +01:00
commit 65b8c80495
432 changed files with 1290844 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
import os
import csv
read_train = open("train.txt", "r").readlines()
with open("train.csv", mode="w", newline="") as train_file:
for line in read_train:
image_file = line.split("/")[-1].replace("\n", "")
text_file = image_file.replace(".jpg", ".txt")
data = [image_file, text_file]
writer = csv.writer(train_file)
writer.writerow(data)
read_train = open("test.txt", "r").readlines()
with open("test.csv", mode="w", newline="") as train_file:
for line in read_train:
image_file = line.split("/")[-1].replace("\n", "")
text_file = image_file.replace(".jpg", ".txt")
data = [image_file, text_file]
writer = csv.writer(train_file)
writer.writerow(data)

View File

@@ -0,0 +1,60 @@
#!/usr/bin/env bash
## DOWNLOAD from JOSEPHS WEBSITE (SLOWER DOWNLOAD)
#wget https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar
#wget https://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
#wget https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
## OR DOWNLOAD FROM HERE (FASTER DOWNLOAD)
# VOC2007 DATASET
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.ta
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar #
# VOC2012 DATASET
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.ta
# Extract tar files
tar xf VOCtrainval_11-May-2012.tar
tar xf VOCtrainval_06-Nov-2007.tar
tar xf VOCtest_06-Nov-2007.tar
# Need voc_label.py to clean up data from xml files
wget https://pjreddie.com/media/files/voc_label.py
# Run python file to clean data from xml files
python voc_label.py
# Get train by using train+val from 2007 and 2012
# Then we only test on 2007 test set
# Unclear from paper what they actually just as a dev set
cat 2007_train.txt 2007_val.txt 2012_*.txt > train.txt
cp 2007_test.txt test.txt
# Move txt files we won't be using to clean up a little bit
mkdir old_txt_files
mv 2007* 2012* old_txt_files/
python generate_csv.py
mkdir data
mkdir data/images
mkdir data/labels
cp VOCdevkit/*.jpg data/images/
cp VOCdevkit/VOC2007/labels/*.txt data/labels/
cp VOCdevkit/VOC2012/labels/*.txt data/labels/
mkdir data
mkdir data/images
mkdir data/labels
mv VOCdevkit/VOC2007/JPEGImages/*.jpg data/images/
mv VOCdevkit/VOC2012/JPEGImages/*.jpg data/images/
mv VOCdevkit/VOC2007/labels/*.txt data/labels/
mv VOCdevkit/VOC2012/labels/*.txt data/labels/
# We don't need VOCdevkit folder anymore, can remove
# in order to save some space
rm -rf VOCdevkit/
mv test.txt old_txt_files/
mv train.txt old_txt_files/

View File

@@ -0,0 +1,90 @@
"""
Creates a Pytorch dataset to load the Pascal VOC dataset
"""
import torch
import os
import pandas as pd
from PIL import Image
class VOCDataset(torch.utils.data.Dataset):
def __init__(
self, csv_file, img_dir, label_dir, S=7, B=2, C=20, transform=None,
):
self.annotations = pd.read_csv(csv_file)
self.img_dir = img_dir
self.label_dir = label_dir
self.transform = transform
self.S = S
self.B = B
self.C = C
def __len__(self):
return len(self.annotations)
def __getitem__(self, index):
label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
boxes = []
with open(label_path) as f:
for label in f.readlines():
class_label, x, y, width, height = [
float(x) if float(x) != int(float(x)) else int(x)
for x in label.replace("\n", "").split()
]
boxes.append([class_label, x, y, width, height])
img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
image = Image.open(img_path)
boxes = torch.tensor(boxes)
if self.transform:
# image = self.transform(image)
image, boxes = self.transform(image, boxes)
# Convert To Cells
label_matrix = torch.zeros((self.S, self.S, self.C + 5 * self.B))
for box in boxes:
class_label, x, y, width, height = box.tolist()
class_label = int(class_label)
# i,j represents the cell row and cell column
i, j = int(self.S * y), int(self.S * x)
x_cell, y_cell = self.S * x - j, self.S * y - i
"""
Calculating the width and height of cell of bounding box,
relative to the cell is done by the following, with
width as the example:
width_pixels = (width*self.image_width)
cell_pixels = (self.image_width)
Then to find the width relative to the cell is simply:
width_pixels/cell_pixels, simplification leads to the
formulas below.
"""
width_cell, height_cell = (
width * self.S,
height * self.S,
)
# If no object already found for specific cell i,j
# Note: This means we restrict to ONE object
# per cell!
if label_matrix[i, j, 20] == 0:
# Set that there exists an object
label_matrix[i, j, 20] = 1
# Box coordinates
box_coordinates = torch.tensor(
[x_cell, y_cell, width_cell, height_cell]
)
label_matrix[i, j, 21:25] = box_coordinates
# Set one hot encoding for class_label
label_matrix[i, j, class_label] = 1
return image, label_matrix

View File

@@ -0,0 +1,124 @@
"""
Implementation of Yolo Loss Function from the original yolo paper
"""
import torch
import torch.nn as nn
from utils import intersection_over_union
class YoloLoss(nn.Module):
"""
Calculate the loss for yolo (v1) model
"""
def __init__(self, S=7, B=2, C=20):
super(YoloLoss, self).__init__()
self.mse = nn.MSELoss(reduction="sum")
"""
S is split size of image (in paper 7),
B is number of boxes (in paper 2),
C is number of classes (in paper and VOC dataset is 20),
"""
self.S = S
self.B = B
self.C = C
# These are from Yolo paper, signifying how much we should
# pay loss for no object (noobj) and the box coordinates (coord)
self.lambda_noobj = 0.5
self.lambda_coord = 5
def forward(self, predictions, target):
# predictions are shaped (BATCH_SIZE, S*S(C+B*5) when inputted
predictions = predictions.reshape(-1, self.S, self.S, self.C + self.B * 5)
# Calculate IoU for the two predicted bounding boxes with target bbox
iou_b1 = intersection_over_union(predictions[..., 21:25], target[..., 21:25])
iou_b2 = intersection_over_union(predictions[..., 26:30], target[..., 21:25])
ious = torch.cat([iou_b1.unsqueeze(0), iou_b2.unsqueeze(0)], dim=0)
# Take the box with highest IoU out of the two prediction
# Note that bestbox will be indices of 0, 1 for which bbox was best
iou_maxes, bestbox = torch.max(ious, dim=0)
exists_box = target[..., 20].unsqueeze(3) # in paper this is Iobj_i
# ======================== #
# FOR BOX COORDINATES #
# ======================== #
# Set boxes with no object in them to 0. We only take out one of the two
# predictions, which is the one with highest Iou calculated previously.
box_predictions = exists_box * (
(
bestbox * predictions[..., 26:30]
+ (1 - bestbox) * predictions[..., 21:25]
)
)
box_targets = exists_box * target[..., 21:25]
# Take sqrt of width, height of boxes to ensure that
box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
torch.abs(box_predictions[..., 2:4] + 1e-6)
)
box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])
box_loss = self.mse(
torch.flatten(box_predictions, end_dim=-2),
torch.flatten(box_targets, end_dim=-2),
)
# ==================== #
# FOR OBJECT LOSS #
# ==================== #
# pred_box is the confidence score for the bbox with highest IoU
pred_box = (
bestbox * predictions[..., 25:26] + (1 - bestbox) * predictions[..., 20:21]
)
object_loss = self.mse(
torch.flatten(exists_box * pred_box),
torch.flatten(exists_box * target[..., 20:21]),
)
# ======================= #
# FOR NO OBJECT LOSS #
# ======================= #
#max_no_obj = torch.max(predictions[..., 20:21], predictions[..., 25:26])
#no_object_loss = self.mse(
# torch.flatten((1 - exists_box) * max_no_obj, start_dim=1),
# torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
#)
no_object_loss = self.mse(
torch.flatten((1 - exists_box) * predictions[..., 20:21], start_dim=1),
torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
)
no_object_loss += self.mse(
torch.flatten((1 - exists_box) * predictions[..., 25:26], start_dim=1),
torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1)
)
# ================== #
# FOR CLASS LOSS #
# ================== #
class_loss = self.mse(
torch.flatten(exists_box * predictions[..., :20], end_dim=-2,),
torch.flatten(exists_box * target[..., :20], end_dim=-2,),
)
loss = (
self.lambda_coord * box_loss # first two rows in paper
+ object_loss # third row in paper
+ self.lambda_noobj * no_object_loss # forth row
+ class_loss # fifth row
)
return loss

View File

@@ -0,0 +1,119 @@
"""
Implementation of Yolo (v1) architecture
with slight modification with added BatchNorm.
"""
import torch
import torch.nn as nn
"""
Information about architecture config:
Tuple is structured by (kernel_size, filters, stride, padding)
"M" is simply maxpooling with stride 2x2 and kernel 2x2
List is structured by tuples and lastly int with number of repeats
"""
architecture_config = [
(7, 64, 2, 3),
"M",
(3, 192, 1, 1),
"M",
(1, 128, 1, 0),
(3, 256, 1, 1),
(1, 256, 1, 0),
(3, 512, 1, 1),
"M",
[(1, 256, 1, 0), (3, 512, 1, 1), 4],
(1, 512, 1, 0),
(3, 1024, 1, 1),
"M",
[(1, 512, 1, 0), (3, 1024, 1, 1), 2],
(3, 1024, 1, 1),
(3, 1024, 2, 1),
(3, 1024, 1, 1),
(3, 1024, 1, 1),
]
class CNNBlock(nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
super(CNNBlock, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
self.batchnorm = nn.BatchNorm2d(out_channels)
self.leakyrelu = nn.LeakyReLU(0.1)
def forward(self, x):
return self.leakyrelu(self.batchnorm(self.conv(x)))
class Yolov1(nn.Module):
def __init__(self, in_channels=3, **kwargs):
super(Yolov1, self).__init__()
self.architecture = architecture_config
self.in_channels = in_channels
self.darknet = self._create_conv_layers(self.architecture)
self.fcs = self._create_fcs(**kwargs)
def forward(self, x):
x = self.darknet(x)
return self.fcs(torch.flatten(x, start_dim=1))
def _create_conv_layers(self, architecture):
layers = []
in_channels = self.in_channels
for x in architecture:
if type(x) == tuple:
layers += [
CNNBlock(
in_channels, x[1], kernel_size=x[0], stride=x[2], padding=x[3],
)
]
in_channels = x[1]
elif type(x) == str:
layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]
elif type(x) == list:
conv1 = x[0]
conv2 = x[1]
num_repeats = x[2]
for _ in range(num_repeats):
layers += [
CNNBlock(
in_channels,
conv1[1],
kernel_size=conv1[0],
stride=conv1[2],
padding=conv1[3],
)
]
layers += [
CNNBlock(
conv1[1],
conv2[1],
kernel_size=conv2[0],
stride=conv2[2],
padding=conv2[3],
)
]
in_channels = conv2[1]
return nn.Sequential(*layers)
def _create_fcs(self, split_size, num_boxes, num_classes):
S, B, C = split_size, num_boxes, num_classes
# In original paper this should be
# nn.Linear(1024*S*S, 4096),
# nn.LeakyReLU(0.1),
# nn.Linear(4096, S*S*(B*5+C))
return nn.Sequential(
nn.Flatten(),
nn.Linear(1024 * S * S, 496),
nn.Dropout(0.0),
nn.LeakyReLU(0.1),
nn.Linear(496, S * S * (C + B * 5)),
)

View File

@@ -0,0 +1,148 @@
"""
Main file for training Yolo model on Pascal VOC dataset
"""
import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from model import Yolov1
from dataset import VOCDataset
from utils import (
non_max_suppression,
mean_average_precision,
intersection_over_union,
cellboxes_to_boxes,
get_bboxes,
plot_image,
save_checkpoint,
load_checkpoint,
)
from loss import YoloLoss
seed = 123
torch.manual_seed(seed)
# Hyperparameters etc.
LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available else "cpu"
BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
WEIGHT_DECAY = 0
EPOCHS = 1000
NUM_WORKERS = 2
PIN_MEMORY = True
LOAD_MODEL = False
LOAD_MODEL_FILE = "overfit.pth.tar"
IMG_DIR = "data/images"
LABEL_DIR = "data/labels"
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img, bboxes):
for t in self.transforms:
img, bboxes = t(img), bboxes
return img, bboxes
transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])
def train_fn(train_loader, model, optimizer, loss_fn):
loop = tqdm(train_loader, leave=True)
mean_loss = []
for batch_idx, (x, y) in enumerate(loop):
x, y = x.to(DEVICE), y.to(DEVICE)
out = model(x)
loss = loss_fn(out, y)
mean_loss.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
# update progress bar
loop.set_postfix(loss=loss.item())
print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")
def main():
model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
optimizer = optim.Adam(
model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
)
loss_fn = YoloLoss()
if LOAD_MODEL:
load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
train_dataset = VOCDataset(
"data/100examples.csv",
transform=transform,
img_dir=IMG_DIR,
label_dir=LABEL_DIR,
)
test_dataset = VOCDataset(
"data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR,
)
train_loader = DataLoader(
dataset=train_dataset,
batch_size=BATCH_SIZE,
num_workers=NUM_WORKERS,
pin_memory=PIN_MEMORY,
shuffle=True,
drop_last=True,
)
test_loader = DataLoader(
dataset=test_dataset,
batch_size=BATCH_SIZE,
num_workers=NUM_WORKERS,
pin_memory=PIN_MEMORY,
shuffle=True,
drop_last=True,
)
for epoch in range(EPOCHS):
# for x, y in train_loader:
# x = x.to(DEVICE)
# for idx in range(8):
# bboxes = cellboxes_to_boxes(model(x))
# bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
# plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
# import sys
# sys.exit()
pred_boxes, target_boxes = get_bboxes(
train_loader, model, iou_threshold=0.5, threshold=0.4
)
mean_avg_prec = mean_average_precision(
pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
)
print(f"Train mAP: {mean_avg_prec}")
#if mean_avg_prec > 0.9:
# checkpoint = {
# "state_dict": model.state_dict(),
# "optimizer": optimizer.state_dict(),
# }
# save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)
# import time
# time.sleep(10)
train_fn(train_loader, model, optimizer, loss_fn)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,349 @@
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from collections import Counter
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
"""
Calculates intersection over union
Parameters:
boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
Returns:
tensor: Intersection over union for all examples
"""
if box_format == "midpoint":
box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
if box_format == "corners":
box1_x1 = boxes_preds[..., 0:1]
box1_y1 = boxes_preds[..., 1:2]
box1_x2 = boxes_preds[..., 2:3]
box1_y2 = boxes_preds[..., 3:4] # (N, 1)
box2_x1 = boxes_labels[..., 0:1]
box2_y1 = boxes_labels[..., 1:2]
box2_x2 = boxes_labels[..., 2:3]
box2_y2 = boxes_labels[..., 3:4]
x1 = torch.max(box1_x1, box2_x1)
y1 = torch.max(box1_y1, box2_y1)
x2 = torch.min(box1_x2, box2_x2)
y2 = torch.min(box1_y2, box2_y2)
# .clamp(0) is for the case when they do not intersect
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
return intersection / (box1_area + box2_area - intersection + 1e-6)
def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
"""
Does Non Max Suppression given bboxes
Parameters:
bboxes (list): list of lists containing all bboxes with each bboxes
specified as [class_pred, prob_score, x1, y1, x2, y2]
iou_threshold (float): threshold where predicted bboxes is correct
threshold (float): threshold to remove predicted bboxes (independent of IoU)
box_format (str): "midpoint" or "corners" used to specify bboxes
Returns:
list: bboxes after performing NMS given a specific IoU threshold
"""
assert type(bboxes) == list
bboxes = [box for box in bboxes if box[1] > threshold]
bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
bboxes_after_nms = []
while bboxes:
chosen_box = bboxes.pop(0)
bboxes = [
box
for box in bboxes
if box[0] != chosen_box[0]
or intersection_over_union(
torch.tensor(chosen_box[2:]),
torch.tensor(box[2:]),
box_format=box_format,
)
< iou_threshold
]
bboxes_after_nms.append(chosen_box)
return bboxes_after_nms
def mean_average_precision(
pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
):
"""
Calculates mean average precision
Parameters:
pred_boxes (list): list of lists containing all bboxes with each bboxes
specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
true_boxes (list): Similar as pred_boxes except all the correct ones
iou_threshold (float): threshold where predicted bboxes is correct
box_format (str): "midpoint" or "corners" used to specify bboxes
num_classes (int): number of classes
Returns:
float: mAP value across all classes given a specific IoU threshold
"""
# list storing all AP for respective classes
average_precisions = []
# used for numerical stability later on
epsilon = 1e-6
for c in range(num_classes):
detections = []
ground_truths = []
# Go through all predictions and targets,
# and only add the ones that belong to the
# current class c
for detection in pred_boxes:
if detection[1] == c:
detections.append(detection)
for true_box in true_boxes:
if true_box[1] == c:
ground_truths.append(true_box)
# find the amount of bboxes for each training example
# Counter here finds how many ground truth bboxes we get
# for each training example, so let's say img 0 has 3,
# img 1 has 5 then we will obtain a dictionary with:
# amount_bboxes = {0:3, 1:5}
amount_bboxes = Counter([gt[0] for gt in ground_truths])
# We then go through each key, val in this dictionary
# and convert to the following (w.r.t same example):
# ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
for key, val in amount_bboxes.items():
amount_bboxes[key] = torch.zeros(val)
# sort by box probabilities which is index 2
detections.sort(key=lambda x: x[2], reverse=True)
TP = torch.zeros((len(detections)))
FP = torch.zeros((len(detections)))
total_true_bboxes = len(ground_truths)
# If none exists for this class then we can safely skip
if total_true_bboxes == 0:
continue
for detection_idx, detection in enumerate(detections):
# Only take out the ground_truths that have the same
# training idx as detection
ground_truth_img = [
bbox for bbox in ground_truths if bbox[0] == detection[0]
]
num_gts = len(ground_truth_img)
best_iou = 0
for idx, gt in enumerate(ground_truth_img):
iou = intersection_over_union(
torch.tensor(detection[3:]),
torch.tensor(gt[3:]),
box_format=box_format,
)
if iou > best_iou:
best_iou = iou
best_gt_idx = idx
if best_iou > iou_threshold:
# only detect ground truth detection once
if amount_bboxes[detection[0]][best_gt_idx] == 0:
# true positive and add this bounding box to seen
TP[detection_idx] = 1
amount_bboxes[detection[0]][best_gt_idx] = 1
else:
FP[detection_idx] = 1
# if IOU is lower then the detection is a false positive
else:
FP[detection_idx] = 1
TP_cumsum = torch.cumsum(TP, dim=0)
FP_cumsum = torch.cumsum(FP, dim=0)
recalls = TP_cumsum / (total_true_bboxes + epsilon)
precisions = torch.divide(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
precisions = torch.cat((torch.tensor([1]), precisions))
recalls = torch.cat((torch.tensor([0]), recalls))
# torch.trapz for numerical integration
average_precisions.append(torch.trapz(precisions, recalls))
return sum(average_precisions) / len(average_precisions)
def plot_image(image, boxes):
"""Plots predicted bounding boxes on the image"""
im = np.array(image)
height, width, _ = im.shape
# Create figure and axes
fig, ax = plt.subplots(1)
# Display the image
ax.imshow(im)
# box[0] is x midpoint, box[2] is width
# box[1] is y midpoint, box[3] is height
# Create a Rectangle potch
for box in boxes:
box = box[2:]
assert len(box) == 4, "Got more values than in x, y, w, h, in a box!"
upper_left_x = box[0] - box[2] / 2
upper_left_y = box[1] - box[3] / 2
rect = patches.Rectangle(
(upper_left_x * width, upper_left_y * height),
box[2] * width,
box[3] * height,
linewidth=1,
edgecolor="r",
facecolor="none",
)
# Add the patch to the Axes
ax.add_patch(rect)
plt.show()
def get_bboxes(
loader,
model,
iou_threshold,
threshold,
pred_format="cells",
box_format="midpoint",
device="cuda",
):
all_pred_boxes = []
all_true_boxes = []
# make sure model is in eval before get bboxes
model.eval()
train_idx = 0
for batch_idx, (x, labels) in enumerate(loader):
x = x.to(device)
labels = labels.to(device)
with torch.no_grad():
predictions = model(x)
batch_size = x.shape[0]
true_bboxes = cellboxes_to_boxes(labels)
bboxes = cellboxes_to_boxes(predictions)
for idx in range(batch_size):
nms_boxes = non_max_suppression(
bboxes[idx],
iou_threshold=iou_threshold,
threshold=threshold,
box_format=box_format,
)
#if batch_idx == 0 and idx == 0:
# plot_image(x[idx].permute(1,2,0).to("cpu"), nms_boxes)
# print(nms_boxes)
for nms_box in nms_boxes:
all_pred_boxes.append([train_idx] + nms_box)
for box in true_bboxes[idx]:
# many will get converted to 0 pred
if box[1] > threshold:
all_true_boxes.append([train_idx] + box)
train_idx += 1
model.train()
return all_pred_boxes, all_true_boxes
def convert_cellboxes(predictions, S=7):
"""
Converts bounding boxes output from Yolo with
an image split size of S into entire image ratios
rather than relative to cell ratios. Tried to do this
vectorized, but this resulted in quite difficult to read
code... Use as a black box? Or implement a more intuitive,
using 2 for loops iterating range(S) and convert them one
by one, resulting in a slower but more readable implementation.
"""
predictions = predictions.to("cpu")
batch_size = predictions.shape[0]
predictions = predictions.reshape(batch_size, 7, 7, 30)
bboxes1 = predictions[..., 21:25]
bboxes2 = predictions[..., 26:30]
scores = torch.cat(
(predictions[..., 20].unsqueeze(0), predictions[..., 25].unsqueeze(0)), dim=0
)
best_box = scores.argmax(0).unsqueeze(-1)
best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2
cell_indices = torch.arange(7).repeat(batch_size, 7, 1).unsqueeze(-1)
x = 1 / S * (best_boxes[..., :1] + cell_indices)
y = 1 / S * (best_boxes[..., 1:2] + cell_indices.permute(0, 2, 1, 3))
w_y = 1 / S * best_boxes[..., 2:4]
converted_bboxes = torch.cat((x, y, w_y), dim=-1)
predicted_class = predictions[..., :20].argmax(-1).unsqueeze(-1)
best_confidence = torch.max(predictions[..., 20], predictions[..., 25]).unsqueeze(
-1
)
converted_preds = torch.cat(
(predicted_class, best_confidence, converted_bboxes), dim=-1
)
return converted_preds
def cellboxes_to_boxes(out, S=7):
converted_pred = convert_cellboxes(out).reshape(out.shape[0], S * S, -1)
converted_pred[..., 0] = converted_pred[..., 0].long()
all_bboxes = []
for ex_idx in range(out.shape[0]):
bboxes = []
for bbox_idx in range(S * S):
bboxes.append([x.item() for x in converted_pred[ex_idx, bbox_idx, :]])
all_bboxes.append(bboxes)
return all_bboxes
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
print("=> Saving checkpoint")
torch.save(state, filename)
def load_checkpoint(checkpoint, model, optimizer):
print("=> Loading checkpoint")
model.load_state_dict(checkpoint["state_dict"])
optimizer.load_state_dict(checkpoint["optimizer"])

View File

@@ -0,0 +1,50 @@
import torch
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
"""
Calculates intersection over union
Parameters:
boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
boxes_labels (tensor): Correct Labels of Boxes (BATCH_SIZE, 4)
box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
Returns:
tensor: Intersection over union for all examples
"""
# Slicing idx:idx+1 in order to keep tensor dimensionality
# Doing ... in indexing if there would be additional dimensions
# Like for Yolo algorithm which would have (N, S, S, 4) in shape
if box_format == "midpoint":
box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
elif box_format == "corners":
box1_x1 = boxes_preds[..., 0:1]
box1_y1 = boxes_preds[..., 1:2]
box1_x2 = boxes_preds[..., 2:3]
box1_y2 = boxes_preds[..., 3:4]
box2_x1 = boxes_labels[..., 0:1]
box2_y1 = boxes_labels[..., 1:2]
box2_x2 = boxes_labels[..., 2:3]
box2_y2 = boxes_labels[..., 3:4]
x1 = torch.max(box1_x1, box2_x1)
y1 = torch.max(box1_y1, box2_y1)
x2 = torch.min(box1_x2, box2_x2)
y2 = torch.min(box1_y2, box2_y2)
# Need clamp(0) in case they do not intersect, then we want intersection to be 0
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
return intersection / (box1_area + box2_area - intersection + 1e-6)

View File

@@ -0,0 +1,112 @@
import torch
from collections import Counter
from iou import intersection_over_union
def mean_average_precision(
pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
):
"""
Calculates mean average precision
Parameters:
pred_boxes (list): list of lists containing all bboxes with each bboxes
specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
true_boxes (list): Similar as pred_boxes except all the correct ones
iou_threshold (float): threshold where predicted bboxes is correct
box_format (str): "midpoint" or "corners" used to specify bboxes
num_classes (int): number of classes
Returns:
float: mAP value across all classes given a specific IoU threshold
"""
# list storing all AP for respective classes
average_precisions = []
# used for numerical stability later on
epsilon = 1e-6
for c in range(num_classes):
detections = []
ground_truths = []
# Go through all predictions and targets,
# and only add the ones that belong to the
# current class c
for detection in pred_boxes:
if detection[1] == c:
detections.append(detection)
for true_box in true_boxes:
if true_box[1] == c:
ground_truths.append(true_box)
# find the amount of bboxes for each training example
# Counter here finds how many ground truth bboxes we get
# for each training example, so let's say img 0 has 3,
# img 1 has 5 then we will obtain a dictionary with:
# amount_bboxes = {0:3, 1:5}
amount_bboxes = Counter([gt[0] for gt in ground_truths])
# We then go through each key, val in this dictionary
# and convert to the following (w.r.t same example):
# ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
for key, val in amount_bboxes.items():
amount_bboxes[key] = torch.zeros(val)
# sort by box probabilities which is index 2
detections.sort(key=lambda x: x[2], reverse=True)
TP = torch.zeros((len(detections)))
FP = torch.zeros((len(detections)))
total_true_bboxes = len(ground_truths)
# If none exists for this class then we can safely skip
if total_true_bboxes == 0:
continue
for detection_idx, detection in enumerate(detections):
# Only take out the ground_truths that have the same
# training idx as detection
ground_truth_img = [
bbox for bbox in ground_truths if bbox[0] == detection[0]
]
num_gts = len(ground_truth_img)
best_iou = 0
for idx, gt in enumerate(ground_truth_img):
iou = intersection_over_union(
torch.tensor(detection[3:]),
torch.tensor(gt[3:]),
box_format=box_format,
)
if iou > best_iou:
best_iou = iou
best_gt_idx = idx
if best_iou > iou_threshold:
# only detect ground truth detection once
if amount_bboxes[detection[0]][best_gt_idx] == 0:
# true positive and add this bounding box to seen
TP[detection_idx] = 1
amount_bboxes[detection[0]][best_gt_idx] = 1
else:
FP[detection_idx] = 1
# if IOU is lower then the detection is a false positive
else:
FP[detection_idx] = 1
TP_cumsum = torch.cumsum(TP, dim=0)
FP_cumsum = torch.cumsum(FP, dim=0)
recalls = TP_cumsum / (total_true_bboxes + epsilon)
precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
precisions = torch.cat((torch.tensor([1]), precisions))
recalls = torch.cat((torch.tensor([0]), recalls))
# torch.trapz for numerical integration
average_precisions.append(torch.trapz(precisions, recalls))
return sum(average_precisions) / len(average_precisions)

View File

@@ -0,0 +1,42 @@
import torch
from iou import intersection_over_union
def nms(bboxes, iou_threshold, threshold, box_format="corners"):
"""
Does Non Max Suppression given bboxes
Parameters:
bboxes (list): list of lists containing all bboxes with each bboxes
specified as [class_pred, prob_score, x1, y1, x2, y2]
iou_threshold (float): threshold where predicted bboxes is correct
threshold (float): threshold to remove predicted bboxes (independent of IoU)
box_format (str): "midpoint" or "corners" used to specify bboxes
Returns:
list: bboxes after performing NMS given a specific IoU threshold
"""
assert type(bboxes) == list
bboxes = [box for box in bboxes if box[1] > threshold]
bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
bboxes_after_nms = []
while bboxes:
chosen_box = bboxes.pop(0)
bboxes = [
box
for box in bboxes
if box[0] != chosen_box[0]
or intersection_over_union(
torch.tensor(chosen_box[2:]),
torch.tensor(box[2:]),
box_format=box_format,
)
< iou_threshold
]
bboxes_after_nms.append(chosen_box)
return bboxes_after_nms