mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-02-21 11:18:01 +00:00
Initial commit
This commit is contained in:
Binary file not shown.
BIN
ML/Pytorch/object_detection/YOLO/__pycache__/loss.cpython-38.pyc
Normal file
BIN
ML/Pytorch/object_detection/YOLO/__pycache__/loss.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
22
ML/Pytorch/object_detection/YOLO/data/generate_csv.py
Executable file
22
ML/Pytorch/object_detection/YOLO/data/generate_csv.py
Executable file
@@ -0,0 +1,22 @@
|
||||
import os
|
||||
import csv
|
||||
|
||||
read_train = open("train.txt", "r").readlines()
|
||||
|
||||
with open("train.csv", mode="w", newline="") as train_file:
|
||||
for line in read_train:
|
||||
image_file = line.split("/")[-1].replace("\n", "")
|
||||
text_file = image_file.replace(".jpg", ".txt")
|
||||
data = [image_file, text_file]
|
||||
writer = csv.writer(train_file)
|
||||
writer.writerow(data)
|
||||
|
||||
read_train = open("test.txt", "r").readlines()
|
||||
|
||||
with open("test.csv", mode="w", newline="") as train_file:
|
||||
for line in read_train:
|
||||
image_file = line.split("/")[-1].replace("\n", "")
|
||||
text_file = image_file.replace(".jpg", ".txt")
|
||||
data = [image_file, text_file]
|
||||
writer = csv.writer(train_file)
|
||||
writer.writerow(data)
|
||||
60
ML/Pytorch/object_detection/YOLO/data/get_data
Executable file
60
ML/Pytorch/object_detection/YOLO/data/get_data
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
## DOWNLOAD from JOSEPHS WEBSITE (SLOWER DOWNLOAD)
|
||||
#wget https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar
|
||||
#wget https://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
|
||||
#wget https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
|
||||
|
||||
## OR DOWNLOAD FROM HERE (FASTER DOWNLOAD)
|
||||
# VOC2007 DATASET
|
||||
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.ta
|
||||
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar #
|
||||
|
||||
# VOC2012 DATASET
|
||||
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.ta
|
||||
|
||||
# Extract tar files
|
||||
tar xf VOCtrainval_11-May-2012.tar
|
||||
tar xf VOCtrainval_06-Nov-2007.tar
|
||||
tar xf VOCtest_06-Nov-2007.tar
|
||||
|
||||
# Need voc_label.py to clean up data from xml files
|
||||
wget https://pjreddie.com/media/files/voc_label.py
|
||||
|
||||
# Run python file to clean data from xml files
|
||||
python voc_label.py
|
||||
|
||||
# Get train by using train+val from 2007 and 2012
|
||||
# Then we only test on 2007 test set
|
||||
# Unclear from paper what they actually just as a dev set
|
||||
cat 2007_train.txt 2007_val.txt 2012_*.txt > train.txt
|
||||
cp 2007_test.txt test.txt
|
||||
|
||||
# Move txt files we won't be using to clean up a little bit
|
||||
mkdir old_txt_files
|
||||
mv 2007* 2012* old_txt_files/
|
||||
|
||||
python generate_csv.py
|
||||
|
||||
mkdir data
|
||||
mkdir data/images
|
||||
mkdir data/labels
|
||||
|
||||
cp VOCdevkit/*.jpg data/images/
|
||||
cp VOCdevkit/VOC2007/labels/*.txt data/labels/
|
||||
cp VOCdevkit/VOC2012/labels/*.txt data/labels/
|
||||
|
||||
mkdir data
|
||||
mkdir data/images
|
||||
mkdir data/labels
|
||||
|
||||
mv VOCdevkit/VOC2007/JPEGImages/*.jpg data/images/
|
||||
mv VOCdevkit/VOC2012/JPEGImages/*.jpg data/images/
|
||||
mv VOCdevkit/VOC2007/labels/*.txt data/labels/
|
||||
mv VOCdevkit/VOC2012/labels/*.txt data/labels/
|
||||
|
||||
# We don't need VOCdevkit folder anymore, can remove
|
||||
# in order to save some space
|
||||
rm -rf VOCdevkit/
|
||||
mv test.txt old_txt_files/
|
||||
mv train.txt old_txt_files/
|
||||
90
ML/Pytorch/object_detection/YOLO/dataset.py
Executable file
90
ML/Pytorch/object_detection/YOLO/dataset.py
Executable file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
Creates a Pytorch dataset to load the Pascal VOC dataset
|
||||
"""
|
||||
|
||||
import torch
|
||||
import os
|
||||
import pandas as pd
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class VOCDataset(torch.utils.data.Dataset):
|
||||
def __init__(
|
||||
self, csv_file, img_dir, label_dir, S=7, B=2, C=20, transform=None,
|
||||
):
|
||||
self.annotations = pd.read_csv(csv_file)
|
||||
self.img_dir = img_dir
|
||||
self.label_dir = label_dir
|
||||
self.transform = transform
|
||||
self.S = S
|
||||
self.B = B
|
||||
self.C = C
|
||||
|
||||
def __len__(self):
|
||||
return len(self.annotations)
|
||||
|
||||
def __getitem__(self, index):
|
||||
label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
|
||||
boxes = []
|
||||
with open(label_path) as f:
|
||||
for label in f.readlines():
|
||||
class_label, x, y, width, height = [
|
||||
float(x) if float(x) != int(float(x)) else int(x)
|
||||
for x in label.replace("\n", "").split()
|
||||
]
|
||||
|
||||
boxes.append([class_label, x, y, width, height])
|
||||
|
||||
img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
|
||||
image = Image.open(img_path)
|
||||
boxes = torch.tensor(boxes)
|
||||
|
||||
if self.transform:
|
||||
# image = self.transform(image)
|
||||
image, boxes = self.transform(image, boxes)
|
||||
|
||||
# Convert To Cells
|
||||
label_matrix = torch.zeros((self.S, self.S, self.C + 5 * self.B))
|
||||
for box in boxes:
|
||||
class_label, x, y, width, height = box.tolist()
|
||||
class_label = int(class_label)
|
||||
|
||||
# i,j represents the cell row and cell column
|
||||
i, j = int(self.S * y), int(self.S * x)
|
||||
x_cell, y_cell = self.S * x - j, self.S * y - i
|
||||
|
||||
"""
|
||||
Calculating the width and height of cell of bounding box,
|
||||
relative to the cell is done by the following, with
|
||||
width as the example:
|
||||
|
||||
width_pixels = (width*self.image_width)
|
||||
cell_pixels = (self.image_width)
|
||||
|
||||
Then to find the width relative to the cell is simply:
|
||||
width_pixels/cell_pixels, simplification leads to the
|
||||
formulas below.
|
||||
"""
|
||||
width_cell, height_cell = (
|
||||
width * self.S,
|
||||
height * self.S,
|
||||
)
|
||||
|
||||
# If no object already found for specific cell i,j
|
||||
# Note: This means we restrict to ONE object
|
||||
# per cell!
|
||||
if label_matrix[i, j, 20] == 0:
|
||||
# Set that there exists an object
|
||||
label_matrix[i, j, 20] = 1
|
||||
|
||||
# Box coordinates
|
||||
box_coordinates = torch.tensor(
|
||||
[x_cell, y_cell, width_cell, height_cell]
|
||||
)
|
||||
|
||||
label_matrix[i, j, 21:25] = box_coordinates
|
||||
|
||||
# Set one hot encoding for class_label
|
||||
label_matrix[i, j, class_label] = 1
|
||||
|
||||
return image, label_matrix
|
||||
124
ML/Pytorch/object_detection/YOLO/loss.py
Executable file
124
ML/Pytorch/object_detection/YOLO/loss.py
Executable file
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
Implementation of Yolo Loss Function from the original yolo paper
|
||||
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from utils import intersection_over_union
|
||||
|
||||
|
||||
class YoloLoss(nn.Module):
|
||||
"""
|
||||
Calculate the loss for yolo (v1) model
|
||||
"""
|
||||
|
||||
def __init__(self, S=7, B=2, C=20):
|
||||
super(YoloLoss, self).__init__()
|
||||
self.mse = nn.MSELoss(reduction="sum")
|
||||
|
||||
"""
|
||||
S is split size of image (in paper 7),
|
||||
B is number of boxes (in paper 2),
|
||||
C is number of classes (in paper and VOC dataset is 20),
|
||||
"""
|
||||
self.S = S
|
||||
self.B = B
|
||||
self.C = C
|
||||
|
||||
# These are from Yolo paper, signifying how much we should
|
||||
# pay loss for no object (noobj) and the box coordinates (coord)
|
||||
self.lambda_noobj = 0.5
|
||||
self.lambda_coord = 5
|
||||
|
||||
def forward(self, predictions, target):
|
||||
# predictions are shaped (BATCH_SIZE, S*S(C+B*5) when inputted
|
||||
predictions = predictions.reshape(-1, self.S, self.S, self.C + self.B * 5)
|
||||
|
||||
# Calculate IoU for the two predicted bounding boxes with target bbox
|
||||
iou_b1 = intersection_over_union(predictions[..., 21:25], target[..., 21:25])
|
||||
iou_b2 = intersection_over_union(predictions[..., 26:30], target[..., 21:25])
|
||||
ious = torch.cat([iou_b1.unsqueeze(0), iou_b2.unsqueeze(0)], dim=0)
|
||||
|
||||
# Take the box with highest IoU out of the two prediction
|
||||
# Note that bestbox will be indices of 0, 1 for which bbox was best
|
||||
iou_maxes, bestbox = torch.max(ious, dim=0)
|
||||
exists_box = target[..., 20].unsqueeze(3) # in paper this is Iobj_i
|
||||
|
||||
# ======================== #
|
||||
# FOR BOX COORDINATES #
|
||||
# ======================== #
|
||||
|
||||
# Set boxes with no object in them to 0. We only take out one of the two
|
||||
# predictions, which is the one with highest Iou calculated previously.
|
||||
box_predictions = exists_box * (
|
||||
(
|
||||
bestbox * predictions[..., 26:30]
|
||||
+ (1 - bestbox) * predictions[..., 21:25]
|
||||
)
|
||||
)
|
||||
|
||||
box_targets = exists_box * target[..., 21:25]
|
||||
|
||||
# Take sqrt of width, height of boxes to ensure that
|
||||
box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
|
||||
torch.abs(box_predictions[..., 2:4] + 1e-6)
|
||||
)
|
||||
box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])
|
||||
|
||||
box_loss = self.mse(
|
||||
torch.flatten(box_predictions, end_dim=-2),
|
||||
torch.flatten(box_targets, end_dim=-2),
|
||||
)
|
||||
|
||||
# ==================== #
|
||||
# FOR OBJECT LOSS #
|
||||
# ==================== #
|
||||
|
||||
# pred_box is the confidence score for the bbox with highest IoU
|
||||
pred_box = (
|
||||
bestbox * predictions[..., 25:26] + (1 - bestbox) * predictions[..., 20:21]
|
||||
)
|
||||
|
||||
object_loss = self.mse(
|
||||
torch.flatten(exists_box * pred_box),
|
||||
torch.flatten(exists_box * target[..., 20:21]),
|
||||
)
|
||||
|
||||
# ======================= #
|
||||
# FOR NO OBJECT LOSS #
|
||||
# ======================= #
|
||||
|
||||
#max_no_obj = torch.max(predictions[..., 20:21], predictions[..., 25:26])
|
||||
#no_object_loss = self.mse(
|
||||
# torch.flatten((1 - exists_box) * max_no_obj, start_dim=1),
|
||||
# torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
|
||||
#)
|
||||
|
||||
no_object_loss = self.mse(
|
||||
torch.flatten((1 - exists_box) * predictions[..., 20:21], start_dim=1),
|
||||
torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
|
||||
)
|
||||
|
||||
no_object_loss += self.mse(
|
||||
torch.flatten((1 - exists_box) * predictions[..., 25:26], start_dim=1),
|
||||
torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1)
|
||||
)
|
||||
|
||||
# ================== #
|
||||
# FOR CLASS LOSS #
|
||||
# ================== #
|
||||
|
||||
class_loss = self.mse(
|
||||
torch.flatten(exists_box * predictions[..., :20], end_dim=-2,),
|
||||
torch.flatten(exists_box * target[..., :20], end_dim=-2,),
|
||||
)
|
||||
|
||||
loss = (
|
||||
self.lambda_coord * box_loss # first two rows in paper
|
||||
+ object_loss # third row in paper
|
||||
+ self.lambda_noobj * no_object_loss # forth row
|
||||
+ class_loss # fifth row
|
||||
)
|
||||
|
||||
return loss
|
||||
119
ML/Pytorch/object_detection/YOLO/model.py
Executable file
119
ML/Pytorch/object_detection/YOLO/model.py
Executable file
@@ -0,0 +1,119 @@
|
||||
"""
|
||||
Implementation of Yolo (v1) architecture
|
||||
with slight modification with added BatchNorm.
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
"""
|
||||
Information about architecture config:
|
||||
Tuple is structured by (kernel_size, filters, stride, padding)
|
||||
"M" is simply maxpooling with stride 2x2 and kernel 2x2
|
||||
List is structured by tuples and lastly int with number of repeats
|
||||
"""
|
||||
|
||||
architecture_config = [
|
||||
(7, 64, 2, 3),
|
||||
"M",
|
||||
(3, 192, 1, 1),
|
||||
"M",
|
||||
(1, 128, 1, 0),
|
||||
(3, 256, 1, 1),
|
||||
(1, 256, 1, 0),
|
||||
(3, 512, 1, 1),
|
||||
"M",
|
||||
[(1, 256, 1, 0), (3, 512, 1, 1), 4],
|
||||
(1, 512, 1, 0),
|
||||
(3, 1024, 1, 1),
|
||||
"M",
|
||||
[(1, 512, 1, 0), (3, 1024, 1, 1), 2],
|
||||
(3, 1024, 1, 1),
|
||||
(3, 1024, 2, 1),
|
||||
(3, 1024, 1, 1),
|
||||
(3, 1024, 1, 1),
|
||||
]
|
||||
|
||||
|
||||
class CNNBlock(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, **kwargs):
|
||||
super(CNNBlock, self).__init__()
|
||||
self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
|
||||
self.batchnorm = nn.BatchNorm2d(out_channels)
|
||||
self.leakyrelu = nn.LeakyReLU(0.1)
|
||||
|
||||
def forward(self, x):
|
||||
return self.leakyrelu(self.batchnorm(self.conv(x)))
|
||||
|
||||
|
||||
class Yolov1(nn.Module):
|
||||
def __init__(self, in_channels=3, **kwargs):
|
||||
super(Yolov1, self).__init__()
|
||||
self.architecture = architecture_config
|
||||
self.in_channels = in_channels
|
||||
self.darknet = self._create_conv_layers(self.architecture)
|
||||
self.fcs = self._create_fcs(**kwargs)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.darknet(x)
|
||||
return self.fcs(torch.flatten(x, start_dim=1))
|
||||
|
||||
def _create_conv_layers(self, architecture):
|
||||
layers = []
|
||||
in_channels = self.in_channels
|
||||
|
||||
for x in architecture:
|
||||
if type(x) == tuple:
|
||||
layers += [
|
||||
CNNBlock(
|
||||
in_channels, x[1], kernel_size=x[0], stride=x[2], padding=x[3],
|
||||
)
|
||||
]
|
||||
in_channels = x[1]
|
||||
|
||||
elif type(x) == str:
|
||||
layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]
|
||||
|
||||
elif type(x) == list:
|
||||
conv1 = x[0]
|
||||
conv2 = x[1]
|
||||
num_repeats = x[2]
|
||||
|
||||
for _ in range(num_repeats):
|
||||
layers += [
|
||||
CNNBlock(
|
||||
in_channels,
|
||||
conv1[1],
|
||||
kernel_size=conv1[0],
|
||||
stride=conv1[2],
|
||||
padding=conv1[3],
|
||||
)
|
||||
]
|
||||
layers += [
|
||||
CNNBlock(
|
||||
conv1[1],
|
||||
conv2[1],
|
||||
kernel_size=conv2[0],
|
||||
stride=conv2[2],
|
||||
padding=conv2[3],
|
||||
)
|
||||
]
|
||||
in_channels = conv2[1]
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _create_fcs(self, split_size, num_boxes, num_classes):
|
||||
S, B, C = split_size, num_boxes, num_classes
|
||||
|
||||
# In original paper this should be
|
||||
# nn.Linear(1024*S*S, 4096),
|
||||
# nn.LeakyReLU(0.1),
|
||||
# nn.Linear(4096, S*S*(B*5+C))
|
||||
|
||||
return nn.Sequential(
|
||||
nn.Flatten(),
|
||||
nn.Linear(1024 * S * S, 496),
|
||||
nn.Dropout(0.0),
|
||||
nn.LeakyReLU(0.1),
|
||||
nn.Linear(496, S * S * (C + B * 5)),
|
||||
)
|
||||
148
ML/Pytorch/object_detection/YOLO/train.py
Executable file
148
ML/Pytorch/object_detection/YOLO/train.py
Executable file
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
Main file for training Yolo model on Pascal VOC dataset
|
||||
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
import torch.optim as optim
|
||||
import torchvision.transforms.functional as FT
|
||||
from tqdm import tqdm
|
||||
from torch.utils.data import DataLoader
|
||||
from model import Yolov1
|
||||
from dataset import VOCDataset
|
||||
from utils import (
|
||||
non_max_suppression,
|
||||
mean_average_precision,
|
||||
intersection_over_union,
|
||||
cellboxes_to_boxes,
|
||||
get_bboxes,
|
||||
plot_image,
|
||||
save_checkpoint,
|
||||
load_checkpoint,
|
||||
)
|
||||
from loss import YoloLoss
|
||||
|
||||
seed = 123
|
||||
torch.manual_seed(seed)
|
||||
|
||||
# Hyperparameters etc.
|
||||
LEARNING_RATE = 2e-5
|
||||
DEVICE = "cuda" if torch.cuda.is_available else "cpu"
|
||||
BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
|
||||
WEIGHT_DECAY = 0
|
||||
EPOCHS = 1000
|
||||
NUM_WORKERS = 2
|
||||
PIN_MEMORY = True
|
||||
LOAD_MODEL = False
|
||||
LOAD_MODEL_FILE = "overfit.pth.tar"
|
||||
IMG_DIR = "data/images"
|
||||
LABEL_DIR = "data/labels"
|
||||
|
||||
|
||||
class Compose(object):
|
||||
def __init__(self, transforms):
|
||||
self.transforms = transforms
|
||||
|
||||
def __call__(self, img, bboxes):
|
||||
for t in self.transforms:
|
||||
img, bboxes = t(img), bboxes
|
||||
|
||||
return img, bboxes
|
||||
|
||||
|
||||
transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])
|
||||
|
||||
|
||||
def train_fn(train_loader, model, optimizer, loss_fn):
|
||||
loop = tqdm(train_loader, leave=True)
|
||||
mean_loss = []
|
||||
|
||||
for batch_idx, (x, y) in enumerate(loop):
|
||||
x, y = x.to(DEVICE), y.to(DEVICE)
|
||||
out = model(x)
|
||||
loss = loss_fn(out, y)
|
||||
mean_loss.append(loss.item())
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# update progress bar
|
||||
loop.set_postfix(loss=loss.item())
|
||||
|
||||
print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")
|
||||
|
||||
|
||||
def main():
|
||||
model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
|
||||
optimizer = optim.Adam(
|
||||
model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
|
||||
)
|
||||
loss_fn = YoloLoss()
|
||||
|
||||
if LOAD_MODEL:
|
||||
load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
|
||||
|
||||
train_dataset = VOCDataset(
|
||||
"data/100examples.csv",
|
||||
transform=transform,
|
||||
img_dir=IMG_DIR,
|
||||
label_dir=LABEL_DIR,
|
||||
)
|
||||
|
||||
test_dataset = VOCDataset(
|
||||
"data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR,
|
||||
)
|
||||
|
||||
train_loader = DataLoader(
|
||||
dataset=train_dataset,
|
||||
batch_size=BATCH_SIZE,
|
||||
num_workers=NUM_WORKERS,
|
||||
pin_memory=PIN_MEMORY,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
)
|
||||
|
||||
test_loader = DataLoader(
|
||||
dataset=test_dataset,
|
||||
batch_size=BATCH_SIZE,
|
||||
num_workers=NUM_WORKERS,
|
||||
pin_memory=PIN_MEMORY,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
)
|
||||
|
||||
for epoch in range(EPOCHS):
|
||||
# for x, y in train_loader:
|
||||
# x = x.to(DEVICE)
|
||||
# for idx in range(8):
|
||||
# bboxes = cellboxes_to_boxes(model(x))
|
||||
# bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
|
||||
# plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
|
||||
|
||||
# import sys
|
||||
# sys.exit()
|
||||
|
||||
pred_boxes, target_boxes = get_bboxes(
|
||||
train_loader, model, iou_threshold=0.5, threshold=0.4
|
||||
)
|
||||
|
||||
mean_avg_prec = mean_average_precision(
|
||||
pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
|
||||
)
|
||||
print(f"Train mAP: {mean_avg_prec}")
|
||||
|
||||
#if mean_avg_prec > 0.9:
|
||||
# checkpoint = {
|
||||
# "state_dict": model.state_dict(),
|
||||
# "optimizer": optimizer.state_dict(),
|
||||
# }
|
||||
# save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)
|
||||
# import time
|
||||
# time.sleep(10)
|
||||
|
||||
train_fn(train_loader, model, optimizer, loss_fn)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
349
ML/Pytorch/object_detection/YOLO/utils.py
Normal file
349
ML/Pytorch/object_detection/YOLO/utils.py
Normal file
@@ -0,0 +1,349 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as patches
|
||||
from collections import Counter
|
||||
|
||||
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
|
||||
"""
|
||||
Calculates intersection over union
|
||||
|
||||
Parameters:
|
||||
boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
|
||||
boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
|
||||
box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
|
||||
|
||||
Returns:
|
||||
tensor: Intersection over union for all examples
|
||||
"""
|
||||
|
||||
if box_format == "midpoint":
|
||||
box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
|
||||
box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
|
||||
box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
|
||||
box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
|
||||
box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
|
||||
box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
|
||||
box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
|
||||
box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
|
||||
|
||||
if box_format == "corners":
|
||||
box1_x1 = boxes_preds[..., 0:1]
|
||||
box1_y1 = boxes_preds[..., 1:2]
|
||||
box1_x2 = boxes_preds[..., 2:3]
|
||||
box1_y2 = boxes_preds[..., 3:4] # (N, 1)
|
||||
box2_x1 = boxes_labels[..., 0:1]
|
||||
box2_y1 = boxes_labels[..., 1:2]
|
||||
box2_x2 = boxes_labels[..., 2:3]
|
||||
box2_y2 = boxes_labels[..., 3:4]
|
||||
|
||||
x1 = torch.max(box1_x1, box2_x1)
|
||||
y1 = torch.max(box1_y1, box2_y1)
|
||||
x2 = torch.min(box1_x2, box2_x2)
|
||||
y2 = torch.min(box1_y2, box2_y2)
|
||||
|
||||
# .clamp(0) is for the case when they do not intersect
|
||||
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
||||
|
||||
box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
|
||||
box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
|
||||
|
||||
return intersection / (box1_area + box2_area - intersection + 1e-6)
|
||||
|
||||
|
||||
def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
|
||||
"""
|
||||
Does Non Max Suppression given bboxes
|
||||
|
||||
Parameters:
|
||||
bboxes (list): list of lists containing all bboxes with each bboxes
|
||||
specified as [class_pred, prob_score, x1, y1, x2, y2]
|
||||
iou_threshold (float): threshold where predicted bboxes is correct
|
||||
threshold (float): threshold to remove predicted bboxes (independent of IoU)
|
||||
box_format (str): "midpoint" or "corners" used to specify bboxes
|
||||
|
||||
Returns:
|
||||
list: bboxes after performing NMS given a specific IoU threshold
|
||||
"""
|
||||
|
||||
assert type(bboxes) == list
|
||||
|
||||
bboxes = [box for box in bboxes if box[1] > threshold]
|
||||
bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
|
||||
bboxes_after_nms = []
|
||||
|
||||
while bboxes:
|
||||
chosen_box = bboxes.pop(0)
|
||||
|
||||
bboxes = [
|
||||
box
|
||||
for box in bboxes
|
||||
if box[0] != chosen_box[0]
|
||||
or intersection_over_union(
|
||||
torch.tensor(chosen_box[2:]),
|
||||
torch.tensor(box[2:]),
|
||||
box_format=box_format,
|
||||
)
|
||||
< iou_threshold
|
||||
]
|
||||
|
||||
bboxes_after_nms.append(chosen_box)
|
||||
|
||||
return bboxes_after_nms
|
||||
|
||||
|
||||
def mean_average_precision(
|
||||
pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
|
||||
):
|
||||
"""
|
||||
Calculates mean average precision
|
||||
|
||||
Parameters:
|
||||
pred_boxes (list): list of lists containing all bboxes with each bboxes
|
||||
specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
|
||||
true_boxes (list): Similar as pred_boxes except all the correct ones
|
||||
iou_threshold (float): threshold where predicted bboxes is correct
|
||||
box_format (str): "midpoint" or "corners" used to specify bboxes
|
||||
num_classes (int): number of classes
|
||||
|
||||
Returns:
|
||||
float: mAP value across all classes given a specific IoU threshold
|
||||
"""
|
||||
|
||||
# list storing all AP for respective classes
|
||||
average_precisions = []
|
||||
|
||||
# used for numerical stability later on
|
||||
epsilon = 1e-6
|
||||
|
||||
for c in range(num_classes):
|
||||
detections = []
|
||||
ground_truths = []
|
||||
|
||||
# Go through all predictions and targets,
|
||||
# and only add the ones that belong to the
|
||||
# current class c
|
||||
for detection in pred_boxes:
|
||||
if detection[1] == c:
|
||||
detections.append(detection)
|
||||
|
||||
for true_box in true_boxes:
|
||||
if true_box[1] == c:
|
||||
ground_truths.append(true_box)
|
||||
|
||||
# find the amount of bboxes for each training example
|
||||
# Counter here finds how many ground truth bboxes we get
|
||||
# for each training example, so let's say img 0 has 3,
|
||||
# img 1 has 5 then we will obtain a dictionary with:
|
||||
# amount_bboxes = {0:3, 1:5}
|
||||
amount_bboxes = Counter([gt[0] for gt in ground_truths])
|
||||
|
||||
# We then go through each key, val in this dictionary
|
||||
# and convert to the following (w.r.t same example):
|
||||
# ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
|
||||
for key, val in amount_bboxes.items():
|
||||
amount_bboxes[key] = torch.zeros(val)
|
||||
|
||||
# sort by box probabilities which is index 2
|
||||
detections.sort(key=lambda x: x[2], reverse=True)
|
||||
TP = torch.zeros((len(detections)))
|
||||
FP = torch.zeros((len(detections)))
|
||||
total_true_bboxes = len(ground_truths)
|
||||
|
||||
# If none exists for this class then we can safely skip
|
||||
if total_true_bboxes == 0:
|
||||
continue
|
||||
|
||||
for detection_idx, detection in enumerate(detections):
|
||||
# Only take out the ground_truths that have the same
|
||||
# training idx as detection
|
||||
ground_truth_img = [
|
||||
bbox for bbox in ground_truths if bbox[0] == detection[0]
|
||||
]
|
||||
|
||||
num_gts = len(ground_truth_img)
|
||||
best_iou = 0
|
||||
|
||||
for idx, gt in enumerate(ground_truth_img):
|
||||
iou = intersection_over_union(
|
||||
torch.tensor(detection[3:]),
|
||||
torch.tensor(gt[3:]),
|
||||
box_format=box_format,
|
||||
)
|
||||
|
||||
if iou > best_iou:
|
||||
best_iou = iou
|
||||
best_gt_idx = idx
|
||||
|
||||
if best_iou > iou_threshold:
|
||||
# only detect ground truth detection once
|
||||
if amount_bboxes[detection[0]][best_gt_idx] == 0:
|
||||
# true positive and add this bounding box to seen
|
||||
TP[detection_idx] = 1
|
||||
amount_bboxes[detection[0]][best_gt_idx] = 1
|
||||
else:
|
||||
FP[detection_idx] = 1
|
||||
|
||||
# if IOU is lower then the detection is a false positive
|
||||
else:
|
||||
FP[detection_idx] = 1
|
||||
|
||||
TP_cumsum = torch.cumsum(TP, dim=0)
|
||||
FP_cumsum = torch.cumsum(FP, dim=0)
|
||||
recalls = TP_cumsum / (total_true_bboxes + epsilon)
|
||||
precisions = torch.divide(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
|
||||
precisions = torch.cat((torch.tensor([1]), precisions))
|
||||
recalls = torch.cat((torch.tensor([0]), recalls))
|
||||
# torch.trapz for numerical integration
|
||||
average_precisions.append(torch.trapz(precisions, recalls))
|
||||
|
||||
return sum(average_precisions) / len(average_precisions)
|
||||
|
||||
|
||||
def plot_image(image, boxes):
|
||||
"""Plots predicted bounding boxes on the image"""
|
||||
im = np.array(image)
|
||||
height, width, _ = im.shape
|
||||
|
||||
# Create figure and axes
|
||||
fig, ax = plt.subplots(1)
|
||||
# Display the image
|
||||
ax.imshow(im)
|
||||
|
||||
# box[0] is x midpoint, box[2] is width
|
||||
# box[1] is y midpoint, box[3] is height
|
||||
|
||||
# Create a Rectangle potch
|
||||
for box in boxes:
|
||||
box = box[2:]
|
||||
assert len(box) == 4, "Got more values than in x, y, w, h, in a box!"
|
||||
upper_left_x = box[0] - box[2] / 2
|
||||
upper_left_y = box[1] - box[3] / 2
|
||||
rect = patches.Rectangle(
|
||||
(upper_left_x * width, upper_left_y * height),
|
||||
box[2] * width,
|
||||
box[3] * height,
|
||||
linewidth=1,
|
||||
edgecolor="r",
|
||||
facecolor="none",
|
||||
)
|
||||
# Add the patch to the Axes
|
||||
ax.add_patch(rect)
|
||||
|
||||
plt.show()
|
||||
|
||||
def get_bboxes(
|
||||
loader,
|
||||
model,
|
||||
iou_threshold,
|
||||
threshold,
|
||||
pred_format="cells",
|
||||
box_format="midpoint",
|
||||
device="cuda",
|
||||
):
|
||||
all_pred_boxes = []
|
||||
all_true_boxes = []
|
||||
|
||||
# make sure model is in eval before get bboxes
|
||||
model.eval()
|
||||
train_idx = 0
|
||||
|
||||
for batch_idx, (x, labels) in enumerate(loader):
|
||||
x = x.to(device)
|
||||
labels = labels.to(device)
|
||||
|
||||
with torch.no_grad():
|
||||
predictions = model(x)
|
||||
|
||||
batch_size = x.shape[0]
|
||||
true_bboxes = cellboxes_to_boxes(labels)
|
||||
bboxes = cellboxes_to_boxes(predictions)
|
||||
|
||||
for idx in range(batch_size):
|
||||
nms_boxes = non_max_suppression(
|
||||
bboxes[idx],
|
||||
iou_threshold=iou_threshold,
|
||||
threshold=threshold,
|
||||
box_format=box_format,
|
||||
)
|
||||
|
||||
|
||||
#if batch_idx == 0 and idx == 0:
|
||||
# plot_image(x[idx].permute(1,2,0).to("cpu"), nms_boxes)
|
||||
# print(nms_boxes)
|
||||
|
||||
for nms_box in nms_boxes:
|
||||
all_pred_boxes.append([train_idx] + nms_box)
|
||||
|
||||
for box in true_bboxes[idx]:
|
||||
# many will get converted to 0 pred
|
||||
if box[1] > threshold:
|
||||
all_true_boxes.append([train_idx] + box)
|
||||
|
||||
train_idx += 1
|
||||
|
||||
model.train()
|
||||
return all_pred_boxes, all_true_boxes
|
||||
|
||||
|
||||
|
||||
def convert_cellboxes(predictions, S=7):
|
||||
"""
|
||||
Converts bounding boxes output from Yolo with
|
||||
an image split size of S into entire image ratios
|
||||
rather than relative to cell ratios. Tried to do this
|
||||
vectorized, but this resulted in quite difficult to read
|
||||
code... Use as a black box? Or implement a more intuitive,
|
||||
using 2 for loops iterating range(S) and convert them one
|
||||
by one, resulting in a slower but more readable implementation.
|
||||
"""
|
||||
|
||||
predictions = predictions.to("cpu")
|
||||
batch_size = predictions.shape[0]
|
||||
predictions = predictions.reshape(batch_size, 7, 7, 30)
|
||||
bboxes1 = predictions[..., 21:25]
|
||||
bboxes2 = predictions[..., 26:30]
|
||||
scores = torch.cat(
|
||||
(predictions[..., 20].unsqueeze(0), predictions[..., 25].unsqueeze(0)), dim=0
|
||||
)
|
||||
best_box = scores.argmax(0).unsqueeze(-1)
|
||||
best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2
|
||||
cell_indices = torch.arange(7).repeat(batch_size, 7, 1).unsqueeze(-1)
|
||||
x = 1 / S * (best_boxes[..., :1] + cell_indices)
|
||||
y = 1 / S * (best_boxes[..., 1:2] + cell_indices.permute(0, 2, 1, 3))
|
||||
w_y = 1 / S * best_boxes[..., 2:4]
|
||||
converted_bboxes = torch.cat((x, y, w_y), dim=-1)
|
||||
predicted_class = predictions[..., :20].argmax(-1).unsqueeze(-1)
|
||||
best_confidence = torch.max(predictions[..., 20], predictions[..., 25]).unsqueeze(
|
||||
-1
|
||||
)
|
||||
converted_preds = torch.cat(
|
||||
(predicted_class, best_confidence, converted_bboxes), dim=-1
|
||||
)
|
||||
|
||||
return converted_preds
|
||||
|
||||
|
||||
def cellboxes_to_boxes(out, S=7):
|
||||
converted_pred = convert_cellboxes(out).reshape(out.shape[0], S * S, -1)
|
||||
converted_pred[..., 0] = converted_pred[..., 0].long()
|
||||
all_bboxes = []
|
||||
|
||||
for ex_idx in range(out.shape[0]):
|
||||
bboxes = []
|
||||
|
||||
for bbox_idx in range(S * S):
|
||||
bboxes.append([x.item() for x in converted_pred[ex_idx, bbox_idx, :]])
|
||||
all_bboxes.append(bboxes)
|
||||
|
||||
return all_bboxes
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
50
ML/Pytorch/object_detection/metrics/iou.py
Normal file
50
ML/Pytorch/object_detection/metrics/iou.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import torch
|
||||
|
||||
|
||||
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
|
||||
"""
|
||||
Calculates intersection over union
|
||||
|
||||
Parameters:
|
||||
boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
|
||||
boxes_labels (tensor): Correct Labels of Boxes (BATCH_SIZE, 4)
|
||||
box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
|
||||
|
||||
Returns:
|
||||
tensor: Intersection over union for all examples
|
||||
"""
|
||||
|
||||
# Slicing idx:idx+1 in order to keep tensor dimensionality
|
||||
# Doing ... in indexing if there would be additional dimensions
|
||||
# Like for Yolo algorithm which would have (N, S, S, 4) in shape
|
||||
if box_format == "midpoint":
|
||||
box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
|
||||
box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
|
||||
box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
|
||||
box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
|
||||
box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
|
||||
box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
|
||||
box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
|
||||
box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
|
||||
|
||||
elif box_format == "corners":
|
||||
box1_x1 = boxes_preds[..., 0:1]
|
||||
box1_y1 = boxes_preds[..., 1:2]
|
||||
box1_x2 = boxes_preds[..., 2:3]
|
||||
box1_y2 = boxes_preds[..., 3:4]
|
||||
box2_x1 = boxes_labels[..., 0:1]
|
||||
box2_y1 = boxes_labels[..., 1:2]
|
||||
box2_x2 = boxes_labels[..., 2:3]
|
||||
box2_y2 = boxes_labels[..., 3:4]
|
||||
|
||||
x1 = torch.max(box1_x1, box2_x1)
|
||||
y1 = torch.max(box1_y1, box2_y1)
|
||||
x2 = torch.min(box1_x2, box2_x2)
|
||||
y2 = torch.min(box1_y2, box2_y2)
|
||||
|
||||
# Need clamp(0) in case they do not intersect, then we want intersection to be 0
|
||||
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
||||
box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
|
||||
box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
|
||||
|
||||
return intersection / (box1_area + box2_area - intersection + 1e-6)
|
||||
112
ML/Pytorch/object_detection/metrics/mean_avg_precision.py
Normal file
112
ML/Pytorch/object_detection/metrics/mean_avg_precision.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import torch
|
||||
from collections import Counter
|
||||
|
||||
from iou import intersection_over_union
|
||||
|
||||
def mean_average_precision(
|
||||
pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
|
||||
):
|
||||
"""
|
||||
Calculates mean average precision
|
||||
|
||||
Parameters:
|
||||
pred_boxes (list): list of lists containing all bboxes with each bboxes
|
||||
specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
|
||||
true_boxes (list): Similar as pred_boxes except all the correct ones
|
||||
iou_threshold (float): threshold where predicted bboxes is correct
|
||||
box_format (str): "midpoint" or "corners" used to specify bboxes
|
||||
num_classes (int): number of classes
|
||||
|
||||
Returns:
|
||||
float: mAP value across all classes given a specific IoU threshold
|
||||
"""
|
||||
|
||||
# list storing all AP for respective classes
|
||||
average_precisions = []
|
||||
|
||||
# used for numerical stability later on
|
||||
epsilon = 1e-6
|
||||
|
||||
for c in range(num_classes):
|
||||
detections = []
|
||||
ground_truths = []
|
||||
|
||||
# Go through all predictions and targets,
|
||||
# and only add the ones that belong to the
|
||||
# current class c
|
||||
for detection in pred_boxes:
|
||||
if detection[1] == c:
|
||||
detections.append(detection)
|
||||
|
||||
for true_box in true_boxes:
|
||||
if true_box[1] == c:
|
||||
ground_truths.append(true_box)
|
||||
|
||||
# find the amount of bboxes for each training example
|
||||
# Counter here finds how many ground truth bboxes we get
|
||||
# for each training example, so let's say img 0 has 3,
|
||||
# img 1 has 5 then we will obtain a dictionary with:
|
||||
# amount_bboxes = {0:3, 1:5}
|
||||
amount_bboxes = Counter([gt[0] for gt in ground_truths])
|
||||
|
||||
# We then go through each key, val in this dictionary
|
||||
# and convert to the following (w.r.t same example):
|
||||
# ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
|
||||
for key, val in amount_bboxes.items():
|
||||
amount_bboxes[key] = torch.zeros(val)
|
||||
|
||||
# sort by box probabilities which is index 2
|
||||
detections.sort(key=lambda x: x[2], reverse=True)
|
||||
TP = torch.zeros((len(detections)))
|
||||
FP = torch.zeros((len(detections)))
|
||||
total_true_bboxes = len(ground_truths)
|
||||
|
||||
# If none exists for this class then we can safely skip
|
||||
if total_true_bboxes == 0:
|
||||
continue
|
||||
|
||||
for detection_idx, detection in enumerate(detections):
|
||||
# Only take out the ground_truths that have the same
|
||||
# training idx as detection
|
||||
ground_truth_img = [
|
||||
bbox for bbox in ground_truths if bbox[0] == detection[0]
|
||||
]
|
||||
|
||||
num_gts = len(ground_truth_img)
|
||||
best_iou = 0
|
||||
|
||||
for idx, gt in enumerate(ground_truth_img):
|
||||
iou = intersection_over_union(
|
||||
torch.tensor(detection[3:]),
|
||||
torch.tensor(gt[3:]),
|
||||
box_format=box_format,
|
||||
)
|
||||
|
||||
if iou > best_iou:
|
||||
best_iou = iou
|
||||
best_gt_idx = idx
|
||||
|
||||
if best_iou > iou_threshold:
|
||||
# only detect ground truth detection once
|
||||
if amount_bboxes[detection[0]][best_gt_idx] == 0:
|
||||
# true positive and add this bounding box to seen
|
||||
TP[detection_idx] = 1
|
||||
amount_bboxes[detection[0]][best_gt_idx] = 1
|
||||
else:
|
||||
FP[detection_idx] = 1
|
||||
|
||||
# if IOU is lower then the detection is a false positive
|
||||
else:
|
||||
FP[detection_idx] = 1
|
||||
|
||||
TP_cumsum = torch.cumsum(TP, dim=0)
|
||||
FP_cumsum = torch.cumsum(FP, dim=0)
|
||||
recalls = TP_cumsum / (total_true_bboxes + epsilon)
|
||||
precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
|
||||
precisions = torch.cat((torch.tensor([1]), precisions))
|
||||
recalls = torch.cat((torch.tensor([0]), recalls))
|
||||
# torch.trapz for numerical integration
|
||||
average_precisions.append(torch.trapz(precisions, recalls))
|
||||
|
||||
return sum(average_precisions) / len(average_precisions)
|
||||
|
||||
42
ML/Pytorch/object_detection/metrics/nms.py
Normal file
42
ML/Pytorch/object_detection/metrics/nms.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import torch
|
||||
from iou import intersection_over_union
|
||||
|
||||
def nms(bboxes, iou_threshold, threshold, box_format="corners"):
|
||||
"""
|
||||
Does Non Max Suppression given bboxes
|
||||
|
||||
Parameters:
|
||||
bboxes (list): list of lists containing all bboxes with each bboxes
|
||||
specified as [class_pred, prob_score, x1, y1, x2, y2]
|
||||
iou_threshold (float): threshold where predicted bboxes is correct
|
||||
threshold (float): threshold to remove predicted bboxes (independent of IoU)
|
||||
box_format (str): "midpoint" or "corners" used to specify bboxes
|
||||
|
||||
Returns:
|
||||
list: bboxes after performing NMS given a specific IoU threshold
|
||||
"""
|
||||
|
||||
assert type(bboxes) == list
|
||||
|
||||
bboxes = [box for box in bboxes if box[1] > threshold]
|
||||
bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
|
||||
bboxes_after_nms = []
|
||||
|
||||
while bboxes:
|
||||
chosen_box = bboxes.pop(0)
|
||||
|
||||
bboxes = [
|
||||
box
|
||||
for box in bboxes
|
||||
if box[0] != chosen_box[0]
|
||||
or intersection_over_union(
|
||||
torch.tensor(chosen_box[2:]),
|
||||
torch.tensor(box[2:]),
|
||||
box_format=box_format,
|
||||
)
|
||||
< iou_threshold
|
||||
]
|
||||
|
||||
bboxes_after_nms.append(chosen_box)
|
||||
|
||||
return bboxes_after_nms
|
||||
Reference in New Issue
Block a user