Initial commit

2026-02-21 11:18:01 +00:00 · 2021-01-30 21:49:15 +01:00
commit 65b8c80495
432 changed files with 1290844 additions and 0 deletions
--- a/ML/Pytorch/object_detection/YOLO/pycache/dataset.cpython-38.pyc
+++ b/ML/Pytorch/object_detection/YOLO/pycache/dataset.cpython-38.pyc
--- a/ML/Pytorch/object_detection/YOLO/pycache/loss.cpython-38.pyc
+++ b/ML/Pytorch/object_detection/YOLO/pycache/loss.cpython-38.pyc
--- a/ML/Pytorch/object_detection/YOLO/pycache/model.cpython-38.pyc
+++ b/ML/Pytorch/object_detection/YOLO/pycache/model.cpython-38.pyc
--- a/ML/Pytorch/object_detection/YOLO/pycache/utils.cpython-38.pyc
+++ b/ML/Pytorch/object_detection/YOLO/pycache/utils.cpython-38.pyc
--- a/ML/Pytorch/object_detection/YOLO/data/generate_csv.py
+++ b/ML/Pytorch/object_detection/YOLO/data/generate_csv.py
@@ -0,0 +1,22 @@
+import os
+import csv
+
+read_train = open("train.txt", "r").readlines()
+
+with open("train.csv", mode="w", newline="") as train_file:
+    for line in read_train:
+        image_file = line.split("/")[-1].replace("\n", "")
+        text_file = image_file.replace(".jpg", ".txt")
+        data = [image_file, text_file]
+        writer = csv.writer(train_file)
+        writer.writerow(data)
+
+read_train = open("test.txt", "r").readlines()
+
+with open("test.csv", mode="w", newline="") as train_file:
+    for line in read_train:
+        image_file = line.split("/")[-1].replace("\n", "")
+        text_file = image_file.replace(".jpg", ".txt")
+        data = [image_file, text_file]
+        writer = csv.writer(train_file)
+        writer.writerow(data)
--- a/ML/Pytorch/object_detection/YOLO/data/get_data
+++ b/ML/Pytorch/object_detection/YOLO/data/get_data
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+## DOWNLOAD from JOSEPHS WEBSITE (SLOWER DOWNLOAD)                                 
+#wget https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar
+#wget https://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
+#wget https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar    
+                                                              
+## OR DOWNLOAD FROM HERE (FASTER DOWNLOAD)                                          
+# VOC2007 DATASET                                                              
+wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.ta
+wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar # 
+
+# VOC2012 DATASET                                                              
+wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.ta
+
+# Extract tar files
+tar xf VOCtrainval_11-May-2012.tar
+tar xf VOCtrainval_06-Nov-2007.tar
+tar xf VOCtest_06-Nov-2007.tar
+
+# Need voc_label.py to clean up data from xml files
+wget https://pjreddie.com/media/files/voc_label.py
+
+# Run python file to clean data from xml files
+python voc_label.py
+
+# Get train by using train+val from 2007 and 2012
+# Then we only test on 2007 test set
+# Unclear from paper what they actually just as a dev set
+cat 2007_train.txt 2007_val.txt 2012_*.txt > train.txt
+cp 2007_test.txt test.txt
+
+# Move txt files we won't be using to clean up a little bit
+mkdir old_txt_files
+mv 2007* 2012* old_txt_files/
+
+python generate_csv.py
+
+mkdir data
+mkdir data/images
+mkdir data/labels
+
+cp VOCdevkit/*.jpg data/images/
+cp VOCdevkit/VOC2007/labels/*.txt data/labels/
+cp VOCdevkit/VOC2012/labels/*.txt data/labels/
+
+mkdir data                                                                              
+mkdir data/images                                                                       
+mkdir data/labels                                                                       
+                                                                                        
+mv VOCdevkit/VOC2007/JPEGImages/*.jpg data/images/                                      
+mv VOCdevkit/VOC2012/JPEGImages/*.jpg data/images/                                      
+mv VOCdevkit/VOC2007/labels/*.txt data/labels/                                          
+mv VOCdevkit/VOC2012/labels/*.txt data/labels/ 
+
+# We don't need VOCdevkit folder anymore, can remove
+# in order to save some space 
+rm -rf VOCdevkit/
+mv test.txt old_txt_files/
+mv train.txt old_txt_files/
--- a/ML/Pytorch/object_detection/YOLO/dataset.py
+++ b/ML/Pytorch/object_detection/YOLO/dataset.py
@@ -0,0 +1,90 @@
+"""
+Creates a Pytorch dataset to load the Pascal VOC dataset
+"""
+
+import torch
+import os
+import pandas as pd
+from PIL import Image
+
+
+class VOCDataset(torch.utils.data.Dataset):
+    def __init__(
+        self, csv_file, img_dir, label_dir, S=7, B=2, C=20, transform=None,
+    ):
+        self.annotations = pd.read_csv(csv_file)
+        self.img_dir = img_dir
+        self.label_dir = label_dir
+        self.transform = transform
+        self.S = S
+        self.B = B
+        self.C = C
+
+    def __len__(self):
+        return len(self.annotations)
+
+    def __getitem__(self, index):
+        label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
+        boxes = []
+        with open(label_path) as f:
+            for label in f.readlines():
+                class_label, x, y, width, height = [
+                    float(x) if float(x) != int(float(x)) else int(x)
+                    for x in label.replace("\n", "").split()
+                ]
+
+                boxes.append([class_label, x, y, width, height])
+
+        img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
+        image = Image.open(img_path)
+        boxes = torch.tensor(boxes)
+
+        if self.transform:
+            # image = self.transform(image)
+            image, boxes = self.transform(image, boxes)
+
+        # Convert To Cells
+        label_matrix = torch.zeros((self.S, self.S, self.C + 5 * self.B))
+        for box in boxes:
+            class_label, x, y, width, height = box.tolist()
+            class_label = int(class_label)
+
+            # i,j represents the cell row and cell column
+            i, j = int(self.S * y), int(self.S * x)
+            x_cell, y_cell = self.S * x - j, self.S * y - i
+
+            """
+            Calculating the width and height of cell of bounding box,
+            relative to the cell is done by the following, with
+            width as the example:
+            
+            width_pixels = (width*self.image_width)
+            cell_pixels = (self.image_width)
+            
+            Then to find the width relative to the cell is simply:
+            width_pixels/cell_pixels, simplification leads to the
+            formulas below.
+            """
+            width_cell, height_cell = (
+                width * self.S,
+                height * self.S,
+            )
+
+            # If no object already found for specific cell i,j
+            # Note: This means we restrict to ONE object
+            # per cell!
+            if label_matrix[i, j, 20] == 0:
+                # Set that there exists an object
+                label_matrix[i, j, 20] = 1
+
+                # Box coordinates
+                box_coordinates = torch.tensor(
+                    [x_cell, y_cell, width_cell, height_cell]
+                )
+
+                label_matrix[i, j, 21:25] = box_coordinates
+
+                # Set one hot encoding for class_label
+                label_matrix[i, j, class_label] = 1
+
+        return image, label_matrix
--- a/ML/Pytorch/object_detection/YOLO/loss.py
+++ b/ML/Pytorch/object_detection/YOLO/loss.py
@@ -0,0 +1,124 @@
+"""
+Implementation of Yolo Loss Function from the original yolo paper
+
+"""
+
+import torch
+import torch.nn as nn
+from utils import intersection_over_union
+
+
+class YoloLoss(nn.Module):
+    """
+    Calculate the loss for yolo (v1) model
+    """
+
+    def __init__(self, S=7, B=2, C=20):
+        super(YoloLoss, self).__init__()
+        self.mse = nn.MSELoss(reduction="sum")
+
+        """
+        S is split size of image (in paper 7),
+        B is number of boxes (in paper 2),
+        C is number of classes (in paper and VOC dataset is 20),
+        """
+        self.S = S
+        self.B = B
+        self.C = C
+
+        # These are from Yolo paper, signifying how much we should
+        # pay loss for no object (noobj) and the box coordinates (coord)
+        self.lambda_noobj = 0.5
+        self.lambda_coord = 5
+
+    def forward(self, predictions, target):
+        # predictions are shaped (BATCH_SIZE, S*S(C+B*5) when inputted
+        predictions = predictions.reshape(-1, self.S, self.S, self.C + self.B * 5)
+
+        # Calculate IoU for the two predicted bounding boxes with target bbox
+        iou_b1 = intersection_over_union(predictions[..., 21:25], target[..., 21:25])
+        iou_b2 = intersection_over_union(predictions[..., 26:30], target[..., 21:25])
+        ious = torch.cat([iou_b1.unsqueeze(0), iou_b2.unsqueeze(0)], dim=0)
+
+        # Take the box with highest IoU out of the two prediction
+        # Note that bestbox will be indices of 0, 1 for which bbox was best
+        iou_maxes, bestbox = torch.max(ious, dim=0)
+        exists_box = target[..., 20].unsqueeze(3)  # in paper this is Iobj_i
+
+        # ======================== #
+        #   FOR BOX COORDINATES    #
+        # ======================== #
+
+        # Set boxes with no object in them to 0. We only take out one of the two 
+        # predictions, which is the one with highest Iou calculated previously.
+        box_predictions = exists_box * (
+            (
+                bestbox * predictions[..., 26:30]
+                + (1 - bestbox) * predictions[..., 21:25]
+            )
+        )
+
+        box_targets = exists_box * target[..., 21:25]
+
+        # Take sqrt of width, height of boxes to ensure that
+        box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
+            torch.abs(box_predictions[..., 2:4] + 1e-6)
+        )
+        box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])
+
+        box_loss = self.mse(
+            torch.flatten(box_predictions, end_dim=-2),
+            torch.flatten(box_targets, end_dim=-2),
+        )
+
+        # ==================== #
+        #   FOR OBJECT LOSS    #
+        # ==================== #
+
+        # pred_box is the confidence score for the bbox with highest IoU
+        pred_box = (
+            bestbox * predictions[..., 25:26] + (1 - bestbox) * predictions[..., 20:21]
+        )
+
+        object_loss = self.mse(
+            torch.flatten(exists_box * pred_box),
+            torch.flatten(exists_box * target[..., 20:21]),
+        )
+
+        # ======================= #
+        #   FOR NO OBJECT LOSS    #
+        # ======================= #
+
+        #max_no_obj = torch.max(predictions[..., 20:21], predictions[..., 25:26])
+        #no_object_loss = self.mse(
+        #    torch.flatten((1 - exists_box) * max_no_obj, start_dim=1),
+        #    torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
+        #)
+
+        no_object_loss = self.mse(
+            torch.flatten((1 - exists_box) * predictions[..., 20:21], start_dim=1),
+            torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
+        )
+
+        no_object_loss += self.mse(
+            torch.flatten((1 - exists_box) * predictions[..., 25:26], start_dim=1),
+            torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1)
+        )
+
+        # ================== #
+        #   FOR CLASS LOSS   #
+        # ================== #
+
+        class_loss = self.mse(
+            torch.flatten(exists_box * predictions[..., :20], end_dim=-2,),
+            torch.flatten(exists_box * target[..., :20], end_dim=-2,),
+        )
+
+        loss = (
+            self.lambda_coord * box_loss  # first two rows in paper
+            + object_loss  # third row in paper
+            + self.lambda_noobj * no_object_loss  # forth row
+            + class_loss  # fifth row
+        )
+
+        return loss
--- a/ML/Pytorch/object_detection/YOLO/model.py
+++ b/ML/Pytorch/object_detection/YOLO/model.py
@@ -0,0 +1,119 @@
+"""
+Implementation of Yolo (v1) architecture
+with slight modification with added BatchNorm.
+"""
+
+import torch
+import torch.nn as nn
+
+""" 
+Information about architecture config:
+Tuple is structured by (kernel_size, filters, stride, padding) 
+"M" is simply maxpooling with stride 2x2 and kernel 2x2
+List is structured by tuples and lastly int with number of repeats
+"""
+
+architecture_config = [
+    (7, 64, 2, 3),
+    "M",
+    (3, 192, 1, 1),
+    "M",
+    (1, 128, 1, 0),
+    (3, 256, 1, 1),
+    (1, 256, 1, 0),
+    (3, 512, 1, 1),
+    "M",
+    [(1, 256, 1, 0), (3, 512, 1, 1), 4],
+    (1, 512, 1, 0),
+    (3, 1024, 1, 1),
+    "M",
+    [(1, 512, 1, 0), (3, 1024, 1, 1), 2],
+    (3, 1024, 1, 1),
+    (3, 1024, 2, 1),
+    (3, 1024, 1, 1),
+    (3, 1024, 1, 1),
+]
+
+
+class CNNBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(CNNBlock, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
+        self.batchnorm = nn.BatchNorm2d(out_channels)
+        self.leakyrelu = nn.LeakyReLU(0.1)
+
+    def forward(self, x):
+        return self.leakyrelu(self.batchnorm(self.conv(x)))
+
+
+class Yolov1(nn.Module):
+    def __init__(self, in_channels=3, **kwargs):
+        super(Yolov1, self).__init__()
+        self.architecture = architecture_config
+        self.in_channels = in_channels
+        self.darknet = self._create_conv_layers(self.architecture)
+        self.fcs = self._create_fcs(**kwargs)
+
+    def forward(self, x):
+        x = self.darknet(x)
+        return self.fcs(torch.flatten(x, start_dim=1))
+
+    def _create_conv_layers(self, architecture):
+        layers = []
+        in_channels = self.in_channels
+
+        for x in architecture:
+            if type(x) == tuple:
+                layers += [
+                    CNNBlock(
+                        in_channels, x[1], kernel_size=x[0], stride=x[2], padding=x[3],
+                    )
+                ]
+                in_channels = x[1]
+
+            elif type(x) == str:
+                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]
+
+            elif type(x) == list:
+                conv1 = x[0]
+                conv2 = x[1]
+                num_repeats = x[2]
+
+                for _ in range(num_repeats):
+                    layers += [
+                        CNNBlock(
+                            in_channels,
+                            conv1[1],
+                            kernel_size=conv1[0],
+                            stride=conv1[2],
+                            padding=conv1[3],
+                        )
+                    ]
+                    layers += [
+                        CNNBlock(
+                            conv1[1],
+                            conv2[1],
+                            kernel_size=conv2[0],
+                            stride=conv2[2],
+                            padding=conv2[3],
+                        )
+                    ]
+                    in_channels = conv2[1]
+
+        return nn.Sequential(*layers)
+
+    def _create_fcs(self, split_size, num_boxes, num_classes):
+        S, B, C = split_size, num_boxes, num_classes
+
+        # In original paper this should be
+        # nn.Linear(1024*S*S, 4096),
+        # nn.LeakyReLU(0.1),
+        # nn.Linear(4096, S*S*(B*5+C))
+
+        return nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(1024 * S * S, 496),
+            nn.Dropout(0.0),
+            nn.LeakyReLU(0.1),
+            nn.Linear(496, S * S * (C + B * 5)),
+        )
--- a/ML/Pytorch/object_detection/YOLO/train.py
+++ b/ML/Pytorch/object_detection/YOLO/train.py
@@ -0,0 +1,148 @@
+"""
+Main file for training Yolo model on Pascal VOC dataset
+
+"""
+
+import torch
+import torchvision.transforms as transforms
+import torch.optim as optim
+import torchvision.transforms.functional as FT
+from tqdm import tqdm
+from torch.utils.data import DataLoader
+from model import Yolov1
+from dataset import VOCDataset
+from utils import (
+    non_max_suppression,
+    mean_average_precision,
+    intersection_over_union,
+    cellboxes_to_boxes,
+    get_bboxes,
+    plot_image,
+    save_checkpoint,
+    load_checkpoint,
+)
+from loss import YoloLoss
+
+seed = 123
+torch.manual_seed(seed)
+
+# Hyperparameters etc. 
+LEARNING_RATE = 2e-5
+DEVICE = "cuda" if torch.cuda.is_available else "cpu"
+BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
+WEIGHT_DECAY = 0
+EPOCHS = 1000
+NUM_WORKERS = 2
+PIN_MEMORY = True
+LOAD_MODEL = False
+LOAD_MODEL_FILE = "overfit.pth.tar"
+IMG_DIR = "data/images"
+LABEL_DIR = "data/labels"
+
+
+class Compose(object):
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img, bboxes):
+        for t in self.transforms:
+            img, bboxes = t(img), bboxes
+
+        return img, bboxes
+
+
+transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])
+
+
+def train_fn(train_loader, model, optimizer, loss_fn):
+    loop = tqdm(train_loader, leave=True)
+    mean_loss = []
+
+    for batch_idx, (x, y) in enumerate(loop):
+        x, y = x.to(DEVICE), y.to(DEVICE)
+        out = model(x)
+        loss = loss_fn(out, y)
+        mean_loss.append(loss.item())
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        # update progress bar
+        loop.set_postfix(loss=loss.item())
+
+    print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")
+
+
+def main():
+    model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
+    optimizer = optim.Adam(
+        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
+    )
+    loss_fn = YoloLoss()
+
+    if LOAD_MODEL:
+        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
+
+    train_dataset = VOCDataset(
+        "data/100examples.csv",
+        transform=transform,
+        img_dir=IMG_DIR,
+        label_dir=LABEL_DIR,
+    )
+
+    test_dataset = VOCDataset(
+        "data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR,
+    )
+
+    train_loader = DataLoader(
+        dataset=train_dataset,
+        batch_size=BATCH_SIZE,
+        num_workers=NUM_WORKERS,
+        pin_memory=PIN_MEMORY,
+        shuffle=True,
+        drop_last=True,
+    )
+
+    test_loader = DataLoader(
+        dataset=test_dataset,
+        batch_size=BATCH_SIZE,
+        num_workers=NUM_WORKERS,
+        pin_memory=PIN_MEMORY,
+        shuffle=True,
+        drop_last=True,
+    )
+
+    for epoch in range(EPOCHS):
+        # for x, y in train_loader:
+        #    x = x.to(DEVICE)
+        #    for idx in range(8):
+        #        bboxes = cellboxes_to_boxes(model(x))
+        #        bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
+        #        plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
+
+        #    import sys
+        #    sys.exit()
+
+        pred_boxes, target_boxes = get_bboxes(
+            train_loader, model, iou_threshold=0.5, threshold=0.4
+        )
+
+        mean_avg_prec = mean_average_precision(
+            pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
+        )
+        print(f"Train mAP: {mean_avg_prec}")
+
+        #if mean_avg_prec > 0.9:
+        #    checkpoint = {
+        #        "state_dict": model.state_dict(),
+        #        "optimizer": optimizer.state_dict(),
+        #    }
+        #    save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)
+        #    import time
+        #    time.sleep(10)
+
+        train_fn(train_loader, model, optimizer, loss_fn)
+
+
+if __name__ == "__main__":
+    main()
--- a/ML/Pytorch/object_detection/YOLO/utils.py
+++ b/ML/Pytorch/object_detection/YOLO/utils.py
@@ -0,0 +1,349 @@
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+from collections import Counter
+
+def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
+    """
+    Calculates intersection over union
+
+    Parameters:
+        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
+        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
+        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
+
+    Returns:
+        tensor: Intersection over union for all examples
+    """
+
+    if box_format == "midpoint":
+        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
+        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
+        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
+        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
+        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
+        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
+        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
+        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
+
+    if box_format == "corners":
+        box1_x1 = boxes_preds[..., 0:1]
+        box1_y1 = boxes_preds[..., 1:2]
+        box1_x2 = boxes_preds[..., 2:3]
+        box1_y2 = boxes_preds[..., 3:4]  # (N, 1)
+        box2_x1 = boxes_labels[..., 0:1]
+        box2_y1 = boxes_labels[..., 1:2]
+        box2_x2 = boxes_labels[..., 2:3]
+        box2_y2 = boxes_labels[..., 3:4]
+
+    x1 = torch.max(box1_x1, box2_x1)
+    y1 = torch.max(box1_y1, box2_y1)
+    x2 = torch.min(box1_x2, box2_x2)
+    y2 = torch.min(box1_y2, box2_y2)
+
+    # .clamp(0) is for the case when they do not intersect
+    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
+
+    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
+    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
+
+    return intersection / (box1_area + box2_area - intersection + 1e-6)
+
+
+def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
+    """
+    Does Non Max Suppression given bboxes
+
+    Parameters:
+        bboxes (list): list of lists containing all bboxes with each bboxes
+        specified as [class_pred, prob_score, x1, y1, x2, y2]
+        iou_threshold (float): threshold where predicted bboxes is correct
+        threshold (float): threshold to remove predicted bboxes (independent of IoU) 
+        box_format (str): "midpoint" or "corners" used to specify bboxes
+
+    Returns:
+        list: bboxes after performing NMS given a specific IoU threshold
+    """
+
+    assert type(bboxes) == list
+
+    bboxes = [box for box in bboxes if box[1] > threshold]
+    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
+    bboxes_after_nms = []
+
+    while bboxes:
+        chosen_box = bboxes.pop(0)
+
+        bboxes = [
+            box
+            for box in bboxes
+            if box[0] != chosen_box[0]
+            or intersection_over_union(
+                torch.tensor(chosen_box[2:]),
+                torch.tensor(box[2:]),
+                box_format=box_format,
+            )
+            < iou_threshold
+        ]
+
+        bboxes_after_nms.append(chosen_box)
+
+    return bboxes_after_nms
+
+
+def mean_average_precision(
+    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
+):
+    """
+    Calculates mean average precision 
+
+    Parameters:
+        pred_boxes (list): list of lists containing all bboxes with each bboxes
+        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
+        true_boxes (list): Similar as pred_boxes except all the correct ones 
+        iou_threshold (float): threshold where predicted bboxes is correct
+        box_format (str): "midpoint" or "corners" used to specify bboxes
+        num_classes (int): number of classes
+
+    Returns:
+        float: mAP value across all classes given a specific IoU threshold 
+    """
+
+    # list storing all AP for respective classes
+    average_precisions = []
+
+    # used for numerical stability later on
+    epsilon = 1e-6
+
+    for c in range(num_classes):
+        detections = []
+        ground_truths = []
+
+        # Go through all predictions and targets,
+        # and only add the ones that belong to the
+        # current class c
+        for detection in pred_boxes:
+            if detection[1] == c:
+                detections.append(detection)
+
+        for true_box in true_boxes:
+            if true_box[1] == c:
+                ground_truths.append(true_box)
+
+        # find the amount of bboxes for each training example
+        # Counter here finds how many ground truth bboxes we get
+        # for each training example, so let's say img 0 has 3,
+        # img 1 has 5 then we will obtain a dictionary with:
+        # amount_bboxes = {0:3, 1:5}
+        amount_bboxes = Counter([gt[0] for gt in ground_truths])
+
+        # We then go through each key, val in this dictionary
+        # and convert to the following (w.r.t same example):
+        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
+        for key, val in amount_bboxes.items():
+            amount_bboxes[key] = torch.zeros(val)
+
+        # sort by box probabilities which is index 2
+        detections.sort(key=lambda x: x[2], reverse=True)
+        TP = torch.zeros((len(detections)))
+        FP = torch.zeros((len(detections)))
+        total_true_bboxes = len(ground_truths)
+        
+        # If none exists for this class then we can safely skip
+        if total_true_bboxes == 0:
+            continue
+
+        for detection_idx, detection in enumerate(detections):
+            # Only take out the ground_truths that have the same
+            # training idx as detection
+            ground_truth_img = [
+                bbox for bbox in ground_truths if bbox[0] == detection[0]
+            ]
+
+            num_gts = len(ground_truth_img)
+            best_iou = 0
+
+            for idx, gt in enumerate(ground_truth_img):
+                iou = intersection_over_union(
+                    torch.tensor(detection[3:]),
+                    torch.tensor(gt[3:]),
+                    box_format=box_format,
+                )
+
+                if iou > best_iou:
+                    best_iou = iou
+                    best_gt_idx = idx
+
+            if best_iou > iou_threshold:
+                # only detect ground truth detection once
+                if amount_bboxes[detection[0]][best_gt_idx] == 0:
+                    # true positive and add this bounding box to seen
+                    TP[detection_idx] = 1
+                    amount_bboxes[detection[0]][best_gt_idx] = 1
+                else:
+                    FP[detection_idx] = 1
+
+            # if IOU is lower then the detection is a false positive
+            else:
+                FP[detection_idx] = 1
+
+        TP_cumsum = torch.cumsum(TP, dim=0)
+        FP_cumsum = torch.cumsum(FP, dim=0)
+        recalls = TP_cumsum / (total_true_bboxes + epsilon)
+        precisions = torch.divide(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
+        precisions = torch.cat((torch.tensor([1]), precisions))
+        recalls = torch.cat((torch.tensor([0]), recalls))
+        # torch.trapz for numerical integration
+        average_precisions.append(torch.trapz(precisions, recalls))
+
+    return sum(average_precisions) / len(average_precisions)
+
+
+def plot_image(image, boxes):
+    """Plots predicted bounding boxes on the image"""
+    im = np.array(image)
+    height, width, _ = im.shape
+
+    # Create figure and axes
+    fig, ax = plt.subplots(1)
+    # Display the image
+    ax.imshow(im)
+
+    # box[0] is x midpoint, box[2] is width
+    # box[1] is y midpoint, box[3] is height
+
+    # Create a Rectangle potch
+    for box in boxes:
+        box = box[2:]
+        assert len(box) == 4, "Got more values than in x, y, w, h, in a box!"
+        upper_left_x = box[0] - box[2] / 2
+        upper_left_y = box[1] - box[3] / 2
+        rect = patches.Rectangle(
+            (upper_left_x * width, upper_left_y * height),
+            box[2] * width,
+            box[3] * height,
+            linewidth=1,
+            edgecolor="r",
+            facecolor="none",
+        )
+        # Add the patch to the Axes
+        ax.add_patch(rect)
+
+    plt.show()
+
+def get_bboxes(
+    loader,
+    model,
+    iou_threshold,
+    threshold,
+    pred_format="cells",
+    box_format="midpoint",
+    device="cuda",
+):
+    all_pred_boxes = []
+    all_true_boxes = []
+
+    # make sure model is in eval before get bboxes
+    model.eval()
+    train_idx = 0
+
+    for batch_idx, (x, labels) in enumerate(loader):
+        x = x.to(device)
+        labels = labels.to(device)
+
+        with torch.no_grad():
+            predictions = model(x)
+
+        batch_size = x.shape[0]
+        true_bboxes = cellboxes_to_boxes(labels)
+        bboxes = cellboxes_to_boxes(predictions)
+
+        for idx in range(batch_size):
+            nms_boxes = non_max_suppression(
+                bboxes[idx],
+                iou_threshold=iou_threshold,
+                threshold=threshold,
+                box_format=box_format,
+            )
+
+
+            #if batch_idx == 0 and idx == 0:
+            #    plot_image(x[idx].permute(1,2,0).to("cpu"), nms_boxes)
+            #    print(nms_boxes)
+
+            for nms_box in nms_boxes:
+                all_pred_boxes.append([train_idx] + nms_box)
+
+            for box in true_bboxes[idx]:
+                # many will get converted to 0 pred
+                if box[1] > threshold:
+                    all_true_boxes.append([train_idx] + box)
+
+            train_idx += 1
+
+    model.train()
+    return all_pred_boxes, all_true_boxes
+
+
+
+def convert_cellboxes(predictions, S=7):
+    """
+    Converts bounding boxes output from Yolo with
+    an image split size of S into entire image ratios
+    rather than relative to cell ratios. Tried to do this
+    vectorized, but this resulted in quite difficult to read
+    code... Use as a black box? Or implement a more intuitive,
+    using 2 for loops iterating range(S) and convert them one
+    by one, resulting in a slower but more readable implementation.
+    """
+
+    predictions = predictions.to("cpu")
+    batch_size = predictions.shape[0]
+    predictions = predictions.reshape(batch_size, 7, 7, 30)
+    bboxes1 = predictions[..., 21:25]
+    bboxes2 = predictions[..., 26:30]
+    scores = torch.cat(
+        (predictions[..., 20].unsqueeze(0), predictions[..., 25].unsqueeze(0)), dim=0
+    )
+    best_box = scores.argmax(0).unsqueeze(-1)
+    best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2
+    cell_indices = torch.arange(7).repeat(batch_size, 7, 1).unsqueeze(-1)
+    x = 1 / S * (best_boxes[..., :1] + cell_indices)
+    y = 1 / S * (best_boxes[..., 1:2] + cell_indices.permute(0, 2, 1, 3))
+    w_y = 1 / S * best_boxes[..., 2:4]
+    converted_bboxes = torch.cat((x, y, w_y), dim=-1)
+    predicted_class = predictions[..., :20].argmax(-1).unsqueeze(-1)
+    best_confidence = torch.max(predictions[..., 20], predictions[..., 25]).unsqueeze(
+        -1
+    )
+    converted_preds = torch.cat(
+        (predicted_class, best_confidence, converted_bboxes), dim=-1
+    )
+
+    return converted_preds
+
+
+def cellboxes_to_boxes(out, S=7):
+    converted_pred = convert_cellboxes(out).reshape(out.shape[0], S * S, -1)
+    converted_pred[..., 0] = converted_pred[..., 0].long()
+    all_bboxes = []
+
+    for ex_idx in range(out.shape[0]):
+        bboxes = []
+
+        for bbox_idx in range(S * S):
+            bboxes.append([x.item() for x in converted_pred[ex_idx, bbox_idx, :]])
+        all_bboxes.append(bboxes)
+
+    return all_bboxes
+
+def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
+    print("=> Saving checkpoint")
+    torch.save(state, filename)
+
+
+def load_checkpoint(checkpoint, model, optimizer):
+    print("=> Loading checkpoint")
+    model.load_state_dict(checkpoint["state_dict"])
+    optimizer.load_state_dict(checkpoint["optimizer"])
--- a/ML/Pytorch/object_detection/metrics/pycache/iou.cpython-38.pyc
+++ b/ML/Pytorch/object_detection/metrics/pycache/iou.cpython-38.pyc
--- a/ML/Pytorch/object_detection/metrics/pycache/mean_avg_precision.cpython-38.pyc
+++ b/ML/Pytorch/object_detection/metrics/pycache/mean_avg_precision.cpython-38.pyc
--- a/ML/Pytorch/object_detection/metrics/pycache/nms.cpython-38.pyc
+++ b/ML/Pytorch/object_detection/metrics/pycache/nms.cpython-38.pyc
--- a/ML/Pytorch/object_detection/metrics/iou.py
+++ b/ML/Pytorch/object_detection/metrics/iou.py
@@ -0,0 +1,50 @@
+import torch
+
+
+def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
+    """
+    Calculates intersection over union
+
+    Parameters:
+        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
+        boxes_labels (tensor): Correct Labels of Boxes (BATCH_SIZE, 4)
+        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
+
+    Returns:
+        tensor: Intersection over union for all examples
+    """
+
+    # Slicing idx:idx+1 in order to keep tensor dimensionality
+    # Doing ... in indexing if there would be additional dimensions
+    # Like for Yolo algorithm which would have (N, S, S, 4) in shape
+    if box_format == "midpoint":
+        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
+        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
+        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
+        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
+        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
+        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
+        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
+        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
+
+    elif box_format == "corners":
+        box1_x1 = boxes_preds[..., 0:1]
+        box1_y1 = boxes_preds[..., 1:2]
+        box1_x2 = boxes_preds[..., 2:3]
+        box1_y2 = boxes_preds[..., 3:4]
+        box2_x1 = boxes_labels[..., 0:1]
+        box2_y1 = boxes_labels[..., 1:2]
+        box2_x2 = boxes_labels[..., 2:3]
+        box2_y2 = boxes_labels[..., 3:4]
+
+    x1 = torch.max(box1_x1, box2_x1)
+    y1 = torch.max(box1_y1, box2_y1)
+    x2 = torch.min(box1_x2, box2_x2)
+    y2 = torch.min(box1_y2, box2_y2)
+
+    # Need clamp(0) in case they do not intersect, then we want intersection to be 0
+    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
+    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
+    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
+
+    return intersection / (box1_area + box2_area - intersection + 1e-6)
--- a/ML/Pytorch/object_detection/metrics/mean_avg_precision.py
+++ b/ML/Pytorch/object_detection/metrics/mean_avg_precision.py
@@ -0,0 +1,112 @@
+import torch
+from collections import Counter
+
+from iou import intersection_over_union
+
+def mean_average_precision(
+    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
+):
+    """
+    Calculates mean average precision 
+
+    Parameters:
+        pred_boxes (list): list of lists containing all bboxes with each bboxes
+        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
+        true_boxes (list): Similar as pred_boxes except all the correct ones 
+        iou_threshold (float): threshold where predicted bboxes is correct
+        box_format (str): "midpoint" or "corners" used to specify bboxes
+        num_classes (int): number of classes
+
+    Returns:
+        float: mAP value across all classes given a specific IoU threshold 
+    """
+
+    # list storing all AP for respective classes
+    average_precisions = []
+
+    # used for numerical stability later on
+    epsilon = 1e-6
+
+    for c in range(num_classes):
+        detections = []
+        ground_truths = []
+
+        # Go through all predictions and targets,
+        # and only add the ones that belong to the
+        # current class c
+        for detection in pred_boxes:
+            if detection[1] == c:
+                detections.append(detection)
+
+        for true_box in true_boxes:
+            if true_box[1] == c:
+                ground_truths.append(true_box)
+
+        # find the amount of bboxes for each training example
+        # Counter here finds how many ground truth bboxes we get
+        # for each training example, so let's say img 0 has 3,
+        # img 1 has 5 then we will obtain a dictionary with:
+        # amount_bboxes = {0:3, 1:5}
+        amount_bboxes = Counter([gt[0] for gt in ground_truths])
+
+        # We then go through each key, val in this dictionary
+        # and convert to the following (w.r.t same example):
+        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
+        for key, val in amount_bboxes.items():
+            amount_bboxes[key] = torch.zeros(val)
+
+        # sort by box probabilities which is index 2
+        detections.sort(key=lambda x: x[2], reverse=True)
+        TP = torch.zeros((len(detections)))
+        FP = torch.zeros((len(detections)))
+        total_true_bboxes = len(ground_truths)
+        
+        # If none exists for this class then we can safely skip
+        if total_true_bboxes == 0:
+            continue
+
+        for detection_idx, detection in enumerate(detections):
+            # Only take out the ground_truths that have the same
+            # training idx as detection
+            ground_truth_img = [
+                bbox for bbox in ground_truths if bbox[0] == detection[0]
+            ]
+
+            num_gts = len(ground_truth_img)
+            best_iou = 0
+
+            for idx, gt in enumerate(ground_truth_img):
+                iou = intersection_over_union(
+                    torch.tensor(detection[3:]),
+                    torch.tensor(gt[3:]),
+                    box_format=box_format,
+                )
+
+                if iou > best_iou:
+                    best_iou = iou
+                    best_gt_idx = idx
+
+            if best_iou > iou_threshold:
+                # only detect ground truth detection once
+                if amount_bboxes[detection[0]][best_gt_idx] == 0:
+                    # true positive and add this bounding box to seen
+                    TP[detection_idx] = 1
+                    amount_bboxes[detection[0]][best_gt_idx] = 1
+                else:
+                    FP[detection_idx] = 1
+
+            # if IOU is lower then the detection is a false positive
+            else:
+                FP[detection_idx] = 1
+
+        TP_cumsum = torch.cumsum(TP, dim=0)
+        FP_cumsum = torch.cumsum(FP, dim=0)
+        recalls = TP_cumsum / (total_true_bboxes + epsilon)
+        precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
+        precisions = torch.cat((torch.tensor([1]), precisions))
+        recalls = torch.cat((torch.tensor([0]), recalls))
+        # torch.trapz for numerical integration
+        average_precisions.append(torch.trapz(precisions, recalls))
+
+    return sum(average_precisions) / len(average_precisions)
+
--- a/ML/Pytorch/object_detection/metrics/nms.py
+++ b/ML/Pytorch/object_detection/metrics/nms.py
@@ -0,0 +1,42 @@
+import torch
+from iou import intersection_over_union
+
+def nms(bboxes, iou_threshold, threshold, box_format="corners"):
+    """
+    Does Non Max Suppression given bboxes
+
+    Parameters:
+        bboxes (list): list of lists containing all bboxes with each bboxes
+        specified as [class_pred, prob_score, x1, y1, x2, y2]
+        iou_threshold (float): threshold where predicted bboxes is correct
+        threshold (float): threshold to remove predicted bboxes (independent of IoU) 
+        box_format (str): "midpoint" or "corners" used to specify bboxes
+
+    Returns:
+        list: bboxes after performing NMS given a specific IoU threshold
+    """
+
+    assert type(bboxes) == list
+
+    bboxes = [box for box in bboxes if box[1] > threshold]
+    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
+    bboxes_after_nms = []
+
+    while bboxes:
+        chosen_box = bboxes.pop(0)
+
+        bboxes = [
+            box
+            for box in bboxes
+            if box[0] != chosen_box[0]
+            or intersection_over_union(
+                torch.tensor(chosen_box[2:]),
+                torch.tensor(box[2:]),
+                box_format=box_format,
+            )
+            < iou_threshold
+        ]
+
+        bboxes_after_nms.append(chosen_box)
+
+    return bboxes_after_nms