Files
Machine-Learning-Collection/ML/Kaggles/Dog vs Cat Competition/utils.py
2021-05-27 10:21:14 +02:00

193 lines
5.6 KiB
Python

import torch
import os
import pandas as pd
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
import config
from tqdm import tqdm
from dataset import CatDog
from torch.utils.data import DataLoader
from sklearn.metrics import log_loss
def check_accuracy(
loader, model, loss_fn, input_shape=None, toggle_eval=True, print_accuracy=True
):
"""
Check accuracy of model on data from loader
"""
if toggle_eval:
model.eval()
device = next(model.parameters()).device
num_correct = 0
num_samples = 0
y_preds = []
y_true = []
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
if input_shape:
x = x.reshape(x.shape[0], *input_shape)
scores = model(x)
predictions = torch.sigmoid(scores) > 0.5
y_preds.append(torch.clip(torch.sigmoid(scores), 0.005, 0.995).cpu().numpy())
y_true.append(y.cpu().numpy())
num_correct += (predictions.squeeze(1) == y).sum()
num_samples += predictions.size(0)
accuracy = num_correct / num_samples
if toggle_eval:
model.train()
if print_accuracy:
print(f"Accuracy: {accuracy * 100:.2f}%")
print(log_loss(np.concatenate(y_true, axis=0), np.concatenate(y_preds, axis=0)))
return accuracy
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
print("=> Saving checkpoint")
torch.save(state, filename)
def load_checkpoint(checkpoint, model):
print("=> Loading checkpoint")
model.load_state_dict(checkpoint["state_dict"])
def create_submission(model, model_name, files_dir):
my_transforms = {
"base": A.Compose(
[
A.Resize(height=240, width=240),
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
max_pixel_value=255.0,
),
ToTensorV2(),
]
),
"horizontal_flip": A.Compose(
[
A.Resize(height=240, width=240),
A.HorizontalFlip(p=1.0),
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
max_pixel_value=255.0,
),
ToTensorV2(),
]
),
"vertical_flip": A.Compose(
[
A.Resize(height=240, width=240),
A.VerticalFlip(p=1.0),
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
max_pixel_value=255.0,
),
ToTensorV2(),
]
),
"coloring": A.Compose(
[
A.Resize(height=240, width=240),
A.ColorJitter(p=1.0),
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
max_pixel_value=255.0,
),
ToTensorV2(),
]
),
"rotate": A.Compose(
[
A.Resize(height=240, width=240),
A.Rotate(p=1.0, limit=45),
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
max_pixel_value=255.0,
),
ToTensorV2(),
]
),
"shear": A.Compose(
[
A.Resize(height=240, width=240),
A.IAAAffine(p=1.0),
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
max_pixel_value=255.0,
),
ToTensorV2(),
]
),
}
for t in ["base", "horizontal_flip", "vertical_flip", "coloring", "rotate", "shear"]:
predictions = []
labels = []
all_files = []
test_dataset = MyDataset(root=files_dir, transform=my_transforms[t])
test_loader = DataLoader(
test_dataset, batch_size=32, num_workers=4, shuffle=False, pin_memory=True
)
model.eval()
for idx, (x, y, filenames) in enumerate(tqdm(test_loader)):
x = x.to(config.DEVICE)
with torch.no_grad():
outputs = (
torch.clip(torch.sigmoid(model(x)), 0.005, 0.995).squeeze(1).cpu().numpy()
)
predictions.append(outputs)
labels += y.numpy().tolist()
all_files += filenames
df = pd.DataFrame(
{
"id": np.arange(
1,
(len(predictions) - 1) * predictions[0].shape[0]
+ predictions[-1].shape[0]
+ 1,
),
"label": np.concatenate(predictions, axis=0),
}
)
df.to_csv(f"predictions_test/submission_{model_name}_{t}.csv", index=False)
model.train()
print(f"Created submission file for model {model_name} and transform {t}")
def blending_ensemble_data():
pred_csvs = []
root_dir = "predictions_validation/"
for file in os.listdir(root_dir):
if "label" not in file:
df = pd.read_csv(root_dir + "/" + file)
pred_csvs.append(df)
else:
label_csv = pd.read_csv(root_dir + "/" + file)
all_preds = pd.concat(pred_csvs, axis=1)
print(all_preds)
if __name__ == "__main__":
blending_ensemble_data()