mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-04-10 12:33:44 +00:00
DR kaggle
This commit is contained in:
126
ML/Kaggles/DiabeticRetinopathy/train_blend.py
Normal file
126
ML/Kaggles/DiabeticRetinopathy/train_blend.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
from torch import nn
|
||||
from torch import optim
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
from utils import save_checkpoint, load_checkpoint, check_accuracy
|
||||
from sklearn.metrics import cohen_kappa_score
|
||||
import config
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def make_prediction(model, loader, file):
|
||||
preds = []
|
||||
filenames = []
|
||||
model.eval()
|
||||
|
||||
for x, y, files in tqdm(loader):
|
||||
x = x.to(config.DEVICE)
|
||||
with torch.no_grad():
|
||||
predictions = model(x)
|
||||
# Convert MSE floats to integer predictions
|
||||
predictions[predictions < 0.5] = 0
|
||||
predictions[(predictions >= 0.5) & (predictions < 1.5)] = 1
|
||||
predictions[(predictions >= 1.5) & (predictions < 2.5)] = 2
|
||||
predictions[(predictions >= 2.5) & (predictions < 3.5)] = 3
|
||||
predictions[(predictions >= 3.5) & (predictions < 1000000000000)] = 4
|
||||
predictions = predictions.long().view(-1)
|
||||
y = y.view(-1)
|
||||
|
||||
preds.append(predictions.cpu().numpy())
|
||||
filenames += map(list, zip(files[0], files[1]))
|
||||
|
||||
filenames = [item for sublist in filenames for item in sublist]
|
||||
df = pd.DataFrame({"image": filenames, "level": np.concatenate(preds, axis=0)})
|
||||
df.to_csv(file, index=False)
|
||||
model.train()
|
||||
print("Done with predictions")
|
||||
|
||||
|
||||
class MyDataset(Dataset):
|
||||
def __init__(self, csv_file):
|
||||
self.csv = pd.read_csv(csv_file)
|
||||
|
||||
def __len__(self):
|
||||
return self.csv.shape[0]
|
||||
|
||||
def __getitem__(self, index):
|
||||
example = self.csv.iloc[index, :]
|
||||
features = example.iloc[: example.shape[0] - 4].to_numpy().astype(np.float32)
|
||||
labels = example.iloc[-4:-2].to_numpy().astype(np.int64)
|
||||
filenames = example.iloc[-2:].values.tolist()
|
||||
return features, labels, filenames
|
||||
|
||||
|
||||
class MyModel(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = nn.Sequential(
|
||||
nn.BatchNorm1d((1536 + 1) * 2),
|
||||
nn.Linear((1536+1) * 2, 500),
|
||||
nn.BatchNorm1d(500),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(500, 100),
|
||||
nn.BatchNorm1d(100),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(100, 2),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.model(x)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model = MyModel().to(config.DEVICE)
|
||||
ds = MyDataset(csv_file="train/train_blend.csv")
|
||||
loader = DataLoader(ds, batch_size=256, num_workers=3, pin_memory=True, shuffle=True)
|
||||
ds_val = MyDataset(csv_file="train/val_blend.csv")
|
||||
loader_val = DataLoader(
|
||||
ds_val, batch_size=256, num_workers=3, pin_memory=True, shuffle=True
|
||||
)
|
||||
ds_test = MyDataset(csv_file="train/test_blend.csv")
|
||||
loader_test = DataLoader(
|
||||
ds_test, batch_size=256, num_workers=2, pin_memory=True, shuffle=False
|
||||
)
|
||||
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
|
||||
loss_fn = nn.MSELoss()
|
||||
|
||||
if config.LOAD_MODEL and "linear.pth.tar" in os.listdir():
|
||||
load_checkpoint(torch.load("linear.pth.tar"), model, optimizer, lr=1e-4)
|
||||
model.train()
|
||||
|
||||
for _ in range(5):
|
||||
losses = []
|
||||
for x, y, files in tqdm(loader_val):
|
||||
x = x.to(config.DEVICE).float()
|
||||
y = y.to(config.DEVICE).view(-1).float()
|
||||
|
||||
# forward
|
||||
scores = model(x).view(-1)
|
||||
loss = loss_fn(scores, y)
|
||||
losses.append(loss.item())
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
optimizer.step()
|
||||
|
||||
print(f"Loss: {sum(losses)/len(losses)}")
|
||||
|
||||
if config.SAVE_MODEL:
|
||||
checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
|
||||
save_checkpoint(checkpoint, filename="linear.pth.tar")
|
||||
|
||||
preds, labels = check_accuracy(loader_val, model)
|
||||
print(cohen_kappa_score(labels, preds, weights="quadratic"))
|
||||
|
||||
preds, labels = check_accuracy(loader, model)
|
||||
print(cohen_kappa_score(labels, preds, weights="quadratic"))
|
||||
|
||||
make_prediction(model, loader_test, "test_preds.csv")
|
||||
Reference in New Issue
Block a user