DR kaggle

2026-04-10 12:33:44 +00:00 · 2021-05-30 16:24:52 +02:00
parent 9675f0d6af
commit 8136ee169f
6 changed files with 565 additions and 0 deletions
--- a/ML/Kaggles/DiabeticRetinopathy/train_blend.py
+++ b/ML/Kaggles/DiabeticRetinopathy/train_blend.py
@@ -0,0 +1,126 @@
+import torch
+from tqdm import tqdm
+import numpy as np
+from torch import nn
+from torch import optim
+from torch.utils.data import DataLoader, Dataset
+from utils import save_checkpoint, load_checkpoint, check_accuracy
+from sklearn.metrics import cohen_kappa_score
+import config
+import os
+import pandas as pd
+
+
+def make_prediction(model, loader, file):
+    preds = []
+    filenames = []
+    model.eval()
+
+    for x, y, files in tqdm(loader):
+        x = x.to(config.DEVICE)
+        with torch.no_grad():
+            predictions = model(x)
+            # Convert MSE floats to integer predictions
+            predictions[predictions < 0.5] = 0
+            predictions[(predictions >= 0.5) & (predictions < 1.5)] = 1
+            predictions[(predictions >= 1.5) & (predictions < 2.5)] = 2
+            predictions[(predictions >= 2.5) & (predictions < 3.5)] = 3
+            predictions[(predictions >= 3.5) & (predictions < 1000000000000)] = 4
+            predictions = predictions.long().view(-1)
+            y = y.view(-1)
+
+            preds.append(predictions.cpu().numpy())
+            filenames += map(list, zip(files[0], files[1]))
+
+    filenames = [item for sublist in filenames for item in sublist]
+    df = pd.DataFrame({"image": filenames, "level": np.concatenate(preds, axis=0)})
+    df.to_csv(file, index=False)
+    model.train()
+    print("Done with predictions")
+
+
+class MyDataset(Dataset):
+    def __init__(self, csv_file):
+        self.csv = pd.read_csv(csv_file)
+
+    def __len__(self):
+        return self.csv.shape[0]
+
+    def __getitem__(self, index):
+        example = self.csv.iloc[index, :]
+        features = example.iloc[: example.shape[0] - 4].to_numpy().astype(np.float32)
+        labels = example.iloc[-4:-2].to_numpy().astype(np.int64)
+        filenames = example.iloc[-2:].values.tolist()
+        return features, labels, filenames
+
+
+class MyModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.BatchNorm1d((1536 + 1) * 2),
+            nn.Linear((1536+1) * 2, 500),
+            nn.BatchNorm1d(500),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(500, 100),
+            nn.BatchNorm1d(100),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(100, 2),
+        )
+
+    def forward(self, x):
+        return self.model(x)
+
+
+if __name__ == "__main__":
+    model = MyModel().to(config.DEVICE)
+    ds = MyDataset(csv_file="train/train_blend.csv")
+    loader = DataLoader(ds, batch_size=256, num_workers=3, pin_memory=True, shuffle=True)
+    ds_val = MyDataset(csv_file="train/val_blend.csv")
+    loader_val = DataLoader(
+        ds_val, batch_size=256, num_workers=3, pin_memory=True, shuffle=True
+    )
+    ds_test = MyDataset(csv_file="train/test_blend.csv")
+    loader_test = DataLoader(
+        ds_test, batch_size=256, num_workers=2, pin_memory=True, shuffle=False
+    )
+    optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
+    loss_fn = nn.MSELoss()
+
+    if config.LOAD_MODEL and "linear.pth.tar" in os.listdir():
+        load_checkpoint(torch.load("linear.pth.tar"), model, optimizer, lr=1e-4)
+        model.train()
+
+    for _ in range(5):
+        losses = []
+        for x, y, files in tqdm(loader_val):
+            x = x.to(config.DEVICE).float()
+            y = y.to(config.DEVICE).view(-1).float()
+
+            # forward
+            scores = model(x).view(-1)
+            loss = loss_fn(scores, y)
+            losses.append(loss.item())
+
+            # backward
+            optimizer.zero_grad()
+            loss.backward()
+
+            # gradient descent or adam step
+            optimizer.step()
+
+        print(f"Loss: {sum(losses)/len(losses)}")
+
+    if config.SAVE_MODEL:
+        checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
+        save_checkpoint(checkpoint, filename="linear.pth.tar")
+
+    preds, labels = check_accuracy(loader_val, model)
+    print(cohen_kappa_score(labels, preds, weights="quadratic"))
+
+    preds, labels = check_accuracy(loader, model)
+    print(cohen_kappa_score(labels, preds, weights="quadratic"))
+
+    make_prediction(model, loader_test, "test_preds.csv")