Files
Machine-Learning-Collection/ML/Kaggles/SantanderTransaction/train.py
Aladdin Persson 65b8c80495 Initial commit
2021-01-30 21:49:15 +01:00

56 lines
1.8 KiB
Python

import torch
from sklearn import metrics
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from utils import get_predictions
from dataset import get_data
from torch.utils.data import DataLoader
import torch.nn.functional as F
class NN(nn.Module):
def __init__(self, input_size, hidden_dim):
super(NN, self).__init__()
self.bn = nn.BatchNorm1d(input_size)
self.fc1 = nn.Linear(2, hidden_dim)
self.fc2 = nn.Linear(input_size//2*hidden_dim, 1)
def forward(self, x):
N = x.shape[0]
x = self.bn(x)
orig_features = x[:, :200].unsqueeze(2) # (N, 200, 1)
new_features = x[:, 200:].unsqueeze(2) # (N, 200, 1)
x = torch.cat([orig_features, new_features], dim=2) # (N, 200, 2)
x = F.relu(self.fc1(x)).reshape(N, -1) # (N, 200*hidden_dim)
return torch.sigmoid(self.fc2(x)).view(-1)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
model = NN(input_size=400, hidden_dim=100).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=2e-3, weight_decay=1e-4)
loss_fn = nn.BCELoss()
train_ds, val_ds, test_ds, test_ids = get_data()
train_loader = DataLoader(train_ds, batch_size=1024, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=1024)
test_loader = DataLoader(test_ds, batch_size=1024)
for epoch in range(20):
probabilities, true = get_predictions(val_loader, model, device=DEVICE)
print(f"VALIDATION ROC: {metrics.roc_auc_score(true, probabilities)}")
for batch_idx, (data, targets) in enumerate(train_loader):
data = data.to(DEVICE)
targets = targets.to(DEVICE)
# forward
scores = model(data)
loss = loss_fn(scores, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
from utils import get_submission
get_submission(model, test_loader, test_ids, DEVICE)