Files
2021-06-05 13:26:29 +02:00

112 lines
3.4 KiB
Python

import torch
from dataset import FacialKeypointDataset
from torch import nn, optim
import os
import config
from torch.utils.data import DataLoader
from tqdm import tqdm
from efficientnet_pytorch import EfficientNet
from utils import (
load_checkpoint,
save_checkpoint,
get_rmse,
get_submission
)
def train_one_epoch(loader, model, optimizer, loss_fn, scaler, device):
losses = []
loop = tqdm(loader)
num_examples = 0
for batch_idx, (data, targets) in enumerate(loop):
data = data.to(device=device)
targets = targets.to(device=device)
# forward
scores = model(data)
scores[targets == -1] = -1
loss = loss_fn(scores, targets)
num_examples += torch.numel(scores[targets != -1])
losses.append(loss.item())
# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Loss average over epoch: {(sum(losses)/num_examples)**0.5}")
def main():
train_ds = FacialKeypointDataset(
csv_file="data/train_4.csv",
transform=config.train_transforms,
)
train_loader = DataLoader(
train_ds,
batch_size=config.BATCH_SIZE,
num_workers=config.NUM_WORKERS,
pin_memory=config.PIN_MEMORY,
shuffle=True,
)
val_ds = FacialKeypointDataset(
transform=config.val_transforms,
csv_file="data/val_4.csv",
)
val_loader = DataLoader(
val_ds,
batch_size=config.BATCH_SIZE,
num_workers=config.NUM_WORKERS,
pin_memory=config.PIN_MEMORY,
shuffle=False,
)
test_ds = FacialKeypointDataset(
csv_file="data/test.csv",
transform=config.val_transforms,
train=False,
)
test_loader = DataLoader(
test_ds,
batch_size=1,
num_workers=config.NUM_WORKERS,
pin_memory=config.PIN_MEMORY,
shuffle=False,
)
loss_fn = nn.MSELoss(reduction="sum")
model = EfficientNet.from_pretrained("efficientnet-b0")
model._fc = nn.Linear(1280, 30)
model = model.to(config.DEVICE)
optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY)
scaler = torch.cuda.amp.GradScaler()
model_4 = EfficientNet.from_pretrained("efficientnet-b0")
model_4._fc = nn.Linear(1280, 30)
model_15 = EfficientNet.from_pretrained("efficientnet-b0")
model_15._fc = nn.Linear(1280, 30)
model_4 = model_4.to(config.DEVICE)
model_15 = model_15.to(config.DEVICE)
if config.LOAD_MODEL and config.CHECKPOINT_FILE in os.listdir():
load_checkpoint(torch.load(config.CHECKPOINT_FILE), model, optimizer, config.LEARNING_RATE)
load_checkpoint(torch.load("b0_4.pth.tar"), model_4, optimizer, config.LEARNING_RATE)
load_checkpoint(torch.load("b0_15.pth.tar"), model_15, optimizer, config.LEARNING_RATE)
get_submission(test_loader, test_ds, model_15, model_4)
for epoch in range(config.NUM_EPOCHS):
get_rmse(val_loader, model, loss_fn, config.DEVICE)
train_one_epoch(train_loader, model, optimizer, loss_fn, scaler, config.DEVICE)
# get on validation
if config.SAVE_MODEL:
checkpoint = {
"state_dict": model.state_dict(),
"optimizer": optimizer.state_dict(),
}
save_checkpoint(checkpoint, filename=config.CHECKPOINT_FILE)
if __name__ == "__main__":
main()