Files
Machine-Learning-Collection/ML/Kaggles/Facial Keypoint Detection Competition/train.py

112 lines
3.4 KiB
Python
Raw Normal View History

import torch
from dataset import FacialKeypointDataset
from torch import nn, optim
import os
import config
from torch.utils.data import DataLoader
from tqdm import tqdm
from efficientnet_pytorch import EfficientNet
from utils import (
load_checkpoint,
save_checkpoint,
get_rmse,
get_submission
)
def train_one_epoch(loader, model, optimizer, loss_fn, scaler, device):
losses = []
loop = tqdm(loader)
num_examples = 0
for batch_idx, (data, targets) in enumerate(loop):
data = data.to(device=device)
targets = targets.to(device=device)
# forward
scores = model(data)
scores[targets == -1] = -1
loss = loss_fn(scores, targets)
num_examples += torch.numel(scores[targets != -1])
losses.append(loss.item())
# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Loss average over epoch: {(sum(losses)/num_examples)**0.5}")
def main():
train_ds = FacialKeypointDataset(
csv_file="data/train_4.csv",
transform=config.train_transforms,
)
train_loader = DataLoader(
train_ds,
batch_size=config.BATCH_SIZE,
num_workers=config.NUM_WORKERS,
pin_memory=config.PIN_MEMORY,
shuffle=True,
)
val_ds = FacialKeypointDataset(
transform=config.val_transforms,
csv_file="data/val_4.csv",
)
val_loader = DataLoader(
val_ds,
batch_size=config.BATCH_SIZE,
num_workers=config.NUM_WORKERS,
pin_memory=config.PIN_MEMORY,
shuffle=False,
)
test_ds = FacialKeypointDataset(
csv_file="data/test.csv",
transform=config.val_transforms,
train=False,
)
test_loader = DataLoader(
test_ds,
batch_size=1,
num_workers=config.NUM_WORKERS,
pin_memory=config.PIN_MEMORY,
shuffle=False,
)
loss_fn = nn.MSELoss(reduction="sum")
model = EfficientNet.from_pretrained("efficientnet-b0")
model._fc = nn.Linear(1280, 30)
model = model.to(config.DEVICE)
optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY)
scaler = torch.cuda.amp.GradScaler()
model_4 = EfficientNet.from_pretrained("efficientnet-b0")
model_4._fc = nn.Linear(1280, 30)
model_15 = EfficientNet.from_pretrained("efficientnet-b0")
model_15._fc = nn.Linear(1280, 30)
model_4 = model_4.to(config.DEVICE)
model_15 = model_15.to(config.DEVICE)
if config.LOAD_MODEL and config.CHECKPOINT_FILE in os.listdir():
load_checkpoint(torch.load(config.CHECKPOINT_FILE), model, optimizer, config.LEARNING_RATE)
load_checkpoint(torch.load("b0_4.pth.tar"), model_4, optimizer, config.LEARNING_RATE)
load_checkpoint(torch.load("b0_15.pth.tar"), model_15, optimizer, config.LEARNING_RATE)
get_submission(test_loader, test_ds, model_15, model_4)
for epoch in range(config.NUM_EPOCHS):
get_rmse(val_loader, model, loss_fn, config.DEVICE)
train_one_epoch(train_loader, model, optimizer, loss_fn, scaler, config.DEVICE)
# get on validation
if config.SAVE_MODEL:
checkpoint = {
"state_dict": model.state_dict(),
"optimizer": optimizer.state_dict(),
}
save_checkpoint(checkpoint, filename=config.CHECKPOINT_FILE)
if __name__ == "__main__":
main()