Initial commit
|
After Width: | Height: | Size: 13 KiB |
|
After Width: | Height: | Size: 26 KiB |
|
After Width: | Height: | Size: 29 KiB |
|
After Width: | Height: | Size: 28 KiB |
|
After Width: | Height: | Size: 20 KiB |
|
After Width: | Height: | Size: 20 KiB |
|
After Width: | Height: | Size: 27 KiB |
|
After Width: | Height: | Size: 27 KiB |
|
After Width: | Height: | Size: 24 KiB |
|
After Width: | Height: | Size: 13 KiB |
|
After Width: | Height: | Size: 35 KiB |
|
After Width: | Height: | Size: 19 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
After Width: | Height: | Size: 20 KiB |
|
After Width: | Height: | Size: 44 KiB |
|
After Width: | Height: | Size: 18 KiB |
|
After Width: | Height: | Size: 34 KiB |
|
After Width: | Height: | Size: 53 KiB |
31
ML/Pytorch/Basics/albumentations_tutorial/classification.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import cv2
|
||||
import albumentations as A
|
||||
import numpy as np
|
||||
from utils import plot_examples
|
||||
from PIL import Image
|
||||
|
||||
image = Image.open("images/elon.jpeg")
|
||||
|
||||
transform = A.Compose(
|
||||
[
|
||||
A.Resize(width=1920, height=1080),
|
||||
A.RandomCrop(width=1280, height=720),
|
||||
A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.VerticalFlip(p=0.1),
|
||||
A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
|
||||
A.OneOf([
|
||||
A.Blur(blur_limit=3, p=0.5),
|
||||
A.ColorJitter(p=0.5),
|
||||
], p=1.0),
|
||||
]
|
||||
)
|
||||
|
||||
images_list = [image]
|
||||
image = np.array(image)
|
||||
for i in range(15):
|
||||
augmentations = transform(image=image)
|
||||
augmented_img = augmentations["image"]
|
||||
images_list.append(augmented_img)
|
||||
plot_examples(images_list)
|
||||
|
||||
41
ML/Pytorch/Basics/albumentations_tutorial/detection.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import cv2
|
||||
import albumentations as A
|
||||
import numpy as np
|
||||
from utils import plot_examples
|
||||
from PIL import Image
|
||||
|
||||
image = cv2.imread("images/cat.jpg")
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
bboxes = [[13, 170, 224, 410]]
|
||||
|
||||
# Pascal_voc (x_min, y_min, x_max, y_max), YOLO, COCO
|
||||
|
||||
transform = A.Compose(
|
||||
[
|
||||
A.Resize(width=1920, height=1080),
|
||||
A.RandomCrop(width=1280, height=720),
|
||||
A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.VerticalFlip(p=0.1),
|
||||
A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
|
||||
A.OneOf([
|
||||
A.Blur(blur_limit=3, p=0.5),
|
||||
A.ColorJitter(p=0.5),
|
||||
], p=1.0),
|
||||
], bbox_params=A.BboxParams(format="pascal_voc", min_area=2048,
|
||||
min_visibility=0.3, label_fields=[])
|
||||
)
|
||||
|
||||
images_list = [image]
|
||||
saved_bboxes = [bboxes[0]]
|
||||
for i in range(15):
|
||||
augmentations = transform(image=image, bboxes=bboxes)
|
||||
augmented_img = augmentations["image"]
|
||||
|
||||
if len(augmentations["bboxes"]) == 0:
|
||||
continue
|
||||
|
||||
images_list.append(augmented_img)
|
||||
saved_bboxes.append(augmentations["bboxes"][0])
|
||||
|
||||
plot_examples(images_list, saved_bboxes)
|
||||
@@ -0,0 +1,62 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
import cv2
|
||||
from PIL import Image
|
||||
import torch.nn as nn
|
||||
import albumentations as A
|
||||
from albumentations.pytorch import ToTensorV2
|
||||
from torch.utils.data import Dataset
|
||||
import os
|
||||
|
||||
class ImageFolder(Dataset):
|
||||
def __init__(self, root_dir, transform=None):
|
||||
super(ImageFolder, self).__init__()
|
||||
self.data = []
|
||||
self.root_dir = root_dir
|
||||
self.transform = transform
|
||||
self.class_names = os.listdir(root_dir)
|
||||
|
||||
for index, name in enumerate(self.class_names):
|
||||
files = os.listdir(os.path.join(root_dir, name))
|
||||
self.data += list(zip(files, [index]*len(files)))
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def __getitem__(self, index):
|
||||
img_file, label = self.data[index]
|
||||
root_and_dir = os.path.join(self.root_dir, self.class_names[label])
|
||||
image = np.array(Image.open(os.path.join(root_and_dir, img_file)))
|
||||
|
||||
if self.transform is not None:
|
||||
augmentations = self.transform(image=image)
|
||||
image = augmentations["image"]
|
||||
|
||||
return image, label
|
||||
|
||||
|
||||
transform = A.Compose(
|
||||
[
|
||||
A.Resize(width=1920, height=1080),
|
||||
A.RandomCrop(width=1280, height=720),
|
||||
A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.VerticalFlip(p=0.1),
|
||||
A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
|
||||
A.OneOf([
|
||||
A.Blur(blur_limit=3, p=0.5),
|
||||
A.ColorJitter(p=0.5),
|
||||
], p=1.0),
|
||||
A.Normalize(
|
||||
mean=[0, 0, 0],
|
||||
std=[1, 1, 1],
|
||||
max_pixel_value=255,
|
||||
),
|
||||
ToTensorV2(),
|
||||
]
|
||||
)
|
||||
|
||||
dataset = ImageFolder(root_dir="cat_dogs", transform=transform)
|
||||
|
||||
for x,y in dataset:
|
||||
print(x.shape)
|
||||
BIN
ML/Pytorch/Basics/albumentations_tutorial/images/cat.jpg
Normal file
|
After Width: | Height: | Size: 81 KiB |
BIN
ML/Pytorch/Basics/albumentations_tutorial/images/elon.jpeg
Normal file
|
After Width: | Height: | Size: 274 KiB |
BIN
ML/Pytorch/Basics/albumentations_tutorial/images/mask.jpeg
Normal file
|
After Width: | Height: | Size: 77 KiB |
|
After Width: | Height: | Size: 29 KiB |
37
ML/Pytorch/Basics/albumentations_tutorial/segmentation.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import cv2
|
||||
import albumentations as A
|
||||
import numpy as np
|
||||
from utils import plot_examples
|
||||
from PIL import Image
|
||||
|
||||
image = Image.open("images/elon.jpeg")
|
||||
mask = Image.open("images/mask.jpeg")
|
||||
mask2 = Image.open("images/second_mask.jpeg")
|
||||
|
||||
transform = A.Compose(
|
||||
[
|
||||
A.Resize(width=1920, height=1080),
|
||||
A.RandomCrop(width=1280, height=720),
|
||||
A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.VerticalFlip(p=0.1),
|
||||
A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
|
||||
A.OneOf([
|
||||
A.Blur(blur_limit=3, p=0.5),
|
||||
A.ColorJitter(p=0.5),
|
||||
], p=1.0),
|
||||
]
|
||||
)
|
||||
|
||||
images_list = [image]
|
||||
image = np.array(image)
|
||||
mask = np.array(mask) # np.asarray(mask), np.array(mask)
|
||||
mask2 = np.array(mask2)
|
||||
for i in range(4):
|
||||
augmentations = transform(image=image, masks=[mask, mask2])
|
||||
augmented_img = augmentations["image"]
|
||||
augmented_masks = augmentations["masks"]
|
||||
images_list.append(augmented_img)
|
||||
images_list.append(augmented_masks[0])
|
||||
images_list.append(augmented_masks[1])
|
||||
plot_examples(images_list)
|
||||
36
ML/Pytorch/Basics/albumentations_tutorial/utils.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import random
|
||||
import cv2
|
||||
from matplotlib import pyplot as plt
|
||||
import matplotlib.patches as patches
|
||||
import numpy as np
|
||||
import albumentations as A
|
||||
|
||||
|
||||
def visualize(image):
|
||||
plt.figure(figsize=(10, 10))
|
||||
plt.axis('off')
|
||||
plt.imshow(image)
|
||||
plt.show()
|
||||
|
||||
|
||||
def plot_examples(images, bboxes=None):
|
||||
fig = plt.figure(figsize=(15, 15))
|
||||
columns = 4
|
||||
rows = 5
|
||||
|
||||
for i in range(1, len(images)):
|
||||
if bboxes is not None:
|
||||
img = visualize_bbox(images[i - 1], bboxes[i - 1], class_name="Elon")
|
||||
else:
|
||||
img = images[i-1]
|
||||
fig.add_subplot(rows, columns, i)
|
||||
plt.imshow(img)
|
||||
plt.show()
|
||||
|
||||
|
||||
# From https://albumentations.ai/docs/examples/example_bboxes/
|
||||
def visualize_bbox(img, bbox, class_name, color=(255, 0, 0), thickness=5):
|
||||
"""Visualizes a single bounding box on the image"""
|
||||
x_min, y_min, x_max, y_max = map(int, bbox)
|
||||
cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, thickness)
|
||||
return img
|
||||
11
ML/Pytorch/Basics/custom_dataset/cats_dogs.csv
Normal file
@@ -0,0 +1,11 @@
|
||||
Animal,Label
|
||||
cat.0.jpg,0
|
||||
cat.1.jpg,0
|
||||
cat.2.jpg,0
|
||||
cat.3.jpg,0
|
||||
cat.4.jpg,0
|
||||
cat.5.jpg,0
|
||||
cat.6.jpg,0
|
||||
cat.7.jpg,0
|
||||
dog.0.jpg,1
|
||||
dog.1.jpg,1
|
||||
|
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.0.jpg
Normal file
|
After Width: | Height: | Size: 13 KiB |
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.1.jpg
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.2.jpg
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.3.jpg
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.4.jpg
Normal file
|
After Width: | Height: | Size: 20 KiB |
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.5.jpg
Normal file
|
After Width: | Height: | Size: 20 KiB |
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.6.jpg
Normal file
|
After Width: | Height: | Size: 27 KiB |
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.7.jpg
Normal file
|
After Width: | Height: | Size: 27 KiB |
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/dog.0.jpg
Normal file
|
After Width: | Height: | Size: 24 KiB |
BIN
ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/dog.1.jpg
Normal file
|
After Width: | Height: | Size: 15 KiB |
131
ML/Pytorch/Basics/custom_dataset/custom_FCNN.py
Normal file
@@ -0,0 +1,131 @@
|
||||
# Imports
|
||||
import os
|
||||
from typing import Union
|
||||
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
import pandas as pd
|
||||
import torch
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
from pandas import io
|
||||
|
||||
# from skimage import io
|
||||
from torch.utils.data import (
|
||||
Dataset,
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
|
||||
|
||||
# Create Fully Connected Network
|
||||
class NN(nn.Module):
|
||||
def __init__(self, input_size, num_classes):
|
||||
super(NN, self).__init__()
|
||||
self.fc1 = nn.Linear(input_size, 50)
|
||||
self.fc2 = nn.Linear(50, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.fc2(x)
|
||||
return x
|
||||
|
||||
|
||||
class SoloDataset(Dataset):
|
||||
def __init__(self, csv_file, root_dir, transform=None):
|
||||
self.annotations = pd.read_csv(csv_file)
|
||||
self.root_dir = root_dir
|
||||
self.transform = transform
|
||||
|
||||
def __len__(self):
|
||||
return len(self.annotations)
|
||||
|
||||
def __getitem__(self, index):
|
||||
x_data = self.annotations.iloc[index, 0:11]
|
||||
x_data = torch.tensor(x_data)
|
||||
y_label = torch.tensor(int(self.annotations.iloc[index, 11]))
|
||||
|
||||
return (x_data.float(), y_label)
|
||||
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
num_classes = 26
|
||||
learning_rate = 1e-3
|
||||
batch_size = 5
|
||||
num_epochs = 30
|
||||
input_size = 11
|
||||
|
||||
# Load Data
|
||||
dataset = SoloDataset(
|
||||
csv_file="power.csv", root_dir="test123", transform=transforms.ToTensor()
|
||||
)
|
||||
train_set, test_set = torch.utils.data.random_split(dataset, [2900, 57])
|
||||
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
|
||||
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Model
|
||||
model = NN(input_size=input_size, num_classes=num_classes).to(device)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
print(len(train_set))
|
||||
print(len(test_set))
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
losses = []
|
||||
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
losses.append(loss.item())
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
optimizer.step()
|
||||
|
||||
print(f"Cost at epoch {epoch} is {sum(losses) / len(losses)}")
|
||||
|
||||
|
||||
# Check accuracy on training to see how good our model is
|
||||
def check_accuracy(loader, model):
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device)
|
||||
y = y.to(device=device)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(
|
||||
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
|
||||
)
|
||||
|
||||
model.train()
|
||||
|
||||
|
||||
print("Checking accuracy on Training Set")
|
||||
check_accuracy(train_loader, model)
|
||||
|
||||
print("Checking accuracy on Test Set")
|
||||
check_accuracy(test_loader, model)
|
||||
130
ML/Pytorch/Basics/custom_dataset/custom_dataset.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""
|
||||
Example of how to create custom dataset in Pytorch. In this case
|
||||
we have images of cats and dogs in a separate folder and a csv
|
||||
file containing the name to the jpg file as well as the target
|
||||
label (0 for cat, 1 for dog).
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-03 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
import torchvision
|
||||
import os
|
||||
import pandas as pd
|
||||
from skimage import io
|
||||
from torch.utils.data import (
|
||||
Dataset,
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
|
||||
|
||||
class CatsAndDogsDataset(Dataset):
|
||||
def __init__(self, csv_file, root_dir, transform=None):
|
||||
self.annotations = pd.read_csv(csv_file)
|
||||
self.root_dir = root_dir
|
||||
self.transform = transform
|
||||
|
||||
def __len__(self):
|
||||
return len(self.annotations)
|
||||
|
||||
def __getitem__(self, index):
|
||||
img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
|
||||
image = io.imread(img_path)
|
||||
y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
|
||||
|
||||
if self.transform:
|
||||
image = self.transform(image)
|
||||
|
||||
return (image, y_label)
|
||||
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
in_channel = 3
|
||||
num_classes = 2
|
||||
learning_rate = 1e-3
|
||||
batch_size = 32
|
||||
num_epochs = 10
|
||||
|
||||
# Load Data
|
||||
dataset = CatsAndDogsDataset(
|
||||
csv_file="cats_dogs.csv",
|
||||
root_dir="cats_dogs_resized",
|
||||
transform=transforms.ToTensor(),
|
||||
)
|
||||
|
||||
# Dataset is actually a lot larger ~25k images, just took out 10 pictures
|
||||
# to upload to Github. It's enough to understand the structure and scale
|
||||
# if you got more images.
|
||||
train_set, test_set = torch.utils.data.random_split(dataset, [5, 5])
|
||||
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
|
||||
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Model
|
||||
model = torchvision.models.googlenet(pretrained=True)
|
||||
model.to(device)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
losses = []
|
||||
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
losses.append(loss.item())
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
optimizer.step()
|
||||
|
||||
print(f"Cost at epoch {epoch} is {sum(losses)/len(losses)}")
|
||||
|
||||
# Check accuracy on training to see how good our model is
|
||||
def check_accuracy(loader, model):
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device)
|
||||
y = y.to(device=device)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(
|
||||
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
|
||||
)
|
||||
|
||||
model.train()
|
||||
|
||||
|
||||
print("Checking accuracy on Training Set")
|
||||
check_accuracy(train_loader, model)
|
||||
|
||||
print("Checking accuracy on Test Set")
|
||||
check_accuracy(test_loader, model)
|
||||
2958
ML/Pytorch/Basics/custom_dataset/power.csv
Normal file
142
ML/Pytorch/Basics/custom_dataset_txt/loader_customtext.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import os # when loading file paths
|
||||
import pandas as pd # for lookup in annotation file
|
||||
import spacy # for tokenizer
|
||||
import torch
|
||||
from torch.nn.utils.rnn import pad_sequence # pad batch
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
from PIL import Image # Load img
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
# We want to convert text -> numerical values
|
||||
# 1. We need a Vocabulary mapping each word to a index
|
||||
# 2. We need to setup a Pytorch dataset to load the data
|
||||
# 3. Setup padding of every batch (all examples should be
|
||||
# of same seq_len and setup dataloader)
|
||||
# Note that loading the image is very easy compared to the text!
|
||||
|
||||
# Download with: python -m spacy download en
|
||||
spacy_eng = spacy.load("en")
|
||||
|
||||
|
||||
class Vocabulary:
|
||||
def __init__(self, freq_threshold):
|
||||
self.itos = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
|
||||
self.stoi = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
|
||||
self.freq_threshold = freq_threshold
|
||||
|
||||
def __len__(self):
|
||||
return len(self.itos)
|
||||
|
||||
@staticmethod
|
||||
def tokenizer_eng(text):
|
||||
return [tok.text.lower() for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
def build_vocabulary(self, sentence_list):
|
||||
frequencies = {}
|
||||
idx = 4
|
||||
|
||||
for sentence in sentence_list:
|
||||
for word in self.tokenizer_eng(sentence):
|
||||
if word not in frequencies:
|
||||
frequencies[word] = 1
|
||||
|
||||
else:
|
||||
frequencies[word] += 1
|
||||
|
||||
if frequencies[word] == self.freq_threshold:
|
||||
self.stoi[word] = idx
|
||||
self.itos[idx] = word
|
||||
idx += 1
|
||||
|
||||
def numericalize(self, text):
|
||||
tokenized_text = self.tokenizer_eng(text)
|
||||
|
||||
return [
|
||||
self.stoi[token] if token in self.stoi else self.stoi["<UNK>"]
|
||||
for token in tokenized_text
|
||||
]
|
||||
|
||||
|
||||
class FlickrDataset(Dataset):
|
||||
def __init__(self, root_dir, captions_file, transform=None, freq_threshold=5):
|
||||
self.root_dir = root_dir
|
||||
self.df = pd.read_csv(captions_file)
|
||||
self.transform = transform
|
||||
|
||||
# Get img, caption columns
|
||||
self.imgs = self.df["image"]
|
||||
self.captions = self.df["caption"]
|
||||
|
||||
# Initialize vocabulary and build vocab
|
||||
self.vocab = Vocabulary(freq_threshold)
|
||||
self.vocab.build_vocabulary(self.captions.tolist())
|
||||
|
||||
def __len__(self):
|
||||
return len(self.df)
|
||||
|
||||
def __getitem__(self, index):
|
||||
caption = self.captions[index]
|
||||
img_id = self.imgs[index]
|
||||
img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
numericalized_caption = [self.vocab.stoi["<SOS>"]]
|
||||
numericalized_caption += self.vocab.numericalize(caption)
|
||||
numericalized_caption.append(self.vocab.stoi["<EOS>"])
|
||||
|
||||
return img, torch.tensor(numericalized_caption)
|
||||
|
||||
|
||||
class MyCollate:
|
||||
def __init__(self, pad_idx):
|
||||
self.pad_idx = pad_idx
|
||||
|
||||
def __call__(self, batch):
|
||||
imgs = [item[0].unsqueeze(0) for item in batch]
|
||||
imgs = torch.cat(imgs, dim=0)
|
||||
targets = [item[1] for item in batch]
|
||||
targets = pad_sequence(targets, batch_first=False, padding_value=self.pad_idx)
|
||||
|
||||
return imgs, targets
|
||||
|
||||
|
||||
def get_loader(
|
||||
root_folder,
|
||||
annotation_file,
|
||||
transform,
|
||||
batch_size=32,
|
||||
num_workers=8,
|
||||
shuffle=True,
|
||||
pin_memory=True,
|
||||
):
|
||||
dataset = FlickrDataset(root_folder, annotation_file, transform=transform)
|
||||
|
||||
pad_idx = dataset.vocab.stoi["<PAD>"]
|
||||
|
||||
loader = DataLoader(
|
||||
dataset=dataset,
|
||||
batch_size=batch_size,
|
||||
num_workers=num_workers,
|
||||
shuffle=shuffle,
|
||||
pin_memory=pin_memory,
|
||||
collate_fn=MyCollate(pad_idx=pad_idx),
|
||||
)
|
||||
|
||||
return loader, dataset
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
transform = transforms.Compose(
|
||||
[transforms.Resize((224, 224)), transforms.ToTensor(),]
|
||||
)
|
||||
|
||||
loader, dataset = get_loader(
|
||||
"flickr8k/images/", "flickr8k/captions.txt", transform=transform
|
||||
)
|
||||
|
||||
for idx, (imgs, captions) in enumerate(loader):
|
||||
print(imgs.shape)
|
||||
print(captions.shape)
|
||||
125
ML/Pytorch/Basics/pytorch_bidirectional_lstm.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""
|
||||
Example code of a simple bidirectional LSTM on the MNIST dataset.
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-05-09 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
input_size = 28
|
||||
sequence_length = 28
|
||||
num_layers = 2
|
||||
hidden_size = 256
|
||||
num_classes = 10
|
||||
learning_rate = 0.001
|
||||
batch_size = 64
|
||||
num_epochs = 2
|
||||
|
||||
# Create a bidirectional LSTM
|
||||
class BRNN(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_layers, num_classes):
|
||||
super(BRNN, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
self.lstm = nn.LSTM(
|
||||
input_size, hidden_size, num_layers, batch_first=True, bidirectional=True
|
||||
)
|
||||
self.fc = nn.Linear(hidden_size * 2, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
|
||||
c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
|
||||
|
||||
out, _ = self.lstm(x, (h0, c0))
|
||||
out = self.fc(out[:, -1, :])
|
||||
|
||||
return out
|
||||
|
||||
|
||||
# Load Data
|
||||
train_dataset = datasets.MNIST(
|
||||
root="dataset/", train=True, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
|
||||
test_dataset = datasets.MNIST(
|
||||
root="dataset/", train=False, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
|
||||
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
|
||||
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Initialize network
|
||||
model = BRNN(input_size, hidden_size, num_layers, num_classes).to(device)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device).squeeze(1)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
optimizer.step()
|
||||
|
||||
# Check accuracy on training & test to see how good our model
|
||||
|
||||
|
||||
def check_accuracy(loader, model):
|
||||
if loader.dataset.train:
|
||||
print("Checking accuracy on training data")
|
||||
else:
|
||||
print("Checking accuracy on test data")
|
||||
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device).squeeze(1)
|
||||
y = y.to(device=device)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(
|
||||
f"Got {num_correct} / {num_samples} with accuracy \
|
||||
{float(num_correct)/float(num_samples)*100:.2f}"
|
||||
)
|
||||
|
||||
model.train()
|
||||
|
||||
|
||||
check_accuracy(train_loader, model)
|
||||
check_accuracy(test_loader, model)
|
||||
69
ML/Pytorch/Basics/pytorch_init_weights.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""
|
||||
Example code of how to initialize weights for a simple CNN network.
|
||||
|
||||
Video explanation: https://youtu.be/xWQ-p_o0Uik
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-10 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
|
||||
|
||||
class CNN(nn.Module):
|
||||
def __init__(self, in_channels, num_classes):
|
||||
super(CNN, self).__init__()
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels=in_channels,
|
||||
out_channels=6,
|
||||
kernel_size=(3, 3),
|
||||
stride=(1, 1),
|
||||
padding=(1, 1),
|
||||
)
|
||||
self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
|
||||
self.conv2 = nn.Conv2d(
|
||||
in_channels=6,
|
||||
out_channels=16,
|
||||
kernel_size=(3, 3),
|
||||
stride=(1, 1),
|
||||
padding=(1, 1),
|
||||
)
|
||||
self.fc1 = nn.Linear(16 * 7 * 7, num_classes)
|
||||
self.initialize_weights()
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.conv1(x))
|
||||
x = self.pool(x)
|
||||
x = F.relu(self.conv2(x))
|
||||
x = self.pool(x)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.fc1(x)
|
||||
|
||||
return x
|
||||
|
||||
def initialize_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_uniform_(m.weight)
|
||||
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.kaiming_uniform_(m.weight)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model = CNN(in_channels=3, num_classes=10)
|
||||
|
||||
for param in model.parameters():
|
||||
print(param)
|
||||
54
ML/Pytorch/Basics/pytorch_loadsave.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""
|
||||
Small code example of how to save and load checkpoint of a model.
|
||||
This example doesn't perform any training, so it would be quite useless.
|
||||
|
||||
In practice you would save the model as you train, and then load before
|
||||
continuining training at another point.
|
||||
|
||||
Video explanation of code & how to save and load model: https://youtu.be/g6kQl_EFn84
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Coded by Aladdin Persson <aladdin dot person at hotmail dot com>
|
||||
- 2020-04-07 Initial programming
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
|
||||
|
||||
def main():
|
||||
# Initialize network
|
||||
model = torchvision.models.vgg16(pretrained=False)
|
||||
optimizer = optim.Adam(model.parameters())
|
||||
|
||||
checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
|
||||
# Try save checkpoint
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
# Try load checkpoint
|
||||
load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
107
ML/Pytorch/Basics/pytorch_lr_ratescheduler.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""
|
||||
Example code of how to use a learning rate scheduler simple, in this
|
||||
case with a (very) small and simple Feedforward Network training on MNIST
|
||||
dataset with a learning rate scheduler. In this case ReduceLROnPlateau
|
||||
scheduler is used, but can easily be changed to any of the other schedulers
|
||||
available.
|
||||
|
||||
Video explanation: https://youtu.be/P31hB37g4Ak
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-10 Initial programming
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
num_classes = 10
|
||||
learning_rate = 0.1
|
||||
batch_size = 128
|
||||
num_epochs = 100
|
||||
|
||||
# Define a very simple model
|
||||
model = nn.Sequential(nn.Linear(784, 50), nn.ReLU(), nn.Linear(50, 10)).to(device)
|
||||
|
||||
# Load Data
|
||||
train_dataset = datasets.MNIST(
|
||||
root="dataset/", train=True, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Define Scheduler
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
||||
optimizer, factor=0.1, patience=5, verbose=True
|
||||
)
|
||||
|
||||
# Train Network
|
||||
for epoch in range(1, num_epochs):
|
||||
losses = []
|
||||
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.reshape(data.shape[0], -1)
|
||||
data = data.to(device=device)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
losses.append(loss.item())
|
||||
|
||||
# backward
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
# scheduler.step(loss)
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
|
||||
mean_loss = sum(losses) / len(losses)
|
||||
|
||||
# After each epoch do scheduler.step, note in this scheduler we need to send
|
||||
# in loss for that epoch!
|
||||
scheduler.step(mean_loss)
|
||||
print(f"Cost at epoch {epoch} is {mean_loss}")
|
||||
|
||||
# Check accuracy on training & test to see how good our model
|
||||
def check_accuracy(loader, model):
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device)
|
||||
y = y.to(device=device)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(
|
||||
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
|
||||
)
|
||||
|
||||
model.train()
|
||||
|
||||
|
||||
check_accuracy(train_loader, model)
|
||||
99
ML/Pytorch/Basics/pytorch_mixed_precision_example.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# Imports
|
||||
import torch
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
from torch.utils.data import DataLoader # Gives easier dataset managment and creates mini batches
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
|
||||
|
||||
# Simple CNN
|
||||
class CNN(nn.Module):
|
||||
def __init__(self, in_channels=1, num_classes=10):
|
||||
super(CNN, self).__init__()
|
||||
self.conv1 = nn.Conv2d(in_channels=1, out_channels=420, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||
self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
|
||||
self.conv2 = nn.Conv2d(in_channels=420, out_channels=1000, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||
self.fc1 = nn.Linear(1000 * 7 * 7, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.conv1(x))
|
||||
x = self.pool(x)
|
||||
x = F.relu(self.conv2(x))
|
||||
x = self.pool(x)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.fc1(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
# Set device
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
# Hyperparameters
|
||||
in_channel = 1
|
||||
num_classes = 10
|
||||
learning_rate = 0.001
|
||||
batch_size = 100
|
||||
num_epochs = 5
|
||||
|
||||
# Load Data
|
||||
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
|
||||
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
|
||||
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
|
||||
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Initialize network
|
||||
model = CNN().to(device)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Necessary for FP16
|
||||
scaler = torch.cuda.amp.GradScaler()
|
||||
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
with torch.cuda.amp.autocast():
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
scaler.scale(loss).backward()
|
||||
scaler.step(optimizer)
|
||||
scaler.update()
|
||||
|
||||
|
||||
# Check accuracy on training & test to see how good our model
|
||||
|
||||
def check_accuracy(loader, model):
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device)
|
||||
y = y.to(device=device)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}')
|
||||
|
||||
model.train()
|
||||
|
||||
|
||||
check_accuracy(train_loader, model)
|
||||
check_accuracy(test_loader, model)
|
||||
123
ML/Pytorch/Basics/pytorch_pretrain_finetune.py
Normal file
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
Shows a small example of how to load a pretrain model (VGG16) from PyTorch,
|
||||
and modifies this to train on the CIFAR10 dataset. The same method generalizes
|
||||
well to other datasets, but the modifications to the network may need to be changed.
|
||||
|
||||
Video explanation: https://youtu.be/U4bHxEhMGNk
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-08 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
num_classes = 10
|
||||
learning_rate = 1e-3
|
||||
batch_size = 1024
|
||||
num_epochs = 5
|
||||
|
||||
# Simple Identity class that let's input pass without changes
|
||||
class Identity(nn.Module):
|
||||
def __init__(self):
|
||||
super(Identity, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return x
|
||||
|
||||
|
||||
# Load pretrain model & modify it
|
||||
model = torchvision.models.vgg16(pretrained=True)
|
||||
|
||||
# If you want to do finetuning then set requires_grad = False
|
||||
# Remove these two lines if you want to train entire model,
|
||||
# and only want to load the pretrain weights.
|
||||
for param in model.parameters():
|
||||
param.requires_grad = False
|
||||
|
||||
model.avgpool = Identity()
|
||||
model.classifier = nn.Sequential(
|
||||
nn.Linear(512, 100), nn.ReLU(), nn.Linear(100, num_classes)
|
||||
)
|
||||
model.to(device)
|
||||
|
||||
|
||||
# Load Data
|
||||
train_dataset = datasets.CIFAR10(
|
||||
root="dataset/", train=True, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
losses = []
|
||||
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
losses.append(loss.item())
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
optimizer.step()
|
||||
|
||||
print(f"Cost at epoch {epoch} is {sum(losses)/len(losses):.5f}")
|
||||
|
||||
# Check accuracy on training & test to see how good our model
|
||||
|
||||
|
||||
def check_accuracy(loader, model):
|
||||
if loader.dataset.train:
|
||||
print("Checking accuracy on training data")
|
||||
else:
|
||||
print("Checking accuracy on test data")
|
||||
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device)
|
||||
y = y.to(device=device)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(
|
||||
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
|
||||
)
|
||||
|
||||
model.train()
|
||||
|
||||
|
||||
check_accuracy(train_loader, model)
|
||||
41
ML/Pytorch/Basics/pytorch_progress_bar.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from tqdm import tqdm
|
||||
from torch.utils.data import TensorDataset, DataLoader
|
||||
|
||||
# Create a simple toy dataset example, normally this
|
||||
# would be doing custom class with __getitem__ etc,
|
||||
# which we have done in custom dataset tutorials
|
||||
x = torch.randn((1000, 3, 224, 224))
|
||||
y = torch.randint(low=0, high=10, size=(1000, 1))
|
||||
ds = TensorDataset(x, y)
|
||||
loader = DataLoader(ds, batch_size=8)
|
||||
|
||||
|
||||
model = nn.Sequential(
|
||||
nn.Conv2d(3, 10, kernel_size=3, padding=1, stride=1),
|
||||
nn.Flatten(),
|
||||
nn.Linear(10*224*224, 10),
|
||||
)
|
||||
|
||||
NUM_EPOCHS = 100
|
||||
for epoch in range(NUM_EPOCHS):
|
||||
loop = tqdm(loader)
|
||||
for idx, (x, y) in enumerate(loop):
|
||||
scores = model(x)
|
||||
|
||||
# here we would compute loss, backward, optimizer step etc.
|
||||
# you know how it goes, but now you have a nice progress bar
|
||||
# with tqdm
|
||||
|
||||
# then at the bottom if you want additional info shown, you can
|
||||
# add it here, for loss and accuracy you would obviously compute
|
||||
# but now we just set them to random values
|
||||
loop.set_description(f"Epoch [{epoch}/{NUM_EPOCHS}]")
|
||||
loop.set_postfix(loss=torch.rand(1).item(), acc=torch.rand(1).item())
|
||||
|
||||
# There you go. Hope it was useful :)
|
||||
|
||||
|
||||
|
||||
|
||||
172
ML/Pytorch/Basics/pytorch_rnn_gru_lstm.py
Normal file
@@ -0,0 +1,172 @@
|
||||
"""
|
||||
Example code of a simple RNN, GRU, LSTM on the MNIST dataset.
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-05-09 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
input_size = 28
|
||||
hidden_size = 256
|
||||
num_layers = 2
|
||||
num_classes = 10
|
||||
sequence_length = 28
|
||||
learning_rate = 0.005
|
||||
batch_size = 64
|
||||
num_epochs = 2
|
||||
|
||||
# Recurrent neural network (many-to-one)
|
||||
class RNN(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_layers, num_classes):
|
||||
super(RNN, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
|
||||
self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
# Set initial hidden and cell states
|
||||
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
|
||||
|
||||
# Forward propagate LSTM
|
||||
out, _ = self.rnn(x, h0)
|
||||
out = out.reshape(out.shape[0], -1)
|
||||
|
||||
# Decode the hidden state of the last time step
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
||||
|
||||
# Recurrent neural network with GRU (many-to-one)
|
||||
class RNN_GRU(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_layers, num_classes):
|
||||
super(RNN_GRU, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
|
||||
self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
# Set initial hidden and cell states
|
||||
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
|
||||
|
||||
# Forward propagate LSTM
|
||||
out, _ = self.gru(x, h0)
|
||||
out = out.reshape(out.shape[0], -1)
|
||||
|
||||
# Decode the hidden state of the last time step
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
||||
|
||||
# Recurrent neural network with LSTM (many-to-one)
|
||||
class RNN_LSTM(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_layers, num_classes):
|
||||
super(RNN_LSTM, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
|
||||
self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
# Set initial hidden and cell states
|
||||
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
|
||||
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
|
||||
|
||||
# Forward propagate LSTM
|
||||
out, _ = self.lstm(
|
||||
x, (h0, c0)
|
||||
) # out: tensor of shape (batch_size, seq_length, hidden_size)
|
||||
out = out.reshape(out.shape[0], -1)
|
||||
|
||||
# Decode the hidden state of the last time step
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
||||
|
||||
# Load Data
|
||||
train_dataset = datasets.MNIST(
|
||||
root="dataset/", train=True, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
|
||||
test_dataset = datasets.MNIST(
|
||||
root="dataset/", train=False, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
|
||||
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
|
||||
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Initialize network
|
||||
model = RNN_LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device).squeeze(1)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
optimizer.step()
|
||||
|
||||
# Check accuracy on training & test to see how good our model
|
||||
def check_accuracy(loader, model):
|
||||
if loader.dataset.train:
|
||||
print("Checking accuracy on training data")
|
||||
else:
|
||||
print("Checking accuracy on test data")
|
||||
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
|
||||
# Set model to eval
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device).squeeze(1)
|
||||
y = y.to(device=device)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(
|
||||
f"Got {num_correct} / {num_samples} with \
|
||||
accuracy {float(num_correct)/float(num_samples)*100:.2f}"
|
||||
)
|
||||
# Set model back to train
|
||||
model.train()
|
||||
|
||||
|
||||
check_accuracy(train_loader, model)
|
||||
check_accuracy(test_loader, model)
|
||||
134
ML/Pytorch/Basics/pytorch_simple_CNN.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
Example code of a simple CNN network training on MNIST dataset.
|
||||
The code is intended to show how to create a CNN network as well
|
||||
as how to initialize loss, optimizer, etc. in a simple way to get
|
||||
training to work with function that checks accuracy as well.
|
||||
|
||||
Video explanation: https://youtu.be/wnK3uWv_WkU
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-08 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
|
||||
# Simple CNN
|
||||
class CNN(nn.Module):
|
||||
def __init__(self, in_channels=1, num_classes=10):
|
||||
super(CNN, self).__init__()
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels=1,
|
||||
out_channels=8,
|
||||
kernel_size=(3, 3),
|
||||
stride=(1, 1),
|
||||
padding=(1, 1),
|
||||
)
|
||||
self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
|
||||
self.conv2 = nn.Conv2d(
|
||||
in_channels=8,
|
||||
out_channels=16,
|
||||
kernel_size=(3, 3),
|
||||
stride=(1, 1),
|
||||
padding=(1, 1),
|
||||
)
|
||||
self.fc1 = nn.Linear(16 * 7 * 7, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.conv1(x))
|
||||
x = self.pool(x)
|
||||
x = F.relu(self.conv2(x))
|
||||
x = self.pool(x)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.fc1(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
in_channel = 1
|
||||
num_classes = 10
|
||||
learning_rate = 0.001
|
||||
batch_size = 64
|
||||
num_epochs = 5
|
||||
|
||||
# Load Data
|
||||
train_dataset = datasets.MNIST(
|
||||
root="dataset/", train=True, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
|
||||
test_dataset = datasets.MNIST(
|
||||
root="dataset/", train=False, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Initialize network
|
||||
model = CNN().to(device)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
optimizer.step()
|
||||
|
||||
# Check accuracy on training & test to see how good our model
|
||||
|
||||
|
||||
def check_accuracy(loader, model):
|
||||
if loader.dataset.train:
|
||||
print("Checking accuracy on training data")
|
||||
else:
|
||||
print("Checking accuracy on test data")
|
||||
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device)
|
||||
y = y.to(device=device)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(
|
||||
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
|
||||
)
|
||||
|
||||
model.train()
|
||||
|
||||
|
||||
check_accuracy(train_loader, model)
|
||||
check_accuracy(test_loader, model)
|
||||
120
ML/Pytorch/Basics/pytorch_simple_fullynet.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""
|
||||
Working code of a simple Fully Connected (FC) network training on MNIST dataset.
|
||||
The code is intended to show how to create a FC network as well
|
||||
as how to initialize loss, optimizer, etc. in a simple way to get
|
||||
training to work with function that checks accuracy as well.
|
||||
|
||||
Video explanation: https://youtu.be/Jy4wM2X21u0
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-08 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
|
||||
# Create Fully Connected Network
|
||||
class NN(nn.Module):
|
||||
def __init__(self, input_size, num_classes):
|
||||
super(NN, self).__init__()
|
||||
self.fc1 = nn.Linear(input_size, 50)
|
||||
self.fc2 = nn.Linear(50, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.fc2(x)
|
||||
return x
|
||||
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
input_size = 784
|
||||
num_classes = 10
|
||||
learning_rate = 0.001
|
||||
batch_size = 64
|
||||
num_epochs = 1
|
||||
|
||||
# Load Data
|
||||
train_dataset = datasets.MNIST(
|
||||
root="dataset/", train=True, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
|
||||
test_dataset = datasets.MNIST(
|
||||
root="dataset/", train=False, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Initialize network
|
||||
model = NN(input_size=input_size, num_classes=num_classes).to(device)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# Get to correct shape
|
||||
data = data.reshape(data.shape[0], -1)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
optimizer.step()
|
||||
|
||||
# Check accuracy on training & test to see how good our model
|
||||
|
||||
|
||||
def check_accuracy(loader, model):
|
||||
if loader.dataset.train:
|
||||
print("Checking accuracy on training data")
|
||||
else:
|
||||
print("Checking accuracy on test data")
|
||||
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device)
|
||||
y = y.to(device=device)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(
|
||||
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
|
||||
)
|
||||
|
||||
model.train()
|
||||
|
||||
|
||||
check_accuracy(train_loader, model)
|
||||
check_accuracy(test_loader, model)
|
||||
28
ML/Pytorch/Basics/pytorch_std_mean.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.data import DataLoader
|
||||
import torchvision.datasets as datasets
|
||||
from tqdm import tqdm
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
train_set = datasets.CIFAR10(root="ds/", transform=transforms.ToTensor(), download=True)
|
||||
train_loader = DataLoader(dataset=train_set, batch_size=64, shuffle=True)
|
||||
|
||||
def get_mean_std(loader):
|
||||
# var[X] = E[X**2] - E[X]**2
|
||||
channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
|
||||
|
||||
for data, _ in tqdm(loader):
|
||||
channels_sum += torch.mean(data, dim=[0, 2, 3])
|
||||
channels_sqrd_sum += torch.mean(data ** 2, dim=[0, 2, 3])
|
||||
num_batches += 1
|
||||
|
||||
mean = channels_sum / num_batches
|
||||
std = (channels_sqrd_sum / num_batches - mean ** 2) ** 0.5
|
||||
|
||||
return mean, std
|
||||
|
||||
|
||||
mean, std = get_mean_std(train_loader)
|
||||
print(mean)
|
||||
print(std)
|
||||
299
ML/Pytorch/Basics/pytorch_tensorbasics.py
Normal file
@@ -0,0 +1,299 @@
|
||||
"""
|
||||
Walk through of a lot of different useful Tensor Operations, where we
|
||||
go through what I think are four main parts in:
|
||||
|
||||
1. Initialization of a Tensor
|
||||
2. Tensor Mathematical Operations and Comparison
|
||||
3. Tensor Indexing
|
||||
4. Tensor Reshaping
|
||||
|
||||
But also other things such as setting the device (GPU/CPU) and converting
|
||||
between different types (int, float etc) and how to convert a tensor to an
|
||||
numpy array and vice-versa.
|
||||
|
||||
"""
|
||||
|
||||
import torch
|
||||
|
||||
# ================================================================= #
|
||||
# Initializing Tensor #
|
||||
# ================================================================= #
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu" # Cuda to run on GPU!
|
||||
|
||||
# Initializing a Tensor in this case of shape 2x3 (2 rows, 3 columns)
|
||||
my_tensor = torch.tensor(
|
||||
[[1, 2, 3], [4, 5, 6]], dtype=torch.float32, device=device, requires_grad=True
|
||||
)
|
||||
|
||||
# A few tensor attributes
|
||||
print(
|
||||
f"Information about tensor: {my_tensor}"
|
||||
) # Prints data of the tensor, device and grad info
|
||||
print(
|
||||
"Type of Tensor {my_tensor.dtype}"
|
||||
) # Prints dtype of the tensor (torch.float32, etc)
|
||||
print(
|
||||
f"Device Tensor is on {my_tensor.device}"
|
||||
) # Prints cpu/cuda (followed by gpu number)
|
||||
print(f"Shape of tensor {my_tensor.shape}") # Prints shape, in this case 2x3
|
||||
print(f"Requires gradient: {my_tensor.requires_grad}") # Prints true/false
|
||||
|
||||
# Other common initialization methods (there exists a ton more)
|
||||
x = torch.empty(size=(3, 3)) # Tensor of shape 3x3 with uninitialized data
|
||||
x = torch.zeros((3, 3)) # Tensor of shape 3x3 with values of 0
|
||||
x = torch.rand(
|
||||
(3, 3)
|
||||
) # Tensor of shape 3x3 with values from uniform distribution in interval [0,1)
|
||||
x = torch.ones((3, 3)) # Tensor of shape 3x3 with values of 1
|
||||
x = torch.eye(5, 5) # Returns Identity Matrix I, (I <-> Eye), matrix of shape 2x3
|
||||
x = torch.arange(
|
||||
start=0, end=5, step=1
|
||||
) # Tensor [0, 1, 2, 3, 4], note, can also do: torch.arange(11)
|
||||
x = torch.linspace(start=0.1, end=1, steps=10) # x = [0.1, 0.2, ..., 1]
|
||||
x = torch.empty(size=(1, 5)).normal_(
|
||||
mean=0, std=1
|
||||
) # Normally distributed with mean=0, std=1
|
||||
x = torch.empty(size=(1, 5)).uniform_(
|
||||
0, 1
|
||||
) # Values from a uniform distribution low=0, high=1
|
||||
x = torch.diag(torch.ones(3)) # Diagonal matrix of shape 3x3
|
||||
|
||||
# How to make initialized tensors to other types (int, float, double)
|
||||
# These will work even if you're on CPU or CUDA!
|
||||
tensor = torch.arange(4) # [0, 1, 2, 3] Initialized as int64 by default
|
||||
print(f"Converted Boolean: {tensor.bool()}") # Converted to Boolean: 1 if nonzero
|
||||
print(f"Converted int16 {tensor.short()}") # Converted to int16
|
||||
print(
|
||||
f"Converted int64 {tensor.long()}"
|
||||
) # Converted to int64 (This one is very important, used super often)
|
||||
print(f"Converted float16 {tensor.half()}") # Converted to float16
|
||||
print(
|
||||
f"Converted float32 {tensor.float()}"
|
||||
) # Converted to float32 (This one is very important, used super often)
|
||||
print(f"Converted float64 {tensor.double()}") # Converted to float64
|
||||
|
||||
# Array to Tensor conversion and vice-versa
|
||||
import numpy as np
|
||||
|
||||
np_array = np.zeros((5, 5))
|
||||
tensor = torch.from_numpy(np_array)
|
||||
np_array_again = (
|
||||
tensor.numpy()
|
||||
) # np_array_again will be same as np_array (perhaps with numerical round offs)
|
||||
|
||||
# =============================================================================== #
|
||||
# Tensor Math & Comparison Operations #
|
||||
# =============================================================================== #
|
||||
|
||||
x = torch.tensor([1, 2, 3])
|
||||
y = torch.tensor([9, 8, 7])
|
||||
|
||||
# -- Addition --
|
||||
z1 = torch.empty(3)
|
||||
torch.add(x, y, out=z1) # This is one way
|
||||
z2 = torch.add(x, y) # This is another way
|
||||
z = x + y # This is my preferred way, simple and clean.
|
||||
|
||||
# -- Subtraction --
|
||||
z = x - y # We can do similarly as the preferred way of addition
|
||||
|
||||
# -- Division (A bit clunky) --
|
||||
z = torch.true_divide(x, y) # Will do element wise division if of equal shape
|
||||
|
||||
# -- Inplace Operations --
|
||||
t = torch.zeros(3)
|
||||
|
||||
t.add_(x) # Whenever we have operation followed by _ it will mutate the tensor in place
|
||||
t += x # Also inplace: t = t + x is not inplace, bit confusing.
|
||||
|
||||
# -- Exponentiation (Element wise if vector or matrices) --
|
||||
z = x.pow(2) # z = [1, 4, 9]
|
||||
z = x ** 2 # z = [1, 4, 9]
|
||||
|
||||
|
||||
# -- Simple Comparison --
|
||||
z = x > 0 # Returns [True, True, True]
|
||||
z = x < 0 # Returns [False, False, False]
|
||||
|
||||
# -- Matrix Multiplication --
|
||||
x1 = torch.rand((2, 5))
|
||||
x2 = torch.rand((5, 3))
|
||||
x3 = torch.mm(x1, x2) # Matrix multiplication of x1 and x2, out shape: 2x3
|
||||
x3 = x1.mm(x2) # Similar as line above
|
||||
|
||||
# -- Matrix Exponentiation --
|
||||
matrix_exp = torch.rand(5, 5)
|
||||
print(
|
||||
matrix_exp.matrix_power(3)
|
||||
) # is same as matrix_exp (mm) matrix_exp (mm) matrix_exp
|
||||
|
||||
# -- Element wise Multiplication --
|
||||
z = x * y # z = [9, 16, 21] = [1*9, 2*8, 3*7]
|
||||
|
||||
# -- Dot product --
|
||||
z = torch.dot(x, y) # Dot product, in this case z = 1*9 + 2*8 + 3*7
|
||||
|
||||
# -- Batch Matrix Multiplication --
|
||||
batch = 32
|
||||
n = 10
|
||||
m = 20
|
||||
p = 30
|
||||
tensor1 = torch.rand((batch, n, m))
|
||||
tensor2 = torch.rand((batch, m, p))
|
||||
out_bmm = torch.bmm(tensor1, tensor2) # Will be shape: (b x n x p)
|
||||
|
||||
# -- Example of broadcasting --
|
||||
x1 = torch.rand((5, 5))
|
||||
x2 = torch.ones((1, 5))
|
||||
z = (
|
||||
x1 - x2
|
||||
) # Shape of z is 5x5: How? The 1x5 vector (x2) is subtracted for each row in the 5x5 (x1)
|
||||
z = (
|
||||
x1 ** x2
|
||||
) # Shape of z is 5x5: How? Broadcasting! Element wise exponentiation for every row
|
||||
|
||||
# Other useful tensor operations
|
||||
sum_x = torch.sum(
|
||||
x, dim=0
|
||||
) # Sum of x across dim=0 (which is the only dim in our case), sum_x = 6
|
||||
values, indices = torch.max(x, dim=0) # Can also do x.max(dim=0)
|
||||
values, indices = torch.min(x, dim=0) # Can also do x.min(dim=0)
|
||||
abs_x = torch.abs(x) # Returns x where abs function has been applied to every element
|
||||
z = torch.argmax(x, dim=0) # Gets index of the maximum value
|
||||
z = torch.argmin(x, dim=0) # Gets index of the minimum value
|
||||
mean_x = torch.mean(x.float(), dim=0) # mean requires x to be float
|
||||
z = torch.eq(x, y) # Element wise comparison, in this case z = [False, False, False]
|
||||
sorted_y, indices = torch.sort(y, dim=0, descending=False)
|
||||
|
||||
z = torch.clamp(x, min=0)
|
||||
# All values < 0 set to 0 and values > 0 unchanged (this is exactly ReLU function)
|
||||
# If you want to values over max_val to be clamped, do torch.clamp(x, min=min_val, max=max_val)
|
||||
|
||||
x = torch.tensor([1, 0, 1, 1, 1], dtype=torch.bool) # True/False values
|
||||
z = torch.any(x) # will return True, can also do x.any() instead of torch.any(x)
|
||||
z = torch.all(
|
||||
x
|
||||
) # will return False (since not all are True), can also do x.all() instead of torch.all()
|
||||
|
||||
# ============================================================= #
|
||||
# Tensor Indexing #
|
||||
# ============================================================= #
|
||||
|
||||
batch_size = 10
|
||||
features = 25
|
||||
x = torch.rand((batch_size, features))
|
||||
|
||||
# Get first examples features
|
||||
print(x[0].shape) # shape [25], this is same as doing x[0,:]
|
||||
|
||||
# Get the first feature for all examples
|
||||
print(x[:, 0].shape) # shape [10]
|
||||
|
||||
# For example: Want to access third example in the batch and the first ten features
|
||||
print(x[2, 0:10].shape) # shape: [10]
|
||||
|
||||
# For example we can use this to, assign certain elements
|
||||
x[0, 0] = 100
|
||||
|
||||
# Fancy Indexing
|
||||
x = torch.arange(10)
|
||||
indices = [2, 5, 8]
|
||||
print(x[indices]) # x[indices] = [2, 5, 8]
|
||||
|
||||
x = torch.rand((3, 5))
|
||||
rows = torch.tensor([1, 0])
|
||||
cols = torch.tensor([4, 0])
|
||||
print(x[rows, cols]) # Gets second row fifth column and first row first column
|
||||
|
||||
# More advanced indexing
|
||||
x = torch.arange(10)
|
||||
print(x[(x < 2) | (x > 8)]) # will be [0, 1, 9]
|
||||
print(x[x.remainder(2) == 0]) # will be [0, 2, 4, 6, 8]
|
||||
|
||||
# Useful operations for indexing
|
||||
print(
|
||||
torch.where(x > 5, x, x * 2)
|
||||
) # gives [0, 2, 4, 6, 8, 10, 6, 7, 8, 9], all values x > 5 yield x, else x*2
|
||||
x = torch.tensor([0, 0, 1, 2, 2, 3, 4]).unique() # x = [0, 1, 2, 3, 4]
|
||||
print(
|
||||
x.ndimension()
|
||||
) # The number of dimensions, in this case 1. if x.shape is 5x5x5 ndim would be 3
|
||||
x = torch.arange(10)
|
||||
print(
|
||||
x.numel()
|
||||
) # The number of elements in x (in this case it's trivial because it's just a vector)
|
||||
|
||||
# ============================================================= #
|
||||
# Tensor Reshaping #
|
||||
# ============================================================= #
|
||||
|
||||
x = torch.arange(9)
|
||||
|
||||
# Let's say we want to reshape it to be 3x3
|
||||
x_3x3 = x.view(3, 3)
|
||||
|
||||
# We can also do (view and reshape are very similar)
|
||||
# and the differences are in simple terms (I'm no expert at this),
|
||||
# is that view acts on contiguous tensors meaning if the
|
||||
# tensor is stored contiguously in memory or not, whereas
|
||||
# for reshape it doesn't matter because it will copy the
|
||||
# tensor to make it contiguously stored, which might come
|
||||
# with some performance loss.
|
||||
x_3x3 = x.reshape(3, 3)
|
||||
|
||||
# If we for example do:
|
||||
y = x_3x3.t()
|
||||
print(
|
||||
y.is_contiguous()
|
||||
) # This will return False and if we try to use view now, it won't work!
|
||||
# y.view(9) would cause an error, reshape however won't
|
||||
|
||||
# This is because in memory it was stored [0, 1, 2, ... 8], whereas now it's [0, 3, 6, 1, 4, 7, 2, 5, 8]
|
||||
# The jump is no longer 1 in memory for one element jump (matrices are stored as a contiguous block, and
|
||||
# using pointers to construct these matrices). This is a bit complicated and I need to explore this more
|
||||
# as well, at least you know it's a problem to be cautious of! A solution is to do the following
|
||||
print(y.contiguous().view(9)) # Calling .contiguous() before view and it works
|
||||
|
||||
# Moving on to another operation, let's say we want to add two tensors dimensions togethor
|
||||
x1 = torch.rand(2, 5)
|
||||
x2 = torch.rand(2, 5)
|
||||
print(torch.cat((x1, x2), dim=0).shape) # Shape: 4x5
|
||||
print(torch.cat((x1, x2), dim=1).shape) # Shape 2x10
|
||||
|
||||
# Let's say we want to unroll x1 into one long vector with 10 elements, we can do:
|
||||
z = x1.view(-1) # And -1 will unroll everything
|
||||
|
||||
# If we instead have an additional dimension and we wish to keep those as is we can do:
|
||||
batch = 64
|
||||
x = torch.rand((batch, 2, 5))
|
||||
z = x.view(
|
||||
batch, -1
|
||||
) # And z.shape would be 64x10, this is very useful stuff and is used all the time
|
||||
|
||||
# Let's say we want to switch x axis so that instead of 64x2x5 we have 64x5x2
|
||||
# I.e we want dimension 0 to stay, dimension 1 to become dimension 2, dimension 2 to become dimension 1
|
||||
# Basically you tell permute where you want the new dimensions to be, torch.transpose is a special case
|
||||
# of permute (why?)
|
||||
z = x.permute(0, 2, 1)
|
||||
|
||||
# Splits x last dimension into chunks of 2 (since 5 is not integer div by 2) the last dimension
|
||||
# will be smaller, so it will split it into two tensors: 64x2x3 and 64x2x2
|
||||
z = torch.chunk(x, chunks=2, dim=1)
|
||||
print(z[0].shape)
|
||||
print(z[1].shape)
|
||||
|
||||
# Let's say we want to add an additional dimension
|
||||
x = torch.arange(
|
||||
10
|
||||
) # Shape is [10], let's say we want to add an additional so we have 1x10
|
||||
print(x.unsqueeze(0).shape) # 1x10
|
||||
print(x.unsqueeze(1).shape) # 10x1
|
||||
|
||||
# Let's say we have x which is 1x1x10 and we want to remove a dim so we have 1x10
|
||||
x = torch.arange(10).unsqueeze(0).unsqueeze(1)
|
||||
|
||||
# Perhaps unsurprisingly
|
||||
z = x.squeeze(1) # can also do .squeeze(0) both returns 1x10
|
||||
|
||||
# That was some essential Tensor operations, hopefully you found it useful!
|
||||
142
ML/Pytorch/Basics/pytorch_tensorboard_.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""
|
||||
Example code of how to use the TensorBoard in PyTorch.
|
||||
This code uses a lot of different functions from TensorBoard
|
||||
and tries to have them all in a compact way, it might not be
|
||||
super clear exactly what calls does what, for that I recommend
|
||||
watching the YouTube video.
|
||||
|
||||
Video explanation: https://youtu.be/RLqsxWaQdHE
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-17 Initial coding
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
|
||||
|
||||
# Simple CNN
|
||||
class CNN(nn.Module):
|
||||
def __init__(self, in_channels=1, num_classes=10):
|
||||
super(CNN, self).__init__()
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels=in_channels, out_channels=8, kernel_size=3, stride=1, padding=1
|
||||
)
|
||||
self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
|
||||
self.conv2 = nn.Conv2d(
|
||||
in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1
|
||||
)
|
||||
self.fc1 = nn.Linear(16 * 7 * 7, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.conv1(x))
|
||||
x = self.pool(x)
|
||||
x = F.relu(self.conv2(x))
|
||||
x = self.pool(x)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.fc1(x)
|
||||
return x
|
||||
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
in_channels = 1
|
||||
num_classes = 10
|
||||
num_epochs = 1
|
||||
|
||||
# Load Data
|
||||
train_dataset = datasets.MNIST(
|
||||
root="dataset/", train=True, transform=transforms.ToTensor(), download=True
|
||||
)
|
||||
|
||||
# To do hyperparameter search, include more batch_sizes you want to try
|
||||
# and more learning rates!
|
||||
batch_sizes = [256]
|
||||
learning_rates = [0.001]
|
||||
classes = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
|
||||
|
||||
for batch_size in batch_sizes:
|
||||
for learning_rate in learning_rates:
|
||||
step = 0
|
||||
# Initialize network
|
||||
model = CNN(in_channels=in_channels, num_classes=num_classes)
|
||||
model.to(device)
|
||||
model.train()
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
train_loader = DataLoader(
|
||||
dataset=train_dataset, batch_size=batch_size, shuffle=True
|
||||
)
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)
|
||||
writer = SummaryWriter(
|
||||
f"runs/MNIST/MiniBatchSize {batch_size} LR {learning_rate}"
|
||||
)
|
||||
|
||||
# Visualize model in TensorBoard
|
||||
images, _ = next(iter(train_loader))
|
||||
writer.add_graph(model, images.to(device))
|
||||
writer.close()
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
losses = []
|
||||
accuracies = []
|
||||
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
losses.append(loss.item())
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# Calculate 'running' training accuracy
|
||||
features = data.reshape(data.shape[0], -1)
|
||||
img_grid = torchvision.utils.make_grid(data)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct = (predictions == targets).sum()
|
||||
running_train_acc = float(num_correct) / float(data.shape[0])
|
||||
accuracies.append(running_train_acc)
|
||||
|
||||
# Plot things to tensorboard
|
||||
class_labels = [classes[label] for label in predictions]
|
||||
writer.add_image("mnist_images", img_grid)
|
||||
writer.add_histogram("fc1", model.fc1.weight)
|
||||
writer.add_scalar("Training loss", loss, global_step=step)
|
||||
writer.add_scalar(
|
||||
"Training Accuracy", running_train_acc, global_step=step
|
||||
)
|
||||
|
||||
if batch_idx == 230:
|
||||
writer.add_embedding(
|
||||
features,
|
||||
metadata=class_labels,
|
||||
label_img=data,
|
||||
global_step=batch_idx,
|
||||
)
|
||||
step += 1
|
||||
|
||||
writer.add_hparams(
|
||||
{"lr": learning_rate, "bsize": batch_size},
|
||||
{
|
||||
"accuracy": sum(accuracies) / len(accuracies),
|
||||
"loss": sum(losses) / len(losses),
|
||||
},
|
||||
)
|
||||
155
ML/Pytorch/Basics/pytorch_transforms.py
Normal file
@@ -0,0 +1,155 @@
|
||||
"""
|
||||
Shows a small example of how to use transformations (perhaps unecessarily many)
|
||||
on CIFAR10 dataset and training on a small CNN toy network.
|
||||
|
||||
Video explanation: https://youtu.be/Zvd276j9sZ8
|
||||
Got any questions leave a comment I'm pretty good at responding on youtube
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-09 Initial coding
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torch.nn.functional as F # All functions that don't have any parameters
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
|
||||
# Simple CNN
|
||||
class CNN(nn.Module):
|
||||
def __init__(self, in_channels, num_classes):
|
||||
super(CNN, self).__init__()
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels=in_channels,
|
||||
out_channels=8,
|
||||
kernel_size=(3, 3),
|
||||
stride=(1, 1),
|
||||
padding=(1, 1),
|
||||
)
|
||||
self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
|
||||
self.conv2 = nn.Conv2d(
|
||||
in_channels=8,
|
||||
out_channels=16,
|
||||
kernel_size=(3, 3),
|
||||
stride=(1, 1),
|
||||
padding=(1, 1),
|
||||
)
|
||||
self.fc1 = nn.Linear(16 * 8 * 8, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.conv1(x))
|
||||
x = self.pool(x)
|
||||
x = F.relu(self.conv2(x))
|
||||
x = self.pool(x)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.fc1(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Hyperparameters
|
||||
learning_rate = 1e-4
|
||||
batch_size = 64
|
||||
num_epochs = 5
|
||||
|
||||
|
||||
# Load pretrain model & modify it
|
||||
model = CNN(in_channels=3, num_classes=10)
|
||||
model.classifier = nn.Sequential(nn.Linear(512, 100), nn.ReLU(), nn.Linear(100, 10))
|
||||
model.to(device)
|
||||
|
||||
# Load Data
|
||||
my_transforms = transforms.Compose(
|
||||
[ # Compose makes it possible to have many transforms
|
||||
transforms.Resize((36, 36)), # Resizes (32,32) to (36,36)
|
||||
transforms.RandomCrop((32, 32)), # Takes a random (32,32) crop
|
||||
transforms.ColorJitter(brightness=0.5), # Change brightness of image
|
||||
transforms.RandomRotation(
|
||||
degrees=45
|
||||
), # Perhaps a random rotation from -45 to 45 degrees
|
||||
transforms.RandomHorizontalFlip(
|
||||
p=0.5
|
||||
), # Flips the image horizontally with probability 0.5
|
||||
transforms.RandomVerticalFlip(
|
||||
p=0.05
|
||||
), # Flips image vertically with probability 0.05
|
||||
transforms.RandomGrayscale(p=0.2), # Converts to grayscale with probability 0.2
|
||||
transforms.ToTensor(), # Finally converts PIL image to tensor so we can train w. pytorch
|
||||
transforms.Normalize(
|
||||
mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]
|
||||
), # Note: these values aren't optimal
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
train_dataset = datasets.CIFAR10(
|
||||
root="dataset/", train=True, transform=my_transforms, download=True
|
||||
)
|
||||
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
losses = []
|
||||
|
||||
for batch_idx, (data, targets) in enumerate(train_loader):
|
||||
# Get data to cuda if possible
|
||||
data = data.to(device=device)
|
||||
targets = targets.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores, targets)
|
||||
|
||||
losses.append(loss.item())
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent or adam step
|
||||
optimizer.step()
|
||||
|
||||
print(f"Cost at epoch {epoch} is {sum(losses)/len(losses):.5f}")
|
||||
|
||||
# Check accuracy on training & test to see how good our model
|
||||
|
||||
|
||||
def check_accuracy(loader, model):
|
||||
if loader.dataset.train:
|
||||
print("Checking accuracy on training data")
|
||||
else:
|
||||
print("Checking accuracy on test data")
|
||||
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device)
|
||||
y = y.to(device=device)
|
||||
|
||||
scores = model(x)
|
||||
_, predictions = scores.max(1)
|
||||
num_correct += (predictions == y).sum()
|
||||
num_samples += predictions.size(0)
|
||||
|
||||
print(
|
||||
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
|
||||
)
|
||||
|
||||
model.train()
|
||||
|
||||
|
||||
check_accuracy(train_loader, model)
|
||||
@@ -0,0 +1,15 @@
|
||||
import random, torch, os, numpy as np
|
||||
|
||||
def seed_everything(seed=42):
|
||||
os.environ['PYTHONHASHSEED'] = str(seed)
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
|
||||
seed_everything()
|
||||
|
||||
# Do training etc after running seed_everything
|
||||
67
ML/Pytorch/CNN_architectures/lenet5_pytorch.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""
|
||||
An implementation of LeNet CNN architecture.
|
||||
|
||||
Video explanation: https://youtu.be/fcOW-Zyb5Bo
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-05 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
|
||||
|
||||
class LeNet(nn.Module):
|
||||
def __init__(self):
|
||||
super(LeNet, self).__init__()
|
||||
self.relu = nn.ReLU()
|
||||
self.pool = nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels=1,
|
||||
out_channels=6,
|
||||
kernel_size=(5, 5),
|
||||
stride=(1, 1),
|
||||
padding=(0, 0),
|
||||
)
|
||||
self.conv2 = nn.Conv2d(
|
||||
in_channels=6,
|
||||
out_channels=16,
|
||||
kernel_size=(5, 5),
|
||||
stride=(1, 1),
|
||||
padding=(0, 0),
|
||||
)
|
||||
self.conv3 = nn.Conv2d(
|
||||
in_channels=16,
|
||||
out_channels=120,
|
||||
kernel_size=(5, 5),
|
||||
stride=(1, 1),
|
||||
padding=(0, 0),
|
||||
)
|
||||
self.linear1 = nn.Linear(120, 84)
|
||||
self.linear2 = nn.Linear(84, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.relu(self.conv1(x))
|
||||
x = self.pool(x)
|
||||
x = self.relu(self.conv2(x))
|
||||
x = self.pool(x)
|
||||
x = self.relu(
|
||||
self.conv3(x)
|
||||
) # num_examples x 120 x 1 x 1 --> num_examples x 120
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.relu(self.linear1(x))
|
||||
x = self.linear2(x)
|
||||
return x
|
||||
|
||||
|
||||
def test_lenet():
|
||||
x = torch.randn(64, 1, 32, 32)
|
||||
model = LeNet()
|
||||
return model(x)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
out = test_lenet()
|
||||
print(out.shape)
|
||||
166
ML/Pytorch/CNN_architectures/pytorch_inceptionet.py
Normal file
@@ -0,0 +1,166 @@
|
||||
"""
|
||||
An implementation of GoogLeNet / InceptionNet from scratch.
|
||||
|
||||
Video explanation: https://youtu.be/uQc4Fs7yx5I
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-07 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
|
||||
|
||||
class GoogLeNet(nn.Module):
|
||||
def __init__(self, aux_logits=True, num_classes=1000):
|
||||
super(GoogLeNet, self).__init__()
|
||||
assert aux_logits == True or aux_logits == False
|
||||
self.aux_logits = aux_logits
|
||||
|
||||
# Write in_channels, etc, all explicit in self.conv1, rest will write to
|
||||
# make everything as compact as possible, kernel_size=3 instead of (3,3)
|
||||
self.conv1 = conv_block(
|
||||
in_channels=3,
|
||||
out_channels=64,
|
||||
kernel_size=(7, 7),
|
||||
stride=(2, 2),
|
||||
padding=(3, 3),
|
||||
)
|
||||
|
||||
self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.conv2 = conv_block(64, 192, kernel_size=3, stride=1, padding=1)
|
||||
self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
# In this order: in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
|
||||
self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32)
|
||||
self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64)
|
||||
self.maxpool3 = nn.MaxPool2d(kernel_size=(3, 3), stride=2, padding=1)
|
||||
|
||||
self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64)
|
||||
self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64)
|
||||
self.inception4c = Inception_block(512, 128, 128, 256, 24, 64, 64)
|
||||
self.inception4d = Inception_block(512, 112, 144, 288, 32, 64, 64)
|
||||
self.inception4e = Inception_block(528, 256, 160, 320, 32, 128, 128)
|
||||
self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.inception5a = Inception_block(832, 256, 160, 320, 32, 128, 128)
|
||||
self.inception5b = Inception_block(832, 384, 192, 384, 48, 128, 128)
|
||||
|
||||
self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
|
||||
self.dropout = nn.Dropout(p=0.4)
|
||||
self.fc1 = nn.Linear(1024, 1000)
|
||||
|
||||
if self.aux_logits:
|
||||
self.aux1 = InceptionAux(512, num_classes)
|
||||
self.aux2 = InceptionAux(528, num_classes)
|
||||
else:
|
||||
self.aux1 = self.aux2 = None
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.maxpool1(x)
|
||||
x = self.conv2(x)
|
||||
# x = self.conv3(x)
|
||||
x = self.maxpool2(x)
|
||||
|
||||
x = self.inception3a(x)
|
||||
x = self.inception3b(x)
|
||||
x = self.maxpool3(x)
|
||||
|
||||
x = self.inception4a(x)
|
||||
|
||||
# Auxiliary Softmax classifier 1
|
||||
if self.aux_logits and self.training:
|
||||
aux1 = self.aux1(x)
|
||||
|
||||
x = self.inception4b(x)
|
||||
x = self.inception4c(x)
|
||||
x = self.inception4d(x)
|
||||
|
||||
# Auxiliary Softmax classifier 2
|
||||
if self.aux_logits and self.training:
|
||||
aux2 = self.aux2(x)
|
||||
|
||||
x = self.inception4e(x)
|
||||
x = self.maxpool4(x)
|
||||
x = self.inception5a(x)
|
||||
x = self.inception5b(x)
|
||||
x = self.avgpool(x)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.dropout(x)
|
||||
x = self.fc1(x)
|
||||
|
||||
if self.aux_logits and self.training:
|
||||
return aux1, aux2, x
|
||||
else:
|
||||
return x
|
||||
|
||||
|
||||
class Inception_block(nn.Module):
|
||||
def __init__(
|
||||
self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
|
||||
):
|
||||
super(Inception_block, self).__init__()
|
||||
self.branch1 = conv_block(in_channels, out_1x1, kernel_size=(1, 1))
|
||||
|
||||
self.branch2 = nn.Sequential(
|
||||
conv_block(in_channels, red_3x3, kernel_size=(1, 1)),
|
||||
conv_block(red_3x3, out_3x3, kernel_size=(3, 3), padding=(1, 1)),
|
||||
)
|
||||
|
||||
self.branch3 = nn.Sequential(
|
||||
conv_block(in_channels, red_5x5, kernel_size=(1, 1)),
|
||||
conv_block(red_5x5, out_5x5, kernel_size=(5, 5), padding=(2, 2)),
|
||||
)
|
||||
|
||||
self.branch4 = nn.Sequential(
|
||||
nn.MaxPool2d(kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
|
||||
conv_block(in_channels, out_1x1pool, kernel_size=(1, 1)),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return torch.cat(
|
||||
[self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)], 1
|
||||
)
|
||||
|
||||
|
||||
class InceptionAux(nn.Module):
|
||||
def __init__(self, in_channels, num_classes):
|
||||
super(InceptionAux, self).__init__()
|
||||
self.relu = nn.ReLU()
|
||||
self.dropout = nn.Dropout(p=0.7)
|
||||
self.pool = nn.AvgPool2d(kernel_size=5, stride=3)
|
||||
self.conv = conv_block(in_channels, 128, kernel_size=1)
|
||||
self.fc1 = nn.Linear(2048, 1024)
|
||||
self.fc2 = nn.Linear(1024, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.pool(x)
|
||||
x = self.conv(x)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.relu(self.fc1(x))
|
||||
x = self.dropout(x)
|
||||
x = self.fc2(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class conv_block(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, **kwargs):
|
||||
super(conv_block, self).__init__()
|
||||
self.relu = nn.ReLU()
|
||||
self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
|
||||
self.batchnorm = nn.BatchNorm2d(out_channels)
|
||||
|
||||
def forward(self, x):
|
||||
return self.relu(self.batchnorm(self.conv(x)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# N = 3 (Mini batch size)
|
||||
x = torch.randn(3, 3, 224, 224)
|
||||
model = GoogLeNet(aux_logits=True, num_classes=1000)
|
||||
print(model(x)[2].shape)
|
||||
163
ML/Pytorch/CNN_architectures/pytorch_resnet.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
From scratch implementation of the famous ResNet models.
|
||||
The intuition for ResNet is simple and clear, but to code
|
||||
it didn't feel super clear at first, even when reading Pytorch own
|
||||
implementation.
|
||||
|
||||
Video explanation:
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-12 Initial coding
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class block(nn.Module):
|
||||
def __init__(
|
||||
self, in_channels, intermediate_channels, identity_downsample=None, stride=1
|
||||
):
|
||||
super(block, self).__init__()
|
||||
self.expansion = 4
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels, intermediate_channels, kernel_size=1, stride=1, padding=0
|
||||
)
|
||||
self.bn1 = nn.BatchNorm2d(intermediate_channels)
|
||||
self.conv2 = nn.Conv2d(
|
||||
intermediate_channels,
|
||||
intermediate_channels,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
)
|
||||
self.bn2 = nn.BatchNorm2d(intermediate_channels)
|
||||
self.conv3 = nn.Conv2d(
|
||||
intermediate_channels,
|
||||
intermediate_channels * self.expansion,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
)
|
||||
self.bn3 = nn.BatchNorm2d(intermediate_channels * self.expansion)
|
||||
self.relu = nn.ReLU()
|
||||
self.identity_downsample = identity_downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x.clone()
|
||||
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.conv2(x)
|
||||
x = self.bn2(x)
|
||||
x = self.relu(x)
|
||||
x = self.conv3(x)
|
||||
x = self.bn3(x)
|
||||
|
||||
if self.identity_downsample is not None:
|
||||
identity = self.identity_downsample(identity)
|
||||
|
||||
x += identity
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
def __init__(self, block, layers, image_channels, num_classes):
|
||||
super(ResNet, self).__init__()
|
||||
self.in_channels = 64
|
||||
self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU()
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
# Essentially the entire ResNet architecture are in these 4 lines below
|
||||
self.layer1 = self._make_layer(
|
||||
block, layers[0], intermediate_channels=64, stride=1
|
||||
)
|
||||
self.layer2 = self._make_layer(
|
||||
block, layers[1], intermediate_channels=128, stride=2
|
||||
)
|
||||
self.layer3 = self._make_layer(
|
||||
block, layers[2], intermediate_channels=256, stride=2
|
||||
)
|
||||
self.layer4 = self._make_layer(
|
||||
block, layers[3], intermediate_channels=512, stride=2
|
||||
)
|
||||
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(512 * 4, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
def _make_layer(self, block, num_residual_blocks, intermediate_channels, stride):
|
||||
identity_downsample = None
|
||||
layers = []
|
||||
|
||||
# Either if we half the input space for ex, 56x56 -> 28x28 (stride=2), or channels changes
|
||||
# we need to adapt the Identity (skip connection) so it will be able to be added
|
||||
# to the layer that's ahead
|
||||
if stride != 1 or self.in_channels != intermediate_channels * 4:
|
||||
identity_downsample = nn.Sequential(
|
||||
nn.Conv2d(
|
||||
self.in_channels,
|
||||
intermediate_channels * 4,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
),
|
||||
nn.BatchNorm2d(intermediate_channels * 4),
|
||||
)
|
||||
|
||||
layers.append(
|
||||
block(self.in_channels, intermediate_channels, identity_downsample, stride)
|
||||
)
|
||||
|
||||
# The expansion size is always 4 for ResNet 50,101,152
|
||||
self.in_channels = intermediate_channels * 4
|
||||
|
||||
# For example for first resnet layer: 256 will be mapped to 64 as intermediate layer,
|
||||
# then finally back to 256. Hence no identity downsample is needed, since stride = 1,
|
||||
# and also same amount of channels.
|
||||
for i in range(num_residual_blocks - 1):
|
||||
layers.append(block(self.in_channels, intermediate_channels))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
|
||||
def ResNet50(img_channel=3, num_classes=1000):
|
||||
return ResNet(block, [3, 4, 6, 3], img_channel, num_classes)
|
||||
|
||||
|
||||
def ResNet101(img_channel=3, num_classes=1000):
|
||||
return ResNet(block, [3, 4, 23, 3], img_channel, num_classes)
|
||||
|
||||
|
||||
def ResNet152(img_channel=3, num_classes=1000):
|
||||
return ResNet(block, [3, 8, 36, 3], img_channel, num_classes)
|
||||
|
||||
|
||||
def test():
|
||||
net = ResNet101(img_channel=3, num_classes=1000)
|
||||
y = net(torch.randn(4, 3, 224, 224)).to("cuda")
|
||||
print(y.size())
|
||||
|
||||
|
||||
test()
|
||||
119
ML/Pytorch/CNN_architectures/pytorch_vgg_implementation.py
Normal file
@@ -0,0 +1,119 @@
|
||||
"""
|
||||
A from scratch implementation of the VGG architecture.
|
||||
|
||||
Video explanation: https://youtu.be/ACmuBbuXn20
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-05 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
|
||||
VGG_types = {
|
||||
"VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
|
||||
"VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
|
||||
"VGG16": [
|
||||
64,
|
||||
64,
|
||||
"M",
|
||||
128,
|
||||
128,
|
||||
"M",
|
||||
256,
|
||||
256,
|
||||
256,
|
||||
"M",
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
"M",
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
"M",
|
||||
],
|
||||
"VGG19": [
|
||||
64,
|
||||
64,
|
||||
"M",
|
||||
128,
|
||||
128,
|
||||
"M",
|
||||
256,
|
||||
256,
|
||||
256,
|
||||
256,
|
||||
"M",
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
"M",
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
"M",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class VGG_net(nn.Module):
|
||||
def __init__(self, in_channels=3, num_classes=1000):
|
||||
super(VGG_net, self).__init__()
|
||||
self.in_channels = in_channels
|
||||
self.conv_layers = self.create_conv_layers(VGG_types["VGG16"])
|
||||
|
||||
self.fcs = nn.Sequential(
|
||||
nn.Linear(512 * 7 * 7, 4096),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(p=0.5),
|
||||
nn.Linear(4096, 4096),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(p=0.5),
|
||||
nn.Linear(4096, num_classes),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv_layers(x)
|
||||
x = x.reshape(x.shape[0], -1)
|
||||
x = self.fcs(x)
|
||||
return x
|
||||
|
||||
def create_conv_layers(self, architecture):
|
||||
layers = []
|
||||
in_channels = self.in_channels
|
||||
|
||||
for x in architecture:
|
||||
if type(x) == int:
|
||||
out_channels = x
|
||||
|
||||
layers += [
|
||||
nn.Conv2d(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=(3, 3),
|
||||
stride=(1, 1),
|
||||
padding=(1, 1),
|
||||
),
|
||||
nn.BatchNorm2d(x),
|
||||
nn.ReLU(),
|
||||
]
|
||||
in_channels = x
|
||||
elif x == "M":
|
||||
layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
model = VGG_net(in_channels=3, num_classes=1000).to(device)
|
||||
print(model)
|
||||
## N = 3 (Mini batch size)
|
||||
# x = torch.randn(3, 3, 224, 224).to(device)
|
||||
# print(model(x).shape)
|
||||
107
ML/Pytorch/GANs/1. SimpleGAN/fc_gan.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torchvision
|
||||
import torchvision.datasets as datasets
|
||||
from torch.utils.data import DataLoader
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
|
||||
|
||||
|
||||
class Discriminator(nn.Module):
|
||||
def __init__(self, in_features):
|
||||
super().__init__()
|
||||
self.disc = nn.Sequential(
|
||||
nn.Linear(in_features, 128),
|
||||
nn.LeakyReLU(0.01),
|
||||
nn.Linear(128, 1),
|
||||
nn.Sigmoid(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.disc(x)
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
def __init__(self, z_dim, img_dim):
|
||||
super().__init__()
|
||||
self.gen = nn.Sequential(
|
||||
nn.Linear(z_dim, 256),
|
||||
nn.LeakyReLU(0.01),
|
||||
nn.Linear(256, img_dim),
|
||||
nn.Tanh(), # normalize inputs to [-1, 1] so make outputs [-1, 1]
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.gen(x)
|
||||
|
||||
|
||||
# Hyperparameters etc.
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
lr = 3e-4
|
||||
z_dim = 64
|
||||
image_dim = 28 * 28 * 1 # 784
|
||||
batch_size = 32
|
||||
num_epochs = 50
|
||||
|
||||
disc = Discriminator(image_dim).to(device)
|
||||
gen = Generator(z_dim, image_dim).to(device)
|
||||
fixed_noise = torch.randn((batch_size, z_dim)).to(device)
|
||||
transforms = transforms.Compose(
|
||||
[transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)),]
|
||||
)
|
||||
|
||||
dataset = datasets.MNIST(root="dataset/", transform=transforms, download=True)
|
||||
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
|
||||
opt_disc = optim.Adam(disc.parameters(), lr=lr)
|
||||
opt_gen = optim.Adam(gen.parameters(), lr=lr)
|
||||
criterion = nn.BCELoss()
|
||||
writer_fake = SummaryWriter(f"logs/fake")
|
||||
writer_real = SummaryWriter(f"logs/real")
|
||||
step = 0
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
for batch_idx, (real, _) in enumerate(loader):
|
||||
real = real.view(-1, 784).to(device)
|
||||
batch_size = real.shape[0]
|
||||
|
||||
### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
|
||||
noise = torch.randn(batch_size, z_dim).to(device)
|
||||
fake = gen(noise)
|
||||
disc_real = disc(real).view(-1)
|
||||
lossD_real = criterion(disc_real, torch.ones_like(disc_real))
|
||||
disc_fake = disc(fake).view(-1)
|
||||
lossD_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
|
||||
lossD = (lossD_real + lossD_fake) / 2
|
||||
disc.zero_grad()
|
||||
lossD.backward(retain_graph=True)
|
||||
opt_disc.step()
|
||||
|
||||
### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
|
||||
# where the second option of maximizing doesn't suffer from
|
||||
# saturating gradients
|
||||
output = disc(fake).view(-1)
|
||||
lossG = criterion(output, torch.ones_like(output))
|
||||
gen.zero_grad()
|
||||
lossG.backward()
|
||||
opt_gen.step()
|
||||
|
||||
if batch_idx == 0:
|
||||
print(
|
||||
f"Epoch [{epoch}/{num_epochs}] Batch {batch_idx}/{len(loader)} \
|
||||
Loss D: {lossD:.4f}, loss G: {lossG:.4f}"
|
||||
)
|
||||
|
||||
with torch.no_grad():
|
||||
fake = gen(fixed_noise).reshape(-1, 1, 28, 28)
|
||||
data = real.reshape(-1, 1, 28, 28)
|
||||
img_grid_fake = torchvision.utils.make_grid(fake, normalize=True)
|
||||
img_grid_real = torchvision.utils.make_grid(data, normalize=True)
|
||||
|
||||
writer_fake.add_image(
|
||||
"Mnist Fake Images", img_grid_fake, global_step=step
|
||||
)
|
||||
writer_real.add_image(
|
||||
"Mnist Real Images", img_grid_real, global_step=step
|
||||
)
|
||||
step += 1
|
||||
96
ML/Pytorch/GANs/2. DCGAN/model.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Discriminator and Generator implementation from DCGAN paper
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class Discriminator(nn.Module):
|
||||
def __init__(self, channels_img, features_d):
|
||||
super(Discriminator, self).__init__()
|
||||
self.disc = nn.Sequential(
|
||||
# input: N x channels_img x 64 x 64
|
||||
nn.Conv2d(
|
||||
channels_img, features_d, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.LeakyReLU(0.2),
|
||||
# _block(in_channels, out_channels, kernel_size, stride, padding)
|
||||
self._block(features_d, features_d * 2, 4, 2, 1),
|
||||
self._block(features_d * 2, features_d * 4, 4, 2, 1),
|
||||
self._block(features_d * 4, features_d * 8, 4, 2, 1),
|
||||
# After all _block img output is 4x4 (Conv2d below makes into 1x1)
|
||||
nn.Conv2d(features_d * 8, 1, kernel_size=4, stride=2, padding=0),
|
||||
nn.Sigmoid(),
|
||||
)
|
||||
|
||||
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
bias=False,
|
||||
),
|
||||
#nn.BatchNorm2d(out_channels),
|
||||
nn.LeakyReLU(0.2),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.disc(x)
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
def __init__(self, channels_noise, channels_img, features_g):
|
||||
super(Generator, self).__init__()
|
||||
self.net = nn.Sequential(
|
||||
# Input: N x channels_noise x 1 x 1
|
||||
self._block(channels_noise, features_g * 16, 4, 1, 0), # img: 4x4
|
||||
self._block(features_g * 16, features_g * 8, 4, 2, 1), # img: 8x8
|
||||
self._block(features_g * 8, features_g * 4, 4, 2, 1), # img: 16x16
|
||||
self._block(features_g * 4, features_g * 2, 4, 2, 1), # img: 32x32
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
# Output: N x channels_img x 64 x 64
|
||||
nn.Tanh(),
|
||||
)
|
||||
|
||||
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
|
||||
return nn.Sequential(
|
||||
nn.ConvTranspose2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
bias=False,
|
||||
),
|
||||
#nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
def initialize_weights(model):
|
||||
# Initializes weights according to the DCGAN paper
|
||||
for m in model.modules():
|
||||
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
|
||||
nn.init.normal_(m.weight.data, 0.0, 0.02)
|
||||
|
||||
def test():
|
||||
N, in_channels, H, W = 8, 3, 64, 64
|
||||
noise_dim = 100
|
||||
x = torch.randn((N, in_channels, H, W))
|
||||
disc = Discriminator(in_channels, 8)
|
||||
assert disc(x).shape == (N, 1, 1, 1), "Discriminator test failed"
|
||||
gen = Generator(noise_dim, in_channels, 8)
|
||||
z = torch.randn((N, noise_dim, 1, 1))
|
||||
assert gen(z).shape == (N, in_channels, H, W), "Generator test failed"
|
||||
|
||||
|
||||
# test()
|
||||
105
ML/Pytorch/GANs/2. DCGAN/train.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
Training of DCGAN network on MNIST dataset with Discriminator
|
||||
and Generator imported from models.py
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torchvision
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from model import Discriminator, Generator, initialize_weights
|
||||
|
||||
# Hyperparameters etc.
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
LEARNING_RATE = 2e-4 # could also use two lrs, one for gen and one for disc
|
||||
BATCH_SIZE = 128
|
||||
IMAGE_SIZE = 64
|
||||
CHANNELS_IMG = 1
|
||||
NOISE_DIM = 100
|
||||
NUM_EPOCHS = 5
|
||||
FEATURES_DISC = 64
|
||||
FEATURES_GEN = 64
|
||||
|
||||
transforms = transforms.Compose(
|
||||
[
|
||||
transforms.Resize(IMAGE_SIZE),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(
|
||||
[0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)]
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
# If you train on MNIST, remember to set channels_img to 1
|
||||
dataset = datasets.MNIST(root="dataset/", train=True, transform=transforms,
|
||||
download=True)
|
||||
|
||||
# comment mnist above and uncomment below if train on CelebA
|
||||
#dataset = datasets.ImageFolder(root="celeb_dataset", transform=transforms)
|
||||
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
|
||||
gen = Generator(NOISE_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)
|
||||
disc = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device)
|
||||
initialize_weights(gen)
|
||||
initialize_weights(disc)
|
||||
|
||||
opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
|
||||
opt_disc = optim.Adam(disc.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
|
||||
criterion = nn.BCELoss()
|
||||
|
||||
fixed_noise = torch.randn(32, NOISE_DIM, 1, 1).to(device)
|
||||
writer_real = SummaryWriter(f"logs/real")
|
||||
writer_fake = SummaryWriter(f"logs/fake")
|
||||
step = 0
|
||||
|
||||
gen.train()
|
||||
disc.train()
|
||||
|
||||
for epoch in range(NUM_EPOCHS):
|
||||
# Target labels not needed! <3 unsupervised
|
||||
for batch_idx, (real, _) in enumerate(dataloader):
|
||||
real = real.to(device)
|
||||
noise = torch.randn(BATCH_SIZE, NOISE_DIM, 1, 1).to(device)
|
||||
fake = gen(noise)
|
||||
|
||||
### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
|
||||
disc_real = disc(real).reshape(-1)
|
||||
loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
|
||||
disc_fake = disc(fake.detach()).reshape(-1)
|
||||
loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
|
||||
loss_disc = (loss_disc_real + loss_disc_fake) / 2
|
||||
disc.zero_grad()
|
||||
loss_disc.backward()
|
||||
opt_disc.step()
|
||||
|
||||
### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
|
||||
output = disc(fake).reshape(-1)
|
||||
loss_gen = criterion(output, torch.ones_like(output))
|
||||
gen.zero_grad()
|
||||
loss_gen.backward()
|
||||
opt_gen.step()
|
||||
|
||||
# Print losses occasionally and print to tensorboard
|
||||
if batch_idx % 100 == 0:
|
||||
print(
|
||||
f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(dataloader)} \
|
||||
Loss D: {loss_disc:.4f}, loss G: {loss_gen:.4f}"
|
||||
)
|
||||
|
||||
with torch.no_grad():
|
||||
fake = gen(fixed_noise)
|
||||
# take out (up to) 32 examples
|
||||
img_grid_real = torchvision.utils.make_grid(
|
||||
real[:32], normalize=True
|
||||
)
|
||||
img_grid_fake = torchvision.utils.make_grid(
|
||||
fake[:32], normalize=True
|
||||
)
|
||||
|
||||
writer_real.add_image("Real", img_grid_real, global_step=step)
|
||||
writer_fake.add_image("Fake", img_grid_fake, global_step=step)
|
||||
|
||||
step += 1
|
||||
98
ML/Pytorch/GANs/3. WGAN/model.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
Discriminator and Generator implementation from DCGAN paper,
|
||||
with removed Sigmoid() as output from Discriminator (and therefor
|
||||
it should be called critic)
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class Discriminator(nn.Module):
|
||||
def __init__(self, channels_img, features_d):
|
||||
super(Discriminator, self).__init__()
|
||||
self.disc = nn.Sequential(
|
||||
# input: N x channels_img x 64 x 64
|
||||
nn.Conv2d(
|
||||
channels_img, features_d, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.LeakyReLU(0.2),
|
||||
# _block(in_channels, out_channels, kernel_size, stride, padding)
|
||||
self._block(features_d, features_d * 2, 4, 2, 1),
|
||||
self._block(features_d * 2, features_d * 4, 4, 2, 1),
|
||||
self._block(features_d * 4, features_d * 8, 4, 2, 1),
|
||||
# After all _block img output is 4x4 (Conv2d below makes into 1x1)
|
||||
nn.Conv2d(features_d * 8, 1, kernel_size=4, stride=2, padding=0),
|
||||
)
|
||||
|
||||
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
bias=False,
|
||||
),
|
||||
nn.InstanceNorm2d(out_channels, affine=True),
|
||||
nn.LeakyReLU(0.2),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.disc(x)
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
def __init__(self, channels_noise, channels_img, features_g):
|
||||
super(Generator, self).__init__()
|
||||
self.net = nn.Sequential(
|
||||
# Input: N x channels_noise x 1 x 1
|
||||
self._block(channels_noise, features_g * 16, 4, 1, 0), # img: 4x4
|
||||
self._block(features_g * 16, features_g * 8, 4, 2, 1), # img: 8x8
|
||||
self._block(features_g * 8, features_g * 4, 4, 2, 1), # img: 16x16
|
||||
self._block(features_g * 4, features_g * 2, 4, 2, 1), # img: 32x32
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
# Output: N x channels_img x 64 x 64
|
||||
nn.Tanh(),
|
||||
)
|
||||
|
||||
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
|
||||
return nn.Sequential(
|
||||
nn.ConvTranspose2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
bias=False,
|
||||
),
|
||||
nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
def initialize_weights(model):
|
||||
# Initializes weights according to the DCGAN paper
|
||||
for m in model.modules():
|
||||
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
|
||||
nn.init.normal_(m.weight.data, 0.0, 0.02)
|
||||
|
||||
|
||||
def test():
|
||||
N, in_channels, H, W = 8, 3, 64, 64
|
||||
noise_dim = 100
|
||||
x = torch.randn((N, in_channels, H, W))
|
||||
disc = Discriminator(in_channels, 8)
|
||||
assert disc(x).shape == (N, 1, 1, 1), "Discriminator test failed"
|
||||
gen = Generator(noise_dim, in_channels, 8)
|
||||
z = torch.randn((N, noise_dim, 1, 1))
|
||||
assert gen(z).shape == (N, in_channels, H, W), "Generator test failed"
|
||||
|
||||
|
||||
# test()
|
||||
114
ML/Pytorch/GANs/3. WGAN/train.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""
|
||||
Training of DCGAN network with WGAN loss
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torchvision
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from model import Discriminator, Generator, initialize_weights
|
||||
|
||||
# Hyperparameters etc
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
LEARNING_RATE = 5e-5
|
||||
BATCH_SIZE = 64
|
||||
IMAGE_SIZE = 64
|
||||
CHANNELS_IMG = 1
|
||||
Z_DIM = 128
|
||||
NUM_EPOCHS = 5
|
||||
FEATURES_CRITIC = 64
|
||||
FEATURES_GEN = 64
|
||||
CRITIC_ITERATIONS = 5
|
||||
WEIGHT_CLIP = 0.01
|
||||
|
||||
transforms = transforms.Compose(
|
||||
[
|
||||
transforms.Resize(IMAGE_SIZE),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(
|
||||
[0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)]
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
dataset = datasets.MNIST(root="dataset/", transform=transforms, download=True)
|
||||
#comment mnist and uncomment below if you want to train on CelebA dataset
|
||||
#dataset = datasets.ImageFolder(root="celeb_dataset", transform=transforms)
|
||||
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
|
||||
|
||||
# initialize gen and disc/critic
|
||||
gen = Generator(Z_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)
|
||||
critic = Discriminator(CHANNELS_IMG, FEATURES_CRITIC).to(device)
|
||||
initialize_weights(gen)
|
||||
initialize_weights(critic)
|
||||
|
||||
# initializate optimizer
|
||||
opt_gen = optim.RMSprop(gen.parameters(), lr=LEARNING_RATE)
|
||||
opt_critic = optim.RMSprop(critic.parameters(), lr=LEARNING_RATE)
|
||||
|
||||
# for tensorboard plotting
|
||||
fixed_noise = torch.randn(32, Z_DIM, 1, 1).to(device)
|
||||
writer_real = SummaryWriter(f"logs/real")
|
||||
writer_fake = SummaryWriter(f"logs/fake")
|
||||
step = 0
|
||||
|
||||
gen.train()
|
||||
critic.train()
|
||||
|
||||
for epoch in range(NUM_EPOCHS):
|
||||
# Target labels not needed! <3 unsupervised
|
||||
for batch_idx, (data, _) in enumerate(loader):
|
||||
data = data.to(device)
|
||||
cur_batch_size = data.shape[0]
|
||||
|
||||
# Train Critic: max E[critic(real)] - E[critic(fake)]
|
||||
for _ in range(CRITIC_ITERATIONS):
|
||||
noise = torch.randn(cur_batch_size, Z_DIM, 1, 1).to(device)
|
||||
fake = gen(noise)
|
||||
critic_real = critic(data).reshape(-1)
|
||||
critic_fake = critic(fake).reshape(-1)
|
||||
loss_critic = -(torch.mean(critic_real) - torch.mean(critic_fake))
|
||||
critic.zero_grad()
|
||||
loss_critic.backward(retain_graph=True)
|
||||
opt_critic.step()
|
||||
|
||||
# clip critic weights between -0.01, 0.01
|
||||
for p in critic.parameters():
|
||||
p.data.clamp_(-WEIGHT_CLIP, WEIGHT_CLIP)
|
||||
|
||||
# Train Generator: max E[critic(gen_fake)] <-> min -E[critic(gen_fake)]
|
||||
gen_fake = critic(fake).reshape(-1)
|
||||
loss_gen = -torch.mean(gen_fake)
|
||||
gen.zero_grad()
|
||||
loss_gen.backward()
|
||||
opt_gen.step()
|
||||
|
||||
# Print losses occasionally and print to tensorboard
|
||||
if batch_idx % 100 == 0 and batch_idx > 0:
|
||||
gen.eval()
|
||||
critic.eval()
|
||||
print(
|
||||
f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(loader)} \
|
||||
Loss D: {loss_critic:.4f}, loss G: {loss_gen:.4f}"
|
||||
)
|
||||
|
||||
with torch.no_grad():
|
||||
fake = gen(noise)
|
||||
# take out (up to) 32 examples
|
||||
img_grid_real = torchvision.utils.make_grid(
|
||||
data[:32], normalize=True
|
||||
)
|
||||
img_grid_fake = torchvision.utils.make_grid(
|
||||
fake[:32], normalize=True
|
||||
)
|
||||
|
||||
writer_real.add_image("Real", img_grid_real, global_step=step)
|
||||
writer_fake.add_image("Fake", img_grid_fake, global_step=step)
|
||||
|
||||
step += 1
|
||||
gen.train()
|
||||
critic.train()
|
||||
84
ML/Pytorch/GANs/4. WGAN-GP/model.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
Discriminator and Generator implementation from DCGAN paper
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class Discriminator(nn.Module):
|
||||
def __init__(self, channels_img, features_d):
|
||||
super(Discriminator, self).__init__()
|
||||
self.disc = nn.Sequential(
|
||||
# input: N x channels_img x 64 x 64
|
||||
nn.Conv2d(channels_img, features_d, kernel_size=4, stride=2, padding=1),
|
||||
nn.LeakyReLU(0.2),
|
||||
# _block(in_channels, out_channels, kernel_size, stride, padding)
|
||||
self._block(features_d, features_d * 2, 4, 2, 1),
|
||||
self._block(features_d * 2, features_d * 4, 4, 2, 1),
|
||||
self._block(features_d * 4, features_d * 8, 4, 2, 1),
|
||||
# After all _block img output is 4x4 (Conv2d below makes into 1x1)
|
||||
nn.Conv2d(features_d * 8, 1, kernel_size=4, stride=2, padding=0),
|
||||
)
|
||||
|
||||
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(
|
||||
in_channels, out_channels, kernel_size, stride, padding, bias=False,
|
||||
),
|
||||
nn.InstanceNorm2d(out_channels, affine=True),
|
||||
nn.LeakyReLU(0.2),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.disc(x)
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
def __init__(self, channels_noise, channels_img, features_g):
|
||||
super(Generator, self).__init__()
|
||||
self.net = nn.Sequential(
|
||||
# Input: N x channels_noise x 1 x 1
|
||||
self._block(channels_noise, features_g * 16, 4, 1, 0), # img: 4x4
|
||||
self._block(features_g * 16, features_g * 8, 4, 2, 1), # img: 8x8
|
||||
self._block(features_g * 8, features_g * 4, 4, 2, 1), # img: 16x16
|
||||
self._block(features_g * 4, features_g * 2, 4, 2, 1), # img: 32x32
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
# Output: N x channels_img x 64 x 64
|
||||
nn.Tanh(),
|
||||
)
|
||||
|
||||
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
|
||||
return nn.Sequential(
|
||||
nn.ConvTranspose2d(
|
||||
in_channels, out_channels, kernel_size, stride, padding, bias=False,
|
||||
),
|
||||
nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
def initialize_weights(model):
|
||||
# Initializes weights according to the DCGAN paper
|
||||
for m in model.modules():
|
||||
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
|
||||
nn.init.normal_(m.weight.data, 0.0, 0.02)
|
||||
|
||||
|
||||
def test():
|
||||
N, in_channels, H, W = 8, 3, 64, 64
|
||||
noise_dim = 100
|
||||
x = torch.randn((N, in_channels, H, W))
|
||||
disc = Discriminator(in_channels, 8)
|
||||
assert disc(x).shape == (N, 1, 1, 1), "Discriminator test failed"
|
||||
gen = Generator(noise_dim, in_channels, 8)
|
||||
z = torch.randn((N, noise_dim, 1, 1))
|
||||
assert gen(z).shape == (N, in_channels, H, W), "Generator test failed"
|
||||
|
||||
|
||||
# test()
|
||||
111
ML/Pytorch/GANs/4. WGAN-GP/train.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
Training of WGAN-GP
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torchvision
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from utils import gradient_penalty, save_checkpoint, load_checkpoint
|
||||
from model import Discriminator, Generator, initialize_weights
|
||||
|
||||
# Hyperparameters etc.
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
LEARNING_RATE = 1e-4
|
||||
BATCH_SIZE = 64
|
||||
IMAGE_SIZE = 64
|
||||
CHANNELS_IMG = 1
|
||||
Z_DIM = 100
|
||||
NUM_EPOCHS = 100
|
||||
FEATURES_CRITIC = 16
|
||||
FEATURES_GEN = 16
|
||||
CRITIC_ITERATIONS = 5
|
||||
LAMBDA_GP = 10
|
||||
|
||||
transforms = transforms.Compose(
|
||||
[
|
||||
transforms.Resize(IMAGE_SIZE),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(
|
||||
[0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)]),
|
||||
]
|
||||
)
|
||||
|
||||
dataset = datasets.MNIST(root="dataset/", transform=transforms, download=True)
|
||||
# comment mnist above and uncomment below for training on CelebA
|
||||
#dataset = datasets.ImageFolder(root="celeb_dataset", transform=transforms)
|
||||
loader = DataLoader(
|
||||
dataset,
|
||||
batch_size=BATCH_SIZE,
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
# initialize gen and disc, note: discriminator should be called critic,
|
||||
# according to WGAN paper (since it no longer outputs between [0, 1])
|
||||
gen = Generator(Z_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)
|
||||
critic = Discriminator(CHANNELS_IMG, FEATURES_CRITIC).to(device)
|
||||
initialize_weights(gen)
|
||||
initialize_weights(critic)
|
||||
|
||||
# initializate optimizer
|
||||
opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas=(0.0, 0.9))
|
||||
opt_critic = optim.Adam(critic.parameters(), lr=LEARNING_RATE, betas=(0.0, 0.9))
|
||||
|
||||
# for tensorboard plotting
|
||||
fixed_noise = torch.randn(32, Z_DIM, 1, 1).to(device)
|
||||
writer_real = SummaryWriter(f"logs/GAN_MNIST/real")
|
||||
writer_fake = SummaryWriter(f"logs/GAN_MNIST/fake")
|
||||
step = 0
|
||||
|
||||
gen.train()
|
||||
critic.train()
|
||||
|
||||
for epoch in range(NUM_EPOCHS):
|
||||
# Target labels not needed! <3 unsupervised
|
||||
for batch_idx, (real, _) in enumerate(loader):
|
||||
real = real.to(device)
|
||||
cur_batch_size = real.shape[0]
|
||||
|
||||
# Train Critic: max E[critic(real)] - E[critic(fake)]
|
||||
# equivalent to minimizing the negative of that
|
||||
for _ in range(CRITIC_ITERATIONS):
|
||||
noise = torch.randn(cur_batch_size, Z_DIM, 1, 1).to(device)
|
||||
fake = gen(noise)
|
||||
critic_real = critic(real).reshape(-1)
|
||||
critic_fake = critic(fake).reshape(-1)
|
||||
gp = gradient_penalty(critic, real, fake, device=device)
|
||||
loss_critic = (
|
||||
-(torch.mean(critic_real) - torch.mean(critic_fake)) + LAMBDA_GP * gp
|
||||
)
|
||||
critic.zero_grad()
|
||||
loss_critic.backward(retain_graph=True)
|
||||
opt_critic.step()
|
||||
|
||||
# Train Generator: max E[critic(gen_fake)] <-> min -E[critic(gen_fake)]
|
||||
gen_fake = critic(fake).reshape(-1)
|
||||
loss_gen = -torch.mean(gen_fake)
|
||||
gen.zero_grad()
|
||||
loss_gen.backward()
|
||||
opt_gen.step()
|
||||
|
||||
# Print losses occasionally and print to tensorboard
|
||||
if batch_idx % 100 == 0 and batch_idx > 0:
|
||||
print(
|
||||
f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(loader)} \
|
||||
Loss D: {loss_critic:.4f}, loss G: {loss_gen:.4f}"
|
||||
)
|
||||
|
||||
with torch.no_grad():
|
||||
fake = gen(fixed_noise)
|
||||
# take out (up to) 32 examples
|
||||
img_grid_real = torchvision.utils.make_grid(real[:32], normalize=True)
|
||||
img_grid_fake = torchvision.utils.make_grid(fake[:32], normalize=True)
|
||||
|
||||
writer_real.add_image("Real", img_grid_real, global_step=step)
|
||||
writer_fake.add_image("Fake", img_grid_fake, global_step=step)
|
||||
|
||||
step += 1
|
||||
35
ML/Pytorch/GANs/4. WGAN-GP/utils.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
def gradient_penalty(critic, real, fake, device="cpu"):
|
||||
BATCH_SIZE, C, H, W = real.shape
|
||||
alpha = torch.rand((BATCH_SIZE, 1, 1, 1)).repeat(1, C, H, W).to(device)
|
||||
interpolated_images = real * alpha + fake * (1 - alpha)
|
||||
|
||||
# Calculate critic scores
|
||||
mixed_scores = critic(interpolated_images)
|
||||
|
||||
# Take the gradient of the scores with respect to the images
|
||||
gradient = torch.autograd.grad(
|
||||
inputs=interpolated_images,
|
||||
outputs=mixed_scores,
|
||||
grad_outputs=torch.ones_like(mixed_scores),
|
||||
create_graph=True,
|
||||
retain_graph=True,
|
||||
)[0]
|
||||
gradient = gradient.view(gradient.shape[0], -1)
|
||||
gradient_norm = gradient.norm(2, dim=1)
|
||||
gradient_penalty = torch.mean((gradient_norm - 1) ** 2)
|
||||
return gradient_penalty
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="celeba_wgan_gp.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, gen, disc):
|
||||
print("=> Loading checkpoint")
|
||||
gen.load_state_dict(checkpoint['gen'])
|
||||
disc.load_state_dict(checkpoint['disc'])
|
||||
205
ML/Pytorch/GANs/5. ProGAN/model.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""
|
||||
Implementation of ProGAN generator and discriminator with the key
|
||||
attributions from the paper. We have tried to make the implementation
|
||||
compact but a goal is also to keep it readable and understandable.
|
||||
Specifically the key points implemented are:
|
||||
|
||||
1) Progressive growing (of model and layers)
|
||||
2) Minibatch std on Discriminator
|
||||
3) Normalization with PixelNorm
|
||||
4) Equalized Learning Rate (here I cheated and only did it on Conv layers)
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from math import log2
|
||||
|
||||
"""
|
||||
Factors is used in Discrmininator and Generator for how much
|
||||
the channels should be multiplied and expanded for each layer,
|
||||
so specifically the first 5 layers the channels stay the same,
|
||||
whereas when we increase the img_size (towards the later layers)
|
||||
we decrease the number of chanels by 1/2, 1/4, etc.
|
||||
"""
|
||||
factors = [1, 1, 1, 1, 1/2, 1/4, 1/4, 1/8, 1/16]
|
||||
|
||||
|
||||
class WSConv2d(nn.Module):
|
||||
"""
|
||||
Weight scaled Conv2d (Equalized Learning Rate)
|
||||
Note that input is multiplied rather than changing weights
|
||||
this will have the same result.
|
||||
|
||||
Inspired by:
|
||||
https://github.com/nvnbny/progressive_growing_of_gans/blob/master/modelUtils.py
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, gain=2
|
||||
):
|
||||
super(WSConv2d, self).__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_channels, out_channels, kernel_size, stride, padding
|
||||
)
|
||||
self.scale = (gain / (self.conv.weight[0].numel())) ** 0.5
|
||||
|
||||
# initialize conv layer
|
||||
nn.init.normal_(self.conv.weight)
|
||||
nn.init.zeros_(self.conv.bias)
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x * self.scale)
|
||||
|
||||
|
||||
class PixelNorm(nn.Module):
|
||||
def __init__(self):
|
||||
super(PixelNorm, self).__init__()
|
||||
self.epsilon = 1e-8
|
||||
|
||||
def forward(self, x):
|
||||
return x / torch.sqrt(
|
||||
torch.mean(x ** 2, dim=1, keepdim=True) + self.epsilon
|
||||
)
|
||||
|
||||
|
||||
class ConvBlock(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, use_pixelnorm=True):
|
||||
super(ConvBlock, self).__init__()
|
||||
self.use_pn = use_pixelnorm
|
||||
self.conv1 = WSConv2d(in_channels, out_channels)
|
||||
self.conv2 = WSConv2d(out_channels, out_channels)
|
||||
self.leaky = nn.LeakyReLU(0.2)
|
||||
self.pn = PixelNorm()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.leaky(self.conv1(x))
|
||||
x = self.pn(x) if self.use_pn else x
|
||||
x = self.leaky(self.conv2(x))
|
||||
x = self.pn(x) if self.use_pn else x
|
||||
return x
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
def __init__(self, z_dim, in_channels, img_size, img_channels=3):
|
||||
super(Generator, self).__init__()
|
||||
self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])
|
||||
|
||||
# initial takes 1x1 -> 4x4
|
||||
self.initial = nn.Sequential(
|
||||
nn.ConvTranspose2d(z_dim, in_channels, 4, 1, 0),
|
||||
nn.LeakyReLU(0.2),
|
||||
PixelNorm(),
|
||||
)
|
||||
|
||||
# Create progression blocks and rgb layers
|
||||
channels = in_channels
|
||||
|
||||
# we need to double img for log2(img_size/4) and
|
||||
# +1 in loop for initial 4x4
|
||||
for idx in range(int(log2(img_size/4)) + 1):
|
||||
conv_in = channels
|
||||
conv_out = int(in_channels*factors[idx])
|
||||
self.prog_blocks.append(ConvBlock(conv_in, conv_out))
|
||||
self.rgb_layers.append(WSConv2d(conv_out, img_channels, kernel_size=1, stride=1, padding=0))
|
||||
channels = conv_out
|
||||
|
||||
def fade_in(self, alpha, upscaled, generated):
|
||||
#assert 0 <= alpha <= 1, "Alpha not between 0 and 1"
|
||||
#assert upscaled.shape == generated.shape
|
||||
return torch.tanh(alpha * generated + (1 - alpha) * upscaled)
|
||||
|
||||
def forward(self, x, alpha, steps):
|
||||
upscaled = self.initial(x)
|
||||
out = self.prog_blocks[0](upscaled)
|
||||
|
||||
if steps == 0:
|
||||
return self.rgb_layers[0](out)
|
||||
|
||||
for step in range(1, steps+1):
|
||||
upscaled = F.interpolate(out, scale_factor=2, mode="nearest")
|
||||
out = self.prog_blocks[step](upscaled)
|
||||
|
||||
# The number of channels in upscale will stay the same, while
|
||||
# out which has moved through prog_blocks might change. To ensure
|
||||
# we can convert both to rgb we use different rgb_layers
|
||||
# (steps-1) and steps for upscaled, out respectively
|
||||
final_upscaled = self.rgb_layers[steps - 1](upscaled)
|
||||
final_out = self.rgb_layers[steps](out)
|
||||
return self.fade_in(alpha, final_upscaled, final_out)
|
||||
|
||||
|
||||
class Discriminator(nn.Module):
|
||||
def __init__(self, img_size, z_dim, in_channels, img_channels=3):
|
||||
super(Discriminator, self).__init__()
|
||||
self.prog_blocks, self.rgb_layers = nn.ModuleList([]), nn.ModuleList([])
|
||||
|
||||
# Create progression blocks and rgb layers
|
||||
channels = in_channels
|
||||
for idx in range(int(log2(img_size/4)) + 1):
|
||||
conv_in = int(in_channels * factors[idx])
|
||||
conv_out = channels
|
||||
self.rgb_layers.append(WSConv2d(img_channels, conv_in, kernel_size=1, stride=1, padding=0))
|
||||
self.prog_blocks.append(ConvBlock(conv_in, conv_out, use_pixelnorm=False))
|
||||
channels = conv_in
|
||||
|
||||
self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)
|
||||
# +1 to in_channels because we concatenate from minibatch std
|
||||
self.conv = WSConv2d(in_channels + 1, z_dim, kernel_size=4, stride=1, padding=0)
|
||||
self.linear = nn.Linear(z_dim, 1)
|
||||
|
||||
def fade_in(self, alpha, downscaled, out):
|
||||
"""Used to fade in downscaled using avgpooling and output from CNN"""
|
||||
#assert 0 <= alpha <= 1, "Alpha needs to be between [0, 1]"
|
||||
#assert downscaled.shape == out.shape
|
||||
return alpha * out + (1 - alpha) * downscaled
|
||||
|
||||
def minibatch_std(self, x):
|
||||
batch_statistics = (
|
||||
torch.std(x, dim=0)
|
||||
.mean()
|
||||
.repeat(x.shape[0], 1, x.shape[2], x.shape[3])
|
||||
)
|
||||
return torch.cat([x, batch_statistics], dim=1)
|
||||
|
||||
def forward(self, x, alpha, steps):
|
||||
out = self.rgb_layers[steps](x) # convert from rgb as initial step
|
||||
|
||||
if steps == 0: # i.e, image is 4x4
|
||||
out = self.minibatch_std(out)
|
||||
out = self.conv(out)
|
||||
return self.linear(out.view(-1, out.shape[1]))
|
||||
|
||||
# index steps which has the "reverse" fade_in
|
||||
downscaled = self.rgb_layers[steps - 1](self.avg_pool(x))
|
||||
out = self.avg_pool(self.prog_blocks[steps](out))
|
||||
out = self.fade_in(alpha, downscaled, out)
|
||||
|
||||
for step in range(steps - 1, 0, -1):
|
||||
downscaled = self.avg_pool(out)
|
||||
out = self.prog_blocks[step](downscaled)
|
||||
|
||||
out = self.minibatch_std(out)
|
||||
out = self.conv(out)
|
||||
return self.linear(out.view(-1, out.shape[1]))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
Z_DIM = 100
|
||||
IN_CHANNELS = 16
|
||||
img_size = 512
|
||||
num_steps = int(log2(img_size / 4))
|
||||
x = torch.randn((5, Z_DIM, 1, 1))
|
||||
gen = Generator(Z_DIM, IN_CHANNELS, img_size=img_size)
|
||||
disc = Discriminator(img_size, Z_DIM, IN_CHANNELS)
|
||||
start = time.time()
|
||||
with torch.autograd.profiler.profile(use_cuda=True) as prof:
|
||||
z = gen(x, alpha=0.5, steps=num_steps)
|
||||
print(prof)
|
||||
gen_time = time.time()-start
|
||||
t = time.time()
|
||||
out = disc(z, 0.01, num_steps)
|
||||
disc_time = time.time()-t
|
||||
print(gen_time, disc_time)
|
||||
#print(disc(z, 0.01, num_steps).shape)
|
||||
5
ML/Pytorch/GANs/5. ProGAN/test.py
Normal file
@@ -0,0 +1,5 @@
|
||||
def func(x=1, y=2, **kwargs):
|
||||
print(x, y)
|
||||
|
||||
|
||||
print(func(x=3, y=4))
|
||||
165
ML/Pytorch/GANs/5. ProGAN/train.py
Normal file
@@ -0,0 +1,165 @@
|
||||
""" Training of ProGAN using WGAN-GP loss"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torchvision
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from utils import gradient_penalty, plot_to_tensorboard, save_checkpoint, load_checkpoint
|
||||
from model import Discriminator, Generator
|
||||
from math import log2
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
|
||||
torch.backends.cudnn.benchmarks = True
|
||||
torch.manual_seed(0)
|
||||
|
||||
# Hyperparameters etc.
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
LEARNING_RATE = 1e-4
|
||||
BATCH_SIZES = [128, 128, 64, 16, 8, 4, 2, 2, 1]
|
||||
IMAGE_SIZE = 128
|
||||
CHANNELS_IMG = 3
|
||||
Z_DIM = 128
|
||||
IN_CHANNELS = 128
|
||||
CRITIC_ITERATIONS = 1
|
||||
LAMBDA_GP = 10
|
||||
NUM_STEPS = int(log2(IMAGE_SIZE / 4)) + 1
|
||||
PROGRESSIVE_EPOCHS = [2 ** i for i in range(int(log2(IMAGE_SIZE / 4) + 1))]
|
||||
PROGRESSIVE_EPOCHS = [8 for i in range(int(log2(IMAGE_SIZE / 4) + 1))]
|
||||
fixed_noise = torch.randn(8, Z_DIM, 1, 1).to(device)
|
||||
NUM_WORKERS = 4
|
||||
|
||||
def get_loader(image_size):
|
||||
transform = transforms.Compose(
|
||||
[
|
||||
transforms.Resize((image_size, image_size)),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(
|
||||
[0.5 for _ in range(CHANNELS_IMG)],
|
||||
[0.5 for _ in range(CHANNELS_IMG)],
|
||||
),
|
||||
]
|
||||
)
|
||||
batch_size = BATCH_SIZES[int(log2(image_size/4))]
|
||||
dataset = datasets.ImageFolder(root="celeb_dataset", transform=transform)
|
||||
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
|
||||
return loader, dataset
|
||||
|
||||
def train_fn(
|
||||
critic,
|
||||
gen,
|
||||
loader,
|
||||
dataset,
|
||||
step,
|
||||
alpha,
|
||||
opt_critic,
|
||||
opt_gen,
|
||||
tensorboard_step,
|
||||
writer,
|
||||
):
|
||||
start = time.time()
|
||||
total_time = 0
|
||||
training = tqdm(loader, leave=True)
|
||||
for batch_idx, (real, _) in enumerate(training):
|
||||
real = real.to(device)
|
||||
cur_batch_size = real.shape[0]
|
||||
model_start = time.time()
|
||||
|
||||
# Train Critic: max E[critic(real)] - E[critic(fake)]
|
||||
# which is equivalent to minimizing the negative of the expression
|
||||
for _ in range(CRITIC_ITERATIONS):
|
||||
critic.zero_grad()
|
||||
noise = torch.randn(cur_batch_size, Z_DIM, 1, 1).to(device)
|
||||
fake = gen(noise, alpha, step)
|
||||
critic_real = critic(real, alpha, step).reshape(-1)
|
||||
critic_fake = critic(fake, alpha, step).reshape(-1)
|
||||
gp = gradient_penalty(critic, real, fake, alpha, step, device=device)
|
||||
loss_critic = (
|
||||
-(torch.mean(critic_real) - torch.mean(critic_fake))
|
||||
+ LAMBDA_GP * gp
|
||||
)
|
||||
loss_critic.backward(retain_graph=True)
|
||||
opt_critic.step()
|
||||
|
||||
# Train Generator: max E[critic(gen_fake)] <-> min -E[critic(gen_fake)]
|
||||
gen.zero_grad()
|
||||
fake = gen(noise, alpha, step)
|
||||
gen_fake = critic(fake, alpha, step).reshape(-1)
|
||||
loss_gen = -torch.mean(gen_fake)
|
||||
loss_gen.backward()
|
||||
opt_gen.step()
|
||||
|
||||
# Update alpha and ensure less than 1
|
||||
alpha += cur_batch_size / (
|
||||
(PROGRESSIVE_EPOCHS[step]*0.5) * len(dataset) # - step
|
||||
)
|
||||
alpha = min(alpha, 1)
|
||||
total_time += time.time()-model_start
|
||||
|
||||
if batch_idx % 300 == 0:
|
||||
with torch.no_grad():
|
||||
fixed_fakes = gen(fixed_noise, alpha, step)
|
||||
plot_to_tensorboard(
|
||||
writer, loss_critic, loss_gen, real, fixed_fakes, tensorboard_step
|
||||
)
|
||||
tensorboard_step += 1
|
||||
|
||||
print(f'Fraction spent on model training: {total_time/(time.time()-start)}')
|
||||
return tensorboard_step, alpha
|
||||
|
||||
|
||||
def main():
|
||||
# initialize gen and disc, note: discriminator should be called critic,
|
||||
# according to WGAN paper (since it no longer outputs between [0, 1])
|
||||
gen = Generator(Z_DIM, IN_CHANNELS, img_size=IMAGE_SIZE, img_channels=CHANNELS_IMG).to(device)
|
||||
critic = Discriminator(IMAGE_SIZE, Z_DIM, IN_CHANNELS, img_channels=CHANNELS_IMG).to(device)
|
||||
|
||||
# initializate optimizer
|
||||
opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas=(0.0, 0.99))
|
||||
opt_critic = optim.Adam(critic.parameters(), lr=LEARNING_RATE, betas=(0.0, 0.99))
|
||||
|
||||
# for tensorboard plotting
|
||||
writer = SummaryWriter(f"logs/gan")
|
||||
|
||||
load_checkpoint(torch.load("celeba_wgan_gp.pth.tar"), gen, critic)
|
||||
gen.train()
|
||||
critic.train()
|
||||
|
||||
tensorboard_step = 0
|
||||
for step, num_epochs in enumerate(PROGRESSIVE_EPOCHS):
|
||||
alpha = 0.01
|
||||
if step < 3:
|
||||
continue
|
||||
|
||||
if step == 4:
|
||||
print(f"Img size is: {4*2**step}")
|
||||
|
||||
loader, dataset = get_loader(4 * 2 ** step)
|
||||
for epoch in range(num_epochs):
|
||||
print(f"Epoch [{epoch+1}/{num_epochs}]")
|
||||
tensorboard_step, alpha = train_fn(
|
||||
critic,
|
||||
gen,
|
||||
loader,
|
||||
dataset,
|
||||
step,
|
||||
alpha,
|
||||
opt_critic,
|
||||
opt_gen,
|
||||
tensorboard_step,
|
||||
writer,
|
||||
)
|
||||
|
||||
checkpoint = {'gen': gen.state_dict(),
|
||||
'critic': critic.state_dict(),
|
||||
'opt_gen': opt_gen.state_dict(),
|
||||
'opt_critic': opt_critic.state_dict()}
|
||||
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
54
ML/Pytorch/GANs/5. ProGAN/utils.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn
|
||||
|
||||
# Print losses occasionally and print to tensorboard
|
||||
def plot_to_tensorboard(
|
||||
writer, loss_critic, loss_gen, real, fake, tensorboard_step
|
||||
):
|
||||
writer.add_scalar("Loss Critic", loss_critic, global_step=tensorboard_step)
|
||||
|
||||
with torch.no_grad():
|
||||
# take out (up to) 32 examples
|
||||
img_grid_real = torchvision.utils.make_grid(real[:8], normalize=True)
|
||||
img_grid_fake = torchvision.utils.make_grid(fake[:8], normalize=True)
|
||||
writer.add_image("Real", img_grid_real, global_step=tensorboard_step)
|
||||
writer.add_image("Fake", img_grid_fake, global_step=tensorboard_step)
|
||||
|
||||
|
||||
def gradient_penalty(critic, real, fake, alpha, train_step, device="cpu"):
|
||||
BATCH_SIZE, C, H, W = real.shape
|
||||
beta = torch.rand((BATCH_SIZE, 1, 1, 1)).repeat(1, C, H, W).to(device)
|
||||
interpolated_images = real * beta + fake * (1 - beta)
|
||||
|
||||
# Calculate critic scores
|
||||
mixed_scores = critic(interpolated_images, alpha, train_step)
|
||||
|
||||
# Take the gradient of the scores with respect to the images
|
||||
gradient = torch.autograd.grad(
|
||||
inputs=interpolated_images,
|
||||
outputs=mixed_scores,
|
||||
grad_outputs=torch.ones_like(mixed_scores),
|
||||
create_graph=True,
|
||||
retain_graph=True,
|
||||
)[0]
|
||||
gradient = gradient.view(gradient.shape[0], -1)
|
||||
gradient_norm = gradient.norm(2, dim=1)
|
||||
gradient_penalty = torch.mean((gradient_norm - 1) ** 2)
|
||||
return gradient_penalty
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="celeba_wgan_gp.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
def load_checkpoint(checkpoint, gen, disc, opt_gen=None, opt_disc=None):
|
||||
print("=> Loading checkpoint")
|
||||
gen.load_state_dict(checkpoint['gen'])
|
||||
disc.load_state_dict(checkpoint['critic'])
|
||||
|
||||
if opt_gen != None and opt_disc != None:
|
||||
opt_gen.load_state_dict(checkpoint['opt_gen'])
|
||||
opt_disc.load_state_dict(checkpoint['opt_critic'])
|
||||
|
||||
|
||||
|
After Width: | Height: | Size: 101 KiB |
@@ -0,0 +1,29 @@
|
||||
import os
|
||||
from PIL import Image
|
||||
from torch.utils.data import Dataset
|
||||
import numpy as np
|
||||
|
||||
class CarvanaDataset(Dataset):
|
||||
def __init__(self, image_dir, mask_dir, transform=None):
|
||||
self.image_dir = image_dir
|
||||
self.mask_dir = mask_dir
|
||||
self.transform = transform
|
||||
self.images = os.listdir(image_dir)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
def __getitem__(self, index):
|
||||
img_path = os.path.join(self.image_dir, self.images[index])
|
||||
mask_path = os.path.join(self.mask_dir, self.images[index].replace(".jpg", "_mask.gif"))
|
||||
image = np.array(Image.open(img_path).convert("RGB"))
|
||||
mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
|
||||
mask[mask == 255.0] = 1.0
|
||||
|
||||
if self.transform is not None:
|
||||
augmentations = self.transform(image=image, mask=mask)
|
||||
image = augmentations["image"]
|
||||
mask = augmentations["mask"]
|
||||
|
||||
return image, mask
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.transforms.functional as TF
|
||||
|
||||
class DoubleConv(nn.Module):
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super(DoubleConv, self).__init__()
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias=False),
|
||||
nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=False),
|
||||
nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x)
|
||||
|
||||
class UNET(nn.Module):
|
||||
def __init__(
|
||||
self, in_channels=3, out_channels=1, features=[64, 128, 256, 512],
|
||||
):
|
||||
super(UNET, self).__init__()
|
||||
self.ups = nn.ModuleList()
|
||||
self.downs = nn.ModuleList()
|
||||
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
|
||||
|
||||
# Down part of UNET
|
||||
for feature in features:
|
||||
self.downs.append(DoubleConv(in_channels, feature))
|
||||
in_channels = feature
|
||||
|
||||
# Up part of UNET
|
||||
for feature in reversed(features):
|
||||
self.ups.append(
|
||||
nn.ConvTranspose2d(
|
||||
feature*2, feature, kernel_size=2, stride=2,
|
||||
)
|
||||
)
|
||||
self.ups.append(DoubleConv(feature*2, feature))
|
||||
|
||||
self.bottleneck = DoubleConv(features[-1], features[-1]*2)
|
||||
self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)
|
||||
|
||||
def forward(self, x):
|
||||
skip_connections = []
|
||||
|
||||
for down in self.downs:
|
||||
x = down(x)
|
||||
skip_connections.append(x)
|
||||
x = self.pool(x)
|
||||
|
||||
x = self.bottleneck(x)
|
||||
skip_connections = skip_connections[::-1]
|
||||
|
||||
for idx in range(0, len(self.ups), 2):
|
||||
x = self.ups[idx](x)
|
||||
skip_connection = skip_connections[idx//2]
|
||||
|
||||
if x.shape != skip_connection.shape:
|
||||
x = TF.resize(x, size=skip_connection.shape[2:])
|
||||
|
||||
concat_skip = torch.cat((skip_connection, x), dim=1)
|
||||
x = self.ups[idx+1](concat_skip)
|
||||
|
||||
return self.final_conv(x)
|
||||
|
||||
def test():
|
||||
x = torch.randn((3, 1, 161, 161))
|
||||
model = UNET(in_channels=1, out_channels=1)
|
||||
preds = model(x)
|
||||
assert preds.shape == x.shape
|
||||
|
||||
if __name__ == "__main__":
|
||||
test()
|
||||
@@ -0,0 +1,124 @@
|
||||
import torch
|
||||
import albumentations as A
|
||||
from albumentations.pytorch import ToTensorV2
|
||||
from tqdm import tqdm
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from model import UNET
|
||||
from utils import (
|
||||
load_checkpoint,
|
||||
save_checkpoint,
|
||||
get_loaders,
|
||||
check_accuracy,
|
||||
save_predictions_as_imgs,
|
||||
)
|
||||
|
||||
# Hyperparameters etc.
|
||||
LEARNING_RATE = 1e-4
|
||||
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
BATCH_SIZE = 16
|
||||
NUM_EPOCHS = 3
|
||||
NUM_WORKERS = 2
|
||||
IMAGE_HEIGHT = 160 # 1280 originally
|
||||
IMAGE_WIDTH = 240 # 1918 originally
|
||||
PIN_MEMORY = True
|
||||
LOAD_MODEL = True
|
||||
TRAIN_IMG_DIR = "data/train_images/"
|
||||
TRAIN_MASK_DIR = "data/train_masks/"
|
||||
VAL_IMG_DIR = "data/val_images/"
|
||||
VAL_MASK_DIR = "data/val_masks/"
|
||||
|
||||
def train_fn(loader, model, optimizer, loss_fn, scaler):
|
||||
loop = tqdm(loader)
|
||||
|
||||
for batch_idx, (data, targets) in enumerate(loop):
|
||||
data = data.to(device=DEVICE)
|
||||
targets = targets.float().unsqueeze(1).to(device=DEVICE)
|
||||
|
||||
# forward
|
||||
with torch.cuda.amp.autocast():
|
||||
predictions = model(data)
|
||||
loss = loss_fn(predictions, targets)
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
scaler.scale(loss).backward()
|
||||
scaler.step(optimizer)
|
||||
scaler.update()
|
||||
|
||||
# update tqdm loop
|
||||
loop.set_postfix(loss=loss.item())
|
||||
|
||||
|
||||
def main():
|
||||
train_transform = A.Compose(
|
||||
[
|
||||
A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
|
||||
A.Rotate(limit=35, p=1.0),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.VerticalFlip(p=0.1),
|
||||
A.Normalize(
|
||||
mean=[0.0, 0.0, 0.0],
|
||||
std=[1.0, 1.0, 1.0],
|
||||
max_pixel_value=255.0,
|
||||
),
|
||||
ToTensorV2(),
|
||||
],
|
||||
)
|
||||
|
||||
val_transforms = A.Compose(
|
||||
[
|
||||
A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
|
||||
A.Normalize(
|
||||
mean=[0.0, 0.0, 0.0],
|
||||
std=[1.0, 1.0, 1.0],
|
||||
max_pixel_value=255.0,
|
||||
),
|
||||
ToTensorV2(),
|
||||
],
|
||||
)
|
||||
|
||||
model = UNET(in_channels=3, out_channels=1).to(DEVICE)
|
||||
loss_fn = nn.BCEWithLogitsLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
|
||||
|
||||
train_loader, val_loader = get_loaders(
|
||||
TRAIN_IMG_DIR,
|
||||
TRAIN_MASK_DIR,
|
||||
VAL_IMG_DIR,
|
||||
VAL_MASK_DIR,
|
||||
BATCH_SIZE,
|
||||
train_transform,
|
||||
val_transforms,
|
||||
NUM_WORKERS,
|
||||
PIN_MEMORY,
|
||||
)
|
||||
|
||||
if LOAD_MODEL:
|
||||
load_checkpoint(torch.load("my_checkpoint.pth.tar"), model)
|
||||
|
||||
|
||||
check_accuracy(val_loader, model, device=DEVICE)
|
||||
scaler = torch.cuda.amp.GradScaler()
|
||||
|
||||
for epoch in range(NUM_EPOCHS):
|
||||
train_fn(train_loader, model, optimizer, loss_fn, scaler)
|
||||
|
||||
# save model
|
||||
checkpoint = {
|
||||
"state_dict": model.state_dict(),
|
||||
"optimizer":optimizer.state_dict(),
|
||||
}
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
# check accuracy
|
||||
check_accuracy(val_loader, model, device=DEVICE)
|
||||
|
||||
# print some examples to a folder
|
||||
save_predictions_as_imgs(
|
||||
val_loader, model, folder="saved_images/", device=DEVICE
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,93 @@
|
||||
import torch
|
||||
import torchvision
|
||||
from dataset import CarvanaDataset
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
def load_checkpoint(checkpoint, model):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
|
||||
def get_loaders(
|
||||
train_dir,
|
||||
train_maskdir,
|
||||
val_dir,
|
||||
val_maskdir,
|
||||
batch_size,
|
||||
train_transform,
|
||||
val_transform,
|
||||
num_workers=4,
|
||||
pin_memory=True,
|
||||
):
|
||||
train_ds = CarvanaDataset(
|
||||
image_dir=train_dir,
|
||||
mask_dir=train_maskdir,
|
||||
transform=train_transform,
|
||||
)
|
||||
|
||||
train_loader = DataLoader(
|
||||
train_ds,
|
||||
batch_size=batch_size,
|
||||
num_workers=num_workers,
|
||||
pin_memory=pin_memory,
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
val_ds = CarvanaDataset(
|
||||
image_dir=val_dir,
|
||||
mask_dir=val_maskdir,
|
||||
transform=val_transform,
|
||||
)
|
||||
|
||||
val_loader = DataLoader(
|
||||
val_ds,
|
||||
batch_size=batch_size,
|
||||
num_workers=num_workers,
|
||||
pin_memory=pin_memory,
|
||||
shuffle=False,
|
||||
)
|
||||
|
||||
return train_loader, val_loader
|
||||
|
||||
def check_accuracy(loader, model, device="cuda"):
|
||||
num_correct = 0
|
||||
num_pixels = 0
|
||||
dice_score = 0
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device)
|
||||
y = y.to(device).unsqueeze(1)
|
||||
preds = torch.sigmoid(model(x))
|
||||
preds = (preds > 0.5).float()
|
||||
num_correct += (preds == y).sum()
|
||||
num_pixels += torch.numel(preds)
|
||||
dice_score += (2 * (preds * y).sum()) / (
|
||||
(preds + y).sum() + 1e-8
|
||||
)
|
||||
|
||||
print(
|
||||
f"Got {num_correct}/{num_pixels} with acc {num_correct/num_pixels*100:.2f}"
|
||||
)
|
||||
print(f"Dice score: {dice_score/len(loader)}")
|
||||
model.train()
|
||||
|
||||
def save_predictions_as_imgs(
|
||||
loader, model, folder="saved_images/", device="cuda"
|
||||
):
|
||||
model.eval()
|
||||
for idx, (x, y) in enumerate(loader):
|
||||
x = x.to(device=device)
|
||||
with torch.no_grad():
|
||||
preds = torch.sigmoid(model(x))
|
||||
preds = (preds > 0.5).float()
|
||||
torchvision.utils.save_image(
|
||||
preds, f"{folder}/pred_{idx}.png"
|
||||
)
|
||||
torchvision.utils.save_image(y.unsqueeze(1), f"{folder}{idx}.png")
|
||||
|
||||
model.train()
|
||||
131
ML/Pytorch/more_advanced/GANs/DCGAN_mnist.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""
|
||||
Example code of how to code GANs and more specifically DCGAN,
|
||||
for more information about DCGANs read: https://arxiv.org/abs/1511.06434
|
||||
|
||||
We then train the DCGAN on the MNIST dataset (toy dataset of handwritten digits)
|
||||
and then generate our own. You can apply this more generally on really any dataset
|
||||
but MNIST is simple enough to get the overall idea.
|
||||
|
||||
Video explanation: https://youtu.be/5RYETbFFQ7s
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-20 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
|
||||
from model_utils import (
|
||||
Discriminator,
|
||||
Generator,
|
||||
) # Import our models we've defined (from DCGAN paper)
|
||||
|
||||
# Hyperparameters
|
||||
lr = 0.0005
|
||||
batch_size = 64
|
||||
image_size = 64
|
||||
channels_img = 1
|
||||
channels_noise = 256
|
||||
num_epochs = 10
|
||||
|
||||
# For how many channels Generator and Discriminator should use
|
||||
features_d = 16
|
||||
features_g = 16
|
||||
|
||||
my_transforms = transforms.Compose(
|
||||
[
|
||||
transforms.Resize(image_size),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5,), (0.5,)),
|
||||
]
|
||||
)
|
||||
|
||||
dataset = datasets.MNIST(
|
||||
root="dataset/", train=True, transform=my_transforms, download=True
|
||||
)
|
||||
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Create discriminator and generator
|
||||
netD = Discriminator(channels_img, features_d).to(device)
|
||||
netG = Generator(channels_noise, channels_img, features_g).to(device)
|
||||
|
||||
# Setup Optimizer for G and D
|
||||
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(0.5, 0.999))
|
||||
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(0.5, 0.999))
|
||||
|
||||
netG.train()
|
||||
netD.train()
|
||||
|
||||
criterion = nn.BCELoss()
|
||||
|
||||
real_label = 1
|
||||
fake_label = 0
|
||||
|
||||
fixed_noise = torch.randn(64, channels_noise, 1, 1).to(device)
|
||||
writer_real = SummaryWriter(f"runs/GAN_MNIST/test_real")
|
||||
writer_fake = SummaryWriter(f"runs/GAN_MNIST/test_fake")
|
||||
step = 0
|
||||
|
||||
print("Starting Training...")
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
for batch_idx, (data, targets) in enumerate(dataloader):
|
||||
data = data.to(device)
|
||||
batch_size = data.shape[0]
|
||||
|
||||
### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
|
||||
netD.zero_grad()
|
||||
label = (torch.ones(batch_size) * 0.9).to(device)
|
||||
output = netD(data).reshape(-1)
|
||||
lossD_real = criterion(output, label)
|
||||
D_x = output.mean().item()
|
||||
|
||||
noise = torch.randn(batch_size, channels_noise, 1, 1).to(device)
|
||||
fake = netG(noise)
|
||||
label = (torch.ones(batch_size) * 0.1).to(device)
|
||||
|
||||
output = netD(fake.detach()).reshape(-1)
|
||||
lossD_fake = criterion(output, label)
|
||||
|
||||
lossD = lossD_real + lossD_fake
|
||||
lossD.backward()
|
||||
optimizerD.step()
|
||||
|
||||
### Train Generator: max log(D(G(z)))
|
||||
netG.zero_grad()
|
||||
label = torch.ones(batch_size).to(device)
|
||||
output = netD(fake).reshape(-1)
|
||||
lossG = criterion(output, label)
|
||||
lossG.backward()
|
||||
optimizerG.step()
|
||||
|
||||
# Print losses ocassionally and print to tensorboard
|
||||
if batch_idx % 100 == 0:
|
||||
step += 1
|
||||
print(
|
||||
f"Epoch [{epoch}/{num_epochs}] Batch {batch_idx}/{len(dataloader)} \
|
||||
Loss D: {lossD:.4f}, loss G: {lossG:.4f} D(x): {D_x:.4f}"
|
||||
)
|
||||
|
||||
with torch.no_grad():
|
||||
fake = netG(fixed_noise)
|
||||
img_grid_real = torchvision.utils.make_grid(data[:32], normalize=True)
|
||||
img_grid_fake = torchvision.utils.make_grid(fake[:32], normalize=True)
|
||||
writer_real.add_image(
|
||||
"Mnist Real Images", img_grid_real, global_step=step
|
||||
)
|
||||
writer_fake.add_image(
|
||||
"Mnist Fake Images", img_grid_fake, global_step=step
|
||||
)
|
||||
4
ML/Pytorch/more_advanced/GANs/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
### Generative Adversarial Network
|
||||
|
||||
DCGAN_mnist.py: main file and training network
|
||||
model_utils.py: Generator and discriminator implementation
|
||||
76
ML/Pytorch/more_advanced/GANs/model_utils.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Discriminator and Generator implementation from DCGAN paper
|
||||
that we import in the main (DCGAN_mnist.py) file.
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class Discriminator(nn.Module):
|
||||
def __init__(self, channels_img, features_d):
|
||||
super(Discriminator, self).__init__()
|
||||
self.net = nn.Sequential(
|
||||
# N x channels_img x 64 x 64
|
||||
nn.Conv2d(channels_img, features_d, kernel_size=4, stride=2, padding=1),
|
||||
nn.LeakyReLU(0.2),
|
||||
# N x features_d x 32 x 32
|
||||
nn.Conv2d(features_d, features_d * 2, kernel_size=4, stride=2, padding=1),
|
||||
nn.BatchNorm2d(features_d * 2),
|
||||
nn.LeakyReLU(0.2),
|
||||
nn.Conv2d(
|
||||
features_d * 2, features_d * 4, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_d * 4),
|
||||
nn.LeakyReLU(0.2),
|
||||
nn.Conv2d(
|
||||
features_d * 4, features_d * 8, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_d * 8),
|
||||
nn.LeakyReLU(0.2),
|
||||
# N x features_d*8 x 4 x 4
|
||||
nn.Conv2d(features_d * 8, 1, kernel_size=4, stride=2, padding=0),
|
||||
# N x 1 x 1 x 1
|
||||
nn.Sigmoid(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
def __init__(self, channels_noise, channels_img, features_g):
|
||||
super(Generator, self).__init__()
|
||||
|
||||
self.net = nn.Sequential(
|
||||
# N x channels_noise x 1 x 1
|
||||
nn.ConvTranspose2d(
|
||||
channels_noise, features_g * 16, kernel_size=4, stride=1, padding=0
|
||||
),
|
||||
nn.BatchNorm2d(features_g * 16),
|
||||
nn.ReLU(),
|
||||
# N x features_g*16 x 4 x 4
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 16, features_g * 8, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_g * 8),
|
||||
nn.ReLU(),
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 8, features_g * 4, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_g * 4),
|
||||
nn.ReLU(),
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 4, features_g * 2, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_g * 2),
|
||||
nn.ReLU(),
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
# N x channels_img x 64 x 64
|
||||
nn.Tanh(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
242
ML/Pytorch/more_advanced/Seq2Seq/seq2seq.py
Normal file
@@ -0,0 +1,242 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torchtext.datasets import Multi30k
|
||||
from torchtext.data import Field, BucketIterator
|
||||
import numpy as np
|
||||
import spacy
|
||||
import random
|
||||
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
|
||||
from utils import translate_sentence, bleu, save_checkpoint, load_checkpoint
|
||||
|
||||
spacy_ger = spacy.load("de")
|
||||
spacy_eng = spacy.load("en")
|
||||
|
||||
|
||||
def tokenize_ger(text):
|
||||
return [tok.text for tok in spacy_ger.tokenizer(text)]
|
||||
|
||||
|
||||
def tokenize_eng(text):
|
||||
return [tok.text for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
|
||||
german = Field(tokenize=tokenize_ger, lower=True, init_token="<sos>", eos_token="<eos>")
|
||||
|
||||
english = Field(
|
||||
tokenize=tokenize_eng, lower=True, init_token="<sos>", eos_token="<eos>"
|
||||
)
|
||||
|
||||
train_data, valid_data, test_data = Multi30k.splits(
|
||||
exts=(".de", ".en"), fields=(german, english)
|
||||
)
|
||||
|
||||
german.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
english.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
|
||||
|
||||
class Encoder(nn.Module):
|
||||
def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
|
||||
super(Encoder, self).__init__()
|
||||
self.dropout = nn.Dropout(p)
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embedding = nn.Embedding(input_size, embedding_size)
|
||||
self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)
|
||||
|
||||
def forward(self, x):
|
||||
# x shape: (seq_length, N) where N is batch size
|
||||
|
||||
embedding = self.dropout(self.embedding(x))
|
||||
# embedding shape: (seq_length, N, embedding_size)
|
||||
|
||||
outputs, (hidden, cell) = self.rnn(embedding)
|
||||
# outputs shape: (seq_length, N, hidden_size)
|
||||
|
||||
return hidden, cell
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, input_size, embedding_size, hidden_size, output_size, num_layers, p
|
||||
):
|
||||
super(Decoder, self).__init__()
|
||||
self.dropout = nn.Dropout(p)
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embedding = nn.Embedding(input_size, embedding_size)
|
||||
self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)
|
||||
self.fc = nn.Linear(hidden_size, output_size)
|
||||
|
||||
def forward(self, x, hidden, cell):
|
||||
# x shape: (N) where N is for batch size, we want it to be (1, N), seq_length
|
||||
# is 1 here because we are sending in a single word and not a sentence
|
||||
x = x.unsqueeze(0)
|
||||
|
||||
embedding = self.dropout(self.embedding(x))
|
||||
# embedding shape: (1, N, embedding_size)
|
||||
|
||||
outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))
|
||||
# outputs shape: (1, N, hidden_size)
|
||||
|
||||
predictions = self.fc(outputs)
|
||||
|
||||
# predictions shape: (1, N, length_target_vocabulary) to send it to
|
||||
# loss function we want it to be (N, length_target_vocabulary) so we're
|
||||
# just gonna remove the first dim
|
||||
predictions = predictions.squeeze(0)
|
||||
|
||||
return predictions, hidden, cell
|
||||
|
||||
|
||||
class Seq2Seq(nn.Module):
|
||||
def __init__(self, encoder, decoder):
|
||||
super(Seq2Seq, self).__init__()
|
||||
self.encoder = encoder
|
||||
self.decoder = decoder
|
||||
|
||||
def forward(self, source, target, teacher_force_ratio=0.5):
|
||||
batch_size = source.shape[1]
|
||||
target_len = target.shape[0]
|
||||
target_vocab_size = len(english.vocab)
|
||||
|
||||
outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
|
||||
|
||||
hidden, cell = self.encoder(source)
|
||||
|
||||
# Grab the first input to the Decoder which will be <SOS> token
|
||||
x = target[0]
|
||||
|
||||
for t in range(1, target_len):
|
||||
# Use previous hidden, cell as context from encoder at start
|
||||
output, hidden, cell = self.decoder(x, hidden, cell)
|
||||
|
||||
# Store next output prediction
|
||||
outputs[t] = output
|
||||
|
||||
# Get the best word the Decoder predicted (index in the vocabulary)
|
||||
best_guess = output.argmax(1)
|
||||
|
||||
# With probability of teacher_force_ratio we take the actual next word
|
||||
# otherwise we take the word that the Decoder predicted it to be.
|
||||
# Teacher Forcing is used so that the model gets used to seeing
|
||||
# similar inputs at training and testing time, if teacher forcing is 1
|
||||
# then inputs at test time might be completely different than what the
|
||||
# network is used to. This was a long comment.
|
||||
x = target[t] if random.random() < teacher_force_ratio else best_guess
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
### We're ready to define everything we need for training our Seq2Seq model ###
|
||||
|
||||
# Training hyperparameters
|
||||
num_epochs = 100
|
||||
learning_rate = 0.001
|
||||
batch_size = 64
|
||||
|
||||
# Model hyperparameters
|
||||
load_model = False
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
input_size_encoder = len(german.vocab)
|
||||
input_size_decoder = len(english.vocab)
|
||||
output_size = len(english.vocab)
|
||||
encoder_embedding_size = 300
|
||||
decoder_embedding_size = 300
|
||||
hidden_size = 1024 # Needs to be the same for both RNN's
|
||||
num_layers = 2
|
||||
enc_dropout = 0.5
|
||||
dec_dropout = 0.5
|
||||
|
||||
# Tensorboard to get nice loss plot
|
||||
writer = SummaryWriter(f"runs/loss_plot")
|
||||
step = 0
|
||||
|
||||
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
|
||||
(train_data, valid_data, test_data),
|
||||
batch_size=batch_size,
|
||||
sort_within_batch=True,
|
||||
sort_key=lambda x: len(x.src),
|
||||
device=device,
|
||||
)
|
||||
|
||||
encoder_net = Encoder(
|
||||
input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
|
||||
).to(device)
|
||||
|
||||
decoder_net = Decoder(
|
||||
input_size_decoder,
|
||||
decoder_embedding_size,
|
||||
hidden_size,
|
||||
output_size,
|
||||
num_layers,
|
||||
dec_dropout,
|
||||
).to(device)
|
||||
|
||||
model = Seq2Seq(encoder_net, decoder_net).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
pad_idx = english.vocab.stoi["<pad>"]
|
||||
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
|
||||
|
||||
if load_model:
|
||||
load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
|
||||
|
||||
|
||||
sentence = "ein boot mit mehreren männern darauf wird von einem großen pferdegespann ans ufer gezogen."
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print(f"[Epoch {epoch} / {num_epochs}]")
|
||||
|
||||
checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
model.eval()
|
||||
|
||||
translated_sentence = translate_sentence(
|
||||
model, sentence, german, english, device, max_length=50
|
||||
)
|
||||
|
||||
print(f"Translated example sentence: \n {translated_sentence}")
|
||||
|
||||
model.train()
|
||||
|
||||
for batch_idx, batch in enumerate(train_iterator):
|
||||
# Get input and targets and get to cuda
|
||||
inp_data = batch.src.to(device)
|
||||
target = batch.trg.to(device)
|
||||
|
||||
# Forward prop
|
||||
output = model(inp_data, target)
|
||||
|
||||
# Output is of shape (trg_len, batch_size, output_dim) but Cross Entropy Loss
|
||||
# doesn't take input in that form. For example if we have MNIST we want to have
|
||||
# output to be: (N, 10) and targets just (N). Here we can view it in a similar
|
||||
# way that we have output_words * batch_size that we want to send in into
|
||||
# our cost function, so we need to do some reshapin. While we're at it
|
||||
# Let's also remove the start token while we're at it
|
||||
output = output[1:].reshape(-1, output.shape[2])
|
||||
target = target[1:].reshape(-1)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss = criterion(output, target)
|
||||
|
||||
# Back prop
|
||||
loss.backward()
|
||||
|
||||
# Clip to avoid exploding gradient issues, makes sure grads are
|
||||
# within a healthy range
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
|
||||
|
||||
# Gradient descent step
|
||||
optimizer.step()
|
||||
|
||||
# Plot to tensorboard
|
||||
writer.add_scalar("Training loss", loss, global_step=step)
|
||||
step += 1
|
||||
|
||||
|
||||
score = bleu(test_data[1:100], model, german, english, device)
|
||||
print(f"Bleu score {score*100:.2f}")
|
||||
84
ML/Pytorch/more_advanced/Seq2Seq/utils.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import torch
|
||||
import spacy
|
||||
from torchtext.data.metrics import bleu_score
|
||||
import sys
|
||||
|
||||
|
||||
def translate_sentence(model, sentence, german, english, device, max_length=50):
|
||||
# print(sentence)
|
||||
|
||||
# sys.exit()
|
||||
|
||||
# Load german tokenizer
|
||||
spacy_ger = spacy.load("de")
|
||||
|
||||
# Create tokens using spacy and everything in lower case (which is what our vocab is)
|
||||
if type(sentence) == str:
|
||||
tokens = [token.text.lower() for token in spacy_ger(sentence)]
|
||||
else:
|
||||
tokens = [token.lower() for token in sentence]
|
||||
|
||||
# print(tokens)
|
||||
|
||||
# sys.exit()
|
||||
# Add <SOS> and <EOS> in beginning and end respectively
|
||||
tokens.insert(0, german.init_token)
|
||||
tokens.append(german.eos_token)
|
||||
|
||||
# Go through each german token and convert to an index
|
||||
text_to_indices = [german.vocab.stoi[token] for token in tokens]
|
||||
|
||||
# Convert to Tensor
|
||||
sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)
|
||||
|
||||
# Build encoder hidden, cell state
|
||||
with torch.no_grad():
|
||||
hidden, cell = model.encoder(sentence_tensor)
|
||||
|
||||
outputs = [english.vocab.stoi["<sos>"]]
|
||||
|
||||
for _ in range(max_length):
|
||||
previous_word = torch.LongTensor([outputs[-1]]).to(device)
|
||||
|
||||
with torch.no_grad():
|
||||
output, hidden, cell = model.decoder(previous_word, hidden, cell)
|
||||
best_guess = output.argmax(1).item()
|
||||
|
||||
outputs.append(best_guess)
|
||||
|
||||
# Model predicts it's the end of the sentence
|
||||
if output.argmax(1).item() == english.vocab.stoi["<eos>"]:
|
||||
break
|
||||
|
||||
translated_sentence = [english.vocab.itos[idx] for idx in outputs]
|
||||
|
||||
# remove start token
|
||||
return translated_sentence[1:]
|
||||
|
||||
|
||||
def bleu(data, model, german, english, device):
|
||||
targets = []
|
||||
outputs = []
|
||||
|
||||
for example in data:
|
||||
src = vars(example)["src"]
|
||||
trg = vars(example)["trg"]
|
||||
|
||||
prediction = translate_sentence(model, src, german, english, device)
|
||||
prediction = prediction[:-1] # remove <eos> token
|
||||
|
||||
targets.append([trg])
|
||||
outputs.append(prediction)
|
||||
|
||||
return bleu_score(outputs, targets)
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
279
ML/Pytorch/more_advanced/Seq2Seq_attention/seq2seq_attention.py
Normal file
@@ -0,0 +1,279 @@
|
||||
import random
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import numpy as np
|
||||
import spacy
|
||||
from utils import translate_sentence, bleu, save_checkpoint, load_checkpoint
|
||||
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
|
||||
from torchtext.datasets import Multi30k
|
||||
from torchtext.data import Field, BucketIterator
|
||||
|
||||
"""
|
||||
To install spacy languages do:
|
||||
python -m spacy download en
|
||||
python -m spacy download de
|
||||
"""
|
||||
spacy_ger = spacy.load("de")
|
||||
spacy_eng = spacy.load("en")
|
||||
|
||||
|
||||
def tokenize_ger(text):
|
||||
return [tok.text for tok in spacy_ger.tokenizer(text)]
|
||||
|
||||
|
||||
def tokenize_eng(text):
|
||||
return [tok.text for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
|
||||
german = Field(tokenize=tokenize_ger, lower=True, init_token="<sos>", eos_token="<eos>")
|
||||
|
||||
english = Field(
|
||||
tokenize=tokenize_eng, lower=True, init_token="<sos>", eos_token="<eos>"
|
||||
)
|
||||
|
||||
train_data, valid_data, test_data = Multi30k.splits(
|
||||
exts=(".de", ".en"), fields=(german, english)
|
||||
)
|
||||
|
||||
german.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
english.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
|
||||
|
||||
class Encoder(nn.Module):
|
||||
def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
|
||||
super(Encoder, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embedding = nn.Embedding(input_size, embedding_size)
|
||||
self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, bidirectional=True)
|
||||
|
||||
self.fc_hidden = nn.Linear(hidden_size * 2, hidden_size)
|
||||
self.fc_cell = nn.Linear(hidden_size * 2, hidden_size)
|
||||
self.dropout = nn.Dropout(p)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (seq_length, N) where N is batch size
|
||||
|
||||
embedding = self.dropout(self.embedding(x))
|
||||
# embedding shape: (seq_length, N, embedding_size)
|
||||
|
||||
encoder_states, (hidden, cell) = self.rnn(embedding)
|
||||
# outputs shape: (seq_length, N, hidden_size)
|
||||
|
||||
# Use forward, backward cells and hidden through a linear layer
|
||||
# so that it can be input to the decoder which is not bidirectional
|
||||
# Also using index slicing ([idx:idx+1]) to keep the dimension
|
||||
hidden = self.fc_hidden(torch.cat((hidden[0:1], hidden[1:2]), dim=2))
|
||||
cell = self.fc_cell(torch.cat((cell[0:1], cell[1:2]), dim=2))
|
||||
|
||||
return encoder_states, hidden, cell
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, input_size, embedding_size, hidden_size, output_size, num_layers, p
|
||||
):
|
||||
super(Decoder, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embedding = nn.Embedding(input_size, embedding_size)
|
||||
self.rnn = nn.LSTM(hidden_size * 2 + embedding_size, hidden_size, num_layers)
|
||||
|
||||
self.energy = nn.Linear(hidden_size * 3, 1)
|
||||
self.fc = nn.Linear(hidden_size, output_size)
|
||||
self.dropout = nn.Dropout(p)
|
||||
self.softmax = nn.Softmax(dim=0)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x, encoder_states, hidden, cell):
|
||||
x = x.unsqueeze(0)
|
||||
# x: (1, N) where N is the batch size
|
||||
|
||||
embedding = self.dropout(self.embedding(x))
|
||||
# embedding shape: (1, N, embedding_size)
|
||||
|
||||
sequence_length = encoder_states.shape[0]
|
||||
h_reshaped = hidden.repeat(sequence_length, 1, 1)
|
||||
# h_reshaped: (seq_length, N, hidden_size*2)
|
||||
|
||||
energy = self.relu(self.energy(torch.cat((h_reshaped, encoder_states), dim=2)))
|
||||
# energy: (seq_length, N, 1)
|
||||
|
||||
attention = self.softmax(energy)
|
||||
# attention: (seq_length, N, 1)
|
||||
|
||||
# attention: (seq_length, N, 1), snk
|
||||
# encoder_states: (seq_length, N, hidden_size*2), snl
|
||||
# we want context_vector: (1, N, hidden_size*2), i.e knl
|
||||
context_vector = torch.einsum("snk,snl->knl", attention, encoder_states)
|
||||
|
||||
rnn_input = torch.cat((context_vector, embedding), dim=2)
|
||||
# rnn_input: (1, N, hidden_size*2 + embedding_size)
|
||||
|
||||
outputs, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
|
||||
# outputs shape: (1, N, hidden_size)
|
||||
|
||||
predictions = self.fc(outputs).squeeze(0)
|
||||
# predictions: (N, hidden_size)
|
||||
|
||||
return predictions, hidden, cell
|
||||
|
||||
|
||||
class Seq2Seq(nn.Module):
|
||||
def __init__(self, encoder, decoder):
|
||||
super(Seq2Seq, self).__init__()
|
||||
self.encoder = encoder
|
||||
self.decoder = decoder
|
||||
|
||||
def forward(self, source, target, teacher_force_ratio=0.5):
|
||||
batch_size = source.shape[1]
|
||||
target_len = target.shape[0]
|
||||
target_vocab_size = len(english.vocab)
|
||||
|
||||
outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
|
||||
encoder_states, hidden, cell = self.encoder(source)
|
||||
|
||||
# First input will be <SOS> token
|
||||
x = target[0]
|
||||
|
||||
for t in range(1, target_len):
|
||||
# At every time step use encoder_states and update hidden, cell
|
||||
output, hidden, cell = self.decoder(x, encoder_states, hidden, cell)
|
||||
|
||||
# Store prediction for current time step
|
||||
outputs[t] = output
|
||||
|
||||
# Get the best word the Decoder predicted (index in the vocabulary)
|
||||
best_guess = output.argmax(1)
|
||||
|
||||
# With probability of teacher_force_ratio we take the actual next word
|
||||
# otherwise we take the word that the Decoder predicted it to be.
|
||||
# Teacher Forcing is used so that the model gets used to seeing
|
||||
# similar inputs at training and testing time, if teacher forcing is 1
|
||||
# then inputs at test time might be completely different than what the
|
||||
# network is used to. This was a long comment.
|
||||
x = target[t] if random.random() < teacher_force_ratio else best_guess
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
### We're ready to define everything we need for training our Seq2Seq model ###
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
load_model = False
|
||||
save_model = True
|
||||
|
||||
# Training hyperparameters
|
||||
num_epochs = 100
|
||||
learning_rate = 3e-4
|
||||
batch_size = 32
|
||||
|
||||
# Model hyperparameters
|
||||
input_size_encoder = len(german.vocab)
|
||||
input_size_decoder = len(english.vocab)
|
||||
output_size = len(english.vocab)
|
||||
encoder_embedding_size = 300
|
||||
decoder_embedding_size = 300
|
||||
hidden_size = 1024
|
||||
num_layers = 1
|
||||
enc_dropout = 0.0
|
||||
dec_dropout = 0.0
|
||||
|
||||
# Tensorboard to get nice loss plot
|
||||
writer = SummaryWriter(f"runs/loss_plot")
|
||||
step = 0
|
||||
|
||||
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
|
||||
(train_data, valid_data, test_data),
|
||||
batch_size=batch_size,
|
||||
sort_within_batch=True,
|
||||
sort_key=lambda x: len(x.src),
|
||||
device=device,
|
||||
)
|
||||
|
||||
encoder_net = Encoder(
|
||||
input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
|
||||
).to(device)
|
||||
|
||||
decoder_net = Decoder(
|
||||
input_size_decoder,
|
||||
decoder_embedding_size,
|
||||
hidden_size,
|
||||
output_size,
|
||||
num_layers,
|
||||
dec_dropout,
|
||||
).to(device)
|
||||
|
||||
model = Seq2Seq(encoder_net, decoder_net).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
pad_idx = english.vocab.stoi["<pad>"]
|
||||
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
|
||||
|
||||
if load_model:
|
||||
load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
|
||||
|
||||
sentence = (
|
||||
"ein boot mit mehreren männern darauf wird von einem großen"
|
||||
"pferdegespann ans ufer gezogen."
|
||||
)
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print(f"[Epoch {epoch} / {num_epochs}]")
|
||||
|
||||
if save_model:
|
||||
checkpoint = {
|
||||
"state_dict": model.state_dict(),
|
||||
"optimizer": optimizer.state_dict(),
|
||||
}
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
model.eval()
|
||||
|
||||
translated_sentence = translate_sentence(
|
||||
model, sentence, german, english, device, max_length=50
|
||||
)
|
||||
|
||||
print(f"Translated example sentence: \n {translated_sentence}")
|
||||
|
||||
model.train()
|
||||
|
||||
for batch_idx, batch in enumerate(train_iterator):
|
||||
# Get input and targets and get to cuda
|
||||
inp_data = batch.src.to(device)
|
||||
target = batch.trg.to(device)
|
||||
|
||||
# Forward prop
|
||||
output = model(inp_data, target)
|
||||
|
||||
# Output is of shape (trg_len, batch_size, output_dim) but Cross Entropy Loss
|
||||
# doesn't take input in that form. For example if we have MNIST we want to have
|
||||
# output to be: (N, 10) and targets just (N). Here we can view it in a similar
|
||||
# way that we have output_words * batch_size that we want to send in into
|
||||
# our cost function, so we need to do some reshapin. While we're at it
|
||||
# Let's also remove the start token while we're at it
|
||||
output = output[1:].reshape(-1, output.shape[2])
|
||||
target = target[1:].reshape(-1)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss = criterion(output, target)
|
||||
|
||||
# Back prop
|
||||
loss.backward()
|
||||
|
||||
# Clip to avoid exploding gradient issues, makes sure grads are
|
||||
# within a healthy range
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
|
||||
|
||||
# Gradient descent step
|
||||
optimizer.step()
|
||||
|
||||
# Plot to tensorboard
|
||||
writer.add_scalar("Training loss", loss, global_step=step)
|
||||
step += 1
|
||||
|
||||
# running on entire test data takes a while
|
||||
score = bleu(test_data[1:100], model, german, english, device)
|
||||
print(f"Bleu score {score * 100:.2f}")
|
||||
79
ML/Pytorch/more_advanced/Seq2Seq_attention/utils.py
Normal file
@@ -0,0 +1,79 @@
|
||||
import torch
|
||||
import spacy
|
||||
from torchtext.data.metrics import bleu_score
|
||||
import sys
|
||||
|
||||
|
||||
def translate_sentence(model, sentence, german, english, device, max_length=50):
|
||||
# Load german tokenizer
|
||||
spacy_ger = spacy.load("de")
|
||||
|
||||
# Create tokens using spacy and everything in lower case (which is what our vocab is)
|
||||
if type(sentence) == str:
|
||||
tokens = [token.text.lower() for token in spacy_ger(sentence)]
|
||||
else:
|
||||
tokens = [token.lower() for token in sentence]
|
||||
|
||||
# Add <SOS> and <EOS> in beginning and end respectively
|
||||
tokens.insert(0, german.init_token)
|
||||
tokens.append(german.eos_token)
|
||||
|
||||
# Go through each german token and convert to an index
|
||||
text_to_indices = [german.vocab.stoi[token] for token in tokens]
|
||||
|
||||
# Convert to Tensor
|
||||
sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)
|
||||
|
||||
# Build encoder hidden, cell state
|
||||
with torch.no_grad():
|
||||
outputs_encoder, hiddens, cells = model.encoder(sentence_tensor)
|
||||
|
||||
outputs = [english.vocab.stoi["<sos>"]]
|
||||
|
||||
for _ in range(max_length):
|
||||
previous_word = torch.LongTensor([outputs[-1]]).to(device)
|
||||
|
||||
with torch.no_grad():
|
||||
output, hiddens, cells = model.decoder(
|
||||
previous_word, outputs_encoder, hiddens, cells
|
||||
)
|
||||
best_guess = output.argmax(1).item()
|
||||
|
||||
outputs.append(best_guess)
|
||||
|
||||
# Model predicts it's the end of the sentence
|
||||
if output.argmax(1).item() == english.vocab.stoi["<eos>"]:
|
||||
break
|
||||
|
||||
translated_sentence = [english.vocab.itos[idx] for idx in outputs]
|
||||
|
||||
# remove start token
|
||||
return translated_sentence[1:]
|
||||
|
||||
|
||||
def bleu(data, model, german, english, device):
|
||||
targets = []
|
||||
outputs = []
|
||||
|
||||
for example in data:
|
||||
src = vars(example)["src"]
|
||||
trg = vars(example)["trg"]
|
||||
|
||||
prediction = translate_sentence(model, src, german, english, device)
|
||||
prediction = prediction[:-1] # remove <eos> token
|
||||
|
||||
targets.append([trg])
|
||||
outputs.append(prediction)
|
||||
|
||||
return bleu_score(outputs, targets)
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
12
ML/Pytorch/more_advanced/image_captioning/README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
### Image Captioning
|
||||
|
||||
Download the dataset used: https://www.kaggle.com/dataset/e1cd22253a9b23b073794872bf565648ddbe4f17e7fa9e74766ad3707141adeb
|
||||
Then set images folder, captions.txt inside a folder Flickr8k.
|
||||
|
||||
train.py: For training the network
|
||||
|
||||
model.py: creating the encoderCNN, decoderRNN and hooking them togethor
|
||||
|
||||
get_loader.py: Loading the data, creating vocabulary
|
||||
|
||||
utils.py: Load model, save model, printing few test cases downloaded online
|
||||
142
ML/Pytorch/more_advanced/image_captioning/get_loader.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import os # when loading file paths
|
||||
import pandas as pd # for lookup in annotation file
|
||||
import spacy # for tokenizer
|
||||
import torch
|
||||
from torch.nn.utils.rnn import pad_sequence # pad batch
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
from PIL import Image # Load img
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
# We want to convert text -> numerical values
|
||||
# 1. We need a Vocabulary mapping each word to a index
|
||||
# 2. We need to setup a Pytorch dataset to load the data
|
||||
# 3. Setup padding of every batch (all examples should be
|
||||
# of same seq_len and setup dataloader)
|
||||
# Note that loading the image is very easy compared to the text!
|
||||
|
||||
# Download with: python -m spacy download en
|
||||
spacy_eng = spacy.load("en")
|
||||
|
||||
|
||||
class Vocabulary:
|
||||
def __init__(self, freq_threshold):
|
||||
self.itos = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
|
||||
self.stoi = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
|
||||
self.freq_threshold = freq_threshold
|
||||
|
||||
def __len__(self):
|
||||
return len(self.itos)
|
||||
|
||||
@staticmethod
|
||||
def tokenizer_eng(text):
|
||||
return [tok.text.lower() for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
def build_vocabulary(self, sentence_list):
|
||||
frequencies = {}
|
||||
idx = 4
|
||||
|
||||
for sentence in sentence_list:
|
||||
for word in self.tokenizer_eng(sentence):
|
||||
if word not in frequencies:
|
||||
frequencies[word] = 1
|
||||
|
||||
else:
|
||||
frequencies[word] += 1
|
||||
|
||||
if frequencies[word] == self.freq_threshold:
|
||||
self.stoi[word] = idx
|
||||
self.itos[idx] = word
|
||||
idx += 1
|
||||
|
||||
def numericalize(self, text):
|
||||
tokenized_text = self.tokenizer_eng(text)
|
||||
|
||||
return [
|
||||
self.stoi[token] if token in self.stoi else self.stoi["<UNK>"]
|
||||
for token in tokenized_text
|
||||
]
|
||||
|
||||
|
||||
class FlickrDataset(Dataset):
|
||||
def __init__(self, root_dir, captions_file, transform=None, freq_threshold=5):
|
||||
self.root_dir = root_dir
|
||||
self.df = pd.read_csv(captions_file)
|
||||
self.transform = transform
|
||||
|
||||
# Get img, caption columns
|
||||
self.imgs = self.df["image"]
|
||||
self.captions = self.df["caption"]
|
||||
|
||||
# Initialize vocabulary and build vocab
|
||||
self.vocab = Vocabulary(freq_threshold)
|
||||
self.vocab.build_vocabulary(self.captions.tolist())
|
||||
|
||||
def __len__(self):
|
||||
return len(self.df)
|
||||
|
||||
def __getitem__(self, index):
|
||||
caption = self.captions[index]
|
||||
img_id = self.imgs[index]
|
||||
img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
numericalized_caption = [self.vocab.stoi["<SOS>"]]
|
||||
numericalized_caption += self.vocab.numericalize(caption)
|
||||
numericalized_caption.append(self.vocab.stoi["<EOS>"])
|
||||
|
||||
return img, torch.tensor(numericalized_caption)
|
||||
|
||||
|
||||
class MyCollate:
|
||||
def __init__(self, pad_idx):
|
||||
self.pad_idx = pad_idx
|
||||
|
||||
def __call__(self, batch):
|
||||
imgs = [item[0].unsqueeze(0) for item in batch]
|
||||
imgs = torch.cat(imgs, dim=0)
|
||||
targets = [item[1] for item in batch]
|
||||
targets = pad_sequence(targets, batch_first=False, padding_value=self.pad_idx)
|
||||
|
||||
return imgs, targets
|
||||
|
||||
|
||||
def get_loader(
|
||||
root_folder,
|
||||
annotation_file,
|
||||
transform,
|
||||
batch_size=32,
|
||||
num_workers=8,
|
||||
shuffle=True,
|
||||
pin_memory=True,
|
||||
):
|
||||
dataset = FlickrDataset(root_folder, annotation_file, transform=transform)
|
||||
|
||||
pad_idx = dataset.vocab.stoi["<PAD>"]
|
||||
|
||||
loader = DataLoader(
|
||||
dataset=dataset,
|
||||
batch_size=batch_size,
|
||||
num_workers=num_workers,
|
||||
shuffle=shuffle,
|
||||
pin_memory=pin_memory,
|
||||
collate_fn=MyCollate(pad_idx=pad_idx),
|
||||
)
|
||||
|
||||
return loader, dataset
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
transform = transforms.Compose(
|
||||
[transforms.Resize((224, 224)), transforms.ToTensor(),]
|
||||
)
|
||||
|
||||
loader, dataset = get_loader(
|
||||
"flickr8k/images/", "flickr8k/captions.txt", transform=transform
|
||||
)
|
||||
|
||||
for idx, (imgs, captions) in enumerate(loader):
|
||||
print(imgs.shape)
|
||||
print(captions.shape)
|
||||
66
ML/Pytorch/more_advanced/image_captioning/model.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import statistics
|
||||
import torchvision.models as models
|
||||
|
||||
|
||||
class EncoderCNN(nn.Module):
|
||||
def __init__(self, embed_size, train_CNN=False):
|
||||
super(EncoderCNN, self).__init__()
|
||||
self.train_CNN = train_CNN
|
||||
self.inception = models.inception_v3(pretrained=True, aux_logits=False)
|
||||
self.inception.fc = nn.Linear(self.inception.fc.in_features, embed_size)
|
||||
self.relu = nn.ReLU()
|
||||
self.times = []
|
||||
self.dropout = nn.Dropout(0.5)
|
||||
|
||||
def forward(self, images):
|
||||
features = self.inception(images)
|
||||
return self.dropout(self.relu(features))
|
||||
|
||||
|
||||
class DecoderRNN(nn.Module):
|
||||
def __init__(self, embed_size, hidden_size, vocab_size, num_layers):
|
||||
super(DecoderRNN, self).__init__()
|
||||
self.embed = nn.Embedding(vocab_size, embed_size)
|
||||
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers)
|
||||
self.linear = nn.Linear(hidden_size, vocab_size)
|
||||
self.dropout = nn.Dropout(0.5)
|
||||
|
||||
def forward(self, features, captions):
|
||||
embeddings = self.dropout(self.embed(captions))
|
||||
embeddings = torch.cat((features.unsqueeze(0), embeddings), dim=0)
|
||||
hiddens, _ = self.lstm(embeddings)
|
||||
outputs = self.linear(hiddens)
|
||||
return outputs
|
||||
|
||||
|
||||
class CNNtoRNN(nn.Module):
|
||||
def __init__(self, embed_size, hidden_size, vocab_size, num_layers):
|
||||
super(CNNtoRNN, self).__init__()
|
||||
self.encoderCNN = EncoderCNN(embed_size)
|
||||
self.decoderRNN = DecoderRNN(embed_size, hidden_size, vocab_size, num_layers)
|
||||
|
||||
def forward(self, images, captions):
|
||||
features = self.encoderCNN(images)
|
||||
outputs = self.decoderRNN(features, captions)
|
||||
return outputs
|
||||
|
||||
def caption_image(self, image, vocabulary, max_length=50):
|
||||
result_caption = []
|
||||
|
||||
with torch.no_grad():
|
||||
x = self.encoderCNN(image).unsqueeze(0)
|
||||
states = None
|
||||
|
||||
for _ in range(max_length):
|
||||
hiddens, states = self.decoderRNN.lstm(x, states)
|
||||
output = self.decoderRNN.linear(hiddens.squeeze(0))
|
||||
predicted = output.argmax(1)
|
||||
result_caption.append(predicted.item())
|
||||
x = self.decoderRNN.embed(predicted).unsqueeze(0)
|
||||
|
||||
if vocabulary.itos[predicted.item()] == "<EOS>":
|
||||
break
|
||||
|
||||
return [vocabulary.itos[idx] for idx in result_caption]
|
||||
BIN
ML/Pytorch/more_advanced/image_captioning/test_examples/boat.png
Normal file
|
After Width: | Height: | Size: 369 KiB |
BIN
ML/Pytorch/more_advanced/image_captioning/test_examples/bus.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
|
After Width: | Height: | Size: 92 KiB |
BIN
ML/Pytorch/more_advanced/image_captioning/test_examples/dog.jpg
Normal file
|
After Width: | Height: | Size: 133 KiB |
|
After Width: | Height: | Size: 641 KiB |
96
ML/Pytorch/more_advanced/image_captioning/train.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from utils import save_checkpoint, load_checkpoint, print_examples
|
||||
from get_loader import get_loader
|
||||
from model import CNNtoRNN
|
||||
|
||||
|
||||
def train():
|
||||
transform = transforms.Compose(
|
||||
[
|
||||
transforms.Resize((356, 356)),
|
||||
transforms.RandomCrop((299, 299)),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
|
||||
]
|
||||
)
|
||||
|
||||
train_loader, dataset = get_loader(
|
||||
root_folder="flickr8k/images",
|
||||
annotation_file="flickr8k/captions.txt",
|
||||
transform=transform,
|
||||
num_workers=2,
|
||||
)
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
load_model = False
|
||||
save_model = False
|
||||
train_CNN = False
|
||||
|
||||
# Hyperparameters
|
||||
embed_size = 256
|
||||
hidden_size = 256
|
||||
vocab_size = len(dataset.vocab)
|
||||
num_layers = 1
|
||||
learning_rate = 3e-4
|
||||
num_epochs = 100
|
||||
|
||||
# for tensorboard
|
||||
writer = SummaryWriter("runs/flickr")
|
||||
step = 0
|
||||
|
||||
# initialize model, loss etc
|
||||
model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device)
|
||||
criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"])
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Only finetune the CNN
|
||||
for name, param in model.encoderCNN.inception.named_parameters():
|
||||
if "fc.weight" in name or "fc.bias" in name:
|
||||
param.requires_grad = True
|
||||
else:
|
||||
param.requires_grad = train_CNN
|
||||
|
||||
if load_model:
|
||||
step = load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
|
||||
|
||||
model.train()
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
# Uncomment the line below to see a couple of test cases
|
||||
# print_examples(model, device, dataset)
|
||||
|
||||
if save_model:
|
||||
checkpoint = {
|
||||
"state_dict": model.state_dict(),
|
||||
"optimizer": optimizer.state_dict(),
|
||||
"step": step,
|
||||
}
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
for idx, (imgs, captions) in tqdm(
|
||||
enumerate(train_loader), total=len(train_loader), leave=False
|
||||
):
|
||||
imgs = imgs.to(device)
|
||||
captions = captions.to(device)
|
||||
|
||||
outputs = model(imgs, captions[:-1])
|
||||
loss = criterion(
|
||||
outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1)
|
||||
)
|
||||
|
||||
writer.add_scalar("Training loss", loss.item(), global_step=step)
|
||||
step += 1
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward(loss)
|
||||
optimizer.step()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
train()
|
||||
69
ML/Pytorch/more_advanced/image_captioning/utils.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def print_examples(model, device, dataset):
|
||||
transform = transforms.Compose(
|
||||
[
|
||||
transforms.Resize((299, 299)),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
|
||||
]
|
||||
)
|
||||
|
||||
model.eval()
|
||||
test_img1 = transform(Image.open("test_examples/dog.jpg").convert("RGB")).unsqueeze(
|
||||
0
|
||||
)
|
||||
print("Example 1 CORRECT: Dog on a beach by the ocean")
|
||||
print(
|
||||
"Example 1 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img1.to(device), dataset.vocab))
|
||||
)
|
||||
test_img2 = transform(
|
||||
Image.open("test_examples/child.jpg").convert("RGB")
|
||||
).unsqueeze(0)
|
||||
print("Example 2 CORRECT: Child holding red frisbee outdoors")
|
||||
print(
|
||||
"Example 2 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img2.to(device), dataset.vocab))
|
||||
)
|
||||
test_img3 = transform(Image.open("test_examples/bus.png").convert("RGB")).unsqueeze(
|
||||
0
|
||||
)
|
||||
print("Example 3 CORRECT: Bus driving by parked cars")
|
||||
print(
|
||||
"Example 3 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img3.to(device), dataset.vocab))
|
||||
)
|
||||
test_img4 = transform(
|
||||
Image.open("test_examples/boat.png").convert("RGB")
|
||||
).unsqueeze(0)
|
||||
print("Example 4 CORRECT: A small boat in the ocean")
|
||||
print(
|
||||
"Example 4 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img4.to(device), dataset.vocab))
|
||||
)
|
||||
test_img5 = transform(
|
||||
Image.open("test_examples/horse.png").convert("RGB")
|
||||
).unsqueeze(0)
|
||||
print("Example 5 CORRECT: A cowboy riding a horse in the desert")
|
||||
print(
|
||||
"Example 5 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img5.to(device), dataset.vocab))
|
||||
)
|
||||
model.train()
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
step = checkpoint["step"]
|
||||
return step
|
||||
BIN
ML/Pytorch/more_advanced/neuralstyle/annahathaway.png
Normal file
|
After Width: | Height: | Size: 121 KiB |
112
ML/Pytorch/more_advanced/neuralstyle/nst.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from PIL import Image
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.models as models
|
||||
from torchvision.utils import save_image
|
||||
|
||||
|
||||
class VGG(nn.Module):
|
||||
def __init__(self):
|
||||
super(VGG, self).__init__()
|
||||
# The first number x in convx_y gets added by 1 after it has gone
|
||||
# through a maxpool, and the second y if we have several conv layers
|
||||
# in between a max pool. These strings (0, 5, 10, ..) then correspond
|
||||
# to conv1_1, conv2_1, conv3_1, conv4_1, conv5_1 mentioned in NST paper
|
||||
self.chosen_features = ["0", "5", "10", "19", "28"]
|
||||
|
||||
# We don't need to run anything further than conv5_1 (the 28th module in vgg)
|
||||
# Since remember, we dont actually care about the output of VGG: the only thing
|
||||
# that is modified is the generated image (i.e, the input).
|
||||
self.model = models.vgg19(pretrained=True).features[:29]
|
||||
|
||||
def forward(self, x):
|
||||
# Store relevant features
|
||||
features = []
|
||||
|
||||
# Go through each layer in model, if the layer is in the chosen_features,
|
||||
# store it in features. At the end we'll just return all the activations
|
||||
# for the specific layers we have in chosen_features
|
||||
for layer_num, layer in enumerate(self.model):
|
||||
x = layer(x)
|
||||
|
||||
if str(layer_num) in self.chosen_features:
|
||||
features.append(x)
|
||||
|
||||
return features
|
||||
|
||||
|
||||
def load_image(image_name):
|
||||
image = Image.open(image_name)
|
||||
image = loader(image).unsqueeze(0)
|
||||
return image.to(device)
|
||||
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
imsize = 356
|
||||
|
||||
# Here we may want to use the Normalization constants used in the original
|
||||
# VGG network (to get similar values net was originally trained on), but
|
||||
# I found it didn't matter too much so I didn't end of using it. If you
|
||||
# use it make sure to normalize back so the images don't look weird.
|
||||
loader = transforms.Compose(
|
||||
[
|
||||
transforms.Resize((imsize, imsize)),
|
||||
transforms.ToTensor(),
|
||||
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||
]
|
||||
)
|
||||
|
||||
original_img = load_image("annahathaway.png")
|
||||
style_img = load_image("style.jpg")
|
||||
|
||||
# initialized generated as white noise or clone of original image.
|
||||
# Clone seemed to work better for me.
|
||||
|
||||
# generated = torch.randn(original_img.data.shape, device=device, requires_grad=True)
|
||||
generated = original_img.clone().requires_grad_(True)
|
||||
model = VGG().to(device).eval()
|
||||
|
||||
# Hyperparameters
|
||||
total_steps = 6000
|
||||
learning_rate = 0.001
|
||||
alpha = 1
|
||||
beta = 0.01
|
||||
optimizer = optim.Adam([generated], lr=learning_rate)
|
||||
|
||||
for step in range(total_steps):
|
||||
# Obtain the convolution features in specifically chosen layers
|
||||
generated_features = model(generated)
|
||||
original_img_features = model(original_img)
|
||||
style_features = model(style_img)
|
||||
|
||||
# Loss is 0 initially
|
||||
style_loss = original_loss = 0
|
||||
|
||||
# iterate through all the features for the chosen layers
|
||||
for gen_feature, orig_feature, style_feature in zip(
|
||||
generated_features, original_img_features, style_features
|
||||
):
|
||||
|
||||
# batch_size will just be 1
|
||||
batch_size, channel, height, width = gen_feature.shape
|
||||
original_loss += torch.mean((gen_feature - orig_feature) ** 2)
|
||||
# Compute Gram Matrix of generated
|
||||
G = gen_feature.view(channel, height * width).mm(
|
||||
gen_feature.view(channel, height * width).t()
|
||||
)
|
||||
# Compute Gram Matrix of Style
|
||||
A = style_feature.view(channel, height * width).mm(
|
||||
style_feature.view(channel, height * width).t()
|
||||
)
|
||||
style_loss += torch.mean((G - A) ** 2)
|
||||
|
||||
total_loss = alpha * original_loss + beta * style_loss
|
||||
optimizer.zero_grad()
|
||||
total_loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if step % 200 == 0:
|
||||
print(total_loss)
|
||||
save_image(generated, "generated.png")
|
||||
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img1.png
Normal file
|
After Width: | Height: | Size: 310 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img2.png
Normal file
|
After Width: | Height: | Size: 282 KiB |