Initial commit
131
ML/Pytorch/more_advanced/GANs/DCGAN_mnist.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""
|
||||
Example code of how to code GANs and more specifically DCGAN,
|
||||
for more information about DCGANs read: https://arxiv.org/abs/1511.06434
|
||||
|
||||
We then train the DCGAN on the MNIST dataset (toy dataset of handwritten digits)
|
||||
and then generate our own. You can apply this more generally on really any dataset
|
||||
but MNIST is simple enough to get the overall idea.
|
||||
|
||||
Video explanation: https://youtu.be/5RYETbFFQ7s
|
||||
Got any questions leave a comment on youtube :)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-20 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
# Imports
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
|
||||
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
|
||||
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
|
||||
import torchvision.transforms as transforms # Transformations we can perform on our dataset
|
||||
from torch.utils.data import (
|
||||
DataLoader,
|
||||
) # Gives easier dataset managment and creates mini batches
|
||||
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
|
||||
from model_utils import (
|
||||
Discriminator,
|
||||
Generator,
|
||||
) # Import our models we've defined (from DCGAN paper)
|
||||
|
||||
# Hyperparameters
|
||||
lr = 0.0005
|
||||
batch_size = 64
|
||||
image_size = 64
|
||||
channels_img = 1
|
||||
channels_noise = 256
|
||||
num_epochs = 10
|
||||
|
||||
# For how many channels Generator and Discriminator should use
|
||||
features_d = 16
|
||||
features_g = 16
|
||||
|
||||
my_transforms = transforms.Compose(
|
||||
[
|
||||
transforms.Resize(image_size),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5,), (0.5,)),
|
||||
]
|
||||
)
|
||||
|
||||
dataset = datasets.MNIST(
|
||||
root="dataset/", train=True, transform=my_transforms, download=True
|
||||
)
|
||||
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Create discriminator and generator
|
||||
netD = Discriminator(channels_img, features_d).to(device)
|
||||
netG = Generator(channels_noise, channels_img, features_g).to(device)
|
||||
|
||||
# Setup Optimizer for G and D
|
||||
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(0.5, 0.999))
|
||||
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(0.5, 0.999))
|
||||
|
||||
netG.train()
|
||||
netD.train()
|
||||
|
||||
criterion = nn.BCELoss()
|
||||
|
||||
real_label = 1
|
||||
fake_label = 0
|
||||
|
||||
fixed_noise = torch.randn(64, channels_noise, 1, 1).to(device)
|
||||
writer_real = SummaryWriter(f"runs/GAN_MNIST/test_real")
|
||||
writer_fake = SummaryWriter(f"runs/GAN_MNIST/test_fake")
|
||||
step = 0
|
||||
|
||||
print("Starting Training...")
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
for batch_idx, (data, targets) in enumerate(dataloader):
|
||||
data = data.to(device)
|
||||
batch_size = data.shape[0]
|
||||
|
||||
### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
|
||||
netD.zero_grad()
|
||||
label = (torch.ones(batch_size) * 0.9).to(device)
|
||||
output = netD(data).reshape(-1)
|
||||
lossD_real = criterion(output, label)
|
||||
D_x = output.mean().item()
|
||||
|
||||
noise = torch.randn(batch_size, channels_noise, 1, 1).to(device)
|
||||
fake = netG(noise)
|
||||
label = (torch.ones(batch_size) * 0.1).to(device)
|
||||
|
||||
output = netD(fake.detach()).reshape(-1)
|
||||
lossD_fake = criterion(output, label)
|
||||
|
||||
lossD = lossD_real + lossD_fake
|
||||
lossD.backward()
|
||||
optimizerD.step()
|
||||
|
||||
### Train Generator: max log(D(G(z)))
|
||||
netG.zero_grad()
|
||||
label = torch.ones(batch_size).to(device)
|
||||
output = netD(fake).reshape(-1)
|
||||
lossG = criterion(output, label)
|
||||
lossG.backward()
|
||||
optimizerG.step()
|
||||
|
||||
# Print losses ocassionally and print to tensorboard
|
||||
if batch_idx % 100 == 0:
|
||||
step += 1
|
||||
print(
|
||||
f"Epoch [{epoch}/{num_epochs}] Batch {batch_idx}/{len(dataloader)} \
|
||||
Loss D: {lossD:.4f}, loss G: {lossG:.4f} D(x): {D_x:.4f}"
|
||||
)
|
||||
|
||||
with torch.no_grad():
|
||||
fake = netG(fixed_noise)
|
||||
img_grid_real = torchvision.utils.make_grid(data[:32], normalize=True)
|
||||
img_grid_fake = torchvision.utils.make_grid(fake[:32], normalize=True)
|
||||
writer_real.add_image(
|
||||
"Mnist Real Images", img_grid_real, global_step=step
|
||||
)
|
||||
writer_fake.add_image(
|
||||
"Mnist Fake Images", img_grid_fake, global_step=step
|
||||
)
|
||||
4
ML/Pytorch/more_advanced/GANs/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
### Generative Adversarial Network
|
||||
|
||||
DCGAN_mnist.py: main file and training network
|
||||
model_utils.py: Generator and discriminator implementation
|
||||
76
ML/Pytorch/more_advanced/GANs/model_utils.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Discriminator and Generator implementation from DCGAN paper
|
||||
that we import in the main (DCGAN_mnist.py) file.
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class Discriminator(nn.Module):
|
||||
def __init__(self, channels_img, features_d):
|
||||
super(Discriminator, self).__init__()
|
||||
self.net = nn.Sequential(
|
||||
# N x channels_img x 64 x 64
|
||||
nn.Conv2d(channels_img, features_d, kernel_size=4, stride=2, padding=1),
|
||||
nn.LeakyReLU(0.2),
|
||||
# N x features_d x 32 x 32
|
||||
nn.Conv2d(features_d, features_d * 2, kernel_size=4, stride=2, padding=1),
|
||||
nn.BatchNorm2d(features_d * 2),
|
||||
nn.LeakyReLU(0.2),
|
||||
nn.Conv2d(
|
||||
features_d * 2, features_d * 4, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_d * 4),
|
||||
nn.LeakyReLU(0.2),
|
||||
nn.Conv2d(
|
||||
features_d * 4, features_d * 8, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_d * 8),
|
||||
nn.LeakyReLU(0.2),
|
||||
# N x features_d*8 x 4 x 4
|
||||
nn.Conv2d(features_d * 8, 1, kernel_size=4, stride=2, padding=0),
|
||||
# N x 1 x 1 x 1
|
||||
nn.Sigmoid(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
def __init__(self, channels_noise, channels_img, features_g):
|
||||
super(Generator, self).__init__()
|
||||
|
||||
self.net = nn.Sequential(
|
||||
# N x channels_noise x 1 x 1
|
||||
nn.ConvTranspose2d(
|
||||
channels_noise, features_g * 16, kernel_size=4, stride=1, padding=0
|
||||
),
|
||||
nn.BatchNorm2d(features_g * 16),
|
||||
nn.ReLU(),
|
||||
# N x features_g*16 x 4 x 4
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 16, features_g * 8, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_g * 8),
|
||||
nn.ReLU(),
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 8, features_g * 4, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_g * 4),
|
||||
nn.ReLU(),
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 4, features_g * 2, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(features_g * 2),
|
||||
nn.ReLU(),
|
||||
nn.ConvTranspose2d(
|
||||
features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
|
||||
),
|
||||
# N x channels_img x 64 x 64
|
||||
nn.Tanh(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
242
ML/Pytorch/more_advanced/Seq2Seq/seq2seq.py
Normal file
@@ -0,0 +1,242 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torchtext.datasets import Multi30k
|
||||
from torchtext.data import Field, BucketIterator
|
||||
import numpy as np
|
||||
import spacy
|
||||
import random
|
||||
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
|
||||
from utils import translate_sentence, bleu, save_checkpoint, load_checkpoint
|
||||
|
||||
spacy_ger = spacy.load("de")
|
||||
spacy_eng = spacy.load("en")
|
||||
|
||||
|
||||
def tokenize_ger(text):
|
||||
return [tok.text for tok in spacy_ger.tokenizer(text)]
|
||||
|
||||
|
||||
def tokenize_eng(text):
|
||||
return [tok.text for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
|
||||
german = Field(tokenize=tokenize_ger, lower=True, init_token="<sos>", eos_token="<eos>")
|
||||
|
||||
english = Field(
|
||||
tokenize=tokenize_eng, lower=True, init_token="<sos>", eos_token="<eos>"
|
||||
)
|
||||
|
||||
train_data, valid_data, test_data = Multi30k.splits(
|
||||
exts=(".de", ".en"), fields=(german, english)
|
||||
)
|
||||
|
||||
german.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
english.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
|
||||
|
||||
class Encoder(nn.Module):
|
||||
def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
|
||||
super(Encoder, self).__init__()
|
||||
self.dropout = nn.Dropout(p)
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embedding = nn.Embedding(input_size, embedding_size)
|
||||
self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)
|
||||
|
||||
def forward(self, x):
|
||||
# x shape: (seq_length, N) where N is batch size
|
||||
|
||||
embedding = self.dropout(self.embedding(x))
|
||||
# embedding shape: (seq_length, N, embedding_size)
|
||||
|
||||
outputs, (hidden, cell) = self.rnn(embedding)
|
||||
# outputs shape: (seq_length, N, hidden_size)
|
||||
|
||||
return hidden, cell
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, input_size, embedding_size, hidden_size, output_size, num_layers, p
|
||||
):
|
||||
super(Decoder, self).__init__()
|
||||
self.dropout = nn.Dropout(p)
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embedding = nn.Embedding(input_size, embedding_size)
|
||||
self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)
|
||||
self.fc = nn.Linear(hidden_size, output_size)
|
||||
|
||||
def forward(self, x, hidden, cell):
|
||||
# x shape: (N) where N is for batch size, we want it to be (1, N), seq_length
|
||||
# is 1 here because we are sending in a single word and not a sentence
|
||||
x = x.unsqueeze(0)
|
||||
|
||||
embedding = self.dropout(self.embedding(x))
|
||||
# embedding shape: (1, N, embedding_size)
|
||||
|
||||
outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))
|
||||
# outputs shape: (1, N, hidden_size)
|
||||
|
||||
predictions = self.fc(outputs)
|
||||
|
||||
# predictions shape: (1, N, length_target_vocabulary) to send it to
|
||||
# loss function we want it to be (N, length_target_vocabulary) so we're
|
||||
# just gonna remove the first dim
|
||||
predictions = predictions.squeeze(0)
|
||||
|
||||
return predictions, hidden, cell
|
||||
|
||||
|
||||
class Seq2Seq(nn.Module):
|
||||
def __init__(self, encoder, decoder):
|
||||
super(Seq2Seq, self).__init__()
|
||||
self.encoder = encoder
|
||||
self.decoder = decoder
|
||||
|
||||
def forward(self, source, target, teacher_force_ratio=0.5):
|
||||
batch_size = source.shape[1]
|
||||
target_len = target.shape[0]
|
||||
target_vocab_size = len(english.vocab)
|
||||
|
||||
outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
|
||||
|
||||
hidden, cell = self.encoder(source)
|
||||
|
||||
# Grab the first input to the Decoder which will be <SOS> token
|
||||
x = target[0]
|
||||
|
||||
for t in range(1, target_len):
|
||||
# Use previous hidden, cell as context from encoder at start
|
||||
output, hidden, cell = self.decoder(x, hidden, cell)
|
||||
|
||||
# Store next output prediction
|
||||
outputs[t] = output
|
||||
|
||||
# Get the best word the Decoder predicted (index in the vocabulary)
|
||||
best_guess = output.argmax(1)
|
||||
|
||||
# With probability of teacher_force_ratio we take the actual next word
|
||||
# otherwise we take the word that the Decoder predicted it to be.
|
||||
# Teacher Forcing is used so that the model gets used to seeing
|
||||
# similar inputs at training and testing time, if teacher forcing is 1
|
||||
# then inputs at test time might be completely different than what the
|
||||
# network is used to. This was a long comment.
|
||||
x = target[t] if random.random() < teacher_force_ratio else best_guess
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
### We're ready to define everything we need for training our Seq2Seq model ###
|
||||
|
||||
# Training hyperparameters
|
||||
num_epochs = 100
|
||||
learning_rate = 0.001
|
||||
batch_size = 64
|
||||
|
||||
# Model hyperparameters
|
||||
load_model = False
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
input_size_encoder = len(german.vocab)
|
||||
input_size_decoder = len(english.vocab)
|
||||
output_size = len(english.vocab)
|
||||
encoder_embedding_size = 300
|
||||
decoder_embedding_size = 300
|
||||
hidden_size = 1024 # Needs to be the same for both RNN's
|
||||
num_layers = 2
|
||||
enc_dropout = 0.5
|
||||
dec_dropout = 0.5
|
||||
|
||||
# Tensorboard to get nice loss plot
|
||||
writer = SummaryWriter(f"runs/loss_plot")
|
||||
step = 0
|
||||
|
||||
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
|
||||
(train_data, valid_data, test_data),
|
||||
batch_size=batch_size,
|
||||
sort_within_batch=True,
|
||||
sort_key=lambda x: len(x.src),
|
||||
device=device,
|
||||
)
|
||||
|
||||
encoder_net = Encoder(
|
||||
input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
|
||||
).to(device)
|
||||
|
||||
decoder_net = Decoder(
|
||||
input_size_decoder,
|
||||
decoder_embedding_size,
|
||||
hidden_size,
|
||||
output_size,
|
||||
num_layers,
|
||||
dec_dropout,
|
||||
).to(device)
|
||||
|
||||
model = Seq2Seq(encoder_net, decoder_net).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
pad_idx = english.vocab.stoi["<pad>"]
|
||||
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
|
||||
|
||||
if load_model:
|
||||
load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
|
||||
|
||||
|
||||
sentence = "ein boot mit mehreren männern darauf wird von einem großen pferdegespann ans ufer gezogen."
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print(f"[Epoch {epoch} / {num_epochs}]")
|
||||
|
||||
checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
model.eval()
|
||||
|
||||
translated_sentence = translate_sentence(
|
||||
model, sentence, german, english, device, max_length=50
|
||||
)
|
||||
|
||||
print(f"Translated example sentence: \n {translated_sentence}")
|
||||
|
||||
model.train()
|
||||
|
||||
for batch_idx, batch in enumerate(train_iterator):
|
||||
# Get input and targets and get to cuda
|
||||
inp_data = batch.src.to(device)
|
||||
target = batch.trg.to(device)
|
||||
|
||||
# Forward prop
|
||||
output = model(inp_data, target)
|
||||
|
||||
# Output is of shape (trg_len, batch_size, output_dim) but Cross Entropy Loss
|
||||
# doesn't take input in that form. For example if we have MNIST we want to have
|
||||
# output to be: (N, 10) and targets just (N). Here we can view it in a similar
|
||||
# way that we have output_words * batch_size that we want to send in into
|
||||
# our cost function, so we need to do some reshapin. While we're at it
|
||||
# Let's also remove the start token while we're at it
|
||||
output = output[1:].reshape(-1, output.shape[2])
|
||||
target = target[1:].reshape(-1)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss = criterion(output, target)
|
||||
|
||||
# Back prop
|
||||
loss.backward()
|
||||
|
||||
# Clip to avoid exploding gradient issues, makes sure grads are
|
||||
# within a healthy range
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
|
||||
|
||||
# Gradient descent step
|
||||
optimizer.step()
|
||||
|
||||
# Plot to tensorboard
|
||||
writer.add_scalar("Training loss", loss, global_step=step)
|
||||
step += 1
|
||||
|
||||
|
||||
score = bleu(test_data[1:100], model, german, english, device)
|
||||
print(f"Bleu score {score*100:.2f}")
|
||||
84
ML/Pytorch/more_advanced/Seq2Seq/utils.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import torch
|
||||
import spacy
|
||||
from torchtext.data.metrics import bleu_score
|
||||
import sys
|
||||
|
||||
|
||||
def translate_sentence(model, sentence, german, english, device, max_length=50):
|
||||
# print(sentence)
|
||||
|
||||
# sys.exit()
|
||||
|
||||
# Load german tokenizer
|
||||
spacy_ger = spacy.load("de")
|
||||
|
||||
# Create tokens using spacy and everything in lower case (which is what our vocab is)
|
||||
if type(sentence) == str:
|
||||
tokens = [token.text.lower() for token in spacy_ger(sentence)]
|
||||
else:
|
||||
tokens = [token.lower() for token in sentence]
|
||||
|
||||
# print(tokens)
|
||||
|
||||
# sys.exit()
|
||||
# Add <SOS> and <EOS> in beginning and end respectively
|
||||
tokens.insert(0, german.init_token)
|
||||
tokens.append(german.eos_token)
|
||||
|
||||
# Go through each german token and convert to an index
|
||||
text_to_indices = [german.vocab.stoi[token] for token in tokens]
|
||||
|
||||
# Convert to Tensor
|
||||
sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)
|
||||
|
||||
# Build encoder hidden, cell state
|
||||
with torch.no_grad():
|
||||
hidden, cell = model.encoder(sentence_tensor)
|
||||
|
||||
outputs = [english.vocab.stoi["<sos>"]]
|
||||
|
||||
for _ in range(max_length):
|
||||
previous_word = torch.LongTensor([outputs[-1]]).to(device)
|
||||
|
||||
with torch.no_grad():
|
||||
output, hidden, cell = model.decoder(previous_word, hidden, cell)
|
||||
best_guess = output.argmax(1).item()
|
||||
|
||||
outputs.append(best_guess)
|
||||
|
||||
# Model predicts it's the end of the sentence
|
||||
if output.argmax(1).item() == english.vocab.stoi["<eos>"]:
|
||||
break
|
||||
|
||||
translated_sentence = [english.vocab.itos[idx] for idx in outputs]
|
||||
|
||||
# remove start token
|
||||
return translated_sentence[1:]
|
||||
|
||||
|
||||
def bleu(data, model, german, english, device):
|
||||
targets = []
|
||||
outputs = []
|
||||
|
||||
for example in data:
|
||||
src = vars(example)["src"]
|
||||
trg = vars(example)["trg"]
|
||||
|
||||
prediction = translate_sentence(model, src, german, english, device)
|
||||
prediction = prediction[:-1] # remove <eos> token
|
||||
|
||||
targets.append([trg])
|
||||
outputs.append(prediction)
|
||||
|
||||
return bleu_score(outputs, targets)
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
279
ML/Pytorch/more_advanced/Seq2Seq_attention/seq2seq_attention.py
Normal file
@@ -0,0 +1,279 @@
|
||||
import random
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import numpy as np
|
||||
import spacy
|
||||
from utils import translate_sentence, bleu, save_checkpoint, load_checkpoint
|
||||
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
|
||||
from torchtext.datasets import Multi30k
|
||||
from torchtext.data import Field, BucketIterator
|
||||
|
||||
"""
|
||||
To install spacy languages do:
|
||||
python -m spacy download en
|
||||
python -m spacy download de
|
||||
"""
|
||||
spacy_ger = spacy.load("de")
|
||||
spacy_eng = spacy.load("en")
|
||||
|
||||
|
||||
def tokenize_ger(text):
|
||||
return [tok.text for tok in spacy_ger.tokenizer(text)]
|
||||
|
||||
|
||||
def tokenize_eng(text):
|
||||
return [tok.text for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
|
||||
german = Field(tokenize=tokenize_ger, lower=True, init_token="<sos>", eos_token="<eos>")
|
||||
|
||||
english = Field(
|
||||
tokenize=tokenize_eng, lower=True, init_token="<sos>", eos_token="<eos>"
|
||||
)
|
||||
|
||||
train_data, valid_data, test_data = Multi30k.splits(
|
||||
exts=(".de", ".en"), fields=(german, english)
|
||||
)
|
||||
|
||||
german.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
english.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
|
||||
|
||||
class Encoder(nn.Module):
|
||||
def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
|
||||
super(Encoder, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embedding = nn.Embedding(input_size, embedding_size)
|
||||
self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, bidirectional=True)
|
||||
|
||||
self.fc_hidden = nn.Linear(hidden_size * 2, hidden_size)
|
||||
self.fc_cell = nn.Linear(hidden_size * 2, hidden_size)
|
||||
self.dropout = nn.Dropout(p)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (seq_length, N) where N is batch size
|
||||
|
||||
embedding = self.dropout(self.embedding(x))
|
||||
# embedding shape: (seq_length, N, embedding_size)
|
||||
|
||||
encoder_states, (hidden, cell) = self.rnn(embedding)
|
||||
# outputs shape: (seq_length, N, hidden_size)
|
||||
|
||||
# Use forward, backward cells and hidden through a linear layer
|
||||
# so that it can be input to the decoder which is not bidirectional
|
||||
# Also using index slicing ([idx:idx+1]) to keep the dimension
|
||||
hidden = self.fc_hidden(torch.cat((hidden[0:1], hidden[1:2]), dim=2))
|
||||
cell = self.fc_cell(torch.cat((cell[0:1], cell[1:2]), dim=2))
|
||||
|
||||
return encoder_states, hidden, cell
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, input_size, embedding_size, hidden_size, output_size, num_layers, p
|
||||
):
|
||||
super(Decoder, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embedding = nn.Embedding(input_size, embedding_size)
|
||||
self.rnn = nn.LSTM(hidden_size * 2 + embedding_size, hidden_size, num_layers)
|
||||
|
||||
self.energy = nn.Linear(hidden_size * 3, 1)
|
||||
self.fc = nn.Linear(hidden_size, output_size)
|
||||
self.dropout = nn.Dropout(p)
|
||||
self.softmax = nn.Softmax(dim=0)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x, encoder_states, hidden, cell):
|
||||
x = x.unsqueeze(0)
|
||||
# x: (1, N) where N is the batch size
|
||||
|
||||
embedding = self.dropout(self.embedding(x))
|
||||
# embedding shape: (1, N, embedding_size)
|
||||
|
||||
sequence_length = encoder_states.shape[0]
|
||||
h_reshaped = hidden.repeat(sequence_length, 1, 1)
|
||||
# h_reshaped: (seq_length, N, hidden_size*2)
|
||||
|
||||
energy = self.relu(self.energy(torch.cat((h_reshaped, encoder_states), dim=2)))
|
||||
# energy: (seq_length, N, 1)
|
||||
|
||||
attention = self.softmax(energy)
|
||||
# attention: (seq_length, N, 1)
|
||||
|
||||
# attention: (seq_length, N, 1), snk
|
||||
# encoder_states: (seq_length, N, hidden_size*2), snl
|
||||
# we want context_vector: (1, N, hidden_size*2), i.e knl
|
||||
context_vector = torch.einsum("snk,snl->knl", attention, encoder_states)
|
||||
|
||||
rnn_input = torch.cat((context_vector, embedding), dim=2)
|
||||
# rnn_input: (1, N, hidden_size*2 + embedding_size)
|
||||
|
||||
outputs, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
|
||||
# outputs shape: (1, N, hidden_size)
|
||||
|
||||
predictions = self.fc(outputs).squeeze(0)
|
||||
# predictions: (N, hidden_size)
|
||||
|
||||
return predictions, hidden, cell
|
||||
|
||||
|
||||
class Seq2Seq(nn.Module):
|
||||
def __init__(self, encoder, decoder):
|
||||
super(Seq2Seq, self).__init__()
|
||||
self.encoder = encoder
|
||||
self.decoder = decoder
|
||||
|
||||
def forward(self, source, target, teacher_force_ratio=0.5):
|
||||
batch_size = source.shape[1]
|
||||
target_len = target.shape[0]
|
||||
target_vocab_size = len(english.vocab)
|
||||
|
||||
outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
|
||||
encoder_states, hidden, cell = self.encoder(source)
|
||||
|
||||
# First input will be <SOS> token
|
||||
x = target[0]
|
||||
|
||||
for t in range(1, target_len):
|
||||
# At every time step use encoder_states and update hidden, cell
|
||||
output, hidden, cell = self.decoder(x, encoder_states, hidden, cell)
|
||||
|
||||
# Store prediction for current time step
|
||||
outputs[t] = output
|
||||
|
||||
# Get the best word the Decoder predicted (index in the vocabulary)
|
||||
best_guess = output.argmax(1)
|
||||
|
||||
# With probability of teacher_force_ratio we take the actual next word
|
||||
# otherwise we take the word that the Decoder predicted it to be.
|
||||
# Teacher Forcing is used so that the model gets used to seeing
|
||||
# similar inputs at training and testing time, if teacher forcing is 1
|
||||
# then inputs at test time might be completely different than what the
|
||||
# network is used to. This was a long comment.
|
||||
x = target[t] if random.random() < teacher_force_ratio else best_guess
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
### We're ready to define everything we need for training our Seq2Seq model ###
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
load_model = False
|
||||
save_model = True
|
||||
|
||||
# Training hyperparameters
|
||||
num_epochs = 100
|
||||
learning_rate = 3e-4
|
||||
batch_size = 32
|
||||
|
||||
# Model hyperparameters
|
||||
input_size_encoder = len(german.vocab)
|
||||
input_size_decoder = len(english.vocab)
|
||||
output_size = len(english.vocab)
|
||||
encoder_embedding_size = 300
|
||||
decoder_embedding_size = 300
|
||||
hidden_size = 1024
|
||||
num_layers = 1
|
||||
enc_dropout = 0.0
|
||||
dec_dropout = 0.0
|
||||
|
||||
# Tensorboard to get nice loss plot
|
||||
writer = SummaryWriter(f"runs/loss_plot")
|
||||
step = 0
|
||||
|
||||
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
|
||||
(train_data, valid_data, test_data),
|
||||
batch_size=batch_size,
|
||||
sort_within_batch=True,
|
||||
sort_key=lambda x: len(x.src),
|
||||
device=device,
|
||||
)
|
||||
|
||||
encoder_net = Encoder(
|
||||
input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
|
||||
).to(device)
|
||||
|
||||
decoder_net = Decoder(
|
||||
input_size_decoder,
|
||||
decoder_embedding_size,
|
||||
hidden_size,
|
||||
output_size,
|
||||
num_layers,
|
||||
dec_dropout,
|
||||
).to(device)
|
||||
|
||||
model = Seq2Seq(encoder_net, decoder_net).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
pad_idx = english.vocab.stoi["<pad>"]
|
||||
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
|
||||
|
||||
if load_model:
|
||||
load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
|
||||
|
||||
sentence = (
|
||||
"ein boot mit mehreren männern darauf wird von einem großen"
|
||||
"pferdegespann ans ufer gezogen."
|
||||
)
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print(f"[Epoch {epoch} / {num_epochs}]")
|
||||
|
||||
if save_model:
|
||||
checkpoint = {
|
||||
"state_dict": model.state_dict(),
|
||||
"optimizer": optimizer.state_dict(),
|
||||
}
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
model.eval()
|
||||
|
||||
translated_sentence = translate_sentence(
|
||||
model, sentence, german, english, device, max_length=50
|
||||
)
|
||||
|
||||
print(f"Translated example sentence: \n {translated_sentence}")
|
||||
|
||||
model.train()
|
||||
|
||||
for batch_idx, batch in enumerate(train_iterator):
|
||||
# Get input and targets and get to cuda
|
||||
inp_data = batch.src.to(device)
|
||||
target = batch.trg.to(device)
|
||||
|
||||
# Forward prop
|
||||
output = model(inp_data, target)
|
||||
|
||||
# Output is of shape (trg_len, batch_size, output_dim) but Cross Entropy Loss
|
||||
# doesn't take input in that form. For example if we have MNIST we want to have
|
||||
# output to be: (N, 10) and targets just (N). Here we can view it in a similar
|
||||
# way that we have output_words * batch_size that we want to send in into
|
||||
# our cost function, so we need to do some reshapin. While we're at it
|
||||
# Let's also remove the start token while we're at it
|
||||
output = output[1:].reshape(-1, output.shape[2])
|
||||
target = target[1:].reshape(-1)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss = criterion(output, target)
|
||||
|
||||
# Back prop
|
||||
loss.backward()
|
||||
|
||||
# Clip to avoid exploding gradient issues, makes sure grads are
|
||||
# within a healthy range
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
|
||||
|
||||
# Gradient descent step
|
||||
optimizer.step()
|
||||
|
||||
# Plot to tensorboard
|
||||
writer.add_scalar("Training loss", loss, global_step=step)
|
||||
step += 1
|
||||
|
||||
# running on entire test data takes a while
|
||||
score = bleu(test_data[1:100], model, german, english, device)
|
||||
print(f"Bleu score {score * 100:.2f}")
|
||||
79
ML/Pytorch/more_advanced/Seq2Seq_attention/utils.py
Normal file
@@ -0,0 +1,79 @@
|
||||
import torch
|
||||
import spacy
|
||||
from torchtext.data.metrics import bleu_score
|
||||
import sys
|
||||
|
||||
|
||||
def translate_sentence(model, sentence, german, english, device, max_length=50):
|
||||
# Load german tokenizer
|
||||
spacy_ger = spacy.load("de")
|
||||
|
||||
# Create tokens using spacy and everything in lower case (which is what our vocab is)
|
||||
if type(sentence) == str:
|
||||
tokens = [token.text.lower() for token in spacy_ger(sentence)]
|
||||
else:
|
||||
tokens = [token.lower() for token in sentence]
|
||||
|
||||
# Add <SOS> and <EOS> in beginning and end respectively
|
||||
tokens.insert(0, german.init_token)
|
||||
tokens.append(german.eos_token)
|
||||
|
||||
# Go through each german token and convert to an index
|
||||
text_to_indices = [german.vocab.stoi[token] for token in tokens]
|
||||
|
||||
# Convert to Tensor
|
||||
sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)
|
||||
|
||||
# Build encoder hidden, cell state
|
||||
with torch.no_grad():
|
||||
outputs_encoder, hiddens, cells = model.encoder(sentence_tensor)
|
||||
|
||||
outputs = [english.vocab.stoi["<sos>"]]
|
||||
|
||||
for _ in range(max_length):
|
||||
previous_word = torch.LongTensor([outputs[-1]]).to(device)
|
||||
|
||||
with torch.no_grad():
|
||||
output, hiddens, cells = model.decoder(
|
||||
previous_word, outputs_encoder, hiddens, cells
|
||||
)
|
||||
best_guess = output.argmax(1).item()
|
||||
|
||||
outputs.append(best_guess)
|
||||
|
||||
# Model predicts it's the end of the sentence
|
||||
if output.argmax(1).item() == english.vocab.stoi["<eos>"]:
|
||||
break
|
||||
|
||||
translated_sentence = [english.vocab.itos[idx] for idx in outputs]
|
||||
|
||||
# remove start token
|
||||
return translated_sentence[1:]
|
||||
|
||||
|
||||
def bleu(data, model, german, english, device):
|
||||
targets = []
|
||||
outputs = []
|
||||
|
||||
for example in data:
|
||||
src = vars(example)["src"]
|
||||
trg = vars(example)["trg"]
|
||||
|
||||
prediction = translate_sentence(model, src, german, english, device)
|
||||
prediction = prediction[:-1] # remove <eos> token
|
||||
|
||||
targets.append([trg])
|
||||
outputs.append(prediction)
|
||||
|
||||
return bleu_score(outputs, targets)
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
12
ML/Pytorch/more_advanced/image_captioning/README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
### Image Captioning
|
||||
|
||||
Download the dataset used: https://www.kaggle.com/dataset/e1cd22253a9b23b073794872bf565648ddbe4f17e7fa9e74766ad3707141adeb
|
||||
Then set images folder, captions.txt inside a folder Flickr8k.
|
||||
|
||||
train.py: For training the network
|
||||
|
||||
model.py: creating the encoderCNN, decoderRNN and hooking them togethor
|
||||
|
||||
get_loader.py: Loading the data, creating vocabulary
|
||||
|
||||
utils.py: Load model, save model, printing few test cases downloaded online
|
||||
142
ML/Pytorch/more_advanced/image_captioning/get_loader.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import os # when loading file paths
|
||||
import pandas as pd # for lookup in annotation file
|
||||
import spacy # for tokenizer
|
||||
import torch
|
||||
from torch.nn.utils.rnn import pad_sequence # pad batch
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
from PIL import Image # Load img
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
# We want to convert text -> numerical values
|
||||
# 1. We need a Vocabulary mapping each word to a index
|
||||
# 2. We need to setup a Pytorch dataset to load the data
|
||||
# 3. Setup padding of every batch (all examples should be
|
||||
# of same seq_len and setup dataloader)
|
||||
# Note that loading the image is very easy compared to the text!
|
||||
|
||||
# Download with: python -m spacy download en
|
||||
spacy_eng = spacy.load("en")
|
||||
|
||||
|
||||
class Vocabulary:
|
||||
def __init__(self, freq_threshold):
|
||||
self.itos = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
|
||||
self.stoi = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
|
||||
self.freq_threshold = freq_threshold
|
||||
|
||||
def __len__(self):
|
||||
return len(self.itos)
|
||||
|
||||
@staticmethod
|
||||
def tokenizer_eng(text):
|
||||
return [tok.text.lower() for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
def build_vocabulary(self, sentence_list):
|
||||
frequencies = {}
|
||||
idx = 4
|
||||
|
||||
for sentence in sentence_list:
|
||||
for word in self.tokenizer_eng(sentence):
|
||||
if word not in frequencies:
|
||||
frequencies[word] = 1
|
||||
|
||||
else:
|
||||
frequencies[word] += 1
|
||||
|
||||
if frequencies[word] == self.freq_threshold:
|
||||
self.stoi[word] = idx
|
||||
self.itos[idx] = word
|
||||
idx += 1
|
||||
|
||||
def numericalize(self, text):
|
||||
tokenized_text = self.tokenizer_eng(text)
|
||||
|
||||
return [
|
||||
self.stoi[token] if token in self.stoi else self.stoi["<UNK>"]
|
||||
for token in tokenized_text
|
||||
]
|
||||
|
||||
|
||||
class FlickrDataset(Dataset):
|
||||
def __init__(self, root_dir, captions_file, transform=None, freq_threshold=5):
|
||||
self.root_dir = root_dir
|
||||
self.df = pd.read_csv(captions_file)
|
||||
self.transform = transform
|
||||
|
||||
# Get img, caption columns
|
||||
self.imgs = self.df["image"]
|
||||
self.captions = self.df["caption"]
|
||||
|
||||
# Initialize vocabulary and build vocab
|
||||
self.vocab = Vocabulary(freq_threshold)
|
||||
self.vocab.build_vocabulary(self.captions.tolist())
|
||||
|
||||
def __len__(self):
|
||||
return len(self.df)
|
||||
|
||||
def __getitem__(self, index):
|
||||
caption = self.captions[index]
|
||||
img_id = self.imgs[index]
|
||||
img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
numericalized_caption = [self.vocab.stoi["<SOS>"]]
|
||||
numericalized_caption += self.vocab.numericalize(caption)
|
||||
numericalized_caption.append(self.vocab.stoi["<EOS>"])
|
||||
|
||||
return img, torch.tensor(numericalized_caption)
|
||||
|
||||
|
||||
class MyCollate:
|
||||
def __init__(self, pad_idx):
|
||||
self.pad_idx = pad_idx
|
||||
|
||||
def __call__(self, batch):
|
||||
imgs = [item[0].unsqueeze(0) for item in batch]
|
||||
imgs = torch.cat(imgs, dim=0)
|
||||
targets = [item[1] for item in batch]
|
||||
targets = pad_sequence(targets, batch_first=False, padding_value=self.pad_idx)
|
||||
|
||||
return imgs, targets
|
||||
|
||||
|
||||
def get_loader(
|
||||
root_folder,
|
||||
annotation_file,
|
||||
transform,
|
||||
batch_size=32,
|
||||
num_workers=8,
|
||||
shuffle=True,
|
||||
pin_memory=True,
|
||||
):
|
||||
dataset = FlickrDataset(root_folder, annotation_file, transform=transform)
|
||||
|
||||
pad_idx = dataset.vocab.stoi["<PAD>"]
|
||||
|
||||
loader = DataLoader(
|
||||
dataset=dataset,
|
||||
batch_size=batch_size,
|
||||
num_workers=num_workers,
|
||||
shuffle=shuffle,
|
||||
pin_memory=pin_memory,
|
||||
collate_fn=MyCollate(pad_idx=pad_idx),
|
||||
)
|
||||
|
||||
return loader, dataset
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
transform = transforms.Compose(
|
||||
[transforms.Resize((224, 224)), transforms.ToTensor(),]
|
||||
)
|
||||
|
||||
loader, dataset = get_loader(
|
||||
"flickr8k/images/", "flickr8k/captions.txt", transform=transform
|
||||
)
|
||||
|
||||
for idx, (imgs, captions) in enumerate(loader):
|
||||
print(imgs.shape)
|
||||
print(captions.shape)
|
||||
66
ML/Pytorch/more_advanced/image_captioning/model.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import statistics
|
||||
import torchvision.models as models
|
||||
|
||||
|
||||
class EncoderCNN(nn.Module):
|
||||
def __init__(self, embed_size, train_CNN=False):
|
||||
super(EncoderCNN, self).__init__()
|
||||
self.train_CNN = train_CNN
|
||||
self.inception = models.inception_v3(pretrained=True, aux_logits=False)
|
||||
self.inception.fc = nn.Linear(self.inception.fc.in_features, embed_size)
|
||||
self.relu = nn.ReLU()
|
||||
self.times = []
|
||||
self.dropout = nn.Dropout(0.5)
|
||||
|
||||
def forward(self, images):
|
||||
features = self.inception(images)
|
||||
return self.dropout(self.relu(features))
|
||||
|
||||
|
||||
class DecoderRNN(nn.Module):
|
||||
def __init__(self, embed_size, hidden_size, vocab_size, num_layers):
|
||||
super(DecoderRNN, self).__init__()
|
||||
self.embed = nn.Embedding(vocab_size, embed_size)
|
||||
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers)
|
||||
self.linear = nn.Linear(hidden_size, vocab_size)
|
||||
self.dropout = nn.Dropout(0.5)
|
||||
|
||||
def forward(self, features, captions):
|
||||
embeddings = self.dropout(self.embed(captions))
|
||||
embeddings = torch.cat((features.unsqueeze(0), embeddings), dim=0)
|
||||
hiddens, _ = self.lstm(embeddings)
|
||||
outputs = self.linear(hiddens)
|
||||
return outputs
|
||||
|
||||
|
||||
class CNNtoRNN(nn.Module):
|
||||
def __init__(self, embed_size, hidden_size, vocab_size, num_layers):
|
||||
super(CNNtoRNN, self).__init__()
|
||||
self.encoderCNN = EncoderCNN(embed_size)
|
||||
self.decoderRNN = DecoderRNN(embed_size, hidden_size, vocab_size, num_layers)
|
||||
|
||||
def forward(self, images, captions):
|
||||
features = self.encoderCNN(images)
|
||||
outputs = self.decoderRNN(features, captions)
|
||||
return outputs
|
||||
|
||||
def caption_image(self, image, vocabulary, max_length=50):
|
||||
result_caption = []
|
||||
|
||||
with torch.no_grad():
|
||||
x = self.encoderCNN(image).unsqueeze(0)
|
||||
states = None
|
||||
|
||||
for _ in range(max_length):
|
||||
hiddens, states = self.decoderRNN.lstm(x, states)
|
||||
output = self.decoderRNN.linear(hiddens.squeeze(0))
|
||||
predicted = output.argmax(1)
|
||||
result_caption.append(predicted.item())
|
||||
x = self.decoderRNN.embed(predicted).unsqueeze(0)
|
||||
|
||||
if vocabulary.itos[predicted.item()] == "<EOS>":
|
||||
break
|
||||
|
||||
return [vocabulary.itos[idx] for idx in result_caption]
|
||||
BIN
ML/Pytorch/more_advanced/image_captioning/test_examples/boat.png
Normal file
|
After Width: | Height: | Size: 369 KiB |
BIN
ML/Pytorch/more_advanced/image_captioning/test_examples/bus.png
Normal file
|
After Width: | Height: | Size: 866 KiB |
|
After Width: | Height: | Size: 92 KiB |
BIN
ML/Pytorch/more_advanced/image_captioning/test_examples/dog.jpg
Normal file
|
After Width: | Height: | Size: 133 KiB |
|
After Width: | Height: | Size: 641 KiB |
96
ML/Pytorch/more_advanced/image_captioning/train.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from utils import save_checkpoint, load_checkpoint, print_examples
|
||||
from get_loader import get_loader
|
||||
from model import CNNtoRNN
|
||||
|
||||
|
||||
def train():
|
||||
transform = transforms.Compose(
|
||||
[
|
||||
transforms.Resize((356, 356)),
|
||||
transforms.RandomCrop((299, 299)),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
|
||||
]
|
||||
)
|
||||
|
||||
train_loader, dataset = get_loader(
|
||||
root_folder="flickr8k/images",
|
||||
annotation_file="flickr8k/captions.txt",
|
||||
transform=transform,
|
||||
num_workers=2,
|
||||
)
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
load_model = False
|
||||
save_model = False
|
||||
train_CNN = False
|
||||
|
||||
# Hyperparameters
|
||||
embed_size = 256
|
||||
hidden_size = 256
|
||||
vocab_size = len(dataset.vocab)
|
||||
num_layers = 1
|
||||
learning_rate = 3e-4
|
||||
num_epochs = 100
|
||||
|
||||
# for tensorboard
|
||||
writer = SummaryWriter("runs/flickr")
|
||||
step = 0
|
||||
|
||||
# initialize model, loss etc
|
||||
model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device)
|
||||
criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"])
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Only finetune the CNN
|
||||
for name, param in model.encoderCNN.inception.named_parameters():
|
||||
if "fc.weight" in name or "fc.bias" in name:
|
||||
param.requires_grad = True
|
||||
else:
|
||||
param.requires_grad = train_CNN
|
||||
|
||||
if load_model:
|
||||
step = load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
|
||||
|
||||
model.train()
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
# Uncomment the line below to see a couple of test cases
|
||||
# print_examples(model, device, dataset)
|
||||
|
||||
if save_model:
|
||||
checkpoint = {
|
||||
"state_dict": model.state_dict(),
|
||||
"optimizer": optimizer.state_dict(),
|
||||
"step": step,
|
||||
}
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
for idx, (imgs, captions) in tqdm(
|
||||
enumerate(train_loader), total=len(train_loader), leave=False
|
||||
):
|
||||
imgs = imgs.to(device)
|
||||
captions = captions.to(device)
|
||||
|
||||
outputs = model(imgs, captions[:-1])
|
||||
loss = criterion(
|
||||
outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1)
|
||||
)
|
||||
|
||||
writer.add_scalar("Training loss", loss.item(), global_step=step)
|
||||
step += 1
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward(loss)
|
||||
optimizer.step()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
train()
|
||||
69
ML/Pytorch/more_advanced/image_captioning/utils.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def print_examples(model, device, dataset):
|
||||
transform = transforms.Compose(
|
||||
[
|
||||
transforms.Resize((299, 299)),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
|
||||
]
|
||||
)
|
||||
|
||||
model.eval()
|
||||
test_img1 = transform(Image.open("test_examples/dog.jpg").convert("RGB")).unsqueeze(
|
||||
0
|
||||
)
|
||||
print("Example 1 CORRECT: Dog on a beach by the ocean")
|
||||
print(
|
||||
"Example 1 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img1.to(device), dataset.vocab))
|
||||
)
|
||||
test_img2 = transform(
|
||||
Image.open("test_examples/child.jpg").convert("RGB")
|
||||
).unsqueeze(0)
|
||||
print("Example 2 CORRECT: Child holding red frisbee outdoors")
|
||||
print(
|
||||
"Example 2 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img2.to(device), dataset.vocab))
|
||||
)
|
||||
test_img3 = transform(Image.open("test_examples/bus.png").convert("RGB")).unsqueeze(
|
||||
0
|
||||
)
|
||||
print("Example 3 CORRECT: Bus driving by parked cars")
|
||||
print(
|
||||
"Example 3 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img3.to(device), dataset.vocab))
|
||||
)
|
||||
test_img4 = transform(
|
||||
Image.open("test_examples/boat.png").convert("RGB")
|
||||
).unsqueeze(0)
|
||||
print("Example 4 CORRECT: A small boat in the ocean")
|
||||
print(
|
||||
"Example 4 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img4.to(device), dataset.vocab))
|
||||
)
|
||||
test_img5 = transform(
|
||||
Image.open("test_examples/horse.png").convert("RGB")
|
||||
).unsqueeze(0)
|
||||
print("Example 5 CORRECT: A cowboy riding a horse in the desert")
|
||||
print(
|
||||
"Example 5 OUTPUT: "
|
||||
+ " ".join(model.caption_image(test_img5.to(device), dataset.vocab))
|
||||
)
|
||||
model.train()
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
step = checkpoint["step"]
|
||||
return step
|
||||
BIN
ML/Pytorch/more_advanced/neuralstyle/annahathaway.png
Normal file
|
After Width: | Height: | Size: 121 KiB |
112
ML/Pytorch/more_advanced/neuralstyle/nst.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from PIL import Image
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.models as models
|
||||
from torchvision.utils import save_image
|
||||
|
||||
|
||||
class VGG(nn.Module):
|
||||
def __init__(self):
|
||||
super(VGG, self).__init__()
|
||||
# The first number x in convx_y gets added by 1 after it has gone
|
||||
# through a maxpool, and the second y if we have several conv layers
|
||||
# in between a max pool. These strings (0, 5, 10, ..) then correspond
|
||||
# to conv1_1, conv2_1, conv3_1, conv4_1, conv5_1 mentioned in NST paper
|
||||
self.chosen_features = ["0", "5", "10", "19", "28"]
|
||||
|
||||
# We don't need to run anything further than conv5_1 (the 28th module in vgg)
|
||||
# Since remember, we dont actually care about the output of VGG: the only thing
|
||||
# that is modified is the generated image (i.e, the input).
|
||||
self.model = models.vgg19(pretrained=True).features[:29]
|
||||
|
||||
def forward(self, x):
|
||||
# Store relevant features
|
||||
features = []
|
||||
|
||||
# Go through each layer in model, if the layer is in the chosen_features,
|
||||
# store it in features. At the end we'll just return all the activations
|
||||
# for the specific layers we have in chosen_features
|
||||
for layer_num, layer in enumerate(self.model):
|
||||
x = layer(x)
|
||||
|
||||
if str(layer_num) in self.chosen_features:
|
||||
features.append(x)
|
||||
|
||||
return features
|
||||
|
||||
|
||||
def load_image(image_name):
|
||||
image = Image.open(image_name)
|
||||
image = loader(image).unsqueeze(0)
|
||||
return image.to(device)
|
||||
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
imsize = 356
|
||||
|
||||
# Here we may want to use the Normalization constants used in the original
|
||||
# VGG network (to get similar values net was originally trained on), but
|
||||
# I found it didn't matter too much so I didn't end of using it. If you
|
||||
# use it make sure to normalize back so the images don't look weird.
|
||||
loader = transforms.Compose(
|
||||
[
|
||||
transforms.Resize((imsize, imsize)),
|
||||
transforms.ToTensor(),
|
||||
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||
]
|
||||
)
|
||||
|
||||
original_img = load_image("annahathaway.png")
|
||||
style_img = load_image("style.jpg")
|
||||
|
||||
# initialized generated as white noise or clone of original image.
|
||||
# Clone seemed to work better for me.
|
||||
|
||||
# generated = torch.randn(original_img.data.shape, device=device, requires_grad=True)
|
||||
generated = original_img.clone().requires_grad_(True)
|
||||
model = VGG().to(device).eval()
|
||||
|
||||
# Hyperparameters
|
||||
total_steps = 6000
|
||||
learning_rate = 0.001
|
||||
alpha = 1
|
||||
beta = 0.01
|
||||
optimizer = optim.Adam([generated], lr=learning_rate)
|
||||
|
||||
for step in range(total_steps):
|
||||
# Obtain the convolution features in specifically chosen layers
|
||||
generated_features = model(generated)
|
||||
original_img_features = model(original_img)
|
||||
style_features = model(style_img)
|
||||
|
||||
# Loss is 0 initially
|
||||
style_loss = original_loss = 0
|
||||
|
||||
# iterate through all the features for the chosen layers
|
||||
for gen_feature, orig_feature, style_feature in zip(
|
||||
generated_features, original_img_features, style_features
|
||||
):
|
||||
|
||||
# batch_size will just be 1
|
||||
batch_size, channel, height, width = gen_feature.shape
|
||||
original_loss += torch.mean((gen_feature - orig_feature) ** 2)
|
||||
# Compute Gram Matrix of generated
|
||||
G = gen_feature.view(channel, height * width).mm(
|
||||
gen_feature.view(channel, height * width).t()
|
||||
)
|
||||
# Compute Gram Matrix of Style
|
||||
A = style_feature.view(channel, height * width).mm(
|
||||
style_feature.view(channel, height * width).t()
|
||||
)
|
||||
style_loss += torch.mean((G - A) ** 2)
|
||||
|
||||
total_loss = alpha * original_loss + beta * style_loss
|
||||
optimizer.zero_grad()
|
||||
total_loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if step % 200 == 0:
|
||||
print(total_loss)
|
||||
save_image(generated, "generated.png")
|
||||
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img1.png
Normal file
|
After Width: | Height: | Size: 310 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img2.png
Normal file
|
After Width: | Height: | Size: 282 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img3.png
Normal file
|
After Width: | Height: | Size: 294 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img4.png
Normal file
|
After Width: | Height: | Size: 284 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img5.png
Normal file
|
After Width: | Height: | Size: 293 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img6.png
Normal file
|
After Width: | Height: | Size: 270 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img7.png
Normal file
|
After Width: | Height: | Size: 294 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/output/img8.png
Normal file
|
After Width: | Height: | Size: 308 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/style.jpg
Normal file
|
After Width: | Height: | Size: 215 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/styles/style1.jpg
Normal file
|
After Width: | Height: | Size: 215 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/styles/style2.jpg
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/styles/style3.jpg
Normal file
|
After Width: | Height: | Size: 280 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/styles/style4.jpg
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/styles/style5.jpg
Normal file
|
After Width: | Height: | Size: 807 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/styles/style6.jpg
Normal file
|
After Width: | Height: | Size: 86 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/styles/style7.jpg
Normal file
|
After Width: | Height: | Size: 120 KiB |
BIN
ML/Pytorch/more_advanced/neuralstyle/styles/style8.jpg
Normal file
|
After Width: | Height: | Size: 549 KiB |
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
Seq2Seq using Transformers on the Multi30k
|
||||
dataset. In this video I utilize Pytorch
|
||||
inbuilt Transformer modules, and have a
|
||||
separate implementation for Transformers
|
||||
from scratch. Training this model for a
|
||||
while (not too long) gives a BLEU score
|
||||
of ~35, and I think training for longer
|
||||
would give even better results.
|
||||
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import spacy
|
||||
from utils import translate_sentence, bleu, save_checkpoint, load_checkpoint
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from torchtext.datasets import Multi30k
|
||||
from torchtext.data import Field, BucketIterator
|
||||
|
||||
"""
|
||||
To install spacy languages do:
|
||||
python -m spacy download en
|
||||
python -m spacy download de
|
||||
"""
|
||||
spacy_ger = spacy.load("de")
|
||||
spacy_eng = spacy.load("en")
|
||||
|
||||
|
||||
def tokenize_ger(text):
|
||||
return [tok.text for tok in spacy_ger.tokenizer(text)]
|
||||
|
||||
|
||||
def tokenize_eng(text):
|
||||
return [tok.text for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
|
||||
german = Field(tokenize=tokenize_ger, lower=True, init_token="<sos>", eos_token="<eos>")
|
||||
|
||||
english = Field(
|
||||
tokenize=tokenize_eng, lower=True, init_token="<sos>", eos_token="<eos>"
|
||||
)
|
||||
|
||||
train_data, valid_data, test_data = Multi30k.splits(
|
||||
exts=(".de", ".en"), fields=(german, english)
|
||||
)
|
||||
|
||||
german.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
english.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
|
||||
|
||||
class Transformer(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
embedding_size,
|
||||
src_vocab_size,
|
||||
trg_vocab_size,
|
||||
src_pad_idx,
|
||||
num_heads,
|
||||
num_encoder_layers,
|
||||
num_decoder_layers,
|
||||
forward_expansion,
|
||||
dropout,
|
||||
max_len,
|
||||
device,
|
||||
):
|
||||
super(Transformer, self).__init__()
|
||||
self.src_word_embedding = nn.Embedding(src_vocab_size, embedding_size)
|
||||
self.src_position_embedding = nn.Embedding(max_len, embedding_size)
|
||||
self.trg_word_embedding = nn.Embedding(trg_vocab_size, embedding_size)
|
||||
self.trg_position_embedding = nn.Embedding(max_len, embedding_size)
|
||||
|
||||
self.device = device
|
||||
self.transformer = nn.Transformer(
|
||||
embedding_size,
|
||||
num_heads,
|
||||
num_encoder_layers,
|
||||
num_decoder_layers,
|
||||
forward_expansion,
|
||||
dropout,
|
||||
)
|
||||
self.fc_out = nn.Linear(embedding_size, trg_vocab_size)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.src_pad_idx = src_pad_idx
|
||||
|
||||
def make_src_mask(self, src):
|
||||
src_mask = src.transpose(0, 1) == self.src_pad_idx
|
||||
|
||||
# (N, src_len)
|
||||
return src_mask.to(self.device)
|
||||
|
||||
def forward(self, src, trg):
|
||||
src_seq_length, N = src.shape
|
||||
trg_seq_length, N = trg.shape
|
||||
|
||||
src_positions = (
|
||||
torch.arange(0, src_seq_length)
|
||||
.unsqueeze(1)
|
||||
.expand(src_seq_length, N)
|
||||
.to(self.device)
|
||||
)
|
||||
|
||||
trg_positions = (
|
||||
torch.arange(0, trg_seq_length)
|
||||
.unsqueeze(1)
|
||||
.expand(trg_seq_length, N)
|
||||
.to(self.device)
|
||||
)
|
||||
|
||||
embed_src = self.dropout(
|
||||
(self.src_word_embedding(src) + self.src_position_embedding(src_positions))
|
||||
)
|
||||
embed_trg = self.dropout(
|
||||
(self.trg_word_embedding(trg) + self.trg_position_embedding(trg_positions))
|
||||
)
|
||||
|
||||
src_padding_mask = self.make_src_mask(src)
|
||||
trg_mask = self.transformer.generate_square_subsequent_mask(trg_seq_length).to(
|
||||
self.device
|
||||
)
|
||||
|
||||
out = self.transformer(
|
||||
embed_src,
|
||||
embed_trg,
|
||||
src_key_padding_mask=src_padding_mask,
|
||||
tgt_mask=trg_mask,
|
||||
)
|
||||
out = self.fc_out(out)
|
||||
return out
|
||||
|
||||
|
||||
# We're ready to define everything we need for training our Seq2Seq model
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
load_model = True
|
||||
save_model = True
|
||||
|
||||
# Training hyperparameters
|
||||
num_epochs = 10000
|
||||
learning_rate = 3e-4
|
||||
batch_size = 32
|
||||
|
||||
# Model hyperparameters
|
||||
src_vocab_size = len(german.vocab)
|
||||
trg_vocab_size = len(english.vocab)
|
||||
embedding_size = 512
|
||||
num_heads = 8
|
||||
num_encoder_layers = 3
|
||||
num_decoder_layers = 3
|
||||
dropout = 0.10
|
||||
max_len = 100
|
||||
forward_expansion = 4
|
||||
src_pad_idx = english.vocab.stoi["<pad>"]
|
||||
|
||||
# Tensorboard to get nice loss plot
|
||||
writer = SummaryWriter("runs/loss_plot")
|
||||
step = 0
|
||||
|
||||
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
|
||||
(train_data, valid_data, test_data),
|
||||
batch_size=batch_size,
|
||||
sort_within_batch=True,
|
||||
sort_key=lambda x: len(x.src),
|
||||
device=device,
|
||||
)
|
||||
|
||||
model = Transformer(
|
||||
embedding_size,
|
||||
src_vocab_size,
|
||||
trg_vocab_size,
|
||||
src_pad_idx,
|
||||
num_heads,
|
||||
num_encoder_layers,
|
||||
num_decoder_layers,
|
||||
forward_expansion,
|
||||
dropout,
|
||||
max_len,
|
||||
device,
|
||||
).to(device)
|
||||
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
||||
optimizer, factor=0.1, patience=10, verbose=True
|
||||
)
|
||||
|
||||
pad_idx = english.vocab.stoi["<pad>"]
|
||||
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
|
||||
|
||||
if load_model:
|
||||
load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
|
||||
|
||||
sentence = "ein pferd geht unter einer brücke neben einem boot."
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print(f"[Epoch {epoch} / {num_epochs}]")
|
||||
|
||||
if save_model:
|
||||
checkpoint = {
|
||||
"state_dict": model.state_dict(),
|
||||
"optimizer": optimizer.state_dict(),
|
||||
}
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
model.eval()
|
||||
translated_sentence = translate_sentence(
|
||||
model, sentence, german, english, device, max_length=50
|
||||
)
|
||||
|
||||
print(f"Translated example sentence: \n {translated_sentence}")
|
||||
model.train()
|
||||
losses = []
|
||||
|
||||
for batch_idx, batch in enumerate(train_iterator):
|
||||
# Get input and targets and get to cuda
|
||||
inp_data = batch.src.to(device)
|
||||
target = batch.trg.to(device)
|
||||
|
||||
# Forward prop
|
||||
output = model(inp_data, target[:-1, :])
|
||||
|
||||
# Output is of shape (trg_len, batch_size, output_dim) but Cross Entropy Loss
|
||||
# doesn't take input in that form. For example if we have MNIST we want to have
|
||||
# output to be: (N, 10) and targets just (N). Here we can view it in a similar
|
||||
# way that we have output_words * batch_size that we want to send in into
|
||||
# our cost function, so we need to do some reshapin.
|
||||
# Let's also remove the start token while we're at it
|
||||
output = output.reshape(-1, output.shape[2])
|
||||
target = target[1:].reshape(-1)
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
loss = criterion(output, target)
|
||||
losses.append(loss.item())
|
||||
|
||||
# Back prop
|
||||
loss.backward()
|
||||
# Clip to avoid exploding gradient issues, makes sure grads are
|
||||
# within a healthy range
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
|
||||
|
||||
# Gradient descent step
|
||||
optimizer.step()
|
||||
|
||||
# plot to tensorboard
|
||||
writer.add_scalar("Training loss", loss, global_step=step)
|
||||
step += 1
|
||||
|
||||
mean_loss = sum(losses) / len(losses)
|
||||
scheduler.step(mean_loss)
|
||||
|
||||
# running on entire test data takes a while
|
||||
score = bleu(test_data[1:100], model, german, english, device)
|
||||
print(f"Bleu score {score * 100:.2f}")
|
||||
70
ML/Pytorch/more_advanced/seq2seq_transformer/utils.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import torch
|
||||
import spacy
|
||||
from torchtext.data.metrics import bleu_score
|
||||
import sys
|
||||
|
||||
|
||||
def translate_sentence(model, sentence, german, english, device, max_length=50):
|
||||
# Load german tokenizer
|
||||
spacy_ger = spacy.load("de")
|
||||
|
||||
# Create tokens using spacy and everything in lower case (which is what our vocab is)
|
||||
if type(sentence) == str:
|
||||
tokens = [token.text.lower() for token in spacy_ger(sentence)]
|
||||
else:
|
||||
tokens = [token.lower() for token in sentence]
|
||||
|
||||
# Add <SOS> and <EOS> in beginning and end respectively
|
||||
tokens.insert(0, german.init_token)
|
||||
tokens.append(german.eos_token)
|
||||
|
||||
# Go through each german token and convert to an index
|
||||
text_to_indices = [german.vocab.stoi[token] for token in tokens]
|
||||
|
||||
# Convert to Tensor
|
||||
sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)
|
||||
|
||||
outputs = [english.vocab.stoi["<sos>"]]
|
||||
for i in range(max_length):
|
||||
trg_tensor = torch.LongTensor(outputs).unsqueeze(1).to(device)
|
||||
|
||||
with torch.no_grad():
|
||||
output = model(sentence_tensor, trg_tensor)
|
||||
|
||||
best_guess = output.argmax(2)[-1, :].item()
|
||||
outputs.append(best_guess)
|
||||
|
||||
if best_guess == english.vocab.stoi["<eos>"]:
|
||||
break
|
||||
|
||||
translated_sentence = [english.vocab.itos[idx] for idx in outputs]
|
||||
# remove start token
|
||||
return translated_sentence[1:]
|
||||
|
||||
|
||||
def bleu(data, model, german, english, device):
|
||||
targets = []
|
||||
outputs = []
|
||||
|
||||
for example in data:
|
||||
src = vars(example)["src"]
|
||||
trg = vars(example)["trg"]
|
||||
|
||||
prediction = translate_sentence(model, src, german, english, device)
|
||||
prediction = prediction[:-1] # remove <eos> token
|
||||
|
||||
targets.append([trg])
|
||||
outputs.append(prediction)
|
||||
|
||||
return bleu_score(outputs, targets)
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
4
ML/Pytorch/more_advanced/torchtext/mydata/test.csv
Normal file
@@ -0,0 +1,4 @@
|
||||
name,quote,score
|
||||
Jocko,You must own everything in your world. There is no one else to blame.,1
|
||||
Bruce Lee,"Do not pray for an easy life, pray for the strength to endure a difficult one.",1
|
||||
Potato guy,"Stand tall, and rice like a potato!",0
|
||||
|
3
ML/Pytorch/more_advanced/torchtext/mydata/test.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{"name": "Jocko", "quote": "You must own everything in your world. There is no one else to blame.", "score":1}
|
||||
{"name": "Bruce", "quote": "Do not pray for an easy life, pray for the strength to endure a difficult one.", "score":1}
|
||||
{"name": "Random Potato", "quote": "Stand tall, and rice like a potato!", "score":0}
|
||||
4
ML/Pytorch/more_advanced/torchtext/mydata/test.tsv
Normal file
@@ -0,0 +1,4 @@
|
||||
name quote score
|
||||
Jocko You must own everything in your world. There is no one else to blame. 1
|
||||
Bruce Lee Do not pray for an easy life, pray for the strength to endure a difficult one. 1
|
||||
Potato guy Stand tall, and rice like a potato! 0
|
||||
|
4
ML/Pytorch/more_advanced/torchtext/mydata/train.csv
Normal file
@@ -0,0 +1,4 @@
|
||||
name,quote,score
|
||||
Jocko,You must own everything in your world. There is no one else to blame.,1
|
||||
Bruce Lee,"Do not pray for an easy life, pray for the strength to endure a difficult one.",1
|
||||
Potato guy,"Stand tall, and rice like a potato!",0
|
||||
|
3
ML/Pytorch/more_advanced/torchtext/mydata/train.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{"name": "Jocko", "quote": "You must own everything in your world. There is no one else to blame.", "score":1}
|
||||
{"name": "Bruce", "quote": "Do not pray for an easy life, pray for the strength to endure a difficult one.", "score":1}
|
||||
{"name": "Random Potato", "quote": "Stand tall, and rice like a potato!", "score":0}
|
||||
4
ML/Pytorch/more_advanced/torchtext/mydata/train.tsv
Normal file
@@ -0,0 +1,4 @@
|
||||
name quote score
|
||||
Jocko You must own everything in your world. There is no one else to blame. 1
|
||||
Bruce Lee Do not pray for an easy life, pray for the strength to endure a difficult one. 1
|
||||
Potato guy Stand tall, and rice like a potato! 0
|
||||
|
111
ML/Pytorch/more_advanced/torchtext/torchtext_tutorial1.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import spacy
|
||||
from torchtext.data import Field, TabularDataset, BucketIterator
|
||||
|
||||
######### Loading from JSON/CSV/TSV files #########
|
||||
|
||||
# STEPS:
|
||||
# 1. Specify how preprocessing should be done -> Fields
|
||||
# 2. Use Dataset to load the data -> TabularDataset (JSON/CSV/TSV Files)
|
||||
# 3. Construct an iterator to do batching & padding -> BucketIterator
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# python -m spacy download en
|
||||
spacy_en = spacy.load("en")
|
||||
|
||||
|
||||
def tokenize(text):
|
||||
return [tok.text for tok in spacy_en.tokenizer(text)]
|
||||
|
||||
|
||||
quote = Field(sequential=True, use_vocab=True, tokenize=tokenize, lower=True)
|
||||
score = Field(sequential=False, use_vocab=False)
|
||||
|
||||
fields = {"quote": ("q", quote), "score": ("s", score)}
|
||||
|
||||
train_data, test_data = TabularDataset.splits(
|
||||
path="mydata", train="train.json", test="test.json", format="json", fields=fields
|
||||
)
|
||||
|
||||
# # train_data, test_data = TabularDataset.splits(
|
||||
# # path='mydata',
|
||||
# # train='train.csv',
|
||||
# # test='test.csv',
|
||||
# # format='csv',
|
||||
# # fields=fields)
|
||||
|
||||
# # train_data, test_data = TabularDataset.splits(
|
||||
# # path='mydata',
|
||||
# # train='train.tsv',
|
||||
# # test='test.tsv',
|
||||
# # format='tsv',
|
||||
# # fields=fields)
|
||||
|
||||
quote.build_vocab(train_data, max_size=10000, min_freq=1, vectors="glove.6B.100d")
|
||||
|
||||
train_iterator, test_iterator = BucketIterator.splits(
|
||||
(train_data, test_data), batch_size=2, device=device
|
||||
)
|
||||
|
||||
######### Training a simple LSTM on this toy data of ours #########
|
||||
class RNN_LSTM(nn.Module):
|
||||
def __init__(self, input_size, embed_size, hidden_size, num_layers):
|
||||
super(RNN_LSTM, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embedding = nn.Embedding(input_size, embed_size)
|
||||
self.rnn = nn.LSTM(embed_size, hidden_size, num_layers)
|
||||
self.fc_out = nn.Linear(hidden_size, 1)
|
||||
|
||||
def forward(self, x):
|
||||
# Set initial hidden and cell states
|
||||
h0 = torch.zeros(self.num_layers, x.size(1), self.hidden_size).to(device)
|
||||
c0 = torch.zeros(self.num_layers, x.size(1), self.hidden_size).to(device)
|
||||
|
||||
embedded = self.embedding(x)
|
||||
outputs, _ = self.rnn(embedded, (h0, c0))
|
||||
prediction = self.fc_out(outputs[-1, :, :])
|
||||
|
||||
return prediction
|
||||
|
||||
|
||||
# Hyperparameters
|
||||
input_size = len(quote.vocab)
|
||||
hidden_size = 512
|
||||
num_layers = 2
|
||||
embedding_size = 100
|
||||
learning_rate = 0.005
|
||||
num_epochs = 10
|
||||
|
||||
# Initialize network
|
||||
model = RNN_LSTM(input_size, embedding_size, hidden_size, num_layers).to(device)
|
||||
|
||||
# (NOT COVERED IN YOUTUBE VIDEO): Load the pretrained embeddings onto our model
|
||||
pretrained_embeddings = quote.vocab.vectors
|
||||
model.embedding.weight.data.copy_(pretrained_embeddings)
|
||||
|
||||
# Loss and optimizer
|
||||
criterion = nn.BCEWithLogitsLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train Network
|
||||
for epoch in range(num_epochs):
|
||||
for batch_idx, batch in enumerate(train_iterator):
|
||||
# Get data to cuda if possible
|
||||
data = batch.q.to(device=device)
|
||||
targets = batch.s.to(device=device)
|
||||
|
||||
# forward
|
||||
scores = model(data)
|
||||
loss = criterion(scores.squeeze(1), targets.type_as(scores))
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# gradient descent
|
||||
optimizer.step()
|
||||
45
ML/Pytorch/more_advanced/torchtext/torchtext_tutorial2.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import spacy
|
||||
from torchtext.datasets import Multi30k
|
||||
from torchtext.data import Field, BucketIterator
|
||||
|
||||
"""
|
||||
To install spacy languages use:
|
||||
python -m spacy download en
|
||||
python -m spacy download de
|
||||
"""
|
||||
|
||||
spacy_eng = spacy.load("en")
|
||||
spacy_ger = spacy.load("de")
|
||||
|
||||
|
||||
def tokenize_eng(text):
|
||||
return [tok.text for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
|
||||
def tokenize_ger(text):
|
||||
return [tok.text for tok in spacy_ger.tokenizer(text)]
|
||||
|
||||
|
||||
english = Field(sequential=True, use_vocab=True, tokenize=tokenize_eng, lower=True)
|
||||
german = Field(sequential=True, use_vocab=True, tokenize=tokenize_ger, lower=True)
|
||||
|
||||
train_data, validation_data, test_data = Multi30k.splits(
|
||||
exts=(".de", ".en"), fields=(german, english)
|
||||
)
|
||||
|
||||
english.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
german.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
|
||||
train_iterator, validation_iterator, test_iterator = BucketIterator.splits(
|
||||
(train_data, validation_data, test_data), batch_size=64, device="cuda"
|
||||
)
|
||||
|
||||
for batch in train_iterator:
|
||||
print(batch)
|
||||
|
||||
# string to integer (stoi)
|
||||
print(f'Index of the word (the) is: {english.vocab.stoi["the"]}')
|
||||
|
||||
# print integer to string (itos)
|
||||
print(f"Word of the index (1612) is: {english.vocab.itos[1612]}")
|
||||
print(f"Word of the index (0) is: {english.vocab.itos[0]}")
|
||||
64
ML/Pytorch/more_advanced/torchtext/torchtext_tutorial3.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import spacy
|
||||
import pandas as pd
|
||||
from torchtext.data import Field, BucketIterator, TabularDataset
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
### Load data from two text files where each row is a sentence ###
|
||||
english_txt = open("train_WMT_english.txt", encoding="utf8").read().split("\n")
|
||||
german_txt = open("train_WMT_german.txt", encoding="utf8").read().split("\n")
|
||||
|
||||
raw_data = {
|
||||
"English": [line for line in english_txt[1:100]],
|
||||
"German": [line for line in german_txt[1:100]],
|
||||
}
|
||||
|
||||
df = pd.DataFrame(raw_data, columns=["English", "German"])
|
||||
|
||||
# create train and test set
|
||||
train, test = train_test_split(df, test_size=0.1)
|
||||
|
||||
# Get train, test data to json and csv format which can be read by torchtext
|
||||
train.to_json("train.json", orient="records", lines=True)
|
||||
test.to_json("test.json", orient="records", lines=True)
|
||||
|
||||
train.to_csv("train.csv", index=False)
|
||||
test.to_csv("test.csv", index=False)
|
||||
|
||||
### Now we're back to where we were in previous Tutorials ###
|
||||
|
||||
"""
|
||||
To install spacy languages use:
|
||||
python -m spacy download en
|
||||
python -m spacy download de
|
||||
"""
|
||||
|
||||
spacy_eng = spacy.load("en")
|
||||
spacy_ger = spacy.load("de")
|
||||
|
||||
|
||||
def tokenize_eng(text):
|
||||
return [tok.text for tok in spacy_eng.tokenizer(text)]
|
||||
|
||||
|
||||
def tokenize_ger(text):
|
||||
return [tok.text for tok in spacy_ger.tokenizer(text)]
|
||||
|
||||
|
||||
english = Field(sequential=True, use_vocab=True, tokenize=tokenize_eng, lower=True)
|
||||
german = Field(sequential=True, use_vocab=True, tokenize=tokenize_ger, lower=True)
|
||||
|
||||
fields = {"English": ("eng", english), "German": ("ger", german)}
|
||||
|
||||
train_data, test_data = TabularDataset.splits(
|
||||
path="", train="train.json", test="test.json", format="json", fields=fields
|
||||
)
|
||||
|
||||
english.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
german.build_vocab(train_data, max_size=10000, min_freq=2)
|
||||
|
||||
train_iterator, test_iterator = BucketIterator.splits(
|
||||
(train_data, test_data), batch_size=32, device="cuda"
|
||||
)
|
||||
|
||||
for batch in train_iterator:
|
||||
print(batch)
|
||||
@@ -0,0 +1,291 @@
|
||||
"""
|
||||
A from scratch implementation of Transformer network,
|
||||
following the paper Attention is all you need with a
|
||||
few minor differences. I tried to make it as clear as
|
||||
possible to understand and also went through the code
|
||||
on my youtube channel!
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class SelfAttention(nn.Module):
|
||||
def __init__(self, embed_size, heads):
|
||||
super(SelfAttention, self).__init__()
|
||||
self.embed_size = embed_size
|
||||
self.heads = heads
|
||||
self.head_dim = embed_size // heads
|
||||
|
||||
assert (
|
||||
self.head_dim * heads == embed_size
|
||||
), "Embedding size needs to be divisible by heads"
|
||||
|
||||
self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
|
||||
self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
|
||||
self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
|
||||
self.fc_out = nn.Linear(heads * self.head_dim, embed_size)
|
||||
|
||||
def forward(self, values, keys, query, mask):
|
||||
# Get number of training examples
|
||||
N = query.shape[0]
|
||||
|
||||
value_len, key_len, query_len = values.shape[1], keys.shape[1], query.shape[1]
|
||||
|
||||
# Split the embedding into self.heads different pieces
|
||||
values = values.reshape(N, value_len, self.heads, self.head_dim)
|
||||
keys = keys.reshape(N, key_len, self.heads, self.head_dim)
|
||||
query = query.reshape(N, query_len, self.heads, self.head_dim)
|
||||
|
||||
values = self.values(values) # (N, value_len, heads, head_dim)
|
||||
keys = self.keys(keys) # (N, key_len, heads, head_dim)
|
||||
queries = self.queries(query) # (N, query_len, heads, heads_dim)
|
||||
|
||||
# Einsum does matrix mult. for query*keys for each training example
|
||||
# with every other training example, don't be confused by einsum
|
||||
# it's just how I like doing matrix multiplication & bmm
|
||||
|
||||
energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])
|
||||
# queries shape: (N, query_len, heads, heads_dim),
|
||||
# keys shape: (N, key_len, heads, heads_dim)
|
||||
# energy: (N, heads, query_len, key_len)
|
||||
|
||||
# Mask padded indices so their weights become 0
|
||||
if mask is not None:
|
||||
energy = energy.masked_fill(mask == 0, float("-1e20"))
|
||||
|
||||
# Normalize energy values similarly to seq2seq + attention
|
||||
# so that they sum to 1. Also divide by scaling factor for
|
||||
# better stability
|
||||
attention = torch.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)
|
||||
# attention shape: (N, heads, query_len, key_len)
|
||||
|
||||
out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(
|
||||
N, query_len, self.heads * self.head_dim
|
||||
)
|
||||
# attention shape: (N, heads, query_len, key_len)
|
||||
# values shape: (N, value_len, heads, heads_dim)
|
||||
# out after matrix multiply: (N, query_len, heads, head_dim), then
|
||||
# we reshape and flatten the last two dimensions.
|
||||
|
||||
out = self.fc_out(out)
|
||||
# Linear layer doesn't modify the shape, final shape will be
|
||||
# (N, query_len, embed_size)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
def __init__(self, embed_size, heads, dropout, forward_expansion):
|
||||
super(TransformerBlock, self).__init__()
|
||||
self.attention = SelfAttention(embed_size, heads)
|
||||
self.norm1 = nn.LayerNorm(embed_size)
|
||||
self.norm2 = nn.LayerNorm(embed_size)
|
||||
|
||||
self.feed_forward = nn.Sequential(
|
||||
nn.Linear(embed_size, forward_expansion * embed_size),
|
||||
nn.ReLU(),
|
||||
nn.Linear(forward_expansion * embed_size, embed_size),
|
||||
)
|
||||
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, value, key, query, mask):
|
||||
attention = self.attention(value, key, query, mask)
|
||||
|
||||
# Add skip connection, run through normalization and finally dropout
|
||||
x = self.dropout(self.norm1(attention + query))
|
||||
forward = self.feed_forward(x)
|
||||
out = self.dropout(self.norm2(forward + x))
|
||||
return out
|
||||
|
||||
|
||||
class Encoder(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
src_vocab_size,
|
||||
embed_size,
|
||||
num_layers,
|
||||
heads,
|
||||
device,
|
||||
forward_expansion,
|
||||
dropout,
|
||||
max_length,
|
||||
):
|
||||
|
||||
super(Encoder, self).__init__()
|
||||
self.embed_size = embed_size
|
||||
self.device = device
|
||||
self.word_embedding = nn.Embedding(src_vocab_size, embed_size)
|
||||
self.position_embedding = nn.Embedding(max_length, embed_size)
|
||||
|
||||
self.layers = nn.ModuleList(
|
||||
[
|
||||
TransformerBlock(
|
||||
embed_size,
|
||||
heads,
|
||||
dropout=dropout,
|
||||
forward_expansion=forward_expansion,
|
||||
)
|
||||
for _ in range(num_layers)
|
||||
]
|
||||
)
|
||||
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x, mask):
|
||||
N, seq_length = x.shape
|
||||
positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
|
||||
out = self.dropout(
|
||||
(self.word_embedding(x) + self.position_embedding(positions))
|
||||
)
|
||||
|
||||
# In the Encoder the query, key, value are all the same, it's in the
|
||||
# decoder this will change. This might look a bit odd in this case.
|
||||
for layer in self.layers:
|
||||
out = layer(out, out, out, mask)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class DecoderBlock(nn.Module):
|
||||
def __init__(self, embed_size, heads, forward_expansion, dropout, device):
|
||||
super(DecoderBlock, self).__init__()
|
||||
self.norm = nn.LayerNorm(embed_size)
|
||||
self.attention = SelfAttention(embed_size, heads=heads)
|
||||
self.transformer_block = TransformerBlock(
|
||||
embed_size, heads, dropout, forward_expansion
|
||||
)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x, value, key, src_mask, trg_mask):
|
||||
attention = self.attention(x, x, x, trg_mask)
|
||||
query = self.dropout(self.norm(attention + x))
|
||||
out = self.transformer_block(value, key, query, src_mask)
|
||||
return out
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
trg_vocab_size,
|
||||
embed_size,
|
||||
num_layers,
|
||||
heads,
|
||||
forward_expansion,
|
||||
dropout,
|
||||
device,
|
||||
max_length,
|
||||
):
|
||||
super(Decoder, self).__init__()
|
||||
self.device = device
|
||||
self.word_embedding = nn.Embedding(trg_vocab_size, embed_size)
|
||||
self.position_embedding = nn.Embedding(max_length, embed_size)
|
||||
|
||||
self.layers = nn.ModuleList(
|
||||
[
|
||||
DecoderBlock(embed_size, heads, forward_expansion, dropout, device)
|
||||
for _ in range(num_layers)
|
||||
]
|
||||
)
|
||||
self.fc_out = nn.Linear(embed_size, trg_vocab_size)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x, enc_out, src_mask, trg_mask):
|
||||
N, seq_length = x.shape
|
||||
positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
|
||||
x = self.dropout((self.word_embedding(x) + self.position_embedding(positions)))
|
||||
|
||||
for layer in self.layers:
|
||||
x = layer(x, enc_out, enc_out, src_mask, trg_mask)
|
||||
|
||||
out = self.fc_out(x)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Transformer(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
src_vocab_size,
|
||||
trg_vocab_size,
|
||||
src_pad_idx,
|
||||
trg_pad_idx,
|
||||
embed_size=512,
|
||||
num_layers=6,
|
||||
forward_expansion=4,
|
||||
heads=8,
|
||||
dropout=0,
|
||||
device="cpu",
|
||||
max_length=100,
|
||||
):
|
||||
|
||||
super(Transformer, self).__init__()
|
||||
|
||||
self.encoder = Encoder(
|
||||
src_vocab_size,
|
||||
embed_size,
|
||||
num_layers,
|
||||
heads,
|
||||
device,
|
||||
forward_expansion,
|
||||
dropout,
|
||||
max_length,
|
||||
)
|
||||
|
||||
self.decoder = Decoder(
|
||||
trg_vocab_size,
|
||||
embed_size,
|
||||
num_layers,
|
||||
heads,
|
||||
forward_expansion,
|
||||
dropout,
|
||||
device,
|
||||
max_length,
|
||||
)
|
||||
|
||||
self.src_pad_idx = src_pad_idx
|
||||
self.trg_pad_idx = trg_pad_idx
|
||||
self.device = device
|
||||
|
||||
def make_src_mask(self, src):
|
||||
src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
|
||||
# (N, 1, 1, src_len)
|
||||
return src_mask.to(self.device)
|
||||
|
||||
def make_trg_mask(self, trg):
|
||||
N, trg_len = trg.shape
|
||||
trg_mask = torch.tril(torch.ones((trg_len, trg_len))).expand(
|
||||
N, 1, trg_len, trg_len
|
||||
)
|
||||
|
||||
return trg_mask.to(self.device)
|
||||
|
||||
def forward(self, src, trg):
|
||||
src_mask = self.make_src_mask(src)
|
||||
trg_mask = self.make_trg_mask(trg)
|
||||
enc_src = self.encoder(src, src_mask)
|
||||
out = self.decoder(trg, enc_src, src_mask, trg_mask)
|
||||
return out
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
print(device)
|
||||
|
||||
x = torch.tensor([[1, 5, 6, 4, 3, 9, 5, 2, 0], [1, 8, 7, 3, 4, 5, 6, 7, 2]]).to(
|
||||
device
|
||||
)
|
||||
trg = torch.tensor([[1, 7, 4, 3, 5, 9, 2, 0], [1, 5, 6, 2, 4, 7, 6, 2]]).to(device)
|
||||
|
||||
src_pad_idx = 0
|
||||
trg_pad_idx = 0
|
||||
src_vocab_size = 10
|
||||
trg_vocab_size = 10
|
||||
model = Transformer(src_vocab_size, trg_vocab_size, src_pad_idx, trg_pad_idx, device=device).to(
|
||||
device
|
||||
)
|
||||
out = model(x, trg[:, :-1])
|
||||
print(out.shape)
|
||||