Initial commit

This commit is contained in:
Aladdin Persson
2021-01-30 21:49:15 +01:00
commit 65b8c80495
432 changed files with 1290844 additions and 0 deletions

View File

@@ -0,0 +1,216 @@
"""
Training a Pointer Network which is a modified
Seq2Seq with attention network for the task of
sorting arrays.
"""
from torch.utils.data import (
Dataset,
DataLoader,
)
import random
import torch
import torch.nn as nn
import torch.optim as optim
from utils import sort_array, save_checkpoint, load_checkpoint
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
class SortArray(Dataset):
def __init__(self, batch_size, min_int, max_int, min_size, max_size):
self.batch_size = batch_size
self.min_int = min_int
self.max_int = max_int + 1
self.min_size = min_size
self.max_size = max_size + 1
self.start_tok = torch.tensor([-1]).expand(1, self.batch_size)
def __len__(self):
return 10000 // self.batch_size
def __getitem__(self, index):
size_of_array = torch.randint(
low=self.min_size, high=self.max_size, size=(1, 1)
)
unsorted_arr = torch.rand(size=(size_of_array, self.batch_size)) * (
self.max_int - self.min_int
)
# unsorted_arr = torch.randint(
# low=self.min_int, high=self.max_int, size=(size_of_array, self.batch_size)
# )
sorted_arr, indices = torch.sort(unsorted_arr, dim=0)
return unsorted_arr.float(), torch.cat((self.start_tok, indices), 0)
class Encoder(nn.Module):
def __init__(self, hidden_size, num_layers):
super(Encoder, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.rnn = nn.LSTM(1, hidden_size, num_layers)
def forward(self, x):
embedding = x.unsqueeze(2)
# embedding shape: (seq_length, N, 1)
encoder_states, (hidden, cell) = self.rnn(embedding)
# encoder_states: (seq_length, N, hidden_size)
return encoder_states, hidden, cell
class Decoder(nn.Module):
def __init__(self, hidden_size, num_layers, units=100):
super(Decoder, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.rnn = nn.LSTM(hidden_size + 1, hidden_size, num_layers)
self.energy = nn.Linear(hidden_size * 2, units)
self.fc = nn.Linear(units, 1)
self.softmax = nn.Softmax(dim=0)
self.relu = nn.ReLU()
def forward(self, x, encoder_states, hidden, cell):
sequence_length = encoder_states.shape[0]
batch_size = encoder_states.shape[1]
h_reshaped = hidden.repeat(sequence_length, 1, 1)
energy = self.relu(self.energy(torch.cat((h_reshaped, encoder_states), dim=2)))
energy = self.fc(energy)
# energy: (seq_length, N, 1)
attention = self.softmax(energy)
# attention: (seq_length, N, 1), snk
# encoder_states: (seq_length, N, hidden_size), snl
# we want context_vector: (1, N, hidden_size), i.e knl
context_vector = torch.einsum("snk,snl->knl", attention, encoder_states)
rnn_input = torch.cat([context_vector, x.unsqueeze(0).unsqueeze(2)], dim=2)
# rnn_input: (1, N, hidden_size)
_, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
return attention.squeeze(2), energy.squeeze(2), hidden, cell
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder):
super(Seq2Seq, self).__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self, source, target, teacher_force_ratio=0.5):
batch_size = source.shape[1]
target_len = target.shape[0]
outputs = torch.zeros(target_len, batch_size, target_len - 1).to(device)
encoder_states, hidden, cell = self.encoder(source)
# First input will be <SOS> token
x = target[0]
predictions = torch.zeros(target_len, batch_size)
for t in range(1, target_len):
# At every time step use encoder_states and update hidden, cell
attention, energy, hidden, cell = self.decoder(
x, encoder_states, hidden, cell
)
# Store prediction for current time step
outputs[t] = energy.permute(1, 0)
# Get the best word the Decoder predicted (index in the vocabulary)
best_guess = attention.argmax(0)
predictions[t, :] = best_guess
# With probability of teacher_force_ratio we take the actual next word
# otherwise we take the word that the Decoder predicted it to be.
# Teacher Forcing is used so that the model gets used to seeing
# similar inputs at training and testing time, if teacher forcing is 1
# then inputs at test time might be completely different than what the
# network is used to. This was a long comment.
x = target[t] if random.random() < teacher_force_ratio else best_guess
return outputs, predictions[1:, :]
### We're ready to define everything we need for training our Seq2Seq model ###
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
load_model = False
save_model = True
# Training hyperparameters
num_epochs = 1000
learning_rate = 3e-5
batch_size = 32
hidden_size = 1024
num_layers = 1 # Current implementation is only for 1 layered
min_int = 1
max_int = 10
min_size = 2
max_size = 15
# Tensorboard to get nice plots etc
writer = SummaryWriter(f"runs/loss_plot2")
step = 0
encoder_net = Encoder(hidden_size, num_layers).to(device)
decoder_net = Decoder(hidden_size, num_layers).to(device)
model = Seq2Seq(encoder_net, decoder_net).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
if load_model:
load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
# following is for testing the network, uncomment this if you want
# to try out a few arrays interactively
# sort_array(encoder_net, decoder_net, device)
dataset = SortArray(batch_size, min_int, max_int, min_size, max_size)
train_loader = DataLoader(dataset, batch_size=1, shuffle=False)
for epoch in range(num_epochs):
print(f"[Epoch {epoch} / {num_epochs}]")
if save_model:
checkpoint = {
"state_dict": model.state_dict(),
"optimizer": optimizer.state_dict(),
"steps": step,
}
save_checkpoint(checkpoint)
for batch_idx, (unsorted_arrs, sorted_arrs) in enumerate(train_loader):
inp_data = unsorted_arrs.squeeze(0).to(device)
target = sorted_arrs.squeeze(0).to(device)
# Forward prop
output, prediction = model(inp_data, target)
# Remove output first element (because of how we did the look in Seq2Seq
# starting at t = 1, then reshape so that we obtain (N*seq_len, seq_len)
# and target will be (N*seq_len)
output = output[1:].reshape(-1, output.shape[2])
target = target[1:].reshape(-1)
optimizer.zero_grad()
loss = criterion(output, target)
# Back prop
loss.backward()
# Clip to avoid exploding gradient issues, makes sure grads are
# within a healthy range
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
# Gradient descent step
optimizer.step()
# plot to tensorboard
writer.add_scalar("Training loss", loss, global_step=step)
step += 1

View File

@@ -0,0 +1,75 @@
import torch
def ask_user():
print("Write your array as a list [i,j,k..] with arbitrary positive numbers")
array = input("Input q if you want to quit \n")
return array
def sort_array(encoder, decoder, device, arr=None):
"""
A very simple example of use of the model
Input: encoder nn.Module
decoder nn.Module
device
array to sort (optional)
"""
if arr is None:
arr = ask_user()
with torch.no_grad():
while arr != "q":
# Avoid numerical errors by rounding to max_len
arr = eval(arr)
lengths = [
len(str(elem).split(".")[1]) if len(str(elem).split(".")) > 1 else 0
for elem in arr
]
max_len = max(lengths)
source = torch.tensor(arr, dtype=torch.float).to(device).unsqueeze(1)
batch_size = source.shape[1]
target_len = source.shape[0] + 1
outputs = torch.zeros(target_len, batch_size, target_len - 1).to(device)
encoder_states, hidden, cell = encoder(source)
# First input will be <SOS> token
x = torch.tensor([-1], dtype=torch.float).to(device)
predictions = torch.zeros((target_len)).to(device)
for t in range(1, target_len):
# At every time step use encoder_states and update hidden, cell
attention, energy, hidden, cell = decoder(
x, encoder_states, hidden, cell
)
# Store prediction for current time step
outputs[t] = energy.permute(1, 0)
# Get the best word the Decoder predicted (index in the vocabulary)
best_guess = attention.argmax(0)
predictions[t] = best_guess.item()
x = torch.tensor([best_guess.item()], dtype=torch.float).to(device)
output = [
round(source[predictions[1:].long()][i, :].item(), max_len)
for i in range(source.shape[0])
]
print(f"Here's the result: {output}")
arr = ask_user()
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
print("=> Saving checkpoint")
torch.save(state, filename)
def load_checkpoint(checkpoint, model, optimizer): # , steps):
print("=> Loading checkpoint")
model.load_state_dict(checkpoint["state_dict"])
optimizer.load_state_dict(checkpoint["optimizer"])
# steps = checkpoint['steps']
# return steps