""" Example code of a simple RNN, GRU, LSTM on the MNIST dataset. Programmed by Aladdin Persson * 2020-05-09 Initial coding * 2022-12-16 Updated with more detailed comments, docstrings to functions, and checked code still functions as intended. """ # Imports import torch import torch.nn.functional as F # Parameterless functions, like (some) activation functions import torchvision.datasets as datasets # Standard datasets import torchvision.transforms as transforms # Transformations we can perform on our dataset for augmentation from torch import optim # For optimizers like SGD, Adam, etc. from torch import nn # All neural network modules from torch.utils.data import ( DataLoader, ) # Gives easier dataset managment by creating mini batches etc. from tqdm import tqdm # For a nice progress bar! # Set device device = "cuda" if torch.cuda.is_available() else "cpu" # Hyperparameters input_size = 28 hidden_size = 256 num_layers = 2 num_classes = 10 sequence_length = 28 learning_rate = 0.005 batch_size = 64 num_epochs = 3 # Recurrent neural network (many-to-one) class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size * sequence_length, num_classes) def forward(self, x): # Set initial hidden and cell states h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # Forward propagate LSTM out, _ = self.rnn(x, h0) out = out.reshape(out.shape[0], -1) # Decode the hidden state of the last time step out = self.fc(out) return out # Recurrent neural network with GRU (many-to-one) class RNN_GRU(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN_GRU, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size * sequence_length, num_classes) def forward(self, x): # Set initial hidden and cell states h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # Forward propagate LSTM out, _ = self.gru(x, h0) out = out.reshape(out.shape[0], -1) # Decode the hidden state of the last time step out = self.fc(out) return out # Recurrent neural network with LSTM (many-to-one) class RNN_LSTM(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN_LSTM, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size * sequence_length, num_classes) def forward(self, x): # Set initial hidden and cell states h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # Forward propagate LSTM out, _ = self.lstm( x, (h0, c0) ) # out: tensor of shape (batch_size, seq_length, hidden_size) out = out.reshape(out.shape[0], -1) # Decode the hidden state of the last time step out = self.fc(out) return out # Load Data train_dataset = datasets.MNIST( root="dataset/", train=True, transform=transforms.ToTensor(), download=True ) test_dataset = datasets.MNIST( root="dataset/", train=False, transform=transforms.ToTensor(), download=True ) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True) # Initialize network (try out just using simple RNN, or GRU, and then compare with LSTM) model = RNN_LSTM(input_size, hidden_size, num_layers, num_classes).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Train Network for epoch in range(num_epochs): for batch_idx, (data, targets) in enumerate(tqdm(train_loader)): # Get data to cuda if possible data = data.to(device=device).squeeze(1) targets = targets.to(device=device) # forward scores = model(data) loss = criterion(scores, targets) # backward optimizer.zero_grad() loss.backward() # gradient descent update step/adam step optimizer.step() # Check accuracy on training & test to see how good our model def check_accuracy(loader, model): num_correct = 0 num_samples = 0 # Set model to eval model.eval() with torch.no_grad(): for x, y in loader: x = x.to(device=device).squeeze(1) y = y.to(device=device) scores = model(x) _, predictions = scores.max(1) num_correct += (predictions == y).sum() num_samples += predictions.size(0) # Toggle model back to train model.train() return num_correct / num_samples print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:2f}") print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")