mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-02-20 13:50:41 +00:00
Initial commit
This commit is contained in:
216
ML/Projects/DeepSort/sort_w_attention.py
Normal file
216
ML/Projects/DeepSort/sort_w_attention.py
Normal file
@@ -0,0 +1,216 @@
|
||||
"""
|
||||
Training a Pointer Network which is a modified
|
||||
Seq2Seq with attention network for the task of
|
||||
sorting arrays.
|
||||
"""
|
||||
|
||||
from torch.utils.data import (
|
||||
Dataset,
|
||||
DataLoader,
|
||||
)
|
||||
import random
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from utils import sort_array, save_checkpoint, load_checkpoint
|
||||
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
|
||||
|
||||
|
||||
class SortArray(Dataset):
|
||||
def __init__(self, batch_size, min_int, max_int, min_size, max_size):
|
||||
self.batch_size = batch_size
|
||||
self.min_int = min_int
|
||||
self.max_int = max_int + 1
|
||||
self.min_size = min_size
|
||||
self.max_size = max_size + 1
|
||||
self.start_tok = torch.tensor([-1]).expand(1, self.batch_size)
|
||||
|
||||
def __len__(self):
|
||||
return 10000 // self.batch_size
|
||||
|
||||
def __getitem__(self, index):
|
||||
size_of_array = torch.randint(
|
||||
low=self.min_size, high=self.max_size, size=(1, 1)
|
||||
)
|
||||
|
||||
unsorted_arr = torch.rand(size=(size_of_array, self.batch_size)) * (
|
||||
self.max_int - self.min_int
|
||||
)
|
||||
# unsorted_arr = torch.randint(
|
||||
# low=self.min_int, high=self.max_int, size=(size_of_array, self.batch_size)
|
||||
# )
|
||||
sorted_arr, indices = torch.sort(unsorted_arr, dim=0)
|
||||
|
||||
return unsorted_arr.float(), torch.cat((self.start_tok, indices), 0)
|
||||
|
||||
|
||||
class Encoder(nn.Module):
|
||||
def __init__(self, hidden_size, num_layers):
|
||||
super(Encoder, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.rnn = nn.LSTM(1, hidden_size, num_layers)
|
||||
|
||||
def forward(self, x):
|
||||
embedding = x.unsqueeze(2)
|
||||
# embedding shape: (seq_length, N, 1)
|
||||
|
||||
encoder_states, (hidden, cell) = self.rnn(embedding)
|
||||
# encoder_states: (seq_length, N, hidden_size)
|
||||
|
||||
return encoder_states, hidden, cell
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(self, hidden_size, num_layers, units=100):
|
||||
super(Decoder, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
self.rnn = nn.LSTM(hidden_size + 1, hidden_size, num_layers)
|
||||
self.energy = nn.Linear(hidden_size * 2, units)
|
||||
self.fc = nn.Linear(units, 1)
|
||||
self.softmax = nn.Softmax(dim=0)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x, encoder_states, hidden, cell):
|
||||
sequence_length = encoder_states.shape[0]
|
||||
batch_size = encoder_states.shape[1]
|
||||
|
||||
h_reshaped = hidden.repeat(sequence_length, 1, 1)
|
||||
energy = self.relu(self.energy(torch.cat((h_reshaped, encoder_states), dim=2)))
|
||||
energy = self.fc(energy)
|
||||
|
||||
# energy: (seq_length, N, 1)
|
||||
attention = self.softmax(energy)
|
||||
|
||||
# attention: (seq_length, N, 1), snk
|
||||
# encoder_states: (seq_length, N, hidden_size), snl
|
||||
# we want context_vector: (1, N, hidden_size), i.e knl
|
||||
context_vector = torch.einsum("snk,snl->knl", attention, encoder_states)
|
||||
rnn_input = torch.cat([context_vector, x.unsqueeze(0).unsqueeze(2)], dim=2)
|
||||
|
||||
# rnn_input: (1, N, hidden_size)
|
||||
_, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
|
||||
return attention.squeeze(2), energy.squeeze(2), hidden, cell
|
||||
|
||||
|
||||
class Seq2Seq(nn.Module):
|
||||
def __init__(self, encoder, decoder):
|
||||
super(Seq2Seq, self).__init__()
|
||||
self.encoder = encoder
|
||||
self.decoder = decoder
|
||||
|
||||
def forward(self, source, target, teacher_force_ratio=0.5):
|
||||
batch_size = source.shape[1]
|
||||
target_len = target.shape[0]
|
||||
|
||||
outputs = torch.zeros(target_len, batch_size, target_len - 1).to(device)
|
||||
encoder_states, hidden, cell = self.encoder(source)
|
||||
|
||||
# First input will be <SOS> token
|
||||
x = target[0]
|
||||
predictions = torch.zeros(target_len, batch_size)
|
||||
|
||||
for t in range(1, target_len):
|
||||
# At every time step use encoder_states and update hidden, cell
|
||||
attention, energy, hidden, cell = self.decoder(
|
||||
x, encoder_states, hidden, cell
|
||||
)
|
||||
|
||||
# Store prediction for current time step
|
||||
outputs[t] = energy.permute(1, 0)
|
||||
|
||||
# Get the best word the Decoder predicted (index in the vocabulary)
|
||||
best_guess = attention.argmax(0)
|
||||
predictions[t, :] = best_guess
|
||||
|
||||
# With probability of teacher_force_ratio we take the actual next word
|
||||
# otherwise we take the word that the Decoder predicted it to be.
|
||||
# Teacher Forcing is used so that the model gets used to seeing
|
||||
# similar inputs at training and testing time, if teacher forcing is 1
|
||||
# then inputs at test time might be completely different than what the
|
||||
# network is used to. This was a long comment.
|
||||
x = target[t] if random.random() < teacher_force_ratio else best_guess
|
||||
|
||||
return outputs, predictions[1:, :]
|
||||
|
||||
|
||||
### We're ready to define everything we need for training our Seq2Seq model ###
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
load_model = False
|
||||
save_model = True
|
||||
|
||||
# Training hyperparameters
|
||||
num_epochs = 1000
|
||||
learning_rate = 3e-5
|
||||
batch_size = 32
|
||||
hidden_size = 1024
|
||||
num_layers = 1 # Current implementation is only for 1 layered
|
||||
min_int = 1
|
||||
max_int = 10
|
||||
min_size = 2
|
||||
max_size = 15
|
||||
|
||||
# Tensorboard to get nice plots etc
|
||||
writer = SummaryWriter(f"runs/loss_plot2")
|
||||
step = 0
|
||||
|
||||
encoder_net = Encoder(hidden_size, num_layers).to(device)
|
||||
decoder_net = Decoder(hidden_size, num_layers).to(device)
|
||||
|
||||
model = Seq2Seq(encoder_net, decoder_net).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
if load_model:
|
||||
load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
|
||||
|
||||
# following is for testing the network, uncomment this if you want
|
||||
# to try out a few arrays interactively
|
||||
# sort_array(encoder_net, decoder_net, device)
|
||||
|
||||
dataset = SortArray(batch_size, min_int, max_int, min_size, max_size)
|
||||
train_loader = DataLoader(dataset, batch_size=1, shuffle=False)
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print(f"[Epoch {epoch} / {num_epochs}]")
|
||||
|
||||
if save_model:
|
||||
checkpoint = {
|
||||
"state_dict": model.state_dict(),
|
||||
"optimizer": optimizer.state_dict(),
|
||||
"steps": step,
|
||||
}
|
||||
save_checkpoint(checkpoint)
|
||||
|
||||
for batch_idx, (unsorted_arrs, sorted_arrs) in enumerate(train_loader):
|
||||
inp_data = unsorted_arrs.squeeze(0).to(device)
|
||||
target = sorted_arrs.squeeze(0).to(device)
|
||||
|
||||
# Forward prop
|
||||
output, prediction = model(inp_data, target)
|
||||
|
||||
# Remove output first element (because of how we did the look in Seq2Seq
|
||||
# starting at t = 1, then reshape so that we obtain (N*seq_len, seq_len)
|
||||
# and target will be (N*seq_len)
|
||||
output = output[1:].reshape(-1, output.shape[2])
|
||||
target = target[1:].reshape(-1)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss = criterion(output, target)
|
||||
|
||||
# Back prop
|
||||
loss.backward()
|
||||
|
||||
# Clip to avoid exploding gradient issues, makes sure grads are
|
||||
# within a healthy range
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
|
||||
|
||||
# Gradient descent step
|
||||
optimizer.step()
|
||||
|
||||
# plot to tensorboard
|
||||
writer.add_scalar("Training loss", loss, global_step=step)
|
||||
step += 1
|
||||
75
ML/Projects/DeepSort/utils.py
Normal file
75
ML/Projects/DeepSort/utils.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import torch
|
||||
|
||||
|
||||
def ask_user():
|
||||
print("Write your array as a list [i,j,k..] with arbitrary positive numbers")
|
||||
array = input("Input q if you want to quit \n")
|
||||
return array
|
||||
|
||||
|
||||
def sort_array(encoder, decoder, device, arr=None):
|
||||
"""
|
||||
A very simple example of use of the model
|
||||
Input: encoder nn.Module
|
||||
decoder nn.Module
|
||||
device
|
||||
array to sort (optional)
|
||||
"""
|
||||
|
||||
if arr is None:
|
||||
arr = ask_user()
|
||||
|
||||
with torch.no_grad():
|
||||
while arr != "q":
|
||||
# Avoid numerical errors by rounding to max_len
|
||||
arr = eval(arr)
|
||||
lengths = [
|
||||
len(str(elem).split(".")[1]) if len(str(elem).split(".")) > 1 else 0
|
||||
for elem in arr
|
||||
]
|
||||
max_len = max(lengths)
|
||||
source = torch.tensor(arr, dtype=torch.float).to(device).unsqueeze(1)
|
||||
batch_size = source.shape[1]
|
||||
target_len = source.shape[0] + 1
|
||||
|
||||
outputs = torch.zeros(target_len, batch_size, target_len - 1).to(device)
|
||||
encoder_states, hidden, cell = encoder(source)
|
||||
|
||||
# First input will be <SOS> token
|
||||
x = torch.tensor([-1], dtype=torch.float).to(device)
|
||||
predictions = torch.zeros((target_len)).to(device)
|
||||
|
||||
for t in range(1, target_len):
|
||||
# At every time step use encoder_states and update hidden, cell
|
||||
attention, energy, hidden, cell = decoder(
|
||||
x, encoder_states, hidden, cell
|
||||
)
|
||||
|
||||
# Store prediction for current time step
|
||||
outputs[t] = energy.permute(1, 0)
|
||||
|
||||
# Get the best word the Decoder predicted (index in the vocabulary)
|
||||
best_guess = attention.argmax(0)
|
||||
predictions[t] = best_guess.item()
|
||||
x = torch.tensor([best_guess.item()], dtype=torch.float).to(device)
|
||||
|
||||
output = [
|
||||
round(source[predictions[1:].long()][i, :].item(), max_len)
|
||||
for i in range(source.shape[0])
|
||||
]
|
||||
|
||||
print(f"Here's the result: {output}")
|
||||
arr = ask_user()
|
||||
|
||||
|
||||
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
|
||||
print("=> Saving checkpoint")
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint, model, optimizer): # , steps):
|
||||
print("=> Loading checkpoint")
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
# steps = checkpoint['steps']
|
||||
# return steps
|
||||
45
ML/Projects/Exploring_MNIST/README.md
Normal file
45
ML/Projects/Exploring_MNIST/README.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# Exploring the MNIST dataset with PyTorch
|
||||
|
||||
The goal of this small project of mine is to learn different models and then try and see what kind of test accuracies we can get on the MNIST dataset. I checked some popular models (LeNet, VGG, Inception net, ResNet) and likely I will try more out in the future as I learn more network architectures. I used an exponential learning rate decay and data augmentation, in the beginning I was just using every data augmentation other people were using but I learned that using RandomHorizontalFlip when learning to recognize digits might not be so useful (heh). I also used a lambda/weight decay of pretty standard 5e-4. My thinking during training was first that I split into a validationset of about 10000 examples and made sure that it was getting high accuracies on validationset with current hyperparameters. After making sure that it wasn't just overfitting the training set, I changed so that the model used all of the training examples (60000) and then when finished training to about ~99.9% training accuracy I tested on the test set.
|
||||
|
||||
## Accuracy
|
||||
| Model | Number of epochs | Training set acc. | Test set acc. |
|
||||
| ----------------- | ----------- | ----------------- | ----------- |
|
||||
| [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf) | 150 | 99.69% | 99.12% |
|
||||
| [VGG13](https://arxiv.org/abs/1409.1556) | 100 | 99.95% | 99.67% |
|
||||
| [VGG16](https://arxiv.org/abs/1409.1556) | 100 | 99.92% | 99.68% |
|
||||
| [GoogLeNet](https://arxiv.org/abs/1409.4842) | 100 | 99.90% | 99.71% |
|
||||
| [ResNet101](https://arxiv.org/abs/1512.03385) | 100 | 99.90% | 99.68% |
|
||||
|
||||
TODO: MobileNet, ResNext, SqueezeNet, .., ?
|
||||
|
||||
### Comments and things to improve
|
||||
I believe LeNet has more potential as it's not really overfitting the training set that well and needs more epochs. I believe that in the original paper by LeCun et. al. (1998) showed that they achieved about 99.1% test accuracy which is similar to my results but we also need to remember the limitations that were back then. I do think training it for a bit longer to make it ~99.8-99.9% on training set would get it up to perhaps 99.2-99.3% test accuracy if we're lucky. So far the other models I think have performed quite well and is close, at least from my understanding, to current state of the art. If you would like to really maximize accuracy you would train an ensemble of models and then average their predictions to achieve better accuracy but I've not done that here as I don't think it's that interesting. This was mostly to learn different network architectures and to then check if they work as intended. If you find anything that I can improve or any mistakes, please tell me what and I'll do my best to fix it!
|
||||
|
||||
### How to run
|
||||
```bash
|
||||
usage: train.py [-h] [--resume PATH] [--lr LR] [--weight-decay R]
|
||||
[--momentum R] [--epochs N] [--batch-size N]
|
||||
[--log-interval N] [--seed S] [--number-workers S]
|
||||
[--init-padding S] [--create-validationset] [--save-model]
|
||||
|
||||
PyTorch MNIST
|
||||
|
||||
optional arguments:
|
||||
--resume PATH Saved model. (ex: PATH = checkpoint/mnist_LeNet.pth.tar)
|
||||
--batch-size N (ex: --batch-size 64), default is 128.
|
||||
--epochs N (ex: --epochs 10) default is 100.
|
||||
--lr LR learning rate (ex: --lr 0.01), default is 0.001.
|
||||
--momentum M SGD w momentum (ex: --momentum 0.5), default is 0.9.
|
||||
--seed S random seed (ex: --seed 3), default is 1.
|
||||
--log-interval N print accuracy ever N mini-batches, ex (--log-interval 50), default 240.
|
||||
--init-padding S Initial padding on images (ex: --init-padding 5), default is 2 to make 28x28 into 32x32.
|
||||
--create-validation to create validationset
|
||||
--save-model to save weights
|
||||
--weight-decay R What weight decay you want (ex: --weight-decay 1e-4), default 1e-5.
|
||||
--number-workers S How many num workers you want in PyTorch (ex --number-workers 2), default is 0.
|
||||
|
||||
|
||||
Example of a run is:
|
||||
python train.py --save-model --resume checkpoint/mnist_LeNet.pth.tar --weight-decay 1e-5 --number-workers 2
|
||||
```
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
109
ML/Projects/Exploring_MNIST/networks/googLeNet.py
Normal file
109
ML/Projects/Exploring_MNIST/networks/googLeNet.py
Normal file
@@ -0,0 +1,109 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class Inception(nn.Module):
|
||||
def __init__(
|
||||
self, in_channels, out1x1, out3x3reduced, out3x3, out5x5reduced, out5x5, outpool
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.branch_1 = BasicConv2d(in_channels, out1x1, kernel_size=1, stride=1)
|
||||
|
||||
self.branch_2 = nn.Sequential(
|
||||
BasicConv2d(in_channels, out3x3reduced, kernel_size=1),
|
||||
BasicConv2d(out3x3reduced, out3x3, kernel_size=3, padding=1),
|
||||
)
|
||||
|
||||
# Is in the original googLeNet paper 5x5 conv but in Inception_v2 it has shown to be
|
||||
# more efficient if you instead do two 3x3 convs which is what I am doing here!
|
||||
self.branch_3 = nn.Sequential(
|
||||
BasicConv2d(in_channels, out5x5reduced, kernel_size=1),
|
||||
BasicConv2d(out5x5reduced, out5x5, kernel_size=3, padding=1),
|
||||
BasicConv2d(out5x5, out5x5, kernel_size=3, padding=1),
|
||||
)
|
||||
|
||||
self.branch_4 = nn.Sequential(
|
||||
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
|
||||
BasicConv2d(in_channels, outpool, kernel_size=1),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
y1 = self.branch_1(x)
|
||||
y2 = self.branch_2(x)
|
||||
y3 = self.branch_3(x)
|
||||
y4 = self.branch_4(x)
|
||||
|
||||
return torch.cat([y1, y2, y3, y4], 1)
|
||||
|
||||
|
||||
class GoogLeNet(nn.Module):
|
||||
def __init__(self, img_channel):
|
||||
super().__init__()
|
||||
|
||||
self.first_layers = nn.Sequential(
|
||||
BasicConv2d(img_channel, 192, kernel_size=3, padding=1)
|
||||
)
|
||||
|
||||
self._3a = Inception(192, 64, 96, 128, 16, 32, 32)
|
||||
self._3b = Inception(256, 128, 128, 192, 32, 96, 64)
|
||||
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self._4a = Inception(480, 192, 96, 208, 16, 48, 64)
|
||||
self._4b = Inception(512, 160, 112, 224, 24, 64, 64)
|
||||
self._4c = Inception(512, 128, 128, 256, 24, 64, 64)
|
||||
self._4d = Inception(512, 112, 144, 288, 32, 64, 64)
|
||||
self._4e = Inception(528, 256, 160, 320, 32, 128, 128)
|
||||
|
||||
self._5a = Inception(832, 256, 160, 320, 32, 128, 128)
|
||||
self._5b = Inception(832, 384, 192, 384, 48, 128, 128)
|
||||
|
||||
self.avgpool = nn.AvgPool2d(kernel_size=8, stride=1)
|
||||
self.linear = nn.Linear(1024, 10)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.first_layers(x)
|
||||
|
||||
out = self._3a(out)
|
||||
out = self._3b(out)
|
||||
out = self.maxpool(out)
|
||||
|
||||
out = self._4a(out)
|
||||
out = self._4b(out)
|
||||
out = self._4c(out)
|
||||
out = self._4d(out)
|
||||
out = self._4e(out)
|
||||
out = self.maxpool(out)
|
||||
|
||||
out = self._5a(out)
|
||||
out = self._5b(out)
|
||||
|
||||
out = self.avgpool(out)
|
||||
out = out.view(out.size(0), -1)
|
||||
out = self.linear(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class BasicConv2d(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, **kwargs):
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
|
||||
self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return F.relu(x, inplace=True)
|
||||
|
||||
|
||||
def test():
|
||||
net = GoogLeNet(1)
|
||||
x = torch.randn(3, 1, 32, 32)
|
||||
y = net(x)
|
||||
print(y.size())
|
||||
|
||||
|
||||
# test()
|
||||
@@ -0,0 +1,4 @@
|
||||
from networks.vgg import VGG
|
||||
from networks.lenet import LeNet
|
||||
from networks.resnet import ResNet, residual_template, ResNet50, ResNet101, ResNet152
|
||||
from networks.googLeNet import BasicConv2d, Inception, GoogLeNet
|
||||
60
ML/Projects/Exploring_MNIST/networks/lenet.py
Normal file
60
ML/Projects/Exploring_MNIST/networks/lenet.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class LeNet(nn.Module):
|
||||
def __init__(self, in_channels, init_weights=True, num_classes=10):
|
||||
super(LeNet, self).__init__()
|
||||
|
||||
self.num_classes = num_classes
|
||||
|
||||
if init_weights:
|
||||
self._initialize_weights()
|
||||
|
||||
self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=6, kernel_size=5)
|
||||
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
|
||||
self.fc1 = nn.Linear(16 * 5 * 5, 120)
|
||||
self.fc2 = nn.Linear(120, 84)
|
||||
self.fc3 = nn.Linear(84, 10)
|
||||
|
||||
def forward(self, x):
|
||||
z1 = self.conv1(x) # 6 x 28 x 28
|
||||
a1 = F.relu(z1) # 6 x 28 x 28
|
||||
a1 = F.max_pool2d(a1, kernel_size=2, stride=2) # 6 x 14 x 14
|
||||
z2 = self.conv2(a1) # 16 x 10 x 10
|
||||
a2 = F.relu(z2) # 16 x 10 x 10
|
||||
a2 = F.max_pool2d(a2, kernel_size=2, stride=2) # 16 x 5 x 5
|
||||
flatten_a2 = a2.view(a2.size(0), -1)
|
||||
z3 = self.fc1(flatten_a2)
|
||||
a3 = F.relu(z3)
|
||||
z4 = self.fc2(a3)
|
||||
a4 = F.relu(z4)
|
||||
z5 = self.fc3(a4)
|
||||
return z5
|
||||
|
||||
def _initialize_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
|
||||
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
|
||||
def test_lenet():
|
||||
net = LeNet(1)
|
||||
x = torch.randn(64, 1, 32, 32)
|
||||
y = net(x)
|
||||
print(y.size())
|
||||
|
||||
|
||||
test_lenet()
|
||||
151
ML/Projects/Exploring_MNIST/networks/resnet.py
Normal file
151
ML/Projects/Exploring_MNIST/networks/resnet.py
Normal file
@@ -0,0 +1,151 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class residual_template(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, in_channels, out_channels, stride=1, identity_downsample=None):
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(out_channels)
|
||||
self.conv2 = nn.Conv2d(
|
||||
out_channels,
|
||||
out_channels,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
bias=False,
|
||||
)
|
||||
self.bn2 = nn.BatchNorm2d(out_channels)
|
||||
self.conv3 = nn.Conv2d(
|
||||
out_channels, out_channels * self.expansion, kernel_size=1, bias=False
|
||||
)
|
||||
self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.identity_downsample = identity_downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.identity_downsample is not None:
|
||||
residual = self.identity_downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
def __init__(self, residual_template, layers, image_channel, num_classes=10):
|
||||
self.in_channels = 64
|
||||
super().__init__()
|
||||
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels=image_channel,
|
||||
out_channels=64,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias=False,
|
||||
)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.layer1 = self._make_layer(
|
||||
residual_template, layers[0], channels=64, stride=1
|
||||
)
|
||||
self.layer2 = self._make_layer(
|
||||
residual_template, layers[1], channels=128, stride=2
|
||||
)
|
||||
self.layer3 = self._make_layer(
|
||||
residual_template, layers[2], channels=256, stride=2
|
||||
)
|
||||
self.layer4 = self._make_layer(
|
||||
residual_template, layers[3], channels=512, stride=2
|
||||
)
|
||||
self.avgpool = nn.AvgPool2d(kernel_size=4, stride=1)
|
||||
self.fc = nn.Linear(512 * residual_template.expansion, num_classes)
|
||||
|
||||
# initialize weights for conv layers, batch layers
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def _make_layer(self, residual_template, num_residuals_blocks, channels, stride):
|
||||
identity_downsample = None
|
||||
|
||||
if stride != 1 or self.in_channels != channels * residual_template.expansion:
|
||||
identity_downsample = nn.Sequential(
|
||||
nn.Conv2d(
|
||||
self.in_channels,
|
||||
channels * residual_template.expansion,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
bias=False,
|
||||
),
|
||||
nn.BatchNorm2d(channels * residual_template.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(
|
||||
residual_template(self.in_channels, channels, stride, identity_downsample)
|
||||
)
|
||||
self.in_channels = channels * residual_template.expansion
|
||||
|
||||
for i in range(1, num_residuals_blocks):
|
||||
layers.append(residual_template(self.in_channels, channels))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def ResNet50(img_channel):
|
||||
return ResNet(residual_template, [3, 4, 6, 3], img_channel)
|
||||
|
||||
|
||||
def ResNet101(img_channel):
|
||||
return ResNet(residual_template, [3, 4, 23, 3], img_channel)
|
||||
|
||||
|
||||
def ResNet152(img_channel):
|
||||
return ResNet(residual_template, [3, 8, 36, 3], img_channel)
|
||||
|
||||
|
||||
def test():
|
||||
net = ResNet152(img_channel=1)
|
||||
y = net(torch.randn(64, 1, 32, 32))
|
||||
print(y.size())
|
||||
|
||||
|
||||
# test()
|
||||
139
ML/Projects/Exploring_MNIST/networks/vgg.py
Normal file
139
ML/Projects/Exploring_MNIST/networks/vgg.py
Normal file
@@ -0,0 +1,139 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
VGG_types = {
|
||||
"VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
|
||||
"VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
|
||||
"VGG16": [
|
||||
64,
|
||||
64,
|
||||
"M",
|
||||
128,
|
||||
128,
|
||||
"M",
|
||||
256,
|
||||
256,
|
||||
256,
|
||||
"M",
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
"M",
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
"M",
|
||||
],
|
||||
"VGG19": [
|
||||
64,
|
||||
64,
|
||||
"M",
|
||||
128,
|
||||
128,
|
||||
"M",
|
||||
256,
|
||||
256,
|
||||
256,
|
||||
256,
|
||||
"M",
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
"M",
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
"M",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class VGG(nn.Module):
|
||||
def __init__(
|
||||
self, vgg_type, in_channels, init_weights=True, batch_norm=True, num_classes=10
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.batch_norm = batch_norm
|
||||
self.in_channels = in_channels
|
||||
|
||||
self.layout = self.create_architecture(VGG_types[vgg_type])
|
||||
self.fc = nn.Linear(512, num_classes)
|
||||
|
||||
# self.fcs = nn.Sequential(
|
||||
# nn.Linear(512* 1 * 1, 4096),
|
||||
# nn.ReLU(inplace = False),
|
||||
# nn.Dropout(),
|
||||
# nn.Linear(4096, 4096),
|
||||
# nn.ReLU(inplace = False),
|
||||
# nn.Dropout(),
|
||||
# nn.Linear(4096, num_classes),
|
||||
# )
|
||||
|
||||
if init_weights:
|
||||
self._initialize_weights()
|
||||
|
||||
def forward(self, x):
|
||||
out = self.layout(x)
|
||||
out = out.view(out.size(0), -1)
|
||||
out = self.fc(out)
|
||||
|
||||
return out
|
||||
|
||||
def _initialize_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
|
||||
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def create_architecture(self, architecture):
|
||||
layers = []
|
||||
|
||||
for x in architecture:
|
||||
if type(x) == int:
|
||||
out_channels = x
|
||||
|
||||
conv2d = nn.Conv2d(
|
||||
self.in_channels, out_channels, kernel_size=3, padding=1
|
||||
)
|
||||
|
||||
if self.batch_norm:
|
||||
layers += [
|
||||
conv2d,
|
||||
nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(inplace=False),
|
||||
]
|
||||
else:
|
||||
layers += [conv2d, nn.ReLU(inplace=False)]
|
||||
|
||||
self.in_channels = out_channels
|
||||
|
||||
elif x == "M":
|
||||
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
|
||||
|
||||
layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
|
||||
def test():
|
||||
net = VGG("VGG16", 1)
|
||||
x = torch.randn(64, 1, 32, 32)
|
||||
y = net(x)
|
||||
print(y.size())
|
||||
|
||||
|
||||
# test()
|
||||
264
ML/Projects/Exploring_MNIST/train.py
Normal file
264
ML/Projects/Exploring_MNIST/train.py
Normal file
@@ -0,0 +1,264 @@
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torch.utils.data
|
||||
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torch.backends.cudnn as cudnn
|
||||
|
||||
from torch.utils.data import DataLoader, SubsetRandomSampler
|
||||
from networks.import_all_networks import *
|
||||
from utils.import_utils import *
|
||||
|
||||
|
||||
class Train_MNIST(object):
|
||||
def __init__(self):
|
||||
self.best_acc = 0
|
||||
self.in_channels = 1 # 1 because MNIST is grayscale
|
||||
self.dataset = mnist_data # Class that is imported from utils that imports data
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
self.dtype = torch.float32
|
||||
|
||||
self.args = self.prepare_args()
|
||||
self.transform_train, self.transform_test = self.prepare_transformations()
|
||||
|
||||
if self.args.create_validationset:
|
||||
(
|
||||
self.loader_train,
|
||||
self.loader_validation,
|
||||
self.loader_test,
|
||||
) = self.prepare_data()
|
||||
self.data_check_acc = self.loader_validation
|
||||
else:
|
||||
self.loader_train, self.loader_test = self.prepare_data()
|
||||
self.data_check_acc = self.loader_train
|
||||
|
||||
def prepare_args(self):
|
||||
parser = argparse.ArgumentParser(description="PyTorch MNIST")
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
default="",
|
||||
type=str,
|
||||
metavar="PATH",
|
||||
help="path to latest checkpoint (default: none)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lr",
|
||||
default=0.001,
|
||||
type=float,
|
||||
metavar="LR",
|
||||
help="initial learning rate",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--weight-decay",
|
||||
default=1e-5,
|
||||
type=float,
|
||||
metavar="R",
|
||||
help="L2 regularization lambda",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--momentum", default=0.9, type=float, metavar="M", help="SGD with momentum"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--epochs",
|
||||
type=int,
|
||||
default=100,
|
||||
metavar="N",
|
||||
help="number of epochs to train (default: 100)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
type=int,
|
||||
default=128,
|
||||
metavar="N",
|
||||
help="input batch size for training (default: 128)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-interval",
|
||||
type=int,
|
||||
default=240,
|
||||
metavar="N",
|
||||
help="how many batches to wait before logging training status",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--number-workers",
|
||||
type=int,
|
||||
default=0,
|
||||
metavar="S",
|
||||
help="number of workers (default: 0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--init-padding",
|
||||
type=int,
|
||||
default=2,
|
||||
metavar="S",
|
||||
help=" If use initial padding or not. (default: 2 because mnist 28x28 to make 32x32)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--create-validationset",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="If you want to use a validation set (default: False). Default size = 10%",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--save-model",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="If you want to save this model(default: False).",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
def prepare_transformations(self):
|
||||
transform_train = transforms.Compose(
|
||||
[
|
||||
transforms.Pad(self.args.init_padding),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,)),
|
||||
]
|
||||
)
|
||||
|
||||
transform_test = transforms.Compose(
|
||||
[
|
||||
transforms.Pad(self.args.init_padding),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,)),
|
||||
]
|
||||
)
|
||||
|
||||
return transform_train, transform_test
|
||||
|
||||
def prepare_data(self, shuffle=True):
|
||||
|
||||
data = self.dataset(
|
||||
shuffle,
|
||||
self.transform_train,
|
||||
self.transform_test,
|
||||
self.args.number_workers,
|
||||
self.args.create_validationset,
|
||||
self.args.batch_size,
|
||||
validation_size=0.1,
|
||||
random_seed=self.args.seed,
|
||||
)
|
||||
|
||||
if self.args.create_validationset:
|
||||
loader_train, loader_validation, loader_test = data.main()
|
||||
|
||||
return loader_train, loader_validation, loader_test
|
||||
|
||||
else:
|
||||
loader_train, loader_test = data.main()
|
||||
|
||||
return loader_train, loader_test
|
||||
|
||||
def train(self):
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
iter = 0
|
||||
|
||||
# vis_plotting = visdom_plotting()
|
||||
loss_list, batch_list, epoch_list, validation_acc_list, training_acc_list = (
|
||||
[],
|
||||
[],
|
||||
[0],
|
||||
[0],
|
||||
[0],
|
||||
)
|
||||
|
||||
for epoch in range(self.args.epochs):
|
||||
for batch_idx, (x, y) in enumerate(self.loader_train):
|
||||
self.model.train()
|
||||
x = x.to(device=self.device, dtype=self.dtype)
|
||||
y = y.to(device=self.device, dtype=torch.long)
|
||||
|
||||
scores = self.model(x)
|
||||
loss = criterion(scores, y)
|
||||
|
||||
loss_list.append(loss.item())
|
||||
batch_list.append(iter + 1)
|
||||
iter += 1
|
||||
|
||||
if batch_idx % self.args.log_interval == 0:
|
||||
print(f"Batch {batch_idx}, epoch {epoch}, loss = {loss.item()}")
|
||||
print()
|
||||
self.model.eval()
|
||||
train_acc = check_accuracy(self.data_check_acc, self.model)
|
||||
# validation_acc = self.check_accuracy(self.data_check_acc)
|
||||
validation_acc = 0
|
||||
validation_acc_list.append(validation_acc)
|
||||
training_acc_list.append(train_acc)
|
||||
epoch_list.append(epoch + 0.5)
|
||||
print()
|
||||
print()
|
||||
# call to plot in visdom
|
||||
# vis_plotting.create_plot(loss_list, batch_list, validation_acc_list, epoch_list, training_acc_list)
|
||||
|
||||
# save checkpoint
|
||||
if train_acc > self.best_acc and self.args.save_model:
|
||||
self.best_acc = train_acc
|
||||
save_checkpoint(
|
||||
self.filename,
|
||||
self.model,
|
||||
self.optimizer,
|
||||
self.best_acc,
|
||||
epoch,
|
||||
)
|
||||
|
||||
self.model.train()
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
|
||||
def choose_network(self):
|
||||
self.model = LeNet(
|
||||
in_channels=self.in_channels, init_weights=True, num_classes=10
|
||||
)
|
||||
self.filename = "checkpoint/mnist_LeNet.pth.tar"
|
||||
|
||||
# self.model = VGG('VGG16', in_channels = self.in_channels)
|
||||
# self.filename = 'checkpoint/mnist_VGG16.pth.tar'
|
||||
|
||||
# self.model = ResNet50(img_channel=1)
|
||||
# self.filename = 'checkpoint/mnist_ResNet.pth.tar'
|
||||
|
||||
# self.model = GoogLeNet(img_channel=1)
|
||||
# self.filename = 'checkpoint/mnist_GoogLeNet.pth.tar'
|
||||
|
||||
self.model = self.model.to(self.device)
|
||||
|
||||
def main(self):
|
||||
if __name__ == "__main__":
|
||||
self.choose_network()
|
||||
self.optimizer = optim.SGD(
|
||||
self.model.parameters(),
|
||||
lr=self.args.lr,
|
||||
weight_decay=self.args.weight_decay,
|
||||
momentum=self.args.momentum,
|
||||
)
|
||||
cudnn.benchmark = True
|
||||
|
||||
if self.args.resume:
|
||||
self.model.eval()
|
||||
(
|
||||
self.model,
|
||||
self.optimizer,
|
||||
self.checkpoint,
|
||||
self.start_epoch,
|
||||
self.best_acc,
|
||||
) = load_model(self.args, self.model, self.optimizer)
|
||||
else:
|
||||
load_model(self.args, self.model, self.optimizer)
|
||||
|
||||
self.train()
|
||||
|
||||
|
||||
## Mnist
|
||||
network = Train_MNIST()
|
||||
Train_MNIST.main(network)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
2
ML/Projects/Exploring_MNIST/utils/import_utils.py
Normal file
2
ML/Projects/Exploring_MNIST/utils/import_utils.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from utils.mnist_data import mnist_data
|
||||
from utils.utils import check_accuracy, save_checkpoint, visdom_plotting, load_model
|
||||
94
ML/Projects/Exploring_MNIST/utils/mnist_data.py
Normal file
94
ML/Projects/Exploring_MNIST/utils/mnist_data.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import numpy as np
|
||||
import torchvision.datasets as datasets
|
||||
from torch.utils.data import DataLoader, SubsetRandomSampler
|
||||
|
||||
|
||||
class mnist_data(object):
|
||||
def __init__(
|
||||
self,
|
||||
shuffle,
|
||||
transform_train,
|
||||
transform_test,
|
||||
num_workers=0,
|
||||
create_validation_set=True,
|
||||
batch_size=128,
|
||||
validation_size=0.2,
|
||||
random_seed=1,
|
||||
):
|
||||
self.shuffle = shuffle
|
||||
self.validation_size = validation_size
|
||||
self.transform_train = transform_train
|
||||
self.transform_test = transform_test
|
||||
self.random_seed = random_seed
|
||||
self.create_validation_set = create_validation_set
|
||||
self.batch_size = batch_size
|
||||
self.num_workers = num_workers
|
||||
|
||||
def download_data(self):
|
||||
mnist_trainset = datasets.MNIST(
|
||||
root="./data", train=True, download=True, transform=self.transform_train
|
||||
)
|
||||
mnist_testset = datasets.MNIST(
|
||||
root="./data", train=False, download=True, transform=self.transform_test
|
||||
)
|
||||
|
||||
return mnist_trainset, mnist_testset
|
||||
|
||||
def create_validationset(self, mnist_trainset):
|
||||
num_train = len(mnist_trainset)
|
||||
indices = list(range(num_train))
|
||||
split = int(self.validation_size * num_train)
|
||||
|
||||
if self.shuffle:
|
||||
np.random.seed(self.random_seed)
|
||||
np.random.shuffle(indices)
|
||||
|
||||
train_idx, valid_idx = indices[split:], indices[:split]
|
||||
|
||||
train_sampler = SubsetRandomSampler(train_idx)
|
||||
validation_sampler = SubsetRandomSampler(valid_idx)
|
||||
|
||||
loader_train = DataLoader(
|
||||
dataset=mnist_trainset,
|
||||
batch_size=self.batch_size,
|
||||
sampler=train_sampler,
|
||||
num_workers=self.num_workers,
|
||||
)
|
||||
loader_validation = DataLoader(
|
||||
dataset=mnist_trainset,
|
||||
batch_size=self.batch_size,
|
||||
sampler=validation_sampler,
|
||||
num_workers=self.num_workers,
|
||||
)
|
||||
|
||||
return loader_train, loader_validation
|
||||
|
||||
def main(self):
|
||||
mnist_trainset, mnist_testset = self.download_data()
|
||||
|
||||
if self.create_validation_set:
|
||||
loader_train, loader_validation = self.create_validationset(mnist_trainset)
|
||||
loader_test = DataLoader(
|
||||
dataset=mnist_testset,
|
||||
batch_size=self.batch_size,
|
||||
shuffle=False,
|
||||
num_workers=self.num_workers,
|
||||
)
|
||||
|
||||
return loader_train, loader_validation, loader_test
|
||||
|
||||
else:
|
||||
loader_train = DataLoader(
|
||||
dataset=mnist_trainset,
|
||||
batch_size=self.batch_size,
|
||||
shuffle=self.shuffle,
|
||||
num_workers=self.num_workers,
|
||||
)
|
||||
loader_test = DataLoader(
|
||||
dataset=mnist_testset,
|
||||
batch_size=self.batch_size,
|
||||
shuffle=False,
|
||||
num_workers=self.num_workers,
|
||||
)
|
||||
|
||||
return loader_train, loader_test
|
||||
130
ML/Projects/Exploring_MNIST/utils/utils.py
Normal file
130
ML/Projects/Exploring_MNIST/utils/utils.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import torch
|
||||
import visdom
|
||||
import os
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
dtype = torch.float32
|
||||
|
||||
|
||||
def save_checkpoint(filename, model, optimizer, train_acc, epoch):
|
||||
save_state = {
|
||||
"state_dict": model.state_dict(),
|
||||
"acc": train_acc,
|
||||
"epoch": epoch + 1,
|
||||
"optimizer": optimizer.state_dict(),
|
||||
}
|
||||
print()
|
||||
print("Saving current parameters")
|
||||
print("___________________________________________________________")
|
||||
|
||||
torch.save(save_state, filename)
|
||||
|
||||
|
||||
def check_accuracy(loader, model):
|
||||
if loader.dataset.train:
|
||||
print("Checking accuracy on training or validation set")
|
||||
else:
|
||||
print("Checking accuracy on test set")
|
||||
num_correct = 0
|
||||
num_samples = 0
|
||||
# model.eval() # set model to evaluation mode
|
||||
with torch.no_grad():
|
||||
for x, y in loader:
|
||||
x = x.to(device=device, dtype=dtype) # move to device, e.g. GPU
|
||||
y = y.to(device=device, dtype=torch.long)
|
||||
scores = model(x)
|
||||
_, preds = scores.max(1)
|
||||
num_correct += (preds == y).sum()
|
||||
num_samples += preds.size(0)
|
||||
acc = (float(num_correct) / num_samples) * 100.0
|
||||
print("Got %d / %d correct (%.2f)" % (num_correct, num_samples, acc))
|
||||
return acc
|
||||
|
||||
|
||||
def load_model(args, model, optimizer):
|
||||
if args.resume:
|
||||
model.eval()
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume)
|
||||
start_epoch = checkpoint["epoch"]
|
||||
best_acc = checkpoint["acc"]
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
optimizer.load_state_dict(checkpoint["optimizer"])
|
||||
print(
|
||||
"=> loaded checkpoint '{}' (epoch {})".format(
|
||||
args.resume, checkpoint["epoch"]
|
||||
)
|
||||
)
|
||||
return model, optimizer, checkpoint, start_epoch, best_acc
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
else:
|
||||
print("No pretrained model. Starting from scratch!")
|
||||
|
||||
|
||||
class visdom_plotting(object):
|
||||
def __init__(self):
|
||||
self.viz = visdom.Visdom()
|
||||
|
||||
self.cur_batch_win = None
|
||||
self.cur_batch_win_opts = {
|
||||
"title": "Epoch Loss Trace",
|
||||
"xlabel": "Batch Number",
|
||||
"ylabel": "Loss",
|
||||
"width": 600,
|
||||
"height": 400,
|
||||
}
|
||||
|
||||
self.cur_validation_acc = None
|
||||
self.cur_validation_acc_opts = {
|
||||
"title": "Validation accuracy",
|
||||
"xlabel": "Epochs",
|
||||
"ylabel": "Validation Accuracy",
|
||||
"width": 600,
|
||||
"height": 400,
|
||||
}
|
||||
|
||||
self.cur_training_acc = None
|
||||
self.cur_training_acc_opts = {
|
||||
"title": "Training accuracy",
|
||||
"xlabel": "Epochs",
|
||||
"ylabel": "Train Accuracy",
|
||||
"width": 600,
|
||||
"height": 400,
|
||||
}
|
||||
|
||||
def create_plot(
|
||||
self, loss_list, batch_list, validation_acc_list, epoch_list, training_acc_list
|
||||
):
|
||||
|
||||
if self.viz.check_connection():
|
||||
self.cur_batch_win = self.viz.line(
|
||||
torch.FloatTensor(loss_list),
|
||||
torch.FloatTensor(batch_list),
|
||||
win=self.cur_batch_win,
|
||||
name="current_batch_loss",
|
||||
update=(None if self.cur_batch_win is None else "replace"),
|
||||
opts=self.cur_batch_win_opts,
|
||||
)
|
||||
|
||||
self.cur_validation_acc = self.viz.line(
|
||||
torch.FloatTensor(validation_acc_list),
|
||||
torch.FloatTensor(epoch_list),
|
||||
win=self.cur_validation_acc,
|
||||
name="current_validation_accuracy",
|
||||
update=(None if self.cur_validation_acc is None else "replace"),
|
||||
opts=self.cur_validation_acc_opts,
|
||||
)
|
||||
|
||||
self.cur_training_acc = self.viz.line(
|
||||
torch.FloatTensor(training_acc_list),
|
||||
torch.FloatTensor(epoch_list),
|
||||
win=self.cur_validation_acc,
|
||||
name="current_training_accuracy",
|
||||
update=(None if self.cur_training_acc is None else "replace"),
|
||||
opts=self.cur_training_acc_opts,
|
||||
)
|
||||
|
||||
|
||||
#
|
||||
41
ML/Projects/spam_classifier_naive_bayes/build_vocabulary.py
Normal file
41
ML/Projects/spam_classifier_naive_bayes/build_vocabulary.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
We want go through each word in all emails,
|
||||
check if the word is an actual english word
|
||||
by comparing with nltk.corpus words and if it is
|
||||
then add it to our vocabulary.
|
||||
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import nltk
|
||||
from nltk.corpus import words
|
||||
|
||||
vocabulary = {}
|
||||
data = pd.read_csv("data/emails.csv")
|
||||
nltk.download("words")
|
||||
set_words = set(words.words())
|
||||
|
||||
|
||||
def build_vocabulary(curr_email):
|
||||
idx = len(vocabulary)
|
||||
for word in curr_email:
|
||||
if word.lower() not in vocabulary and word.lower() in set_words:
|
||||
vocabulary[word] = idx
|
||||
idx += 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for i in range(data.shape[0]):
|
||||
curr_email = data.iloc[i, :][0].split()
|
||||
print(
|
||||
f"Current email is {i}/{data.shape[0]} and the \
|
||||
length of vocab is curr {len(vocabulary)}"
|
||||
)
|
||||
|
||||
build_vocabulary(curr_email)
|
||||
|
||||
# Write dictionary to vocabulary.txt file
|
||||
file = open("vocabulary.txt", "w")
|
||||
file.write(str(vocabulary))
|
||||
file.close()
|
||||
@@ -0,0 +1,44 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Having created our vocabulary we now need to create
|
||||
the dataset X,y which we will create by doing frequency
|
||||
vector for each email. For example if our vocabulary
|
||||
has the words
|
||||
|
||||
[aardkvark, ..., buy, ... money, .... zulu]
|
||||
|
||||
We go through each email and count up how many times each
|
||||
word was repeated, so for a specific example this might look
|
||||
like:
|
||||
|
||||
[0, ..., 4, ... 2, .... 0]
|
||||
|
||||
And perhaps since both "buy" and "money" this email might be
|
||||
spam
|
||||
|
||||
"""
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import ast
|
||||
|
||||
data = pd.read_csv("data/emails.csv")
|
||||
file = open("vocabulary.txt", "r")
|
||||
contents = file.read()
|
||||
vocabulary = ast.literal_eval(contents)
|
||||
|
||||
X = np.zeros((data.shape[0], len(vocabulary)))
|
||||
y = np.zeros((data.shape[0]))
|
||||
|
||||
for i in range(data.shape[0]):
|
||||
email = data.iloc[i, :][0].split()
|
||||
|
||||
for email_word in email:
|
||||
if email_word.lower() in vocabulary:
|
||||
X[i, vocabulary[email_word]] += 1
|
||||
|
||||
y[i] = data.iloc[i, :][1]
|
||||
|
||||
# Save stored numpy arrays
|
||||
np.save("data/X.npy", X)
|
||||
np.save("data/y.npy", y)
|
||||
5729
ML/Projects/spam_classifier_naive_bayes/data/emails.csv
Normal file
5729
ML/Projects/spam_classifier_naive_bayes/data/emails.csv
Normal file
File diff suppressed because one or more lines are too long
68
ML/Projects/spam_classifier_naive_bayes/naivebayes.py
Normal file
68
ML/Projects/spam_classifier_naive_bayes/naivebayes.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""
|
||||
Naive Bayes Classifier Implementation from scratch
|
||||
|
||||
To run the code structure the code in the following way:
|
||||
X be size: (num_training_examples, num_features)
|
||||
y be size: (num_classes, )
|
||||
|
||||
Where the classes are 0, 1, 2, etc. Then an example run looks like:
|
||||
NB = NaiveBayes(X, y)
|
||||
NB.fit(X)
|
||||
predictions = NB.predict(X)
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-21 Initial coding
|
||||
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
|
||||
class NaiveBayes:
|
||||
def __init__(self, X, y):
|
||||
self.num_examples, self.num_features = X.shape
|
||||
self.num_classes = len(np.unique(y))
|
||||
self.eps = 1e-6
|
||||
|
||||
def fit(self, X):
|
||||
self.classes_mean = {}
|
||||
self.classes_variance = {}
|
||||
self.classes_prior = {}
|
||||
|
||||
for c in range(self.num_classes):
|
||||
X_c = X[y == c]
|
||||
|
||||
self.classes_mean[str(c)] = np.mean(X_c, axis=0)
|
||||
self.classes_variance[str(c)] = np.var(X_c, axis=0)
|
||||
self.classes_prior[str(c)] = X_c.shape[0] / X.shape[0]
|
||||
|
||||
def predict(self, X):
|
||||
probs = np.zeros((self.num_examples, self.num_classes))
|
||||
|
||||
for c in range(self.num_classes):
|
||||
prior = self.classes_prior[str(c)]
|
||||
probs_c = self.density_function(
|
||||
X, self.classes_mean[str(c)], self.classes_variance[str(c)]
|
||||
)
|
||||
probs[:, c] = probs_c + np.log(prior)
|
||||
|
||||
return np.argmax(probs, 1)
|
||||
|
||||
def density_function(self, x, mean, sigma):
|
||||
# Calculate probability from Gaussian density function
|
||||
const = -self.num_features / 2 * np.log(2 * np.pi) - 0.5 * np.sum(
|
||||
np.log(sigma + self.eps)
|
||||
)
|
||||
probs = 0.5 * np.sum(np.power(x - mean, 2) / (sigma + self.eps), 1)
|
||||
return const - probs
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# For spam emails (Make sure to run build_vocab etc. to have .npy files)
|
||||
X = np.load("data/X.npy")
|
||||
y = np.load("data/y.npy")
|
||||
|
||||
NB = NaiveBayes(X, y)
|
||||
NB.fit(X)
|
||||
y_pred = NB.predict(X)
|
||||
|
||||
print(f"Accuracy: {sum(y_pred==y)/X.shape[0]}")
|
||||
19
ML/Projects/text_generation_babynames/data/example_names.txt
Normal file
19
ML/Projects/text_generation_babynames/data/example_names.txt
Normal file
@@ -0,0 +1,19 @@
|
||||
Niela
|
||||
Elia
|
||||
Leneth
|
||||
Ley
|
||||
Ira
|
||||
Bernandel
|
||||
Gelico
|
||||
Marti
|
||||
Ednie
|
||||
Ozel
|
||||
Marin
|
||||
Elithon
|
||||
Mirce
|
||||
Elie
|
||||
Elvar
|
||||
Domarine
|
||||
Artha
|
||||
Audrey
|
||||
Davyd
|
||||
1055767
ML/Projects/text_generation_babynames/data/names.txt
Normal file
1055767
ML/Projects/text_generation_babynames/data/names.txt
Normal file
File diff suppressed because it is too large
Load Diff
167204
ML/Projects/text_generation_babynames/data/shakespeare_larger.txt
Normal file
167204
ML/Projects/text_generation_babynames/data/shakespeare_larger.txt
Normal file
File diff suppressed because it is too large
Load Diff
40000
ML/Projects/text_generation_babynames/data/shakespeare_tiny.txt
Normal file
40000
ML/Projects/text_generation_babynames/data/shakespeare_tiny.txt
Normal file
File diff suppressed because it is too large
Load Diff
144
ML/Projects/text_generation_babynames/generating_names.py
Normal file
144
ML/Projects/text_generation_babynames/generating_names.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
Text generation using a character LSTM, specifically we want to
|
||||
generate new names as inspiration for those having a baby :)
|
||||
|
||||
Although this is for name generation, the code is general in the
|
||||
way that you can just send in any large text file (shakespear text, etc)
|
||||
and it will generate it.
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-05-09 Initial coding
|
||||
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import string
|
||||
import random
|
||||
import sys
|
||||
import unidecode
|
||||
|
||||
# Device configuration
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Get characters from string.printable
|
||||
all_characters = string.printable
|
||||
n_characters = len(all_characters)
|
||||
|
||||
# Read large text file (Note can be any text file: not limited to just names)
|
||||
file = unidecode.unidecode(open("data/names.txt").read())
|
||||
|
||||
|
||||
class RNN(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_layers, output_size):
|
||||
super(RNN, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
self.embed = nn.Embedding(input_size, hidden_size)
|
||||
self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
|
||||
self.fc = nn.Linear(hidden_size, output_size)
|
||||
|
||||
def forward(self, x, hidden, cell):
|
||||
out = self.embed(x)
|
||||
out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
|
||||
out = self.fc(out.reshape(out.shape[0], -1))
|
||||
return out, (hidden, cell)
|
||||
|
||||
def init_hidden(self, batch_size):
|
||||
hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
|
||||
cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
|
||||
return hidden, cell
|
||||
|
||||
|
||||
class Generator:
|
||||
def __init__(self):
|
||||
self.chunk_len = 250
|
||||
self.num_epochs = 5000
|
||||
self.batch_size = 1
|
||||
self.print_every = 50
|
||||
self.hidden_size = 256
|
||||
self.num_layers = 2
|
||||
self.lr = 0.003
|
||||
|
||||
def char_tensor(self, string):
|
||||
tensor = torch.zeros(len(string)).long()
|
||||
for c in range(len(string)):
|
||||
tensor[c] = all_characters.index(string[c])
|
||||
return tensor
|
||||
|
||||
def get_random_batch(self):
|
||||
start_idx = random.randint(0, len(file) - self.chunk_len)
|
||||
end_idx = start_idx + self.chunk_len + 1
|
||||
text_str = file[start_idx:end_idx]
|
||||
text_input = torch.zeros(self.batch_size, self.chunk_len)
|
||||
text_target = torch.zeros(self.batch_size, self.chunk_len)
|
||||
|
||||
for i in range(self.batch_size):
|
||||
text_input[i, :] = self.char_tensor(text_str[:-1])
|
||||
text_target[i, :] = self.char_tensor(text_str[1:])
|
||||
|
||||
return text_input.long(), text_target.long()
|
||||
|
||||
def generate(self, initial_str="A", predict_len=100, temperature=0.85):
|
||||
hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
|
||||
initial_input = self.char_tensor(initial_str)
|
||||
predicted = initial_str
|
||||
|
||||
for p in range(len(initial_str) - 1):
|
||||
_, (hidden, cell) = self.rnn(
|
||||
initial_input[p].view(1).to(device), hidden, cell
|
||||
)
|
||||
|
||||
last_char = initial_input[-1]
|
||||
|
||||
for p in range(predict_len):
|
||||
output, (hidden, cell) = self.rnn(
|
||||
last_char.view(1).to(device), hidden, cell
|
||||
)
|
||||
output_dist = output.data.view(-1).div(temperature).exp()
|
||||
top_char = torch.multinomial(output_dist, 1)[0]
|
||||
predicted_char = all_characters[top_char]
|
||||
predicted += predicted_char
|
||||
last_char = self.char_tensor(predicted_char)
|
||||
|
||||
return predicted
|
||||
|
||||
# input_size, hidden_size, num_layers, output_size
|
||||
def train(self):
|
||||
self.rnn = RNN(
|
||||
n_characters, self.hidden_size, self.num_layers, n_characters
|
||||
).to(device)
|
||||
|
||||
optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.lr)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
writer = SummaryWriter(f"runs/names0") # for tensorboard
|
||||
|
||||
print("=> Starting training")
|
||||
|
||||
for epoch in range(1, self.num_epochs + 1):
|
||||
inp, target = self.get_random_batch()
|
||||
hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
|
||||
|
||||
self.rnn.zero_grad()
|
||||
loss = 0
|
||||
inp = inp.to(device)
|
||||
target = target.to(device)
|
||||
|
||||
for c in range(self.chunk_len):
|
||||
output, (hidden, cell) = self.rnn(inp[:, c], hidden, cell)
|
||||
loss += criterion(output, target[:, c])
|
||||
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
loss = loss.item() / self.chunk_len
|
||||
|
||||
if epoch % self.print_every == 0:
|
||||
print(f"Loss: {loss}")
|
||||
print(self.generate())
|
||||
|
||||
writer.add_scalar("Training loss", loss, global_step=epoch)
|
||||
|
||||
|
||||
gennames = Generator()
|
||||
gennames.train()
|
||||
Reference in New Issue
Block a user