updated and checked CNN architectures still works with latest pytorch

This commit is contained in:
Aladdin Persson
2022-12-20 12:13:12 +01:00
parent 28a6abea27
commit b6985eccc9
5 changed files with 100 additions and 77 deletions

View File

@@ -1,12 +1,9 @@
""" """
An implementation of LeNet CNN architecture. An implementation of LeNet CNN architecture.
Video explanation: https://youtu.be/fcOW-Zyb5Bo
Got any questions leave a comment on youtube :)
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com> Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
* 2020-04-05 Initial coding * 2020-04-05 Initial coding
* 2022-12-20 Update comments, code revision, checked still works with latest PyTorch version
""" """
import torch import torch
@@ -17,27 +14,27 @@ class LeNet(nn.Module):
def __init__(self): def __init__(self):
super(LeNet, self).__init__() super(LeNet, self).__init__()
self.relu = nn.ReLU() self.relu = nn.ReLU()
self.pool = nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2)) self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
self.conv1 = nn.Conv2d( self.conv1 = nn.Conv2d(
in_channels=1, in_channels=1,
out_channels=6, out_channels=6,
kernel_size=(5, 5), kernel_size=5,
stride=(1, 1), stride=1,
padding=(0, 0), padding=0,
) )
self.conv2 = nn.Conv2d( self.conv2 = nn.Conv2d(
in_channels=6, in_channels=6,
out_channels=16, out_channels=16,
kernel_size=(5, 5), kernel_size=5,
stride=(1, 1), stride=1,
padding=(0, 0), padding=0,
) )
self.conv3 = nn.Conv2d( self.conv3 = nn.Conv2d(
in_channels=16, in_channels=16,
out_channels=120, out_channels=120,
kernel_size=(5, 5), kernel_size=5,
stride=(1, 1), stride=1,
padding=(0, 0), padding=0,
) )
self.linear1 = nn.Linear(120, 84) self.linear1 = nn.Linear(120, 84)
self.linear2 = nn.Linear(84, 10) self.linear2 = nn.Linear(84, 10)
@@ -64,4 +61,4 @@ def test_lenet():
if __name__ == "__main__": if __name__ == "__main__":
out = test_lenet() out = test_lenet()
print(out.shape) print(out.shape)

View File

@@ -1,3 +1,12 @@
"""
An implementation of EfficientNet CNN architecture.
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
* 2021-02-05 Initial coding
* 2022-12-20 Update comments, code revision, checked still works with latest PyTorch version
"""
import torch import torch
import torch.nn as nn import torch.nn as nn
from math import ceil from math import ceil
@@ -25,9 +34,10 @@ phi_values = {
"b7": (6, 600, 0.5), "b7": (6, 600, 0.5),
} }
class CNNBlock(nn.Module): class CNNBlock(nn.Module):
def __init__( def __init__(
self, in_channels, out_channels, kernel_size, stride, padding, groups=1 self, in_channels, out_channels, kernel_size, stride, padding, groups=1
): ):
super(CNNBlock, self).__init__() super(CNNBlock, self).__init__()
self.cnn = nn.Conv2d( self.cnn = nn.Conv2d(
@@ -40,16 +50,17 @@ class CNNBlock(nn.Module):
bias=False, bias=False,
) )
self.bn = nn.BatchNorm2d(out_channels) self.bn = nn.BatchNorm2d(out_channels)
self.silu = nn.SiLU() # SiLU <-> Swish self.silu = nn.SiLU() # SiLU <-> Swish
def forward(self, x): def forward(self, x):
return self.silu(self.bn(self.cnn(x))) return self.silu(self.bn(self.cnn(x)))
class SqueezeExcitation(nn.Module): class SqueezeExcitation(nn.Module):
def __init__(self, in_channels, reduced_dim): def __init__(self, in_channels, reduced_dim):
super(SqueezeExcitation, self).__init__() super(SqueezeExcitation, self).__init__()
self.se = nn.Sequential( self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1), # C x H x W -> C x 1 x 1 nn.AdaptiveAvgPool2d(1), # C x H x W -> C x 1 x 1
nn.Conv2d(in_channels, reduced_dim, 1), nn.Conv2d(in_channels, reduced_dim, 1),
nn.SiLU(), nn.SiLU(),
nn.Conv2d(reduced_dim, in_channels, 1), nn.Conv2d(reduced_dim, in_channels, 1),
@@ -59,17 +70,18 @@ class SqueezeExcitation(nn.Module):
def forward(self, x): def forward(self, x):
return x * self.se(x) return x * self.se(x)
class InvertedResidualBlock(nn.Module): class InvertedResidualBlock(nn.Module):
def __init__( def __init__(
self, self,
in_channels, in_channels,
out_channels, out_channels,
kernel_size, kernel_size,
stride, stride,
padding, padding,
expand_ratio, expand_ratio,
reduction=4, # squeeze excitation reduction=4, # squeeze excitation
survival_prob=0.8, # for stochastic depth survival_prob=0.8, # for stochastic depth
): ):
super(InvertedResidualBlock, self).__init__() super(InvertedResidualBlock, self).__init__()
self.survival_prob = 0.8 self.survival_prob = 0.8
@@ -80,12 +92,21 @@ class InvertedResidualBlock(nn.Module):
if self.expand: if self.expand:
self.expand_conv = CNNBlock( self.expand_conv = CNNBlock(
in_channels, hidden_dim, kernel_size=3, stride=1, padding=1, in_channels,
hidden_dim,
kernel_size=3,
stride=1,
padding=1,
) )
self.conv = nn.Sequential( self.conv = nn.Sequential(
CNNBlock( CNNBlock(
hidden_dim, hidden_dim, kernel_size, stride, padding, groups=hidden_dim, hidden_dim,
hidden_dim,
kernel_size,
stride,
padding,
groups=hidden_dim,
), ),
SqueezeExcitation(hidden_dim, reduced_dim), SqueezeExcitation(hidden_dim, reduced_dim),
nn.Conv2d(hidden_dim, out_channels, 1, bias=False), nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
@@ -96,7 +117,9 @@ class InvertedResidualBlock(nn.Module):
if not self.training: if not self.training:
return x return x
binary_tensor = torch.rand(x.shape[0], 1, 1, 1, device=x.device) < self.survival_prob binary_tensor = (
torch.rand(x.shape[0], 1, 1, 1, device=x.device) < self.survival_prob
)
return torch.div(x, self.survival_prob) * binary_tensor return torch.div(x, self.survival_prob) * binary_tensor
def forward(self, inputs): def forward(self, inputs):
@@ -122,8 +145,8 @@ class EfficientNet(nn.Module):
def calculate_factors(self, version, alpha=1.2, beta=1.1): def calculate_factors(self, version, alpha=1.2, beta=1.1):
phi, res, drop_rate = phi_values[version] phi, res, drop_rate = phi_values[version]
depth_factor = alpha ** phi depth_factor = alpha**phi
width_factor = beta ** phi width_factor = beta**phi
return width_factor, depth_factor, drop_rate return width_factor, depth_factor, drop_rate
def create_features(self, width_factor, depth_factor, last_channels): def create_features(self, width_factor, depth_factor, last_channels):
@@ -132,7 +155,7 @@ class EfficientNet(nn.Module):
in_channels = channels in_channels = channels
for expand_ratio, channels, repeats, stride, kernel_size in base_model: for expand_ratio, channels, repeats, stride, kernel_size in base_model:
out_channels = 4*ceil(int(channels*width_factor) / 4) out_channels = 4 * ceil(int(channels * width_factor) / 4)
layers_repeats = ceil(repeats * depth_factor) layers_repeats = ceil(repeats * depth_factor)
for layer in range(layers_repeats): for layer in range(layers_repeats):
@@ -141,9 +164,9 @@ class EfficientNet(nn.Module):
in_channels, in_channels,
out_channels, out_channels,
expand_ratio=expand_ratio, expand_ratio=expand_ratio,
stride = stride if layer == 0 else 1, stride=stride if layer == 0 else 1,
kernel_size=kernel_size, kernel_size=kernel_size,
padding=kernel_size//2, # if k=1:pad=0, k=3:pad=1, k=5:pad=2 padding=kernel_size // 2, # if k=1:pad=0, k=3:pad=1, k=5:pad=2
) )
) )
in_channels = out_channels in_channels = out_channels
@@ -170,6 +193,8 @@ def test():
num_classes=num_classes, num_classes=num_classes,
).to(device) ).to(device)
print(model(x).shape) # (num_examples, num_classes) print(model(x).shape) # (num_examples, num_classes)
test()
if __name__ == "__main__":
test()

View File

@@ -1,15 +1,11 @@
""" """
An implementation of GoogLeNet / InceptionNet from scratch. An implementation of GoogLeNet / InceptionNet from scratch.
Video explanation: https://youtu.be/uQc4Fs7yx5I
Got any questions leave a comment on youtube :)
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com> Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
* 2020-04-07 Initial coding * 2020-04-07 Initial coding
* 2022-12-20 Update comments, code revision, checked still works with latest PyTorch version
""" """
# Imports
import torch import torch
from torch import nn from torch import nn
@@ -25,9 +21,9 @@ class GoogLeNet(nn.Module):
self.conv1 = conv_block( self.conv1 = conv_block(
in_channels=3, in_channels=3,
out_channels=64, out_channels=64,
kernel_size=(7, 7), kernel_size=7,
stride=(2, 2), stride=2,
padding=(3, 3), padding=3,
) )
self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
@@ -37,7 +33,7 @@ class GoogLeNet(nn.Module):
# In this order: in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool # In this order: in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32) self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32)
self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64) self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64)
self.maxpool3 = nn.MaxPool2d(kernel_size=(3, 3), stride=2, padding=1) self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64) self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64)
self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64) self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64)
@@ -63,7 +59,6 @@ class GoogLeNet(nn.Module):
x = self.conv1(x) x = self.conv1(x)
x = self.maxpool1(x) x = self.maxpool1(x)
x = self.conv2(x) x = self.conv2(x)
# x = self.conv3(x)
x = self.maxpool2(x) x = self.maxpool2(x)
x = self.inception3a(x) x = self.inception3a(x)
@@ -104,21 +99,21 @@ class Inception_block(nn.Module):
self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
): ):
super(Inception_block, self).__init__() super(Inception_block, self).__init__()
self.branch1 = conv_block(in_channels, out_1x1, kernel_size=(1, 1)) self.branch1 = conv_block(in_channels, out_1x1, kernel_size=1)
self.branch2 = nn.Sequential( self.branch2 = nn.Sequential(
conv_block(in_channels, red_3x3, kernel_size=(1, 1)), conv_block(in_channels, red_3x3, kernel_size=1),
conv_block(red_3x3, out_3x3, kernel_size=(3, 3), padding=(1, 1)), conv_block(red_3x3, out_3x3, kernel_size=(3, 3), padding=1),
) )
self.branch3 = nn.Sequential( self.branch3 = nn.Sequential(
conv_block(in_channels, red_5x5, kernel_size=(1, 1)), conv_block(in_channels, red_5x5, kernel_size=1),
conv_block(red_5x5, out_5x5, kernel_size=(5, 5), padding=(2, 2)), conv_block(red_5x5, out_5x5, kernel_size=5, padding=2),
) )
self.branch4 = nn.Sequential( self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
conv_block(in_channels, out_1x1pool, kernel_size=(1, 1)), conv_block(in_channels, out_1x1pool, kernel_size=1),
) )
def forward(self, x): def forward(self, x):
@@ -144,7 +139,6 @@ class InceptionAux(nn.Module):
x = self.relu(self.fc1(x)) x = self.relu(self.fc1(x))
x = self.dropout(x) x = self.dropout(x)
x = self.fc2(x) x = self.fc2(x)
return x return x
@@ -160,7 +154,8 @@ class conv_block(nn.Module):
if __name__ == "__main__": if __name__ == "__main__":
# N = 3 (Mini batch size) BATCH_SIZE = 5
x = torch.randn(3, 3, 224, 224) x = torch.randn(BATCH_SIZE, 3, 224, 224)
model = GoogLeNet(aux_logits=True, num_classes=1000) model = GoogLeNet(aux_logits=True, num_classes=1000)
print(model(x)[2].shape) print(model(x)[2].shape)
assert model(x)[2].shape == torch.Size([BATCH_SIZE, 1000])

View File

@@ -5,11 +5,9 @@ The intuition for ResNet is simple and clear, but to code
it didn't feel super clear at first, even when reading Pytorch own it didn't feel super clear at first, even when reading Pytorch own
implementation. implementation.
Video explanation:
Got any questions leave a comment on youtube :)
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com> Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
* 2020-04-12 Initial coding * 2020-04-12 Initial coding
* 2022-12-20 Update comments, code revision, checked still works with latest PyTorch version
""" """
import torch import torch
@@ -20,10 +18,15 @@ class block(nn.Module):
def __init__( def __init__(
self, in_channels, intermediate_channels, identity_downsample=None, stride=1 self, in_channels, intermediate_channels, identity_downsample=None, stride=1
): ):
super(block, self).__init__() super().__init__()
self.expansion = 4 self.expansion = 4
self.conv1 = nn.Conv2d( self.conv1 = nn.Conv2d(
in_channels, intermediate_channels, kernel_size=1, stride=1, padding=0, bias=False in_channels,
intermediate_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False,
) )
self.bn1 = nn.BatchNorm2d(intermediate_channels) self.bn1 = nn.BatchNorm2d(intermediate_channels)
self.conv2 = nn.Conv2d( self.conv2 = nn.Conv2d(
@@ -32,7 +35,7 @@ class block(nn.Module):
kernel_size=3, kernel_size=3,
stride=stride, stride=stride,
padding=1, padding=1,
bias=False bias=False,
) )
self.bn2 = nn.BatchNorm2d(intermediate_channels) self.bn2 = nn.BatchNorm2d(intermediate_channels)
self.conv3 = nn.Conv2d( self.conv3 = nn.Conv2d(
@@ -41,7 +44,7 @@ class block(nn.Module):
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0, padding=0,
bias=False bias=False,
) )
self.bn3 = nn.BatchNorm2d(intermediate_channels * self.expansion) self.bn3 = nn.BatchNorm2d(intermediate_channels * self.expansion)
self.relu = nn.ReLU() self.relu = nn.ReLU()
@@ -72,7 +75,9 @@ class ResNet(nn.Module):
def __init__(self, block, layers, image_channels, num_classes): def __init__(self, block, layers, image_channels, num_classes):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.in_channels = 64 self.in_channels = 64
self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) self.conv1 = nn.Conv2d(
image_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
)
self.bn1 = nn.BatchNorm2d(64) self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU() self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
@@ -124,7 +129,7 @@ class ResNet(nn.Module):
intermediate_channels * 4, intermediate_channels * 4,
kernel_size=1, kernel_size=1,
stride=stride, stride=stride,
bias=False bias=False,
), ),
nn.BatchNorm2d(intermediate_channels * 4), nn.BatchNorm2d(intermediate_channels * 4),
) )
@@ -158,9 +163,13 @@ def ResNet152(img_channel=3, num_classes=1000):
def test(): def test():
net = ResNet101(img_channel=3, num_classes=1000) BATCH_SIZE = 4
y = net(torch.randn(4, 3, 224, 224)).to("cuda") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = ResNet101(img_channel=3, num_classes=1000).to(device)
y = net(torch.randn(BATCH_SIZE, 3, 224, 224)).to(device)
assert y.size() == torch.Size([BATCH_SIZE, 1000])
print(y.size()) print(y.size())
test() if __name__ == "__main__":
test()

View File

@@ -1,12 +1,9 @@
""" """
A from scratch implementation of the VGG architecture. A from scratch implementation of the VGG architecture.
Video explanation: https://youtu.be/ACmuBbuXn20
Got any questions leave a comment on youtube :)
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com> Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
* 2020-04-05 Initial coding * 2020-04-05 Initial coding
* 2022-12-20 Update comments, code revision, checked still works with latest PyTorch version
""" """
# Imports # Imports
@@ -113,7 +110,7 @@ class VGG_net(nn.Module):
if __name__ == "__main__": if __name__ == "__main__":
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
model = VGG_net(in_channels=3, num_classes=1000).to(device) model = VGG_net(in_channels=3, num_classes=1000).to(device)
print(model) BATCH_SIZE = 3
## N = 3 (Mini batch size) x = torch.randn(3, 3, 224, 224).to(device)
# x = torch.randn(3, 3, 224, 224).to(device) assert model(x).shape == torch.Size([BATCH_SIZE, 1000])
# print(model(x).shape) print(model(x).shape)