Initial commit

This commit is contained in:
Aladdin Persson
2021-01-30 21:49:15 +01:00
commit 65b8c80495
432 changed files with 1290844 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
[Original Paper - ImageNet Classification with Deep Convolutional Neural Networks (2012)](https://www.cs.toronto.edu/~hinton/absps/imagenet.pdf)
Some questions I had when I was reading the paper
- [What does the term saturating nonlinearities mean?](https://stats.stackexchange.com/questions/174295/what-does-the-term-saturating-nonlinearities-mean)
- [What Is Saturating Gradient Problem](https://datascience.stackexchange.com/questions/27665/what-is-saturating-gradient-problem)
- [Why ReLU is better than the other activation functions](https://datascience.stackexchange.com/questions/23493/why-relu-is-better-than-the-other-activation-functions)
- [Why does overlapped pooling help reduce overfitting in conv nets?](https://stats.stackexchange.com/questions/283261/why-does-overlapped-pooling-help-reduce-overfitting-in-conv-nets)
- [Importance of local response normalization in CNN](https://stats.stackexchange.com/questions/145768/importance-of-local-response-normalization-in-cnn)
- [What Is Local Response Normalization In Convolutional Neural Networks](https://prateekvjoshi.com/2016/04/05/what-is-local-response-normalization-in-convolutional-neural-networks/)

View File

@@ -0,0 +1,113 @@
# Tensorflow v2.3.1
"""
Programmed by the-robot <https://github.com/the-robot>
"""
from tensorflow.keras.layers import (
Conv2D,
Dense,
Dropout,
Flatten,
Input,
Lambda,
MaxPooling2D,
)
from tensorflow.keras import Model
import tensorflow as tf
import typing
tf.config.run_functions_eagerly(True)
@tf.function
def AlexNet(input_shape: typing.Tuple[int], classes: int = 1000) -> Model:
"""
Implementation of the AlexNet architecture.
Arguments:
input_shape -- shape of the images of the dataset
classes -- integer, number of classes
Returns:
model -- a Model() instance in Keras
Note:
when you read the paper, you will notice that the channels (filters) in the diagram is only
half of what I have written below. That is because in the diagram, they only showed model for
one GPU (I guess for simplicity). However, during the ILSVRC, they run the network across 2 NVIDIA GTA 580 3GB GPUs.
Also, in paper, they used Local Response Normalization. This can also be done in Keras with Lambda layer.
You can also use BatchNormalization layer instead.
"""
# convert input shape into tensor
X_input = Input(input_shape)
# NOTE: layer 1-5 is conv-layers
# layer 1
X = Conv2D(
filters = 96,
kernel_size = (11, 11),
strides = (4, 4),
activation = "relu",
padding = "same",
)(X_input)
X = MaxPooling2D(pool_size = (3, 3), strides = (2, 2))(X)
X = Lambda(tf.nn.local_response_normalization)(X)
# layer 2
X = Conv2D(
filters = 256,
kernel_size = (5, 5),
strides = (1, 1),
activation = "relu",
padding = "same",
)(X)
X = MaxPooling2D(pool_size = (3, 3), strides = (2, 2))(X)
X = Lambda(tf.nn.local_response_normalization)(X)
# layer 3
X = Conv2D(
filters = 384,
kernel_size = (3, 3),
strides = (1, 1),
activation = "relu",
padding = "same",
)(X)
# layer 4
X = Conv2D(
filters = 384,
kernel_size = (3, 3),
strides = (1, 1),
activation = "relu",
padding = "same",
)(X)
# layer 5
X = Conv2D(
filters = 256,
kernel_size = (3, 3),
strides = (1, 1),
activation = "relu",
padding = "same",
)(X)
X = MaxPooling2D(pool_size = (3, 3), strides = (2, 2))(X)
X = Lambda(tf.nn.local_response_normalization)(X)
# NOTE: layer 6-7 is fully-connected layers
# layer 6
X = Flatten()(X)
X = Dense(units = 4096, activation = 'relu')(X)
X = Dropout(0.5)(X)
# layer 7
X = Dense(units = 4096, activation = 'relu')(X)
X = Dropout(0.5)(X)
# layer 8 (classification layer)
# use sigmoid if binary classificaton and softmax if multiclass classification
X = Dense(units = classes, activation = "softmax")(X)
model = Model(inputs = X_input, outputs = X, name = "AlexNet")
return model

View File

@@ -0,0 +1,9 @@
# disable tensorflow debugging messages
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from alexnet import AlexNet
if __name__ == "__main__":
model = AlexNet(input_shape = (224, 224, 3), classes = 1000)
model.summary()

View File

@@ -0,0 +1,14 @@
[Original Paper - Going Deeper with Convolutions (2014)](https://arxiv.org/abs/1409.4842)
[Related Video](https://www.youtube.com/watch?v=uQc4Fs7yx5I)
![meme](https://i.imgur.com/m91bhbe.png)
- [Review: GoogLeNet (Inception v1)](https://medium.com/coinmonks/paper-review-of-googlenet-inception-v1-winner-of-ilsvlc-2014-image-classification-c2b3565a64e7)
- [Understanding GoogLeNet Model CNN Architecture](https://www.geeksforgeeks.org/understanding-googlenet-model-cnn-architecture/)
- [Ensemble Methods in Machine Learning: What are They and Why Use Them?](https://towardsdatascience.com/ensemble-methods-in-machine-learning-what-are-they-and-why-use-them-68ec3f9fef5f)
- [Neural Networks Ensemble](https://towardsdatascience.com/neural-networks-ensemble-33f33bea7df3)
- [Multiscale Methods and Machine Learning](https://www.kdnuggets.com/2018/03/multiscale-methods-machine-learning.html)
- [What do the terms “dense” and “sparse” mean in the context of neural networks?](https://stats.stackexchange.com/questions/266996/what-do-the-terms-dense-and-sparse-mean-in-the-context-of-neural-networks)
- [The Sparse Future of Deep Learning](https://towardsdatascience.com/the-sparse-future-of-deep-learning-bce05e8e094a)
- [Understanding Auxiliary Loss](https://stats.stackexchange.com/a/436203)

View File

@@ -0,0 +1,163 @@
# Tensorflow v.2.3.1
"""
Programmed by the-robot <https://github.com/the-robot>
"""
from tensorflow.keras.layers import (
Activation,
AveragePooling2D,
BatchNormalization,
Conv2D,
Dense,
Dropout,
Flatten,
MaxPooling2D,
concatenate,
)
import tensorflow as tf
import typing
@tf.function
def convolution_block(
X: tf.Tensor,
filters: int,
kernel_size: int,
stride: int = 1,
padding: str = 'valid',
) -> tf.Tensor:
"""
Convolution block for GoogLeNet.
Arguments:
X -- input tensor of shape (m, H, W, filters)
filters -- defining the number of filters in the CONV layers
kernel_size -- integer, specifying the shape of the middle CONV's window for the main path
stride -- integer specifying the stride to be used
padding -- padding type, same or valid. Default is valid
Returns:
X -- output of the identity block, tensor of shape (H, W, filters)
"""
X = Conv2D(
filters = filters,
kernel_size = (kernel_size, kernel_size),
strides = (stride, stride),
padding = padding,
)(X)
# batch normalization is not in original paper because it was not invented at that time
# however I am using it here because it will improve the performance
X = BatchNormalization()(X)
X = Activation("relu")(X)
return X
@tf.function
def inception_block(
X: tf.Tensor,
filters_1x1: int,
filters_3x3_reduce: int,
filters_3x3: int,
filters_5x5_reduce: int,
filters_5x5: int,
pool_size: int,
) -> tf.Tensor:
"""
Inception block for GoogLeNet.
Arguments:
X -- input tensor of shape (m, H, W, filters)
filters_1x1 -- number of filters for (1x1 conv) in first branch
filters_3x3_reduce -- number of filters for (1x1 conv) dimensionality reduction before (3x3 conv) in second branch
filters_3x3 -- number of filters for (3x3 conv) in second branch
filters_5x5_reduce -- number of filters for (1x1 conv) dimensionality reduction before (5x5 conv) in third branch
filters_5x5 -- number of filters for (5x5 conv) in third branch
pool_size -- number of filters for (1x1 conv) after 3x3 max pooling in fourth branch
Returns:
X -- output of the identity block, tensor of shape (H, W, filters)
"""
# first branch
conv_1x1 = convolution_block(
X,
filters = filters_1x1,
kernel_size = 1,
padding = "same"
)
# second branch
conv_3x3 = convolution_block(
X,
filters = filters_3x3_reduce,
kernel_size = 1,
padding = "same"
)
conv_3x3 = convolution_block(
conv_3x3,
filters = filters_3x3,
kernel_size = 3,
padding = "same"
)
# third branch
conv_5x5 = convolution_block(
X,
filters = filters_5x5_reduce,
kernel_size = 1,
padding = "same"
)
conv_5x5 = convolution_block(
conv_5x5,
filters = filters_5x5,
kernel_size = 5,
padding = "same"
)
# fourth branch
pool_projection = MaxPooling2D(
pool_size = (2, 2),
strides = (1, 1),
padding = "same",
)(X)
pool_projection = convolution_block(
pool_projection,
filters = pool_size,
kernel_size = 1,
padding = "same"
)
# concat by channel/filter
return concatenate(inputs = [conv_1x1, conv_3x3, conv_5x5, pool_projection], axis = 3)
@tf.function
def auxiliary_block(
X: tf.Tensor,
classes: int,
) -> tf.Tensor:
"""
Auxiliary block for GoogLeNet.
Refer to the original paper, page 8 for the auxiliary layer specification.
Arguments:
X -- input tensor of shape (m, H, W, filters)
classes -- number of classes for classification
Return:
X -- output of the identity block, tensor of shape (H, W, filters)
"""
X = AveragePooling2D(
pool_size = (5, 5),
padding = "same",
strides = (3, 3),
)(X)
X = convolution_block(
X,
filters = 128,
kernel_size = 1,
stride = 1,
padding = "same",
)
X = Flatten()(X)
X = Dense(units = 1024, activation = "relu")(X)
X = Dropout(rate = 0.7)(X)
X = Dense(units = classes)(X)
X = Activation("softmax")(X)
return X

View File

@@ -0,0 +1,219 @@
# Tensorflow v.2.3.1
"""
Programmed by the-robot <https://github.com/the-robot>
"""
from block import (
auxiliary_block,
convolution_block,
inception_block,
)
from tensorflow.keras.layers import (
AveragePooling2D,
Dense,
Dropout,
Input,
MaxPooling2D,
)
from tensorflow.keras import Model
import tensorflow as tf
import typing
tf.config.run_functions_eagerly(True)
@tf.function
def GoogLeNet(input_shape: typing.Tuple[int] = (224, 224, 3), classes: int = 1000) -> Model:
"""
Implementation of the popular GoogLeNet aka Inception v1 architecture.
Refer to the original paper, page 6 - table 1 for inception block filter sizes.
Arguments:
input_shape -- shape of the images of the dataset
classes -- number of classes for classification
Returns:
model -- a Model() instance in Keras
"""
# convert input shape into tensor
X_input = Input(input_shape)
# NOTE: auxiliary layers are only used in trainig phase to improve performance
# because they act as regularization and prevent vanishing gradient problem
auxiliary1 = None # to store auxiliary layers classification value
auxiliary2 = None
# layer 1 (convolution block)
X = convolution_block(
X = X_input,
filters = 64,
kernel_size = 7,
stride = 2,
padding = "same",
)
# layer 2 (max pool)
X = MaxPooling2D(
pool_size = (3, 3),
padding = "same",
strides = (2, 2),
)(X)
# layer 3 (convolution block)
# 1x1 reduce
X = convolution_block(
X,
filters = 64,
kernel_size = 1,
stride = 1,
padding = "same",
)
X = convolution_block(
X,
filters = 192,
kernel_size = 3,
stride = 1,
padding = "same",
)
# layer 4 (max pool)
X = MaxPooling2D(
pool_size = (3, 3),
padding = "same",
strides = (2, 2),
)(X)
# layer 5 (inception 3a)
X = inception_block(
X,
filters_1x1 = 64,
filters_3x3_reduce = 96,
filters_3x3 = 128,
filters_5x5_reduce = 16,
filters_5x5 = 32,
pool_size = 32,
)
# layer 6 (inception 3b)
X = inception_block(
X,
filters_1x1 = 128,
filters_3x3_reduce = 128,
filters_3x3 = 192,
filters_5x5_reduce = 32,
filters_5x5 = 96,
pool_size = 64,
)
# layer 7 (max pool)
X = MaxPooling2D(
pool_size = (3, 3),
padding = "same",
strides = (2, 2),
)(X)
# layer 8 (inception 4a)
X = inception_block(
X,
filters_1x1 = 192,
filters_3x3_reduce = 96,
filters_3x3 = 208,
filters_5x5_reduce = 16,
filters_5x5 = 48,
pool_size = 64,
)
# First Auxiliary Softmax Classifier
auxiliary1 = auxiliary_block(X, classes = classes)
# layer 9 (inception 4b)
X = inception_block(
X,
filters_1x1 = 160,
filters_3x3_reduce = 112,
filters_3x3 = 224,
filters_5x5_reduce = 24,
filters_5x5 = 64,
pool_size = 64,
)
# layer 10 (inception 4c)
X = inception_block(
X,
filters_1x1 = 128,
filters_3x3_reduce = 128,
filters_3x3 = 256,
filters_5x5_reduce = 24,
filters_5x5 = 64,
pool_size = 64,
)
# layer 11 (inception 4d)
X = inception_block(
X,
filters_1x1 = 112,
filters_3x3_reduce = 144,
filters_3x3 = 288,
filters_5x5_reduce = 32,
filters_5x5 = 64,
pool_size = 64,
)
# Second Auxiliary Softmax Classifier
auxiliary2 = auxiliary_block(X, classes = classes)
# layer 12 (inception 4e)
X = inception_block(
X,
filters_1x1 = 256,
filters_3x3_reduce = 160,
filters_3x3 = 320,
filters_5x5_reduce = 32,
filters_5x5 = 128,
pool_size = 128,
)
# layer 13 (max pool)
X = MaxPooling2D(
pool_size = (3, 3),
padding = "same",
strides = (2, 2),
)(X)
# layer 14 (inception 5a)
X = inception_block(
X,
filters_1x1 = 256,
filters_3x3_reduce = 160,
filters_3x3 = 320,
filters_5x5_reduce = 32,
filters_5x5 = 128,
pool_size = 128,
)
# layer 15 (inception 5b)
X = inception_block(
X,
filters_1x1 = 384,
filters_3x3_reduce = 192,
filters_3x3 = 384,
filters_5x5_reduce = 48,
filters_5x5 = 128,
pool_size = 128,
)
# layer 16 (average pool)
X = AveragePooling2D(
pool_size = (7, 7),
padding = "same",
strides = (1, 1),
)(X)
# layer 17 (dropout 40%)
X = Dropout(rate = 0.4)(X)
# layer 18 (fully-connected layer with softmax activation)
X = Dense(units = classes, activation='softmax')(X)
model = Model(X_input, outputs = [X, auxiliary1, auxiliary2], name='GoogLeNet/Inception-v1')
return model

View File

@@ -0,0 +1,9 @@
# disable tensorflow debugging messages
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from googlenet import GoogLeNet
if __name__ == "__main__":
model = GoogLeNet(input_shape = (224, 224, 3))
model.summary()

View File

@@ -0,0 +1,5 @@
[Original Paper - GradientBased Learning Applied to Document Recognition (1998)](http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf)
[Related Video](https://www.youtube.com/watch?v=fcOW-Zyb5Bo)
Some other useful links
- [Understanding and Implementing LeNet-5 CNN Architecture](https://towardsdatascience.com/understanding-and-implementing-lenet-5-cnn-architecture-deep-learning-a2d531ebc342)

View File

@@ -0,0 +1,78 @@
# Tensorflow v.2.3.1
"""
Programmed by the-robot <https://github.com/the-robot>
"""
from tensorflow.keras.layers import (
AveragePooling2D,
Conv2D,
Dense,
Flatten,
Input,
)
from tensorflow.keras import Model
import tensorflow as tf
import typing
tf.config.run_functions_eagerly(True)
@tf.function
def LeNet5(input_shape: typing.Tuple[int], classes: int = 1000) -> Model:
"""
Implementation of the classic LeNet architecture.
Arguments:
input_shape -- shape of the images of the dataset
classes -- integer, number of classes
Returns:
model -- a Model() instance in Keras
Note:
because I want to keep it original, I used tanh activation instead of ReLU activation.
however based on newer papers, the rectified linear unit (ReLU) performed much faster than
tanh activation.
"""
# convert input shape into tensor
X_input = Input(input_shape)
# layer 1
X = Conv2D(
filters = 6,
kernel_size = (5, 5),
strides = (1, 1),
activation = "tanh",
padding = "valid",
)(X_input)
X = AveragePooling2D(pool_size = (2, 2), strides = (2, 2), padding = "valid")(X)
# layer 2
X = Conv2D(
filters = 16,
kernel_size = (5, 5),
strides = (1, 1),
activation = "tanh",
padding = "valid",
)(X)
X = AveragePooling2D(pool_size = (2, 2), strides = (2, 2), padding = "valid")(X)
# layer 3
X = Conv2D(
filters = 120,
kernel_size = (5, 5),
strides = (1, 1),
activation = "tanh",
padding = "valid",
)(X)
# layer 4
X = Flatten()(X)
X = Dense(units = 84, activation = "tanh")(X)
# layer 5 (classification layer)
X = Dense(units = classes, activation = "softmax")(X)
model = Model(inputs = X_input, outputs = X, name = "LeNet5")
return model

View File

@@ -0,0 +1,9 @@
# disable tensorflow debugging messages
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from lenet5 import LeNet5
if __name__ == "__main__":
model = LeNet5(input_shape = (32, 32, 1), classes = 10)
model.summary()

View File

@@ -0,0 +1,7 @@
[Original Paper - Deep Residual Learning for Image Recognition (2015)](https://arxiv.org/abs/1512.03385)
[Related Video](https://www.youtube.com/watch?v=DkNIBBBvcPs&ab_channel=AladdinPersson)
Some questions that came to my mind when I was reading the paper
- [How do bottleneck architectures work in neural networks?](https://stats.stackexchange.com/questions/205150/how-do-bottleneck-architectures-work-in-neural-networks)
- [What does dotted line mean in ResNet?](https://stats.stackexchange.com/questions/457787/what-does-dotted-line-mean-in-resnet) `refering to Figure 3, 34-layer residual from paper`

View File

@@ -0,0 +1,105 @@
# Tensorflow v.2.3.1
"""
Programmed by the-robot <https://github.com/the-robot>
"""
from tensorflow.keras.layers import (
Activation,
Add,
BatchNormalization,
Conv2D,
)
import tensorflow as tf
import typing
@tf.function
def block(
X: tf.Tensor,
kernel_size: int,
filters: typing.List[int],
stage_no: int,
block_name: str,
is_conv_layer: bool = False,
stride: int = 2
) -> tf.Tensor:
"""
Block for residual network.
Arguments:
X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
kernel_size -- integer, specifying the shape of the middle CONV's window for the main path
filters -- python list of integers, defining the number of filters in the CONV layers of the main path
stage_no -- integer, used to name the layers, depending on their position in the network
block_name -- string/character, used to name the layers, depending on their position in the network
is_conv_layer -- to identiy if identity downsample is needed
stride -- integer specifying the stride to be used
Returns:
X -- output of the identity block, tensor of shape (n_H, n_W, n_C)
"""
# names
conv_name_base = "res" + str(stage_no) + block_name + "_branch"
bn_name_base = "bn" + str(stage_no) + block_name + "_branch"
# filters
F1, F2, F3 = filters
# save the input value for shortcut.
X_shortcut = X
# First component
# NOTE: if conv_layer, you need to do downsampling
X = Conv2D(
filters = F1,
kernel_size = (1, 1),
strides = (stride, stride) if is_conv_layer else (1, 1),
padding = "valid",
name = conv_name_base + "2a",
kernel_initializer = "glorot_uniform",
)(X)
X = BatchNormalization(axis = 3, name = bn_name_base + "2a")(X)
X = Activation("relu")(X)
# Second component
X = Conv2D(
filters = F2,
kernel_size = (kernel_size, kernel_size),
strides = (1, 1),
padding = "same",
name = conv_name_base + "2b",
kernel_initializer = "glorot_uniform",
)(X)
X = BatchNormalization(axis = 3, name = bn_name_base + "2b")(X)
X = Activation("relu")(X)
# Third component
X = Conv2D(
filters = F3,
kernel_size = (1, 1),
strides = (1, 1),
padding = "valid",
name = conv_name_base + "2c",
kernel_initializer = "glorot_uniform",
)(X)
X = BatchNormalization(axis = 3, name = bn_name_base + "2c")(X)
# NOTE: if is_conv_layer, you need to do downsampling the X_shortcut to match the output (X) channel
# so it can be added together
if is_conv_layer:
X_shortcut = Conv2D(
filters = F3,
kernel_size = (1, 1),
strides = (stride, stride),
padding = "valid",
name = conv_name_base + "1",
kernel_initializer = "glorot_uniform",
)(X_shortcut)
X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + "1")(X_shortcut)
# Shortcut value
X = Add()([X, X_shortcut])
X = Activation("relu")(X)
return X

View File

@@ -0,0 +1,157 @@
# Tensorflow v.2.3.1
"""
Programmed by the-robot <https://github.com/the-robot>
"""
from block import block
from tensorflow.keras.layers import (
Activation,
AveragePooling2D,
BatchNormalization,
Conv2D,
Dense,
Flatten,
Input,
MaxPooling2D,
ZeroPadding2D,
)
from tensorflow.keras import Model
import tensorflow as tf
import typing
tf.config.run_functions_eagerly(True)
@tf.function
def ResNet(name: str, layers: typing.List[int], input_shape: typing.Tuple[int] = (64, 64, 3), classes: int = 6) -> Model:
"""
Implementation of the popular ResNet architecture.
Arguments:
name -- name of the architecture
layers -- number of blocks per layer
input_shape -- shape of the images of the dataset
classes -- integer, number of classes
Returns:
model -- a Model() instance in Keras
Model Architecture:
Resnet50:
CONV2D -> BATCHNORM -> RELU -> MAXPOOL // conv1
-> CONVBLOCK -> IDBLOCK * 2 // conv2_x
-> CONVBLOCK -> IDBLOCK * 3 // conv3_x
-> CONVBLOCK -> IDBLOCK * 5 // conv4_x
-> CONVBLOCK -> IDBLOCK * 2 // conv5_x
-> AVGPOOL
-> TOPLAYER
Resnet101:
CONV2D -> BATCHNORM -> RELU -> MAXPOOL // conv1
-> CONVBLOCK -> IDBLOCK * 2 // conv2_x
-> CONVBLOCK -> IDBLOCK * 3 // conv3_x
-> CONVBLOCK -> IDBLOCK * 22 // conv4_x
-> CONVBLOCK -> IDBLOCK * 2 // conv5_x
-> AVGPOOL
-> TOPLAYER
Resnet152:
CONV2D -> BATCHNORM -> RELU -> MAXPOOL // conv1
-> CONVBLOCK -> IDBLOCK * 2 // conv2_x
-> CONVBLOCK -> IDBLOCK * 7 // conv3_x
-> CONVBLOCK -> IDBLOCK * 35 // conv4_x
-> CONVBLOCK -> IDBLOCK * 2 // conv5_x
-> AVGPOOL
-> TOPLAYER
"""
# get layers (layer1 is always the same so no need to provide)
layer2, layer3, layer4, layer5 = layers
# convert input shape into tensor
X_input = Input(input_shape)
# zero-padding
X = ZeroPadding2D((3, 3))(X_input)
# conv1
X = Conv2D(
filters = 64,
kernel_size = (7, 7),
strides = (2, 2),
name = "conv1",
kernel_initializer = "glorot_uniform",
)(X)
X = BatchNormalization(axis = 3, name = "bn_conv1")(X)
X = Activation("relu")(X)
X = MaxPooling2D((3, 3), strides = (2, 2))(X)
# conv2_x
X = make_layer(X, layers = layer2, kernel_size = 3, filters = [64, 64, 256], stride = 1, stage_no = 2)
# conv3_x
X = make_layer(X, layers = layer3, kernel_size = 3, filters = [128, 128, 512], stride = 2, stage_no = 3)
# conv4_x
X = make_layer(X, layers = layer4, kernel_size = 3, filters = [256, 256, 1024], stride = 2, stage_no = 4)
# conv5_x
X = make_layer(X, layers = layer5, kernel_size = 3, filters = [512, 512, 2048], stride = 1, stage_no = 5)
# average pooling
X = AveragePooling2D((2, 2), name = "avg_pool")(X)
# output layer
X = Flatten()(X)
X = Dense(
classes,
activation = "softmax",
name="fc" + str(classes),
kernel_initializer = "glorot_uniform"
)(X)
model = Model(inputs = X_input, outputs = X, name = name)
return model
def make_layer(X: tf.Tensor, layers: int, kernel_size: int, filters: typing.List[int], stride: int, stage_no: int) -> tf.Tensor:
"""
Method to create one conv-identity layer for ResNet.
Arguments:
X -- input tensor
layers -- number of blocks per layer
kernel_size -- size of the kernel for the block
filters -- number of filters/channels
stride -- number of stride for downsampling the input
stage_no -- stage number just to name the layer
Returns:
X -- output tensor
"""
# create convolution block
X = block(
X,
kernel_size = kernel_size,
filters = filters,
stage_no = stage_no,
block_name = "a",
is_conv_layer = True,
stride = stride
)
# create identity block
block_name_ordinal = ord("b")
for _ in range(layers - 1):
X = block(
X,
kernel_size = kernel_size,
filters = filters,
stage_no = stage_no,
block_name = chr(block_name_ordinal)
)
block_name_ordinal += 1
return X

View File

@@ -0,0 +1,10 @@
# disable tensorflow debugging messages
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from resnet import ResNet
if __name__ == "__main__":
# test ResNet50
model = ResNet(name = "Resnet50", layers = [3, 4, 6, 3], input_shape = (64, 64, 3), classes = 6)
model.summary()

View File

@@ -0,0 +1,13 @@
[Original Paper - Very Deep Convolutional Networks for Large-Scale Image Recognition (2014)](https://arxiv.org/abs/1409.1556)
[Related Video](https://www.youtube.com/watch?v=ACmuBbuXn20)
Some questions I had when I was reading the paper
- [What does 1x1 convolution mean in a neural network?](https://stats.stackexchange.com/questions/194142/what-does-1x1-convolution-mean-in-a-neural-network)
- [A guide to receptive field arithmetic for Convolutional Neural Networks](https://medium.com/mlreview/a-guide-to-receptive-field-arithmetic-for-convolutional-neural-networks-e0f514068807)
Some other useful links
- [VGGNet summary](https://medium.com/coinmonks/paper-review-of-vggnet-1st-runner-up-of-ilsvlc-2014-image-classification-d02355543a11)
- [VGGNet in Keras](https://towardsdatascience.com/step-by-step-vgg16-implementation-in-keras-for-beginners-a833c686ae6c)
- [VGGNet with Batch Normalization](https://gist.github.com/jjangsangy/38d644606130f05b806a4261c493a820)
This code is inspired by [VGGNet implement from scratch in PyTorch by aladdinpersson](https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/CNN_architectures/pytorch_vgg_implementation.py).

View File

@@ -0,0 +1,21 @@
# disable tensorflow debugging messages
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from vggnet import VGGNet
# Integer value represents output channel after performing the convolution layer
# 'M' represents the max pooling layer
# After convolution blocks; flatten the output and use 4096x4096x1000 Linear Layers
# with soft-max at the end
VGG_types = {
'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
if __name__ == "__main__":
# test VGGNet16
model = VGGNet(name = "VGGNet16", architecture = VGG_types["VGG16"], input_shape=(224, 224, 3), classes = 1000)
model.summary()

View File

@@ -0,0 +1,126 @@
# Tensorflow v.2.3.1
"""
Programmed by the-robot <https://github.com/the-robot>
"""
from tensorflow.keras.layers import (
Activation,
BatchNormalization,
Conv2D,
Dense,
Dropout,
Flatten,
Input,
MaxPooling2D,
)
from tensorflow.keras import Model
import tensorflow as tf
import typing
tf.config.run_functions_eagerly(True)
@tf.function
def VGGNet(
name: str,
architecture: typing.List[ typing.Union[int, str] ],
input_shape: typing.Tuple[int],
classes: int = 1000
) -> Model:
"""
Implementation of the VGGNet architecture.
Arguments:
name -- name of the architecture
architecture -- number of output channel per convolution layers in VGGNet
input_shape -- shape of the images of the dataset
classes -- integer, number of classes
Returns:
model -- a Model() instance in Keras
"""
# convert input shape into tensor
X_input = Input(input_shape)
# make convolution layers
X = make_conv_layer(X_input, architecture)
# flatten the output and make fully connected layers
X = Flatten()(X)
X = make_dense_layer(X, 4096)
X = make_dense_layer(X, 4096)
# classification layer
X = Dense(units = classes, activation = "softmax")(X)
model = Model(inputs = X_input, outputs = X, name = name)
return model
def make_conv_layer(
X: tf.Tensor,
architecture: typing.List[ typing.Union[int, str] ],
activation: str = 'relu'
) -> tf.Tensor:
"""
Method to create convolution layers for VGGNet.
In VGGNet
- Kernal is always 3x3 for conv-layer with padding 1 and stride 1.
- 2x2 kernel for max pooling with stride of 2.
Arguments:
X -- input tensor
architecture -- number of output channel per convolution layers in VGGNet
activation -- type of activation method
Returns:
X -- output tensor
"""
for output in architecture:
# convolution layer
if type(output) == int:
out_channels = output
X = Conv2D(
filters = out_channels,
kernel_size = (3, 3),
strides = (1, 1),
padding = "same"
)(X)
X = BatchNormalization()(X)
X = Activation(activation)(X)
# relu activation is added (by default activation) so that all the
# negative values are not passed to the next layer
# max-pooling layer
else:
X = MaxPooling2D(
pool_size = (2, 2),
strides = (2, 2)
)(X)
return X
def make_dense_layer(X: tf.Tensor, output_units: int, dropout = 0.5, activation = 'relu') -> tf.Tensor:
"""
Method to create dense layer for VGGNet.
Arguments:
X -- input tensor
output_units -- output tensor size
dropout -- dropout value for regularization
activation -- type of activation method
Returns:
X -- input tensor
"""
X = Dense(units = output_units)(X)
X = BatchNormalization()(X)
X = Activation(activation)(X)
X = Dropout(dropout)(X)
return X