mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-02-20 13:50:41 +00:00
Initial commit
This commit is contained in:
9
ML/TensorFlow/CNN_architectures/AlexNet/README.md
Normal file
9
ML/TensorFlow/CNN_architectures/AlexNet/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
[Original Paper - ImageNet Classification with Deep Convolutional Neural Networks (2012)](https://www.cs.toronto.edu/~hinton/absps/imagenet.pdf)
|
||||
|
||||
Some questions I had when I was reading the paper
|
||||
- [What does the term saturating nonlinearities mean?](https://stats.stackexchange.com/questions/174295/what-does-the-term-saturating-nonlinearities-mean)
|
||||
- [What Is Saturating Gradient Problem](https://datascience.stackexchange.com/questions/27665/what-is-saturating-gradient-problem)
|
||||
- [Why ReLU is better than the other activation functions](https://datascience.stackexchange.com/questions/23493/why-relu-is-better-than-the-other-activation-functions)
|
||||
- [Why does overlapped pooling help reduce overfitting in conv nets?](https://stats.stackexchange.com/questions/283261/why-does-overlapped-pooling-help-reduce-overfitting-in-conv-nets)
|
||||
- [Importance of local response normalization in CNN](https://stats.stackexchange.com/questions/145768/importance-of-local-response-normalization-in-cnn)
|
||||
- [What Is Local Response Normalization In Convolutional Neural Networks](https://prateekvjoshi.com/2016/04/05/what-is-local-response-normalization-in-convolutional-neural-networks/)
|
||||
113
ML/TensorFlow/CNN_architectures/AlexNet/alexnet.py
Normal file
113
ML/TensorFlow/CNN_architectures/AlexNet/alexnet.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# Tensorflow v2.3.1
|
||||
|
||||
"""
|
||||
Programmed by the-robot <https://github.com/the-robot>
|
||||
"""
|
||||
|
||||
from tensorflow.keras.layers import (
|
||||
Conv2D,
|
||||
Dense,
|
||||
Dropout,
|
||||
Flatten,
|
||||
Input,
|
||||
Lambda,
|
||||
MaxPooling2D,
|
||||
)
|
||||
from tensorflow.keras import Model
|
||||
import tensorflow as tf
|
||||
import typing
|
||||
|
||||
tf.config.run_functions_eagerly(True)
|
||||
|
||||
@tf.function
|
||||
def AlexNet(input_shape: typing.Tuple[int], classes: int = 1000) -> Model:
|
||||
"""
|
||||
Implementation of the AlexNet architecture.
|
||||
|
||||
Arguments:
|
||||
input_shape -- shape of the images of the dataset
|
||||
classes -- integer, number of classes
|
||||
|
||||
Returns:
|
||||
model -- a Model() instance in Keras
|
||||
|
||||
Note:
|
||||
when you read the paper, you will notice that the channels (filters) in the diagram is only
|
||||
half of what I have written below. That is because in the diagram, they only showed model for
|
||||
one GPU (I guess for simplicity). However, during the ILSVRC, they run the network across 2 NVIDIA GTA 580 3GB GPUs.
|
||||
|
||||
Also, in paper, they used Local Response Normalization. This can also be done in Keras with Lambda layer.
|
||||
You can also use BatchNormalization layer instead.
|
||||
"""
|
||||
|
||||
# convert input shape into tensor
|
||||
X_input = Input(input_shape)
|
||||
|
||||
# NOTE: layer 1-5 is conv-layers
|
||||
# layer 1
|
||||
X = Conv2D(
|
||||
filters = 96,
|
||||
kernel_size = (11, 11),
|
||||
strides = (4, 4),
|
||||
activation = "relu",
|
||||
padding = "same",
|
||||
)(X_input)
|
||||
X = MaxPooling2D(pool_size = (3, 3), strides = (2, 2))(X)
|
||||
X = Lambda(tf.nn.local_response_normalization)(X)
|
||||
|
||||
# layer 2
|
||||
X = Conv2D(
|
||||
filters = 256,
|
||||
kernel_size = (5, 5),
|
||||
strides = (1, 1),
|
||||
activation = "relu",
|
||||
padding = "same",
|
||||
)(X)
|
||||
X = MaxPooling2D(pool_size = (3, 3), strides = (2, 2))(X)
|
||||
X = Lambda(tf.nn.local_response_normalization)(X)
|
||||
|
||||
# layer 3
|
||||
X = Conv2D(
|
||||
filters = 384,
|
||||
kernel_size = (3, 3),
|
||||
strides = (1, 1),
|
||||
activation = "relu",
|
||||
padding = "same",
|
||||
)(X)
|
||||
|
||||
# layer 4
|
||||
X = Conv2D(
|
||||
filters = 384,
|
||||
kernel_size = (3, 3),
|
||||
strides = (1, 1),
|
||||
activation = "relu",
|
||||
padding = "same",
|
||||
)(X)
|
||||
|
||||
# layer 5
|
||||
X = Conv2D(
|
||||
filters = 256,
|
||||
kernel_size = (3, 3),
|
||||
strides = (1, 1),
|
||||
activation = "relu",
|
||||
padding = "same",
|
||||
)(X)
|
||||
X = MaxPooling2D(pool_size = (3, 3), strides = (2, 2))(X)
|
||||
X = Lambda(tf.nn.local_response_normalization)(X)
|
||||
|
||||
# NOTE: layer 6-7 is fully-connected layers
|
||||
# layer 6
|
||||
X = Flatten()(X)
|
||||
X = Dense(units = 4096, activation = 'relu')(X)
|
||||
X = Dropout(0.5)(X)
|
||||
|
||||
# layer 7
|
||||
X = Dense(units = 4096, activation = 'relu')(X)
|
||||
X = Dropout(0.5)(X)
|
||||
|
||||
# layer 8 (classification layer)
|
||||
# use sigmoid if binary classificaton and softmax if multiclass classification
|
||||
X = Dense(units = classes, activation = "softmax")(X)
|
||||
|
||||
model = Model(inputs = X_input, outputs = X, name = "AlexNet")
|
||||
return model
|
||||
9
ML/TensorFlow/CNN_architectures/AlexNet/test.py
Normal file
9
ML/TensorFlow/CNN_architectures/AlexNet/test.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# disable tensorflow debugging messages
|
||||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
from alexnet import AlexNet
|
||||
|
||||
if __name__ == "__main__":
|
||||
model = AlexNet(input_shape = (224, 224, 3), classes = 1000)
|
||||
model.summary()
|
||||
14
ML/TensorFlow/CNN_architectures/GoogLeNet/README.md
Normal file
14
ML/TensorFlow/CNN_architectures/GoogLeNet/README.md
Normal file
@@ -0,0 +1,14 @@
|
||||
[Original Paper - Going Deeper with Convolutions (2014)](https://arxiv.org/abs/1409.4842)
|
||||
[Related Video](https://www.youtube.com/watch?v=uQc4Fs7yx5I)
|
||||
|
||||

|
||||
|
||||
|
||||
- [Review: GoogLeNet (Inception v1)](https://medium.com/coinmonks/paper-review-of-googlenet-inception-v1-winner-of-ilsvlc-2014-image-classification-c2b3565a64e7)
|
||||
- [Understanding GoogLeNet Model – CNN Architecture](https://www.geeksforgeeks.org/understanding-googlenet-model-cnn-architecture/)
|
||||
- [Ensemble Methods in Machine Learning: What are They and Why Use Them?](https://towardsdatascience.com/ensemble-methods-in-machine-learning-what-are-they-and-why-use-them-68ec3f9fef5f)
|
||||
- [Neural Networks Ensemble](https://towardsdatascience.com/neural-networks-ensemble-33f33bea7df3)
|
||||
- [Multiscale Methods and Machine Learning](https://www.kdnuggets.com/2018/03/multiscale-methods-machine-learning.html)
|
||||
- [What do the terms “dense” and “sparse” mean in the context of neural networks?](https://stats.stackexchange.com/questions/266996/what-do-the-terms-dense-and-sparse-mean-in-the-context-of-neural-networks)
|
||||
- [The Sparse Future of Deep Learning](https://towardsdatascience.com/the-sparse-future-of-deep-learning-bce05e8e094a)
|
||||
- [Understanding Auxiliary Loss](https://stats.stackexchange.com/a/436203)
|
||||
163
ML/TensorFlow/CNN_architectures/GoogLeNet/block.py
Normal file
163
ML/TensorFlow/CNN_architectures/GoogLeNet/block.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# Tensorflow v.2.3.1
|
||||
|
||||
"""
|
||||
Programmed by the-robot <https://github.com/the-robot>
|
||||
"""
|
||||
|
||||
from tensorflow.keras.layers import (
|
||||
Activation,
|
||||
AveragePooling2D,
|
||||
BatchNormalization,
|
||||
Conv2D,
|
||||
Dense,
|
||||
Dropout,
|
||||
Flatten,
|
||||
MaxPooling2D,
|
||||
concatenate,
|
||||
)
|
||||
import tensorflow as tf
|
||||
import typing
|
||||
|
||||
@tf.function
|
||||
def convolution_block(
|
||||
X: tf.Tensor,
|
||||
filters: int,
|
||||
kernel_size: int,
|
||||
stride: int = 1,
|
||||
padding: str = 'valid',
|
||||
) -> tf.Tensor:
|
||||
"""
|
||||
Convolution block for GoogLeNet.
|
||||
Arguments:
|
||||
X -- input tensor of shape (m, H, W, filters)
|
||||
filters -- defining the number of filters in the CONV layers
|
||||
kernel_size -- integer, specifying the shape of the middle CONV's window for the main path
|
||||
stride -- integer specifying the stride to be used
|
||||
padding -- padding type, same or valid. Default is valid
|
||||
Returns:
|
||||
X -- output of the identity block, tensor of shape (H, W, filters)
|
||||
"""
|
||||
|
||||
X = Conv2D(
|
||||
filters = filters,
|
||||
kernel_size = (kernel_size, kernel_size),
|
||||
strides = (stride, stride),
|
||||
padding = padding,
|
||||
)(X)
|
||||
# batch normalization is not in original paper because it was not invented at that time
|
||||
# however I am using it here because it will improve the performance
|
||||
X = BatchNormalization()(X)
|
||||
X = Activation("relu")(X)
|
||||
|
||||
return X
|
||||
|
||||
@tf.function
|
||||
def inception_block(
|
||||
X: tf.Tensor,
|
||||
filters_1x1: int,
|
||||
filters_3x3_reduce: int,
|
||||
filters_3x3: int,
|
||||
filters_5x5_reduce: int,
|
||||
filters_5x5: int,
|
||||
pool_size: int,
|
||||
) -> tf.Tensor:
|
||||
"""
|
||||
Inception block for GoogLeNet.
|
||||
Arguments:
|
||||
X -- input tensor of shape (m, H, W, filters)
|
||||
filters_1x1 -- number of filters for (1x1 conv) in first branch
|
||||
filters_3x3_reduce -- number of filters for (1x1 conv) dimensionality reduction before (3x3 conv) in second branch
|
||||
filters_3x3 -- number of filters for (3x3 conv) in second branch
|
||||
filters_5x5_reduce -- number of filters for (1x1 conv) dimensionality reduction before (5x5 conv) in third branch
|
||||
filters_5x5 -- number of filters for (5x5 conv) in third branch
|
||||
pool_size -- number of filters for (1x1 conv) after 3x3 max pooling in fourth branch
|
||||
Returns:
|
||||
X -- output of the identity block, tensor of shape (H, W, filters)
|
||||
"""
|
||||
|
||||
# first branch
|
||||
conv_1x1 = convolution_block(
|
||||
X,
|
||||
filters = filters_1x1,
|
||||
kernel_size = 1,
|
||||
padding = "same"
|
||||
)
|
||||
|
||||
# second branch
|
||||
conv_3x3 = convolution_block(
|
||||
X,
|
||||
filters = filters_3x3_reduce,
|
||||
kernel_size = 1,
|
||||
padding = "same"
|
||||
)
|
||||
conv_3x3 = convolution_block(
|
||||
conv_3x3,
|
||||
filters = filters_3x3,
|
||||
kernel_size = 3,
|
||||
padding = "same"
|
||||
)
|
||||
|
||||
# third branch
|
||||
conv_5x5 = convolution_block(
|
||||
X,
|
||||
filters = filters_5x5_reduce,
|
||||
kernel_size = 1,
|
||||
padding = "same"
|
||||
)
|
||||
conv_5x5 = convolution_block(
|
||||
conv_5x5,
|
||||
filters = filters_5x5,
|
||||
kernel_size = 5,
|
||||
padding = "same"
|
||||
)
|
||||
|
||||
# fourth branch
|
||||
pool_projection = MaxPooling2D(
|
||||
pool_size = (2, 2),
|
||||
strides = (1, 1),
|
||||
padding = "same",
|
||||
)(X)
|
||||
pool_projection = convolution_block(
|
||||
pool_projection,
|
||||
filters = pool_size,
|
||||
kernel_size = 1,
|
||||
padding = "same"
|
||||
)
|
||||
|
||||
# concat by channel/filter
|
||||
return concatenate(inputs = [conv_1x1, conv_3x3, conv_5x5, pool_projection], axis = 3)
|
||||
|
||||
@tf.function
|
||||
def auxiliary_block(
|
||||
X: tf.Tensor,
|
||||
classes: int,
|
||||
) -> tf.Tensor:
|
||||
"""
|
||||
Auxiliary block for GoogLeNet.
|
||||
Refer to the original paper, page 8 for the auxiliary layer specification.
|
||||
Arguments:
|
||||
X -- input tensor of shape (m, H, W, filters)
|
||||
classes -- number of classes for classification
|
||||
Return:
|
||||
X -- output of the identity block, tensor of shape (H, W, filters)
|
||||
"""
|
||||
|
||||
X = AveragePooling2D(
|
||||
pool_size = (5, 5),
|
||||
padding = "same",
|
||||
strides = (3, 3),
|
||||
)(X)
|
||||
X = convolution_block(
|
||||
X,
|
||||
filters = 128,
|
||||
kernel_size = 1,
|
||||
stride = 1,
|
||||
padding = "same",
|
||||
)
|
||||
X = Flatten()(X)
|
||||
X = Dense(units = 1024, activation = "relu")(X)
|
||||
X = Dropout(rate = 0.7)(X)
|
||||
X = Dense(units = classes)(X)
|
||||
X = Activation("softmax")(X)
|
||||
|
||||
return X
|
||||
219
ML/TensorFlow/CNN_architectures/GoogLeNet/googlenet.py
Normal file
219
ML/TensorFlow/CNN_architectures/GoogLeNet/googlenet.py
Normal file
@@ -0,0 +1,219 @@
|
||||
# Tensorflow v.2.3.1
|
||||
|
||||
"""
|
||||
Programmed by the-robot <https://github.com/the-robot>
|
||||
"""
|
||||
|
||||
from block import (
|
||||
auxiliary_block,
|
||||
convolution_block,
|
||||
inception_block,
|
||||
)
|
||||
|
||||
from tensorflow.keras.layers import (
|
||||
AveragePooling2D,
|
||||
Dense,
|
||||
Dropout,
|
||||
Input,
|
||||
MaxPooling2D,
|
||||
)
|
||||
from tensorflow.keras import Model
|
||||
import tensorflow as tf
|
||||
import typing
|
||||
|
||||
tf.config.run_functions_eagerly(True)
|
||||
|
||||
@tf.function
|
||||
def GoogLeNet(input_shape: typing.Tuple[int] = (224, 224, 3), classes: int = 1000) -> Model:
|
||||
"""
|
||||
Implementation of the popular GoogLeNet aka Inception v1 architecture.
|
||||
Refer to the original paper, page 6 - table 1 for inception block filter sizes.
|
||||
Arguments:
|
||||
input_shape -- shape of the images of the dataset
|
||||
classes -- number of classes for classification
|
||||
Returns:
|
||||
model -- a Model() instance in Keras
|
||||
"""
|
||||
|
||||
# convert input shape into tensor
|
||||
X_input = Input(input_shape)
|
||||
|
||||
# NOTE: auxiliary layers are only used in trainig phase to improve performance
|
||||
# because they act as regularization and prevent vanishing gradient problem
|
||||
auxiliary1 = None # to store auxiliary layers classification value
|
||||
auxiliary2 = None
|
||||
|
||||
# layer 1 (convolution block)
|
||||
X = convolution_block(
|
||||
X = X_input,
|
||||
filters = 64,
|
||||
kernel_size = 7,
|
||||
stride = 2,
|
||||
padding = "same",
|
||||
)
|
||||
|
||||
# layer 2 (max pool)
|
||||
X = MaxPooling2D(
|
||||
pool_size = (3, 3),
|
||||
padding = "same",
|
||||
strides = (2, 2),
|
||||
)(X)
|
||||
|
||||
# layer 3 (convolution block)
|
||||
# 1x1 reduce
|
||||
X = convolution_block(
|
||||
X,
|
||||
filters = 64,
|
||||
kernel_size = 1,
|
||||
stride = 1,
|
||||
padding = "same",
|
||||
)
|
||||
X = convolution_block(
|
||||
X,
|
||||
filters = 192,
|
||||
kernel_size = 3,
|
||||
stride = 1,
|
||||
padding = "same",
|
||||
)
|
||||
|
||||
# layer 4 (max pool)
|
||||
X = MaxPooling2D(
|
||||
pool_size = (3, 3),
|
||||
padding = "same",
|
||||
strides = (2, 2),
|
||||
)(X)
|
||||
|
||||
# layer 5 (inception 3a)
|
||||
X = inception_block(
|
||||
X,
|
||||
filters_1x1 = 64,
|
||||
filters_3x3_reduce = 96,
|
||||
filters_3x3 = 128,
|
||||
filters_5x5_reduce = 16,
|
||||
filters_5x5 = 32,
|
||||
pool_size = 32,
|
||||
)
|
||||
|
||||
# layer 6 (inception 3b)
|
||||
X = inception_block(
|
||||
X,
|
||||
filters_1x1 = 128,
|
||||
filters_3x3_reduce = 128,
|
||||
filters_3x3 = 192,
|
||||
filters_5x5_reduce = 32,
|
||||
filters_5x5 = 96,
|
||||
pool_size = 64,
|
||||
)
|
||||
|
||||
# layer 7 (max pool)
|
||||
X = MaxPooling2D(
|
||||
pool_size = (3, 3),
|
||||
padding = "same",
|
||||
strides = (2, 2),
|
||||
)(X)
|
||||
|
||||
# layer 8 (inception 4a)
|
||||
X = inception_block(
|
||||
X,
|
||||
filters_1x1 = 192,
|
||||
filters_3x3_reduce = 96,
|
||||
filters_3x3 = 208,
|
||||
filters_5x5_reduce = 16,
|
||||
filters_5x5 = 48,
|
||||
pool_size = 64,
|
||||
)
|
||||
|
||||
# First Auxiliary Softmax Classifier
|
||||
auxiliary1 = auxiliary_block(X, classes = classes)
|
||||
|
||||
# layer 9 (inception 4b)
|
||||
X = inception_block(
|
||||
X,
|
||||
filters_1x1 = 160,
|
||||
filters_3x3_reduce = 112,
|
||||
filters_3x3 = 224,
|
||||
filters_5x5_reduce = 24,
|
||||
filters_5x5 = 64,
|
||||
pool_size = 64,
|
||||
)
|
||||
|
||||
# layer 10 (inception 4c)
|
||||
X = inception_block(
|
||||
X,
|
||||
filters_1x1 = 128,
|
||||
filters_3x3_reduce = 128,
|
||||
filters_3x3 = 256,
|
||||
filters_5x5_reduce = 24,
|
||||
filters_5x5 = 64,
|
||||
pool_size = 64,
|
||||
)
|
||||
|
||||
# layer 11 (inception 4d)
|
||||
X = inception_block(
|
||||
X,
|
||||
filters_1x1 = 112,
|
||||
filters_3x3_reduce = 144,
|
||||
filters_3x3 = 288,
|
||||
filters_5x5_reduce = 32,
|
||||
filters_5x5 = 64,
|
||||
pool_size = 64,
|
||||
)
|
||||
|
||||
# Second Auxiliary Softmax Classifier
|
||||
auxiliary2 = auxiliary_block(X, classes = classes)
|
||||
|
||||
# layer 12 (inception 4e)
|
||||
X = inception_block(
|
||||
X,
|
||||
filters_1x1 = 256,
|
||||
filters_3x3_reduce = 160,
|
||||
filters_3x3 = 320,
|
||||
filters_5x5_reduce = 32,
|
||||
filters_5x5 = 128,
|
||||
pool_size = 128,
|
||||
)
|
||||
|
||||
# layer 13 (max pool)
|
||||
X = MaxPooling2D(
|
||||
pool_size = (3, 3),
|
||||
padding = "same",
|
||||
strides = (2, 2),
|
||||
)(X)
|
||||
|
||||
# layer 14 (inception 5a)
|
||||
X = inception_block(
|
||||
X,
|
||||
filters_1x1 = 256,
|
||||
filters_3x3_reduce = 160,
|
||||
filters_3x3 = 320,
|
||||
filters_5x5_reduce = 32,
|
||||
filters_5x5 = 128,
|
||||
pool_size = 128,
|
||||
)
|
||||
|
||||
# layer 15 (inception 5b)
|
||||
X = inception_block(
|
||||
X,
|
||||
filters_1x1 = 384,
|
||||
filters_3x3_reduce = 192,
|
||||
filters_3x3 = 384,
|
||||
filters_5x5_reduce = 48,
|
||||
filters_5x5 = 128,
|
||||
pool_size = 128,
|
||||
)
|
||||
|
||||
# layer 16 (average pool)
|
||||
X = AveragePooling2D(
|
||||
pool_size = (7, 7),
|
||||
padding = "same",
|
||||
strides = (1, 1),
|
||||
)(X)
|
||||
|
||||
# layer 17 (dropout 40%)
|
||||
X = Dropout(rate = 0.4)(X)
|
||||
|
||||
# layer 18 (fully-connected layer with softmax activation)
|
||||
X = Dense(units = classes, activation='softmax')(X)
|
||||
|
||||
model = Model(X_input, outputs = [X, auxiliary1, auxiliary2], name='GoogLeNet/Inception-v1')
|
||||
return model
|
||||
9
ML/TensorFlow/CNN_architectures/GoogLeNet/test.py
Normal file
9
ML/TensorFlow/CNN_architectures/GoogLeNet/test.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# disable tensorflow debugging messages
|
||||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
from googlenet import GoogLeNet
|
||||
|
||||
if __name__ == "__main__":
|
||||
model = GoogLeNet(input_shape = (224, 224, 3))
|
||||
model.summary()
|
||||
5
ML/TensorFlow/CNN_architectures/LeNet5/README.md
Normal file
5
ML/TensorFlow/CNN_architectures/LeNet5/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
[Original Paper - GradientBased Learning Applied to Document Recognition (1998)](http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf)
|
||||
[Related Video](https://www.youtube.com/watch?v=fcOW-Zyb5Bo)
|
||||
|
||||
Some other useful links
|
||||
- [Understanding and Implementing LeNet-5 CNN Architecture](https://towardsdatascience.com/understanding-and-implementing-lenet-5-cnn-architecture-deep-learning-a2d531ebc342)
|
||||
78
ML/TensorFlow/CNN_architectures/LeNet5/lenet5.py
Normal file
78
ML/TensorFlow/CNN_architectures/LeNet5/lenet5.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# Tensorflow v.2.3.1
|
||||
|
||||
"""
|
||||
Programmed by the-robot <https://github.com/the-robot>
|
||||
"""
|
||||
|
||||
from tensorflow.keras.layers import (
|
||||
AveragePooling2D,
|
||||
Conv2D,
|
||||
Dense,
|
||||
Flatten,
|
||||
Input,
|
||||
)
|
||||
from tensorflow.keras import Model
|
||||
import tensorflow as tf
|
||||
import typing
|
||||
|
||||
tf.config.run_functions_eagerly(True)
|
||||
|
||||
@tf.function
|
||||
def LeNet5(input_shape: typing.Tuple[int], classes: int = 1000) -> Model:
|
||||
"""
|
||||
Implementation of the classic LeNet architecture.
|
||||
|
||||
Arguments:
|
||||
input_shape -- shape of the images of the dataset
|
||||
classes -- integer, number of classes
|
||||
|
||||
Returns:
|
||||
model -- a Model() instance in Keras
|
||||
|
||||
Note:
|
||||
because I want to keep it original, I used tanh activation instead of ReLU activation.
|
||||
however based on newer papers, the rectified linear unit (ReLU) performed much faster than
|
||||
tanh activation.
|
||||
"""
|
||||
|
||||
# convert input shape into tensor
|
||||
X_input = Input(input_shape)
|
||||
|
||||
# layer 1
|
||||
X = Conv2D(
|
||||
filters = 6,
|
||||
kernel_size = (5, 5),
|
||||
strides = (1, 1),
|
||||
activation = "tanh",
|
||||
padding = "valid",
|
||||
)(X_input)
|
||||
X = AveragePooling2D(pool_size = (2, 2), strides = (2, 2), padding = "valid")(X)
|
||||
|
||||
# layer 2
|
||||
X = Conv2D(
|
||||
filters = 16,
|
||||
kernel_size = (5, 5),
|
||||
strides = (1, 1),
|
||||
activation = "tanh",
|
||||
padding = "valid",
|
||||
)(X)
|
||||
X = AveragePooling2D(pool_size = (2, 2), strides = (2, 2), padding = "valid")(X)
|
||||
|
||||
# layer 3
|
||||
X = Conv2D(
|
||||
filters = 120,
|
||||
kernel_size = (5, 5),
|
||||
strides = (1, 1),
|
||||
activation = "tanh",
|
||||
padding = "valid",
|
||||
)(X)
|
||||
|
||||
# layer 4
|
||||
X = Flatten()(X)
|
||||
X = Dense(units = 84, activation = "tanh")(X)
|
||||
|
||||
# layer 5 (classification layer)
|
||||
X = Dense(units = classes, activation = "softmax")(X)
|
||||
|
||||
model = Model(inputs = X_input, outputs = X, name = "LeNet5")
|
||||
return model
|
||||
9
ML/TensorFlow/CNN_architectures/LeNet5/test.py
Normal file
9
ML/TensorFlow/CNN_architectures/LeNet5/test.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# disable tensorflow debugging messages
|
||||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
from lenet5 import LeNet5
|
||||
|
||||
if __name__ == "__main__":
|
||||
model = LeNet5(input_shape = (32, 32, 1), classes = 10)
|
||||
model.summary()
|
||||
7
ML/TensorFlow/CNN_architectures/ResNet/README.md
Normal file
7
ML/TensorFlow/CNN_architectures/ResNet/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
[Original Paper - Deep Residual Learning for Image Recognition (2015)](https://arxiv.org/abs/1512.03385)
|
||||
[Related Video](https://www.youtube.com/watch?v=DkNIBBBvcPs&ab_channel=AladdinPersson)
|
||||
|
||||
Some questions that came to my mind when I was reading the paper
|
||||
|
||||
- [How do bottleneck architectures work in neural networks?](https://stats.stackexchange.com/questions/205150/how-do-bottleneck-architectures-work-in-neural-networks)
|
||||
- [What does dotted line mean in ResNet?](https://stats.stackexchange.com/questions/457787/what-does-dotted-line-mean-in-resnet) `refering to Figure 3, 34-layer residual from paper`
|
||||
105
ML/TensorFlow/CNN_architectures/ResNet/block.py
Normal file
105
ML/TensorFlow/CNN_architectures/ResNet/block.py
Normal file
@@ -0,0 +1,105 @@
|
||||
# Tensorflow v.2.3.1
|
||||
|
||||
"""
|
||||
Programmed by the-robot <https://github.com/the-robot>
|
||||
"""
|
||||
|
||||
from tensorflow.keras.layers import (
|
||||
Activation,
|
||||
Add,
|
||||
BatchNormalization,
|
||||
Conv2D,
|
||||
)
|
||||
import tensorflow as tf
|
||||
import typing
|
||||
|
||||
@tf.function
|
||||
def block(
|
||||
X: tf.Tensor,
|
||||
kernel_size: int,
|
||||
filters: typing.List[int],
|
||||
stage_no: int,
|
||||
block_name: str,
|
||||
is_conv_layer: bool = False,
|
||||
stride: int = 2
|
||||
) -> tf.Tensor:
|
||||
"""
|
||||
Block for residual network.
|
||||
|
||||
Arguments:
|
||||
X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
|
||||
kernel_size -- integer, specifying the shape of the middle CONV's window for the main path
|
||||
filters -- python list of integers, defining the number of filters in the CONV layers of the main path
|
||||
stage_no -- integer, used to name the layers, depending on their position in the network
|
||||
block_name -- string/character, used to name the layers, depending on their position in the network
|
||||
is_conv_layer -- to identiy if identity downsample is needed
|
||||
stride -- integer specifying the stride to be used
|
||||
|
||||
Returns:
|
||||
X -- output of the identity block, tensor of shape (n_H, n_W, n_C)
|
||||
"""
|
||||
|
||||
# names
|
||||
conv_name_base = "res" + str(stage_no) + block_name + "_branch"
|
||||
bn_name_base = "bn" + str(stage_no) + block_name + "_branch"
|
||||
|
||||
# filters
|
||||
F1, F2, F3 = filters
|
||||
|
||||
# save the input value for shortcut.
|
||||
X_shortcut = X
|
||||
|
||||
# First component
|
||||
# NOTE: if conv_layer, you need to do downsampling
|
||||
X = Conv2D(
|
||||
filters = F1,
|
||||
kernel_size = (1, 1),
|
||||
strides = (stride, stride) if is_conv_layer else (1, 1),
|
||||
padding = "valid",
|
||||
name = conv_name_base + "2a",
|
||||
kernel_initializer = "glorot_uniform",
|
||||
)(X)
|
||||
X = BatchNormalization(axis = 3, name = bn_name_base + "2a")(X)
|
||||
X = Activation("relu")(X)
|
||||
|
||||
# Second component
|
||||
X = Conv2D(
|
||||
filters = F2,
|
||||
kernel_size = (kernel_size, kernel_size),
|
||||
strides = (1, 1),
|
||||
padding = "same",
|
||||
name = conv_name_base + "2b",
|
||||
kernel_initializer = "glorot_uniform",
|
||||
)(X)
|
||||
X = BatchNormalization(axis = 3, name = bn_name_base + "2b")(X)
|
||||
X = Activation("relu")(X)
|
||||
|
||||
# Third component
|
||||
X = Conv2D(
|
||||
filters = F3,
|
||||
kernel_size = (1, 1),
|
||||
strides = (1, 1),
|
||||
padding = "valid",
|
||||
name = conv_name_base + "2c",
|
||||
kernel_initializer = "glorot_uniform",
|
||||
)(X)
|
||||
X = BatchNormalization(axis = 3, name = bn_name_base + "2c")(X)
|
||||
|
||||
# NOTE: if is_conv_layer, you need to do downsampling the X_shortcut to match the output (X) channel
|
||||
# so it can be added together
|
||||
if is_conv_layer:
|
||||
X_shortcut = Conv2D(
|
||||
filters = F3,
|
||||
kernel_size = (1, 1),
|
||||
strides = (stride, stride),
|
||||
padding = "valid",
|
||||
name = conv_name_base + "1",
|
||||
kernel_initializer = "glorot_uniform",
|
||||
)(X_shortcut)
|
||||
X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + "1")(X_shortcut)
|
||||
|
||||
# Shortcut value
|
||||
X = Add()([X, X_shortcut])
|
||||
X = Activation("relu")(X)
|
||||
|
||||
return X
|
||||
157
ML/TensorFlow/CNN_architectures/ResNet/resnet.py
Normal file
157
ML/TensorFlow/CNN_architectures/ResNet/resnet.py
Normal file
@@ -0,0 +1,157 @@
|
||||
# Tensorflow v.2.3.1
|
||||
|
||||
"""
|
||||
Programmed by the-robot <https://github.com/the-robot>
|
||||
"""
|
||||
|
||||
from block import block
|
||||
|
||||
from tensorflow.keras.layers import (
|
||||
Activation,
|
||||
AveragePooling2D,
|
||||
BatchNormalization,
|
||||
Conv2D,
|
||||
Dense,
|
||||
Flatten,
|
||||
Input,
|
||||
MaxPooling2D,
|
||||
ZeroPadding2D,
|
||||
)
|
||||
from tensorflow.keras import Model
|
||||
import tensorflow as tf
|
||||
import typing
|
||||
|
||||
tf.config.run_functions_eagerly(True)
|
||||
|
||||
@tf.function
|
||||
def ResNet(name: str, layers: typing.List[int], input_shape: typing.Tuple[int] = (64, 64, 3), classes: int = 6) -> Model:
|
||||
"""
|
||||
Implementation of the popular ResNet architecture.
|
||||
|
||||
Arguments:
|
||||
name -- name of the architecture
|
||||
layers -- number of blocks per layer
|
||||
input_shape -- shape of the images of the dataset
|
||||
classes -- integer, number of classes
|
||||
|
||||
Returns:
|
||||
model -- a Model() instance in Keras
|
||||
|
||||
|
||||
Model Architecture:
|
||||
Resnet50:
|
||||
CONV2D -> BATCHNORM -> RELU -> MAXPOOL // conv1
|
||||
-> CONVBLOCK -> IDBLOCK * 2 // conv2_x
|
||||
-> CONVBLOCK -> IDBLOCK * 3 // conv3_x
|
||||
-> CONVBLOCK -> IDBLOCK * 5 // conv4_x
|
||||
-> CONVBLOCK -> IDBLOCK * 2 // conv5_x
|
||||
-> AVGPOOL
|
||||
-> TOPLAYER
|
||||
|
||||
Resnet101:
|
||||
CONV2D -> BATCHNORM -> RELU -> MAXPOOL // conv1
|
||||
-> CONVBLOCK -> IDBLOCK * 2 // conv2_x
|
||||
-> CONVBLOCK -> IDBLOCK * 3 // conv3_x
|
||||
-> CONVBLOCK -> IDBLOCK * 22 // conv4_x
|
||||
-> CONVBLOCK -> IDBLOCK * 2 // conv5_x
|
||||
-> AVGPOOL
|
||||
-> TOPLAYER
|
||||
|
||||
Resnet152:
|
||||
CONV2D -> BATCHNORM -> RELU -> MAXPOOL // conv1
|
||||
-> CONVBLOCK -> IDBLOCK * 2 // conv2_x
|
||||
-> CONVBLOCK -> IDBLOCK * 7 // conv3_x
|
||||
-> CONVBLOCK -> IDBLOCK * 35 // conv4_x
|
||||
-> CONVBLOCK -> IDBLOCK * 2 // conv5_x
|
||||
-> AVGPOOL
|
||||
-> TOPLAYER
|
||||
"""
|
||||
|
||||
# get layers (layer1 is always the same so no need to provide)
|
||||
layer2, layer3, layer4, layer5 = layers
|
||||
|
||||
# convert input shape into tensor
|
||||
X_input = Input(input_shape)
|
||||
|
||||
# zero-padding
|
||||
X = ZeroPadding2D((3, 3))(X_input)
|
||||
|
||||
# conv1
|
||||
X = Conv2D(
|
||||
filters = 64,
|
||||
kernel_size = (7, 7),
|
||||
strides = (2, 2),
|
||||
name = "conv1",
|
||||
kernel_initializer = "glorot_uniform",
|
||||
)(X)
|
||||
X = BatchNormalization(axis = 3, name = "bn_conv1")(X)
|
||||
X = Activation("relu")(X)
|
||||
X = MaxPooling2D((3, 3), strides = (2, 2))(X)
|
||||
|
||||
# conv2_x
|
||||
X = make_layer(X, layers = layer2, kernel_size = 3, filters = [64, 64, 256], stride = 1, stage_no = 2)
|
||||
|
||||
# conv3_x
|
||||
X = make_layer(X, layers = layer3, kernel_size = 3, filters = [128, 128, 512], stride = 2, stage_no = 3)
|
||||
|
||||
# conv4_x
|
||||
X = make_layer(X, layers = layer4, kernel_size = 3, filters = [256, 256, 1024], stride = 2, stage_no = 4)
|
||||
|
||||
# conv5_x
|
||||
X = make_layer(X, layers = layer5, kernel_size = 3, filters = [512, 512, 2048], stride = 1, stage_no = 5)
|
||||
|
||||
# average pooling
|
||||
X = AveragePooling2D((2, 2), name = "avg_pool")(X)
|
||||
|
||||
# output layer
|
||||
X = Flatten()(X)
|
||||
X = Dense(
|
||||
classes,
|
||||
activation = "softmax",
|
||||
name="fc" + str(classes),
|
||||
kernel_initializer = "glorot_uniform"
|
||||
)(X)
|
||||
|
||||
model = Model(inputs = X_input, outputs = X, name = name)
|
||||
return model
|
||||
|
||||
def make_layer(X: tf.Tensor, layers: int, kernel_size: int, filters: typing.List[int], stride: int, stage_no: int) -> tf.Tensor:
|
||||
"""
|
||||
Method to create one conv-identity layer for ResNet.
|
||||
|
||||
Arguments:
|
||||
X -- input tensor
|
||||
layers -- number of blocks per layer
|
||||
kernel_size -- size of the kernel for the block
|
||||
filters -- number of filters/channels
|
||||
stride -- number of stride for downsampling the input
|
||||
stage_no -- stage number just to name the layer
|
||||
|
||||
Returns:
|
||||
X -- output tensor
|
||||
"""
|
||||
|
||||
# create convolution block
|
||||
X = block(
|
||||
X,
|
||||
kernel_size = kernel_size,
|
||||
filters = filters,
|
||||
stage_no = stage_no,
|
||||
block_name = "a",
|
||||
is_conv_layer = True,
|
||||
stride = stride
|
||||
)
|
||||
|
||||
# create identity block
|
||||
block_name_ordinal = ord("b")
|
||||
for _ in range(layers - 1):
|
||||
X = block(
|
||||
X,
|
||||
kernel_size = kernel_size,
|
||||
filters = filters,
|
||||
stage_no = stage_no,
|
||||
block_name = chr(block_name_ordinal)
|
||||
)
|
||||
block_name_ordinal += 1
|
||||
|
||||
return X
|
||||
10
ML/TensorFlow/CNN_architectures/ResNet/test.py
Normal file
10
ML/TensorFlow/CNN_architectures/ResNet/test.py
Normal file
@@ -0,0 +1,10 @@
|
||||
# disable tensorflow debugging messages
|
||||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
from resnet import ResNet
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test ResNet50
|
||||
model = ResNet(name = "Resnet50", layers = [3, 4, 6, 3], input_shape = (64, 64, 3), classes = 6)
|
||||
model.summary()
|
||||
13
ML/TensorFlow/CNN_architectures/VGGNet/README.md
Normal file
13
ML/TensorFlow/CNN_architectures/VGGNet/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
[Original Paper - Very Deep Convolutional Networks for Large-Scale Image Recognition (2014)](https://arxiv.org/abs/1409.1556)
|
||||
[Related Video](https://www.youtube.com/watch?v=ACmuBbuXn20)
|
||||
|
||||
Some questions I had when I was reading the paper
|
||||
- [What does 1x1 convolution mean in a neural network?](https://stats.stackexchange.com/questions/194142/what-does-1x1-convolution-mean-in-a-neural-network)
|
||||
- [A guide to receptive field arithmetic for Convolutional Neural Networks](https://medium.com/mlreview/a-guide-to-receptive-field-arithmetic-for-convolutional-neural-networks-e0f514068807)
|
||||
|
||||
Some other useful links
|
||||
- [VGGNet summary](https://medium.com/coinmonks/paper-review-of-vggnet-1st-runner-up-of-ilsvlc-2014-image-classification-d02355543a11)
|
||||
- [VGGNet in Keras](https://towardsdatascience.com/step-by-step-vgg16-implementation-in-keras-for-beginners-a833c686ae6c)
|
||||
- [VGGNet with Batch Normalization](https://gist.github.com/jjangsangy/38d644606130f05b806a4261c493a820)
|
||||
|
||||
This code is inspired by [VGGNet implement from scratch in PyTorch by aladdinpersson](https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/CNN_architectures/pytorch_vgg_implementation.py).
|
||||
21
ML/TensorFlow/CNN_architectures/VGGNet/test.py
Normal file
21
ML/TensorFlow/CNN_architectures/VGGNet/test.py
Normal file
@@ -0,0 +1,21 @@
|
||||
# disable tensorflow debugging messages
|
||||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
from vggnet import VGGNet
|
||||
|
||||
# Integer value represents output channel after performing the convolution layer
|
||||
# 'M' represents the max pooling layer
|
||||
# After convolution blocks; flatten the output and use 4096x4096x1000 Linear Layers
|
||||
# with soft-max at the end
|
||||
VGG_types = {
|
||||
'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
|
||||
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
|
||||
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
|
||||
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test VGGNet16
|
||||
model = VGGNet(name = "VGGNet16", architecture = VGG_types["VGG16"], input_shape=(224, 224, 3), classes = 1000)
|
||||
model.summary()
|
||||
126
ML/TensorFlow/CNN_architectures/VGGNet/vggnet.py
Normal file
126
ML/TensorFlow/CNN_architectures/VGGNet/vggnet.py
Normal file
@@ -0,0 +1,126 @@
|
||||
# Tensorflow v.2.3.1
|
||||
|
||||
"""
|
||||
Programmed by the-robot <https://github.com/the-robot>
|
||||
"""
|
||||
|
||||
from tensorflow.keras.layers import (
|
||||
Activation,
|
||||
BatchNormalization,
|
||||
Conv2D,
|
||||
Dense,
|
||||
Dropout,
|
||||
Flatten,
|
||||
Input,
|
||||
MaxPooling2D,
|
||||
)
|
||||
from tensorflow.keras import Model
|
||||
import tensorflow as tf
|
||||
import typing
|
||||
|
||||
tf.config.run_functions_eagerly(True)
|
||||
|
||||
@tf.function
|
||||
def VGGNet(
|
||||
name: str,
|
||||
architecture: typing.List[ typing.Union[int, str] ],
|
||||
input_shape: typing.Tuple[int],
|
||||
classes: int = 1000
|
||||
) -> Model:
|
||||
"""
|
||||
Implementation of the VGGNet architecture.
|
||||
|
||||
Arguments:
|
||||
name -- name of the architecture
|
||||
architecture -- number of output channel per convolution layers in VGGNet
|
||||
input_shape -- shape of the images of the dataset
|
||||
classes -- integer, number of classes
|
||||
|
||||
Returns:
|
||||
model -- a Model() instance in Keras
|
||||
"""
|
||||
|
||||
# convert input shape into tensor
|
||||
X_input = Input(input_shape)
|
||||
|
||||
# make convolution layers
|
||||
X = make_conv_layer(X_input, architecture)
|
||||
|
||||
# flatten the output and make fully connected layers
|
||||
X = Flatten()(X)
|
||||
X = make_dense_layer(X, 4096)
|
||||
X = make_dense_layer(X, 4096)
|
||||
|
||||
# classification layer
|
||||
X = Dense(units = classes, activation = "softmax")(X)
|
||||
|
||||
model = Model(inputs = X_input, outputs = X, name = name)
|
||||
return model
|
||||
|
||||
def make_conv_layer(
|
||||
X: tf.Tensor,
|
||||
architecture: typing.List[ typing.Union[int, str] ],
|
||||
activation: str = 'relu'
|
||||
) -> tf.Tensor:
|
||||
"""
|
||||
Method to create convolution layers for VGGNet.
|
||||
In VGGNet
|
||||
- Kernal is always 3x3 for conv-layer with padding 1 and stride 1.
|
||||
- 2x2 kernel for max pooling with stride of 2.
|
||||
|
||||
Arguments:
|
||||
X -- input tensor
|
||||
architecture -- number of output channel per convolution layers in VGGNet
|
||||
activation -- type of activation method
|
||||
|
||||
Returns:
|
||||
X -- output tensor
|
||||
"""
|
||||
|
||||
for output in architecture:
|
||||
|
||||
# convolution layer
|
||||
if type(output) == int:
|
||||
out_channels = output
|
||||
|
||||
X = Conv2D(
|
||||
filters = out_channels,
|
||||
kernel_size = (3, 3),
|
||||
strides = (1, 1),
|
||||
padding = "same"
|
||||
)(X)
|
||||
X = BatchNormalization()(X)
|
||||
X = Activation(activation)(X)
|
||||
|
||||
# relu activation is added (by default activation) so that all the
|
||||
# negative values are not passed to the next layer
|
||||
|
||||
# max-pooling layer
|
||||
else:
|
||||
X = MaxPooling2D(
|
||||
pool_size = (2, 2),
|
||||
strides = (2, 2)
|
||||
)(X)
|
||||
|
||||
return X
|
||||
|
||||
def make_dense_layer(X: tf.Tensor, output_units: int, dropout = 0.5, activation = 'relu') -> tf.Tensor:
|
||||
"""
|
||||
Method to create dense layer for VGGNet.
|
||||
|
||||
Arguments:
|
||||
X -- input tensor
|
||||
output_units -- output tensor size
|
||||
dropout -- dropout value for regularization
|
||||
activation -- type of activation method
|
||||
|
||||
Returns:
|
||||
X -- input tensor
|
||||
"""
|
||||
|
||||
X = Dense(units = output_units)(X)
|
||||
X = BatchNormalization()(X)
|
||||
X = Activation(activation)(X)
|
||||
X = Dropout(dropout)(X)
|
||||
|
||||
return X
|
||||
Reference in New Issue
Block a user