DeepLearningForVisionSystems Ch5 ResNet
DeepLearningForVisionSystems Ch5 ResNet
ipynb - Colaboratory
This model uses residual skip connections that allows a network to go deeper without suffering
from the vanishing/ explodiing gradient problem
# Importing Tensorflow
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, MaxPool2D, Ave
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import numpy as np
# import random, os for data loading
import random, os
import matplotlib.pyplot as plt
# pandas for displaying confusion matrix
import pandas as pd
print(tf.__version__)
# Display GPU availability if any
from tensorflow.python.client import device_lib
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
print("devices =" , tf.config.list_physical_devices())
print(get_available_gpus())
2.5.0
devices = [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), Phy
['/device:GPU:0']
# Set the seed value for consistent results
def set_seed(seed=31415):
np.random.seed(seed)
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 1/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
tf.random.set_seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()
Parameter Initializations
# Input image dimensions
input_shape = (224, 224, 3)
# number of images to process in a batch
batch_size = 30
# Number of categories in the dataset
num_classes = 5
checkpoint_filePath = '/content/drive/MyDrive/MachineLearning/ResNet_2.h5'
path='/content/Linnaeus 5 256X256'
# Check if the folder with the dataset already exists, if not copy it from the saved
if not os.path.isdir(path):
!cp '/content/drive/MyDrive/MachineLearning/Linnaeus 5 256X256.rar' '/content/'
get_ipython().system_raw("unrar x '/content/Linnaeus 5 256X256.rar'")
categories = os.listdir(os.path.join(path, 'train'))
print(len(categories), " categories found =", categories)
train_image_dataset = tf.keras.preprocessing.image_dataset_from_directory(
os.path.join(path, 'train')
, labels='inferred'
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 2/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
,
, label_mode='categorical'
, class_names=categories
, batch_size=batch_size
, image_size=(256, 256)
, shuffle=True
, seed=2
, validation_split=0.1
, subset= 'training'
)
validation_image_dataset = tf.keras.preprocessing.image_dataset_from_directory(
os.path.join(path, 'train')
, labels='inferred'
, label_mode='categorical'
, class_names=categories
, batch_size=batch_size
, image_size=(256, 256)
, shuffle=True
, seed=2
, validation_split=0.1
, subset= 'validation'
)
print("Training class names found =" , train_image_dataset.class_names)
def crop_images(images, labels):
'''
Expecting categories to be names of subfolders and the images belonging to each
of the subfolders be stored inside them. While reading the images, they are resized
and then cropped to 224x224x3 based on the way the paper describes (randomly betwee
diagnostics: bool (default False), If True it will print a lot of debug information
'''
# In order to clip the image in either from top-left, top-right, bottom-left, botto
# we create an array of possible start positions
corners_list = [0, (256-input_shape[0])//2, 256-input_shape[0]]
# Sampling one number from the list of start positions
offset_height = offset_width = random.sample(corners_list, 1)[0]
images = tf.image.per_image_standardization(images-127)
images = images/tf.math.reduce_max(tf.math.abs(images))
# Since there is an auxillary arm of the model, we have to concatenate two labels w
return tf.image.crop_to_bounding_box(images, offset_height, offset_width, input_sh
validation_datasource = validation_image_dataset.map(crop_images)
validation_datasource = validation_datasource.cache().prefetch(buffer_size=tf.data.AU
training datasource train image dataset map(crop images)
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 3/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
training_datasource = train_image_dataset.map(crop_images)
training_datasource = training_datasource.cache().prefetch(buffer_size=tf.data.AUTOTU
for images, labels in training_datasource:
print("images =", images.shape)
print("labels =", type(labels))
break
training_datasource = train_image_dataset.map(crop_images)
training_datasource = training_datasource.cache().prefetch(buffer_size=tf.data.AUTOTU
Test Data
test_image_dataset = tf.keras.preprocessing.image_dataset_from_directory(
os.path.join(path, 'test')
, labels='inferred'
, label_mode='categorical'
, class_names=categories
, batch_size=batch_size
, image_size=(256, 256)
, seed=2
)
def test_data_crop_images(images, labels):
'''
Definiing separate function for test data because labels do not have to be
concatenated during testing and the map function does not allow multiple function
Expecting categories to be names of subfolders and the images belonging to each
of the subfolders be stored inside them. While reading the images, they are resized
and then cropped to 224x224x3 based on the way the paper describes (randomly betwee
diagnostics: bool (default False), If True it will print a lot of debug information
'''
# In order to clip the image in either from top-left, top-right, bottom-left, botto
# we create an array of possible start positions
corners_list = [0, (256-input_shape[0])//2, 256-input_shape[0]]
# Sampling one number from the list of start positions
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 4/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
# Sampling one number from the list of start positions
offset_height = offset_width = random.sample(corners_list, 1)[0]
images = tf.image.per_image_standardization(images-127)
images = images/tf.math.reduce_max(tf.math.abs(images))
# Since there is an auxillary arm of the model, we have to concatenate two labels w
return tf.image.crop_to_bounding_box(images, offset_height, offset_width, input_sh
test_datasource = test_image_dataset.map(test_data_crop_images)
test_datasource = test_datasource.cache().prefetch(buffer_size=tf.data.AUTOTUNE).shuf
Model Architecture
def residual_block(input, filter_configs, shortcut_filter_configs, name_prefix=''):
'''
This function is to define the residual block that is specified in the paper.
Based on the shortcut_filteere_configs parameters, the function can either
directly join the input with the output of the conv filters on the main path,
or,
can add conv filters in the shortcut and then add to the main path.
After addition of the shortcut and the main path, the output is provided
through an activation layer
Parameters:
input: input Tensor
filter_configs: list of dictionary with convolution filter configurations in the ma
Each item in the list is a layer inside the residual block. The
dictionary should be of the form -
{'filters': number of filters for the
, 'kernel_size': kernel size of the filter
, 'strides': strides of the filter
, 'padding': padding of the filter
, 'activation': Activatioin of the convolution filter
}
shortcut_filter_configs: list of dictionaries with convolution filter
configurations for the filters in the shortcut path.
Structure of the dictionary is the same as filter_configs
name_prefix = String that will be added as a prefix to all layers of the block
'''
shortcut_path = input # This is for the shortcut path
i th i t # Thi i f th i th
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 5/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
main_path = input # This is for the main path
for i, config in enumerate(filter_configs):
# Going through the main path conv filter creations
main_path = Conv2D(filters = config['filters']
, kernel_size = config['kernel_size']
, strides=config['strides']
, padding=config['padding']
, activation=config['activation']
, use_bias=True
, kernel_initializer='glorot_uniform'
, name=name_prefix + '_main_'+ str(i+1)
)(main_path)
# Check if the shortcut filter configs has been defined
if shortcut_filter_configs is not None:
# We need to add filters in the shortcut path
# As per the paper, there only needs to be 1x1 kernel filter with the required de
# but the code here gives the "unwanted/ unwarranted" freedom to define multiple
for i, config in enumerate(shortcut_filter_configs):
# Go through the list of filters and create the conv filters
shortcut_path = Conv2D(filters = config['filters']
, kernel_size = config['kernel_size']
, strides=config['strides']
, padding=config['padding']
, activation=config['activation']
, use_bias=True
, kernel_initializer='glorot_uniform'
, name = name_prefix + '_shortcut_' + str(i+1)
)(shortcut_path)
# Going through the Add layer to add the input with the output of
combined_path = Add(name=name_prefix + '_add_junction')([shortcut_path, main_path])
# The output of the Add has to go through an activation layer
residual_block_output = Activation('elu', name=name_prefix + '_activation_output')(
# TODO: Make the activation method here as a configurable parameter
return residual_block_output
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 6/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
def get_resnet50_config():
# First 3 residual blocks have in their main path, 1x1x64, 3x3x64, 1x1x256
# Since the output of the maxpool_1 is of depth 64, only the first residual
# block will require a 1x1x256 filter in its shortcut path
conv2_1_2_3_filter_configs= [
{'filters': 64, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activatio
{'filters': 64, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activatio
{'filters': 256, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activati
]
conv2_1_shortcut_filter_configs = [
{'filters': 256, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activation':
]
# Next 4 residual blocks in Conv3_x have filters 1x1x128, 3x3x128, 1x1x512
# The output of the previous block has depth of 256, so the first of the 4 residual
# will require a 1x1x512 conv filter in its shortcut path, the others will have ide
# Conv3_1 will have stride = 2, so that will be defined separately
# stride 2 will reduce the width and height from 56x56 to 28x28
conv3_1_filter_configs= [
{'filters': 128, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activati
{'filters': 128, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 512, 'kernel_size': 1, 'strides': 2, 'padding': 'same', 'activati
]
conv3_1_shortcut_filter_configs = [
{'filters': 512, 'kernel_size': 1, 'strides': 2, 'padding': 'same', 'activation':
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 7/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
]
# Rest of the 3 blocks of conv3 are defined below
conv3_2_3_4_filter_configs= [
{'filters': 128, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activati
{'filters': 128, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 512, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activati
]
# There will be 6 blocks in conv4_x
# conv4_1 will have strride = 2
# widthxheight will be reduced from 28x28 to 14x14
conv4_1_filter_configs= [
{'filters': 256, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activati
{'filters': 256, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 1024, 'kernel_size': 1, 'strides': 2, 'padding': 'same', 'activat
]
conv4_1_shortcut_filter_configs = [
{'filters': 1024, 'kernel_size': 1, 'strides': 2, 'padding': 'same', 'activation'
]
# Rest of the conv4_x blocks:
conv4_2_to_6_filter_configs= [
{'filters': 256, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activati
{'filters': 256, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 1024, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activat
]
# conv5_x has 3 blocks
# conv5_1 will have stride =2
# widthxheight will reduce from 14x14 to 7x7
conv5_1_filter_configs= [
{'filters': 512, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activati
{'filters': 512, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 2048, 'kernel_size': 1, 'strides': 2, 'padding': 'same', 'activat
]
conv5_1_shortcut_filter_configs = [
{'filters': 2048, 'kernel_size': 1, 'strides': 2, 'padding': 'same', 'activation'
]
conv5_2_3_filter_configs= [
{'filters': 512, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activati
{'filters': 512, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 2048, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activat
]
resNet50_residual_block_config = [
(conv2_1_2_3_filter_configs, conv2_1_shortcut_filter_configs, "conv2_1"), # Con
(conv2_1_2_3_filter_configs, None, "conv2_2"), # Con
(conv2_1_2_3_filter_configs, None, "conv2_3"), # Con
(conv3_1_filter_configs, conv3_1_shortcut_filter_configs, "conv3_1"), # Con
(conv3_2_3_4_filter_configs, None, "conv3_2"), # Con
(conv3_2_3_4_filter_configs, None, "conv3_3"), # Con
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 8/24
7/25/2021 _ _ _ _ _ DeepLearningForVisionSystems-Ch5-ResNet.ipynb
_ - Colaboratory
(conv3_2_3_4_filter_configs, None, "conv3_4"), # Con
(conv4_1_filter_configs, conv4_1_shortcut_filter_configs, "conv4_1"), # Con
(conv4_2_to_6_filter_configs, None, "conv4_2"), # Con
(conv4_2_to_6_filter_configs, None, "conv4_3"), # con
(conv4_2_to_6_filter_configs, None, "conv4_4"), # con
(conv4_2_to_6_filter_configs, None, "conv4_5"), # con
(conv4_2_to_6_filter_configs, None, "conv4_6"), # con
(conv5_1_filter_configs, conv5_1_shortcut_filter_configs, "conv5_1"), # con
(conv5_2_3_filter_configs, None, "conv5_2"), # con
(conv5_2_3_filter_configs, None, "conv5_3") # con
]
return resNet50_residual_block_config
def get_resnet34_config():
# First 2 residual blocks have in their main path, 3x3x64
# Since the output of the maxpool_1 is of depth 64, shortcut paths can just be iden
conv2_1_2_3_filter_configs= [
{'filters': 64, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activatio
{'filters': 64, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activatio
]
# Next 4 residual blocks in Conv3_x have filters 3x3x128, 3x3x128
# The output of the previous block has depth of 64, so the first of the 4 residual
# will require a 1x1x128 conv filter in its shortcut path, the others will have ide
# Conv3_1 will have stride = 2, so that will be defined separately
# stride 2 will reduce the width and height from 56x56 to 28x28
conv3_1_filter_configs= [
{'filters': 128, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 128, 'kernel_size': 3, 'strides': 2, 'padding': 'same', 'activati
]
conv3_1_shortcut_filter_configs = [
{'filters': 128, 'kernel_size': 1, 'strides': 2, 'padding': 'same', 'activation':
]
# Rest of the 3 blocks of conv3 are defined below with stride 1
conv3_2_3_4_filter_configs= [
{'filters': 128, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 128, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
]
# There will be 6 blocks in conv4_x
# conv4_1 will have strride = 2
# widthxheight will be reduced from 28x28 to 14x14
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 9/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
conv4_1_filter_configs= [
{'filters': 256, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 256, 'kernel_size': 3, 'strides': 2, 'padding': 'same', 'activati
]
conv4_1_shortcut_filter_configs = [
{'filters': 256, 'kernel_size': 1, 'strides': 2, 'padding': 'same', 'activation':
]
# Rest of the conv4_x blocks:
conv4_2_to_6_filter_configs= [
{'filters': 256, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 256, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
]
# conv5_x has 3 blocks
# conv5_1 will have stride =2
# widthxheight will reduce from 14x14 to 7x7
conv5_1_filter_configs= [
{'filters': 512, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
{'filters': 512, 'kernel_size': 3, 'strides': 2, 'padding': 'same', 'activati
]
conv5_1_shortcut_filter_configs = [
{'filters': 512, 'kernel_size': 1, 'strides': 2, 'padding': 'same', 'activation':
]
# block 2 and 3 will have stride = 1
conv5_2_3_filter_configs= [
{'filters': 512, 'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activati
{'filters': 512, 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'activati
]
resNet34_residual_block_config = [
(conv2_1_2_3_filter_configs, None, "conv2_1"), # Conv 2_1
(conv2_1_2_3_filter_configs, None, "conv2_2"), # Con
(conv2_1_2_3_filter_configs, None, "conv2_3"), # Con
(conv3_1_filter_configs, conv3_1_shortcut_filter_configs, "conv3_1"), # Con
(conv3_2_3_4_filter_configs, None, "conv3_2"), # Con
(conv3_2_3_4_filter_configs, None, "conv3_3"), # Con
(conv3_2_3_4_filter_configs, None, "conv3_4"), # Con
(conv4_1_filter_configs, conv4_1_shortcut_filter_configs, "conv4_1"), # Con
(conv4_2_to_6_filter_configs, None, "conv4_2"), # Con
(conv4_2_to_6_filter_configs, None, "conv4_3"), # con
(conv4_2_to_6_filter_configs, None, "conv4_4"), # con
(conv4_2_to_6_filter_configs, None, "conv4_5"), # con
(conv4_2_to_6_filter_configs, None, "conv4_6"), # con
(conv5_1_filter_configs, conv5_1_shortcut_filter_configs, "conv5_1"), # con
(conv5_2_3_filter_configs, None, "conv5_2"), # con
(conv5_2_3_filter_configs, None, "conv5_3") # con
]
return resNet34_residual_block_config
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 10/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
def build_model(residual_block_config):
# input layer definition, input shape = 224x224x3
input = Input (shape=input_shape, batch_size = batch_size, name='main_input')
#First layer is a conv filter as per the paper, output = 112x112x64
main_path = Conv2D(filters=64, kernel_size= 7, strides= 2, padding='same'
, activation='elu', use_bias = True, name='conv1')(input)
# MaxPool layer output = 56x56x64
main_path = MaxPool2D(pool_size=3, strides=2, padding='same'
, name='maxpool_1')(main_path)
# Create the chain of residual blocks using the below for loop
for filter_configs, shortcut_filter_configs, name_prefix in residual_block_config:
main_path = residual_block(main_path
, filter_configs
, shortcut_filter_configs
, name_prefix= name_prefix
)
#AveragePool layer and getting ready to create the output classification layer,
main_path = AveragePooling2D(pool_size = 7, strides = 7, padding='same'
, name='avg_pool')(main_path)
# Flatten the data to get it ready for FC layer
main_path = Flatten(name="flatten")(main_path)
#Adding final FC layer to classify the output
main_path = Dense(num_classes, activation='softmax', name='fc_final_output')(main_p
model= Model(inputs=input, outputs = main_path)
return model
resNet50_residual_block_config = get_resnet50_config()
model = build_model(resNet50_residual_block_config)
model.summary()
Model: "model"
________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 11/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
================================================================================
main_input (InputLayer) [(30, 224, 224, 3)] 0
________________________________________________________________________________
conv1 (Conv2D) (30, 112, 112, 64) 9472 main_input[0][0
________________________________________________________________________________
maxpool_1 (MaxPooling2D) (30, 56, 56, 64) 0 conv1[0][0]
________________________________________________________________________________
conv2_1_main_1 (Conv2D) (30, 56, 56, 64) 4160 maxpool_1[0][0]
________________________________________________________________________________
conv2_1_main_2 (Conv2D) (30, 56, 56, 64) 36928 conv2_1_main_1[
________________________________________________________________________________
conv2_1_shortcut_1 (Conv2D) (30, 56, 56, 256) 16640 maxpool_1[0][0]
________________________________________________________________________________
conv2_1_main_3 (Conv2D) (30, 56, 56, 256) 16640 conv2_1_main_2[
________________________________________________________________________________
conv2_1_add_junction (Add) (30, 56, 56, 256) 0 conv2_1_shortcu
conv2_1_main_3[
________________________________________________________________________________
conv2_1_activation_output (Acti (30, 56, 56, 256) 0 conv2_1_add_jun
________________________________________________________________________________
conv2_2_main_1 (Conv2D) (30, 56, 56, 64) 16448 conv2_1_activat
________________________________________________________________________________
conv2_2_main_2 (Conv2D) (30, 56, 56, 64) 36928 conv2_2_main_1[
________________________________________________________________________________
conv2_2_main_3 (Conv2D) (30, 56, 56, 256) 16640 conv2_2_main_2[
________________________________________________________________________________
conv2_2_add_junction (Add) (30, 56, 56, 256) 0 conv2_1_activat
conv2_2_main_3[
________________________________________________________________________________
conv2_2_activation_output (Acti (30, 56, 56, 256) 0 conv2_2_add_jun
________________________________________________________________________________
conv2_3_main_1 (Conv2D) (30, 56, 56, 64) 16448 conv2_2_activat
________________________________________________________________________________
conv2_3_main_2 (Conv2D) (30, 56, 56, 64) 36928 conv2_3_main_1[
________________________________________________________________________________
conv2_3_main_3 (Conv2D) (30, 56, 56, 256) 16640 conv2_3_main_2[
________________________________________________________________________________
conv2_3_add_junction (Add) (30, 56, 56, 256) 0 conv2_2_activat
conv2_3_main_3[
________________________________________________________________________________
conv2_3_activation_output (Acti (30, 56, 56, 256) 0 conv2_3_add_jun
________________________________________________________________________________
conv3_1_main_1 (Conv2D) (30, 56, 56, 128) 32896 conv2_3_activat
________________________________________________________________________________
conv3_1_main_2 (Conv2D) (30, 56, 56, 128) 147584 conv3_1_main_1[
________________________________________________________________________________
conv3_1_shortcut_1 (Conv2D) (30, 28, 28, 512) 131584 conv2_3_activat
________________________________________________________________________________
conv3_1_main_3 (Conv2D) (30, 28, 28, 512) 66048 conv3_1_main_2[
________________________________________________________________________________
conv3_1_add_junction (Add) (30, 28, 28, 512) 0 conv3_1_shortcu
conv3_1_main_3[
________________________________________________________________________________
conv3_1_activation_output (Acti (30, 28, 28, 512) 0 conv3_1_add_jun
________________________________________________________________________________
conv3 2 main 1 (Conv2D) (30, 28, 28, 128) 65664 conv3 1 activat
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 12/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
tf.keras.utils.plot_model(model)
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 13/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
resNet34_residual_block_config = get_resnet34_config()
model = build_model(resNet34_residual_block_config)
model.summary()
Model: "model_1"
________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
================================================================================
main_input (InputLayer) [(30, 224, 224, 3)] 0
________________________________________________________________________________
conv1 (Conv2D) (30, 112, 112, 64) 9472 main_input[0][0
________________________________________________________________________________
maxpool_1 (MaxPooling2D) (30, 56, 56, 64) 0 conv1[0][0]
________________________________________________________________________________
conv2_1_main_1 (Conv2D) (30, 56, 56, 64) 36928 maxpool_1[0][0]
________________________________________________________________________________
conv2_1_main_2 (Conv2D) (30, 56, 56, 64) 36928 conv2_1_main_1[
________________________________________________________________________________
conv2_1_add_junction (Add) (30, 56, 56, 64) 0 maxpool_1[0][0]
conv2_1_main_2[
________________________________________________________________________________
conv2_1_activation_output (Acti (30, 56, 56, 64) 0 conv2_1_add_jun
________________________________________________________________________________
conv2_2_main_1 (Conv2D) (30, 56, 56, 64) 36928 conv2_1_activat
________________________________________________________________________________
conv2_2_main_2 (Conv2D) (30, 56, 56, 64) 36928 conv2_2_main_1[
________________________________________________________________________________
conv2_2_add_junction (Add) (30, 56, 56, 64) 0 conv2_1_activat
conv2_2_main_2[
________________________________________________________________________________
conv2_2_activation_output (Acti (30, 56, 56, 64) 0 conv2_2_add_jun
________________________________________________________________________________
conv2_3_main_1 (Conv2D) (30, 56, 56, 64) 36928 conv2_2_activat
________________________________________________________________________________
conv2_3_main_2 (Conv2D) (30, 56, 56, 64) 36928 conv2_3_main_1[
________________________________________________________________________________
conv2_3_add_junction (Add) (30, 56, 56, 64) 0 conv2_2_activat
conv2_3_main_2[
________________________________________________________________________________
conv2_3_activation_output (Acti (30, 56, 56, 64) 0 conv2_3_add_jun
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 14/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
________________________________________________________________________________
conv3_1_main_1 (Conv2D) (30, 56, 56, 128) 73856 conv2_3_activat
________________________________________________________________________________
conv3_1_shortcut_1 (Conv2D) (30, 28, 28, 128) 8320 conv2_3_activat
________________________________________________________________________________
conv3_1_main_2 (Conv2D) (30, 28, 28, 128) 147584 conv3_1_main_1[
________________________________________________________________________________
conv3_1_add_junction (Add) (30, 28, 28, 128) 0 conv3_1_shortcu
conv3_1_main_2[
________________________________________________________________________________
conv3_1_activation_output (Acti (30, 28, 28, 128) 0 conv3_1_add_jun
________________________________________________________________________________
conv3_2_main_1 (Conv2D) (30, 28, 28, 128) 147584 conv3_1_activat
________________________________________________________________________________
conv3_2_main_2 (Conv2D) (30, 28, 28, 128) 147584 conv3_2_main_1[
________________________________________________________________________________
conv3_2_add_junction (Add) (30, 28, 28, 128) 0 conv3_1_activat
conv3_2_main_2[
________________________________________________________________________________
conv3_2_activation_output (Acti (30, 28, 28, 128) 0 conv3_2_add_jun
________________________________________________________________________________
conv3_3_main_1 (Conv2D) (30, 28, 28, 128) 147584 conv3_2_activat
tf.keras.utils.plot_model(model)
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 15/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
Training Prep
Callbacks Declaration
def lr_control(epoch, learning_rate):
#The paper talks about reducing the learning rate by 4% every 8 epochs
tf.print("inside lr_control, epoch =", epoch, " lr = ", learning_rate)
#Checking if 8 epochs are complete
if epoch > 7 and epoch%8 == 0 :
# Reducing the learning rate by 10%
return learning_rate* 0.9
else:
i
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 16/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
return learning_rate
lrScheduler = tf.keras.callbacks.LearningRateScheduler(schedule=lr_control, verbose=1
Model Checkpoint
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_filePath
, monitor='val_loss'
, verbose = 1
, save_best_only = True
, save_weights_only = False
)
Early Stopper
earlyStopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss'
, min_delta = 0.0001
, patience = 9
, verbose=1
, restore_best_weights=True
)
Model Compilation
optimizer = SGD(learning_rate=0.00001, momentum=0.9)
model.compile(optimizer=optimizer
, loss = 'categorical_crossentropy'
, metrics = [ 'accuracy']
)
Train Model
Training ResNet34 to avoid heavy computation on Colab
# Start the training process and collect the metrics data for plotting
metrics = model.fit(training_datasource
, epochs=50
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 17/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
, batch_size=batch_size
, validation_data = validation_datasource
, callbacks = [lrScheduler, checkpoint, earlyStopper]
)
Epoch 1/50
inside lr_control, epoch = 0 lr = 9.999999747378752e-06
Epoch 00001: val_loss improved from inf to 1.50194, saving model to /content/dri
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/utils/generic_uti
category=CustomMaskWarning)
Epoch 2/50
inside lr_control, epoch = 1 lr = 9.999999747378752e-06
Epoch 00002: val_loss improved from 1.50194 to 1.47287, saving model to /content
Epoch 3/50
inside lr_control, epoch = 2 lr = 9.999999747378752e-06
Epoch 00003: val_loss improved from 1.47287 to 1.44669, saving model to /content
Epoch 4/50
inside lr_control, epoch = 3 lr = 9.999999747378752e-06
Epoch 00004: val_loss improved from 1.44669 to 1.42079, saving model to /content
Epoch 5/50
inside lr_control, epoch = 4 lr = 9.999999747378752e-06
Epoch 00005: val_loss improved from 1.42079 to 1.38507, saving model to /content
Epoch 6/50
inside lr_control, epoch = 5 lr = 9.999999747378752e-06
Epoch 00006: val_loss improved from 1.38507 to 1.35605, saving model to /content
Epoch 7/50
inside lr_control, epoch = 6 lr = 9.999999747378752e-06
Epoch 00007: val_loss improved from 1.35605 to 1.31727, saving model to /content
Epoch 8/50
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 18/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
inside lr_control, epoch = 7 lr = 9.999999747378752e-06
Epoch 00008: val_loss improved from 1.31727 to 1.29456, saving model to /content
Epoch 9/50
import matplotlib.pyplot as plt
acc = metrics.history['accuracy']
val_acc = metrics.history['val_accuracy']
loss = metrics.history['loss']
val_loss = metrics.history['val_loss']
epochs_range = range(len(metrics.history['accuracy']))
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 19/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
predictions = []
actuals=[]
for i, (images, labels) in enumerate( test_datasource):
pred = model(images)
for j in range(len(labels)):
actuals.append( labels[j])
predictions.append(pred[j])
# Printing a few labels and predictions to ensure that there are no dead-Relus
for j in range(10):
print(labels[j].numpy(), "\t", pred[j].numpy())
Confusion Matrix
pd.DataFrame(tf.math.confusion_matrix(
np.argmax(actuals, axis=1), np.argmax(predictions, axis=1), num_classes=num_class
, columns = test_image_dataset.class_names
, index = test_image_dataset.class_names)
dog 278 44 16 44 18
bird 76 166 37 59 62
flower 12 21 283 13 71
other 45 54 37 237 27
berry 21 28 43 16 292
# Start the training process and collect the metrics data for plotting
metrics = model.fit(training_datasource
, epochs=50
, batch_size=batch_size
, validation_data = validation_datasource
, callbacks = [lrScheduler, checkpoint, earlyStopper]
)
Epoch 1/50
inside lr_control, epoch = 0 lr = 5.314409918355523e-06
Epoch 00001: val_loss improved from inf to 0.98180, saving model to /content/dri
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/utils/generic_uti
category=CustomMaskWarning)
Epoch 2/50
inside lr_control, epoch = 1 lr = 5.314409918355523e-06
Epoch 00003: val_loss improved from 0.98180 to 0.95182, saving model to /content
Epoch 4/50
inside lr_control, epoch = 3 lr = 5.314409918355523e-06
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 21/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
Epoch 00008: val_loss improved from 0.95182 to 0.92298, saving model to /content
Epoch 9/50
import matplotlib.pyplot as plt
acc = metrics.history['accuracy']
val_acc = metrics.history['val_accuracy']
loss = metrics.history['loss']
val_loss = metrics.history['val_loss']
epochs_range = range(len(metrics.history['accuracy']))
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 22/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
predictions = []
actuals=[]
for i, (images, labels) in enumerate( test_datasource):
pred = model(images)
for j in range(len(labels)):
actuals.append( labels[j])
predictions.append(pred[j])
# Printing a few labels and predictions to ensure that there are no dead-Relus
for j in range(10):
print(labels[j].numpy(), "\t", pred[j].numpy())
pd.DataFrame(tf.math.confusion_matrix(
np.argmax(actuals, axis=1)
, np.argmax(predictions, axis=1)
, num_classes=num_classes
, dtype=tf.dtypes.int32).numpy()
, columns = test_image_dataset.class_names
, index = test_image_dataset.class_names)
dog 281 44 4 67 4
bird 46 234 21 75 24
flower 4 38 266 43 49
other 20 57 22 285 16
berry 12 54 39 19 276
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 23/24
7/25/2021 DeepLearningForVisionSystems-Ch5-ResNet.ipynb - Colaboratory
https://colab.research.google.com/drive/1TbmgZbXTvHQhxgiwaS2lK9B5_8s-bfm5#printMode=true 24/24