0% found this document useful (0 votes)
11 views

HW4 Machine Learning Homework

machine learning odev

Uploaded by

ömer pakdil
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views

HW4 Machine Learning Homework

machine learning odev

Uploaded by

ömer pakdil
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 13

In [3]:

import tensorflow as tf

from tensorflow.keras import datasets, layers, models


import matplotlib.pyplot as plt

In [4]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1


train_images, test_images = train_images / 255.0, test_images / 255.0

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


170498071/170498071 ━━━━━━━━━━━━━━━━━━━━ 200s 1us/step
170498071/170498071 [==============================] - 2s 0us/step

In [30]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']

plt.figure(figsize=(10,10))
for i in range(25):
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(train_images[i])
# The CIFAR labels happen to be arrays,
# which is why you need the extra index
plt.xlabel(class_names[train_labels[i][0]])
plt.show()
In [31]:

model = models.Sequential()

# First Convolutional Layer


model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))

# Second Convolutional Layer


model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Third Convolutional Layer


model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Flatten the results to feed into a dense layer


model.add(layers.Flatten())

# Dense Layers
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

In [32]:

model.summary()

Model: "sequential_6"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_22 (Conv2D) │ (None, 30, 30, 32) │ 896 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_16 (MaxPooling2D) │ (None, 15, 15, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_23 (Conv2D) │ (None, 13, 13, 64) │ 18,496 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_17 (MaxPooling2D) │ (None, 6, 6, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_24 (Conv2D) │ (None, 4, 4, 64) │ 36,928 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_6 (Flatten) │ (None, 1024) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_12 (Dense) │ (None, 64) │ 65,600 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_13 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

Total params: 122,570 (478.79 KB)

Trainable params: 122,570 (478.79 KB)

Non-trainable params: 0 (0.00 B)

conv2d (Conv2D) (None, 30, 30, 32) 896


max_pooling2d (MaxPooling2 (None, 15, 15, 32) 0
D)

conv2d_1 (Conv2D) (None, 13, 13, 64) 18496

max_pooling2d_1 (MaxPoolin (None, 6, 6, 64) 0


g2D)

conv2d_2 (Conv2D) (None, 4, 4, 64) 36928

=================================================================
Total params: 56320 (220.00 KB)
Trainable params: 56320 (220.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

Parameters We Can Change and Their Effects


1. Number of Convolutional Layers
2. Number of Filters in Each Convolutional Layer
3. Filter Size (Kernel Size)
4. Activation Functions
5. Pooling Layers
6. Number of Neurons in Dense Layers
7. Batch Size
8. Epochs
9. Learning Rate

Changes were made for each parameter and examples were given to show how each change affected the
model.

In [42]:

# Increasing Convolutional Layers ( 3 to 4 )

model2 = models.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])

In [43]:
model2.summary()

Model: "sequential_12"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_45 (Conv2D) │ (None, 30, 30, 32) │ 896 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_33 (MaxPooling2D) │ (None, 15, 15, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_46 (Conv2D) │ (None, 13, 13, 64) │ 18,496 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_34 (MaxPooling2D) │ (None, 6, 6, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_47 (Conv2D) │ (None, 4, 4, 128) │ 73,856 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_35 (MaxPooling2D) │ (None, 2, 2, 128) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_48 (Conv2D) │ (None, 0, 0, 128) │ 147,584 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_12 (Flatten) │ (None, 0) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_24 (Dense) │ (None, 64) │ 64 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_25 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

Total params: 241,546 (943.54 KB)

Trainable params: 241,546 (943.54 KB)

Non-trainable params: 0 (0.00 B)

Adding more convolutional layers (e.g., from 3 to 4) can help the model learn more complex features, but
may increase the risk of overfitting and computational cost.

In [56]:
# Increasing Number of Filters in Each Convolutional Layer

model3 = models.Sequential([
layers.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])

In [57]:
model3.summary()

Model: "sequential_19"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_67 (Conv2D) │ (None, 30, 30, 64) │ 1,792 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_46 (MaxPooling2D) │ (None, 15, 15, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_68 (Conv2D) │ (None, 13, 13, 128) │ 73,856 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_47 (MaxPooling2D) │ (None, 6, 6, 128) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_69 (Conv2D) │ (None, 4, 4, 128) │ 147,584 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_19 (Flatten) │ (None, 2048) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_38 (Dense) │ (None, 64) │ 131,136 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_39 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

Total params: 355,018 (1.35 MB)

Trainable params: 355,018 (1.35 MB)

Non-trainable params: 0 (0.00 B)


Non-trainable params: 0 (0.00 B)

Increasing the number of filters helps capture more features but also increases the model size and
computational demands.

In [58]:

# Increasing Filter Size ( 3x3 to 5x5 )

model4 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])

In [59]:
model4.summary()

Model: "sequential_20"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_70 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_48 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_71 (Conv2D) │ (None, 10, 10, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_49 (MaxPooling2D) │ (None, 5, 5, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_72 (Conv2D) │ (None, 1, 1, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_20 (Flatten) │ (None, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_40 (Dense) │ (None, 64) │ 4,160 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_41 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

Total params: 160,970 (628.79 KB)

Trainable params: 160,970 (628.79 KB)

Non-trainable params: 0 (0.00 B)

Larger filters (5x5) can capture broader patterns but may miss finer details that smaller filters (3x3) can
catch.

In [60]:
# Changing activation functions ( relu to tanh )

model5 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='tanh', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='tanh'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='tanh'),
layers.Flatten(),
layers.Dense(64, activation='tanh'),
layers.Dense(10, activation='tanh')
])

In [61]:
model5.summary()

Model: "sequential_21"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_73 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_50 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_74 (Conv2D) │ (None, 10, 10, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_51 (MaxPooling2D) │ (None, 5, 5, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_75 (Conv2D) │ (None, 1, 1, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_21 (Flatten) │ (None, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_42 (Dense) │ (None, 64) │ 4,160 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_43 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

Total params: 160,970 (628.79 KB)

Trainable params: 160,970 (628.79 KB)

Non-trainable params: 0 (0.00 B)

Different activation functions can affect the ability of the model to learn complex patterns. ReLU is
commonly used, but tanh can also be effective depending on the problem.

In [66]:
# Remove pooling layers

model6 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='relu', input_shape=(32, 32, 3)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='relu')
])

In [67]:
model6.summary()

Model: "sequential_24"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_82 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_83 (Conv2D) │ (None, 24, 24, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_84 (Conv2D) │ (None, 20, 20, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_24 (Flatten) │ (None, 25600) │ 0 │
│ flatten_24 (Flatten) │ (None, 25600) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_48 (Dense) │ (None, 64) │ 1,638,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_49 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

Total params: 1,795,274 (6.85 MB)

Trainable params: 1,795,274 (6.85 MB)

Non-trainable params: 0 (0.00 B)

Pooling layers reduce the spatial dimensions and help prevent overfitting. Without pooling, the model might
capture too much detail, leading to overfitting.

In [68]:
# Increasing Number of Neurons in Dense Layers

model7 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(10, activation='relu')
])

In [69]:
model7.summary()

Model: "sequential_25"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_85 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_54 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_86 (Conv2D) │ (None, 10, 10, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_55 (MaxPooling2D) │ (None, 5, 5, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_87 (Conv2D) │ (None, 1, 1, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_25 (Flatten) │ (None, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_50 (Dense) │ (None, 128) │ 8,320 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_51 (Dense) │ (None, 10) │ 1,290 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

Total params: 165,770 (647.54 KB)

Trainable params: 165,770 (647.54 KB)

Non-trainable params: 0 (0.00 B)

More neurons can help the model learn more complex patterns but also increase the risk of overfitting and
computational cost.

In [75]:
In [75]:
# Main model

model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=10, batch_size=32,


validation_data=(test_images, test_labels))

Epoch 1/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9235 - loss: 0.2167 - val_accur
acy: 0.6948 - val_loss: 1.5697
Epoch 2/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9302 - loss: 0.1978 - val_accur
acy: 0.6915 - val_loss: 1.6489
Epoch 3/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9336 - loss: 0.1839 - val_accur
acy: 0.6915 - val_loss: 1.6949
Epoch 4/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9376 - loss: 0.1721 - val_accur
acy: 0.6889 - val_loss: 1.7276
Epoch 5/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9420 - loss: 0.1607 - val_accur
acy: 0.6890 - val_loss: 1.7500
Epoch 6/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9444 - loss: 0.1591 - val_accur
acy: 0.6897 - val_loss: 1.8458
Epoch 7/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9413 - loss: 0.1585 - val_accur
acy: 0.6941 - val_loss: 1.8822
Epoch 8/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9470 - loss: 0.1469 - val_accur
acy: 0.6868 - val_loss: 2.0302
Epoch 9/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 0.9491 - loss: 0.1458 - val_accu
racy: 0.6926 - val_loss: 1.9869
Epoch 10/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9533 - loss: 0.1328 - val_accur
acy: 0.6862 - val_loss: 2.0189

In [76]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')

test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)

313/313 - 1s - 3ms/step - accuracy: 0.6862 - loss: 2.0189


In [77]:
print(test_acc)

0.6862000226974487

In [78]:
# Increasing batch size

history2 = model.fit(train_images, train_labels, epochs=10, batch_size=64,


validation_data=(test_images, test_labels))

Epoch 1/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9721 - loss: 0.0830 - val_accura
cy: 0.6997 - val_loss: 2.2044
Epoch 2/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 16ms/step - accuracy: 0.9914 - loss: 0.0342 - val_accura
cy: 0.6977 - val_loss: 2.2941
Epoch 3/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9837 - loss: 0.0496 - val_accura
cy: 0.6930 - val_loss: 2.3404
Epoch 4/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9755 - loss: 0.0696 - val_accura
cy: 0.6911 - val_loss: 2.4313
Epoch 5/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9776 - loss: 0.0660 - val_accura
cy: 0.6899 - val_loss: 2.5113
Epoch 6/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9747 - loss: 0.0721 - val_accura
cy: 0.6919 - val_loss: 2.5785
Epoch 7/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9789 - loss: 0.0605 - val_accura
cy: 0.6881 - val_loss: 2.6383
Epoch 8/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9786 - loss: 0.0608 - val_accura
cy: 0.6819 - val_loss: 2.7417
Epoch 9/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9732 - loss: 0.0755 - val_accura
cy: 0.6898 - val_loss: 2.8422
Epoch 10/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9790 - loss: 0.0590 - val_accura
cy: 0.6864 - val_loss: 2.9127

In [79]:
plt.plot(history2.history['accuracy'], label='accuracy')
plt.plot(history2.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')

test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)

313/313 - 1s - 3ms/step - accuracy: 0.6864 - loss: 2.9127


In [80]:
print(test_acc)

0.6863999962806702

Larger batch sizes can speed up training but might reduce the generalization ability. Smaller batch sizes
usually offer better generalization.

In [81]:
# Increasing batch size

history3 = model.fit(train_images, train_labels, epochs=20, validation_data=(test_images


, test_labels))

Epoch 1/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9264 - loss: 0.2437 - val_accur
acy: 0.6814 - val_loss: 2.5280
Epoch 2/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9628 - loss: 0.1067 - val_accu
racy: 0.6946 - val_loss: 2.5628
Epoch 3/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 17s 11ms/step - accuracy: 0.9572 - loss: 0.1222 - val_accu
racy: 0.6743 - val_loss: 2.7162
Epoch 4/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9606 - loss: 0.1153 - val_accu
racy: 0.6823 - val_loss: 2.7181
Epoch 5/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9628 - loss: 0.1038 - val_accu
racy: 0.6873 - val_loss: 2.6969
Epoch 6/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 0.9617 - loss: 0.1090 - val_accu
racy: 0.6854 - val_loss: 2.6625
Epoch 7/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 17s 11ms/step - accuracy: 0.9641 - loss: 0.1058 - val_accu
racy: 0.6900 - val_loss: 2.6380
Epoch 8/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9649 - loss: 0.1016 - val_accu
racy: 0.6850 - val_loss: 2.6855
Epoch 9/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9657 - loss: 0.1038 - val_accu
racy: 0.6824 - val_loss: 2.7350
Epoch 10/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9667 - loss: 0.0965 - val_accur
acy: 0.6781 - val_loss: 2.7364
Epoch 11/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9645 - loss: 0.1027 - val_accur
acy: 0.6857 - val_loss: 2.6774
Epoch 12/20
Epoch 12/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9660 - loss: 0.0973 - val_accur
acy: 0.6903 - val_loss: 2.7796
Epoch 13/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9663 - loss: 0.1006 - val_accur
acy: 0.6790 - val_loss: 3.0427
Epoch 14/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9628 - loss: 0.1122 - val_accur
acy: 0.6880 - val_loss: 2.8659
Epoch 15/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9695 - loss: 0.0924 - val_accur
acy: 0.6828 - val_loss: 2.8592
Epoch 16/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9667 - loss: 0.0948 - val_accur
acy: 0.6825 - val_loss: 2.7363
Epoch 17/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9695 - loss: 0.0881 - val_accur
acy: 0.6870 - val_loss: 2.8467
Epoch 18/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9693 - loss: 0.0877 - val_accur
acy: 0.6827 - val_loss: 2.9090
Epoch 19/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9724 - loss: 0.0858 - val_accur
acy: 0.6836 - val_loss: 2.9486
Epoch 20/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9689 - loss: 0.0924 - val_accur
acy: 0.6797 - val_loss: 2.8446

In [84]:
plt.plot(history3.history['accuracy'], label='accuracy')
plt.plot(history3.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')

test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)

313/313 - 1s - 3ms/step - accuracy: 0.7017 - loss: 4.0073

In [88]:

print(test_acc)

0.70169997215271
0.70169997215271

More epochs allow for more training iterations, which can improve accuracy but also increase the risk of
overfitting.

In [83]:
# Reducing learning rate

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='sparse_cat
egorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=10, batch_size=32,


validation_data=(test_images, test_labels))

Epoch 1/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9905 - loss: 0.0298 - val_accur
acy: 0.6990 - val_loss: 2.9388
Epoch 2/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9991 - loss: 0.0073 - val_accur
acy: 0.6992 - val_loss: 3.0310
Epoch 3/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 0.9999 - loss: 0.0039 - val_accu
racy: 0.6999 - val_loss: 3.1500
Epoch 4/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 25s 16ms/step - accuracy: 1.0000 - loss: 0.0027 - val_accu
racy: 0.6998 - val_loss: 3.2499
Epoch 5/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 18s 11ms/step - accuracy: 1.0000 - loss: 0.0018 - val_accu
racy: 0.6997 - val_loss: 3.3865
Epoch 6/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 17s 11ms/step - accuracy: 1.0000 - loss: 0.0012 - val_accu
racy: 0.7018 - val_loss: 3.4865
Epoch 7/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 1.0000 - loss: 8.0826e-04 - val_a
ccuracy: 0.7022 - val_loss: 3.6365
Epoch 8/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 1.0000 - loss: 5.5039e-04 - val_
accuracy: 0.7027 - val_loss: 3.7390
Epoch 9/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 1.0000 - loss: 4.0029e-04 - val_a
ccuracy: 0.7015 - val_loss: 3.8755
Epoch 10/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 1.0000 - loss: 2.8834e-04 - val_a
ccuracy: 0.7017 - val_loss: 4.0073

In [89]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')

test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)

313/313 - 1s - 3ms/step - accuracy: 0.7017 - loss: 4.0073


In [90]:

print(test_acc)

0.70169997215271

A lower learning rate ensures a more thorough learning process but requires more training time. A higher
learning rate can speed up training but may lead to suboptimal convergence.

You might also like