HW4 Machine Learning Homework
HW4 Machine Learning Homework
import tensorflow as tf
In [4]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
In [30]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
plt.figure(figsize=(10,10))
for i in range(25):
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(train_images[i])
# The CIFAR labels happen to be arrays,
# which is why you need the extra index
plt.xlabel(class_names[train_labels[i][0]])
plt.show()
In [31]:
model = models.Sequential()
# Dense Layers
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
In [32]:
model.summary()
Model: "sequential_6"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_22 (Conv2D) │ (None, 30, 30, 32) │ 896 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_16 (MaxPooling2D) │ (None, 15, 15, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_23 (Conv2D) │ (None, 13, 13, 64) │ 18,496 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_17 (MaxPooling2D) │ (None, 6, 6, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_24 (Conv2D) │ (None, 4, 4, 64) │ 36,928 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_6 (Flatten) │ (None, 1024) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_12 (Dense) │ (None, 64) │ 65,600 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_13 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
=================================================================
Total params: 56320 (220.00 KB)
Trainable params: 56320 (220.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Changes were made for each parameter and examples were given to show how each change affected the
model.
In [42]:
model2 = models.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
In [43]:
model2.summary()
Model: "sequential_12"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_45 (Conv2D) │ (None, 30, 30, 32) │ 896 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_33 (MaxPooling2D) │ (None, 15, 15, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_46 (Conv2D) │ (None, 13, 13, 64) │ 18,496 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_34 (MaxPooling2D) │ (None, 6, 6, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_47 (Conv2D) │ (None, 4, 4, 128) │ 73,856 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_35 (MaxPooling2D) │ (None, 2, 2, 128) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_48 (Conv2D) │ (None, 0, 0, 128) │ 147,584 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_12 (Flatten) │ (None, 0) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_24 (Dense) │ (None, 64) │ 64 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_25 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Adding more convolutional layers (e.g., from 3 to 4) can help the model learn more complex features, but
may increase the risk of overfitting and computational cost.
In [56]:
# Increasing Number of Filters in Each Convolutional Layer
model3 = models.Sequential([
layers.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
In [57]:
model3.summary()
Model: "sequential_19"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_67 (Conv2D) │ (None, 30, 30, 64) │ 1,792 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_46 (MaxPooling2D) │ (None, 15, 15, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_68 (Conv2D) │ (None, 13, 13, 128) │ 73,856 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_47 (MaxPooling2D) │ (None, 6, 6, 128) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_69 (Conv2D) │ (None, 4, 4, 128) │ 147,584 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_19 (Flatten) │ (None, 2048) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_38 (Dense) │ (None, 64) │ 131,136 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_39 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Increasing the number of filters helps capture more features but also increases the model size and
computational demands.
In [58]:
model4 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
In [59]:
model4.summary()
Model: "sequential_20"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_70 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_48 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_71 (Conv2D) │ (None, 10, 10, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_49 (MaxPooling2D) │ (None, 5, 5, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_72 (Conv2D) │ (None, 1, 1, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_20 (Flatten) │ (None, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_40 (Dense) │ (None, 64) │ 4,160 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_41 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Larger filters (5x5) can capture broader patterns but may miss finer details that smaller filters (3x3) can
catch.
In [60]:
# Changing activation functions ( relu to tanh )
model5 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='tanh', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='tanh'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='tanh'),
layers.Flatten(),
layers.Dense(64, activation='tanh'),
layers.Dense(10, activation='tanh')
])
In [61]:
model5.summary()
Model: "sequential_21"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_73 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_50 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_74 (Conv2D) │ (None, 10, 10, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_51 (MaxPooling2D) │ (None, 5, 5, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_75 (Conv2D) │ (None, 1, 1, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_21 (Flatten) │ (None, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_42 (Dense) │ (None, 64) │ 4,160 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_43 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Different activation functions can affect the ability of the model to learn complex patterns. ReLU is
commonly used, but tanh can also be effective depending on the problem.
In [66]:
# Remove pooling layers
model6 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='relu', input_shape=(32, 32, 3)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='relu')
])
In [67]:
model6.summary()
Model: "sequential_24"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_82 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_83 (Conv2D) │ (None, 24, 24, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_84 (Conv2D) │ (None, 20, 20, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_24 (Flatten) │ (None, 25600) │ 0 │
│ flatten_24 (Flatten) │ (None, 25600) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_48 (Dense) │ (None, 64) │ 1,638,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_49 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Pooling layers reduce the spatial dimensions and help prevent overfitting. Without pooling, the model might
capture too much detail, leading to overfitting.
In [68]:
# Increasing Number of Neurons in Dense Layers
model7 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(10, activation='relu')
])
In [69]:
model7.summary()
Model: "sequential_25"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_85 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_54 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_86 (Conv2D) │ (None, 10, 10, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_55 (MaxPooling2D) │ (None, 5, 5, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_87 (Conv2D) │ (None, 1, 1, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_25 (Flatten) │ (None, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_50 (Dense) │ (None, 128) │ 8,320 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_51 (Dense) │ (None, 10) │ 1,290 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
More neurons can help the model learn more complex patterns but also increase the risk of overfitting and
computational cost.
In [75]:
In [75]:
# Main model
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['accuracy'])
Epoch 1/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9235 - loss: 0.2167 - val_accur
acy: 0.6948 - val_loss: 1.5697
Epoch 2/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9302 - loss: 0.1978 - val_accur
acy: 0.6915 - val_loss: 1.6489
Epoch 3/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9336 - loss: 0.1839 - val_accur
acy: 0.6915 - val_loss: 1.6949
Epoch 4/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9376 - loss: 0.1721 - val_accur
acy: 0.6889 - val_loss: 1.7276
Epoch 5/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9420 - loss: 0.1607 - val_accur
acy: 0.6890 - val_loss: 1.7500
Epoch 6/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9444 - loss: 0.1591 - val_accur
acy: 0.6897 - val_loss: 1.8458
Epoch 7/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9413 - loss: 0.1585 - val_accur
acy: 0.6941 - val_loss: 1.8822
Epoch 8/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9470 - loss: 0.1469 - val_accur
acy: 0.6868 - val_loss: 2.0302
Epoch 9/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 0.9491 - loss: 0.1458 - val_accu
racy: 0.6926 - val_loss: 1.9869
Epoch 10/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9533 - loss: 0.1328 - val_accur
acy: 0.6862 - val_loss: 2.0189
In [76]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
0.6862000226974487
In [78]:
# Increasing batch size
Epoch 1/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9721 - loss: 0.0830 - val_accura
cy: 0.6997 - val_loss: 2.2044
Epoch 2/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 16ms/step - accuracy: 0.9914 - loss: 0.0342 - val_accura
cy: 0.6977 - val_loss: 2.2941
Epoch 3/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9837 - loss: 0.0496 - val_accura
cy: 0.6930 - val_loss: 2.3404
Epoch 4/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9755 - loss: 0.0696 - val_accura
cy: 0.6911 - val_loss: 2.4313
Epoch 5/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9776 - loss: 0.0660 - val_accura
cy: 0.6899 - val_loss: 2.5113
Epoch 6/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9747 - loss: 0.0721 - val_accura
cy: 0.6919 - val_loss: 2.5785
Epoch 7/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9789 - loss: 0.0605 - val_accura
cy: 0.6881 - val_loss: 2.6383
Epoch 8/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9786 - loss: 0.0608 - val_accura
cy: 0.6819 - val_loss: 2.7417
Epoch 9/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9732 - loss: 0.0755 - val_accura
cy: 0.6898 - val_loss: 2.8422
Epoch 10/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9790 - loss: 0.0590 - val_accura
cy: 0.6864 - val_loss: 2.9127
In [79]:
plt.plot(history2.history['accuracy'], label='accuracy')
plt.plot(history2.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
0.6863999962806702
Larger batch sizes can speed up training but might reduce the generalization ability. Smaller batch sizes
usually offer better generalization.
In [81]:
# Increasing batch size
Epoch 1/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9264 - loss: 0.2437 - val_accur
acy: 0.6814 - val_loss: 2.5280
Epoch 2/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9628 - loss: 0.1067 - val_accu
racy: 0.6946 - val_loss: 2.5628
Epoch 3/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 17s 11ms/step - accuracy: 0.9572 - loss: 0.1222 - val_accu
racy: 0.6743 - val_loss: 2.7162
Epoch 4/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9606 - loss: 0.1153 - val_accu
racy: 0.6823 - val_loss: 2.7181
Epoch 5/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9628 - loss: 0.1038 - val_accu
racy: 0.6873 - val_loss: 2.6969
Epoch 6/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 0.9617 - loss: 0.1090 - val_accu
racy: 0.6854 - val_loss: 2.6625
Epoch 7/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 17s 11ms/step - accuracy: 0.9641 - loss: 0.1058 - val_accu
racy: 0.6900 - val_loss: 2.6380
Epoch 8/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9649 - loss: 0.1016 - val_accu
racy: 0.6850 - val_loss: 2.6855
Epoch 9/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9657 - loss: 0.1038 - val_accu
racy: 0.6824 - val_loss: 2.7350
Epoch 10/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9667 - loss: 0.0965 - val_accur
acy: 0.6781 - val_loss: 2.7364
Epoch 11/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9645 - loss: 0.1027 - val_accur
acy: 0.6857 - val_loss: 2.6774
Epoch 12/20
Epoch 12/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9660 - loss: 0.0973 - val_accur
acy: 0.6903 - val_loss: 2.7796
Epoch 13/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9663 - loss: 0.1006 - val_accur
acy: 0.6790 - val_loss: 3.0427
Epoch 14/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9628 - loss: 0.1122 - val_accur
acy: 0.6880 - val_loss: 2.8659
Epoch 15/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9695 - loss: 0.0924 - val_accur
acy: 0.6828 - val_loss: 2.8592
Epoch 16/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9667 - loss: 0.0948 - val_accur
acy: 0.6825 - val_loss: 2.7363
Epoch 17/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9695 - loss: 0.0881 - val_accur
acy: 0.6870 - val_loss: 2.8467
Epoch 18/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9693 - loss: 0.0877 - val_accur
acy: 0.6827 - val_loss: 2.9090
Epoch 19/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9724 - loss: 0.0858 - val_accur
acy: 0.6836 - val_loss: 2.9486
Epoch 20/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9689 - loss: 0.0924 - val_accur
acy: 0.6797 - val_loss: 2.8446
In [84]:
plt.plot(history3.history['accuracy'], label='accuracy')
plt.plot(history3.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
In [88]:
print(test_acc)
0.70169997215271
0.70169997215271
More epochs allow for more training iterations, which can improve accuracy but also increase the risk of
overfitting.
In [83]:
# Reducing learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='sparse_cat
egorical_crossentropy', metrics=['accuracy'])
Epoch 1/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9905 - loss: 0.0298 - val_accur
acy: 0.6990 - val_loss: 2.9388
Epoch 2/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9991 - loss: 0.0073 - val_accur
acy: 0.6992 - val_loss: 3.0310
Epoch 3/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 0.9999 - loss: 0.0039 - val_accu
racy: 0.6999 - val_loss: 3.1500
Epoch 4/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 25s 16ms/step - accuracy: 1.0000 - loss: 0.0027 - val_accu
racy: 0.6998 - val_loss: 3.2499
Epoch 5/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 18s 11ms/step - accuracy: 1.0000 - loss: 0.0018 - val_accu
racy: 0.6997 - val_loss: 3.3865
Epoch 6/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 17s 11ms/step - accuracy: 1.0000 - loss: 0.0012 - val_accu
racy: 0.7018 - val_loss: 3.4865
Epoch 7/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 1.0000 - loss: 8.0826e-04 - val_a
ccuracy: 0.7022 - val_loss: 3.6365
Epoch 8/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 1.0000 - loss: 5.5039e-04 - val_
accuracy: 0.7027 - val_loss: 3.7390
Epoch 9/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 1.0000 - loss: 4.0029e-04 - val_a
ccuracy: 0.7015 - val_loss: 3.8755
Epoch 10/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 1.0000 - loss: 2.8834e-04 - val_a
ccuracy: 0.7017 - val_loss: 4.0073
In [89]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
print(test_acc)
0.70169997215271
A lower learning rate ensures a more thorough learning process but requires more training time. A higher
learning rate can speed up training but may lead to suboptimal convergence.