0% found this document useful (0 votes)
4 views

sentimentanalysislab

Uploaded by

mevow74125
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

sentimentanalysislab

Uploaded by

mevow74125
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

sentimentanalysislab file:///D:/ACADEMICS/BS/V%20BSc(H)/MLP/MLPCODE/MLP%20LAB%20Unit%203%...

Sentiment Analysis Lab – Movie review ( Positive or negative) using Natural Language
Processing,
In [10]: import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences

Load and Explore the Dataset


Load the IMDB dataset

In [14]: # Load the IMDB dataset from TensorFlow


# Only consider the top 10,000 most frequent words
vocab_size = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

In [15]: X_train

1 of 5 04-12-2024, 12:24
sentimentanalysislab file:///D:/ACADEMICS/BS/V%20BSc(H)/MLP/MLPCODE/MLP%20LAB%20Unit%203%...

Out[15]: array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838,
112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 44
7, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 124
7, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33,
4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82,
2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13,
104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30
, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15,
16, 5345, 19, 178, 32]),
list([1, 194, 1153, 194, 8255, 78, 228, 5, 6, 1463, 4369, 5012, 134, 26, 4, 715, 8, 118, 1634, 14, 394, 20, 1
3, 119, 954, 189, 102, 5, 207, 110, 3103, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2300, 1523, 5,
647, 4, 116, 9, 35, 8163, 4, 229, 9, 340, 1322, 4, 118, 9, 4, 130, 4901, 19, 4, 1002, 5, 89, 29, 952, 46, 37, 4, 455
, 9, 45, 43, 38, 1543, 1905, 398, 4, 1649, 26, 6853, 5, 163, 11, 3215, 2, 4, 1153, 9, 194, 775, 7, 8255, 2, 349, 263
7, 148, 605, 2, 8003, 15, 123, 125, 68, 2, 6853, 15, 349, 165, 4362, 98, 5, 4, 228, 9, 43, 2, 1157, 15, 299, 120, 5,
120, 174, 11, 220, 175, 136, 50, 9, 4373, 228, 8255, 5, 2, 656, 245, 2350, 5, 4, 9837, 131, 152, 491, 18, 2, 32, 746
4, 1212, 14, 9, 6, 371, 78, 22, 625, 64, 1382, 9, 8, 168, 145, 23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462,
33, 89, 78, 285, 16, 145, 95]),
list([1, 14, 47, 8, 30, 31, 7, 4, 249, 108, 7, 4, 5974, 54, 61, 369, 13, 71, 149, 14, 22, 112, 4, 2401, 311,
12, 16, 3711, 33, 75, 43, 1829, 296, 4, 86, 320, 35, 534, 19, 263, 4821, 1301, 4, 1873, 33, 89, 78, 12, 66, 16, 4, 3
60, 7, 4, 58, 316, 334, 11, 4, 1716, 43, 645, 662, 8, 257, 85, 1200, 42, 1228, 2578, 83, 68, 3912, 15, 36, 165, 1539
, 278, 36, 69, 2, 780, 8, 106, 14, 6905, 1338, 18, 6, 22, 12, 215, 28, 610, 40, 6, 87, 326, 23, 2300, 21, 23, 22, 12
, 272, 40, 57, 31, 11, 4, 22, 47, 6, 2307, 51, 9, 170, 23, 595, 116, 595, 1352, 13, 191, 79, 638, 89, 2, 14, 9, 8, 1
06, 607, 624, 35, 534, 6, 227, 7, 129, 113]),
...,
list([1, 11, 6, 230, 245, 6401, 9, 6, 1225, 446, 2, 45, 2174, 84, 8322, 4007, 21, 4, 912, 84, 2, 325, 725, 13
4, 2, 1715, 84, 5, 36, 28, 57, 1099, 21, 8, 140, 8, 703, 5, 2, 84, 56, 18, 1644, 14, 9, 31, 7, 4, 9406, 1209, 2295,
2, 1008, 18, 6, 20, 207, 110, 563, 12, 8, 2901, 2, 8, 97, 6, 20, 53, 4767, 74, 4, 460, 364, 1273, 29, 270, 11, 960,
108, 45, 40, 29, 2961, 395, 11, 6, 4065, 500, 7, 2, 89, 364, 70, 29, 140, 4, 64, 4780, 11, 4, 2678, 26, 178, 4, 529,
443, 2, 5, 27, 710, 117, 2, 8123, 165, 47, 84, 37, 131, 818, 14, 595, 10, 10, 61, 1242, 1209, 10, 10, 288, 2260, 170
2, 34, 2901, 2, 4, 65, 496, 4, 231, 7, 790, 5, 6, 320, 234, 2766, 234, 1119, 1574, 7, 496, 4, 139, 929, 2901, 2, 775
0, 5, 4241, 18, 4, 8497, 2, 250, 11, 1818, 7561, 4, 4217, 5408, 747, 1115, 372, 1890, 1006, 541, 9303, 7, 4, 59, 2,
4, 3586, 2]),
list([1, 1446, 7079, 69, 72, 3305, 13, 610, 930, 8, 12, 582, 23, 5, 16, 484, 685, 54, 349, 11, 4120, 2959, 45
, 58, 1466, 13, 197, 12, 16, 43, 23, 2, 5, 62, 30, 145, 402, 11, 4131, 51, 575, 32, 61, 369, 71, 66, 770, 12, 1054,
75, 100, 2198, 8, 4, 105, 37, 69, 147, 712, 75, 3543, 44, 257, 390, 5, 69, 263, 514, 105, 50, 286, 1814, 23, 4, 123,
13, 161, 40, 5, 421, 4, 116, 16, 897, 13, 2, 40, 319, 5872, 112, 6700, 11, 4803, 121, 25, 70, 3468, 4, 719, 3798, 13
, 18, 31, 62, 40, 8, 7200, 4, 2, 7, 14, 123, 5, 942, 25, 8, 721, 12, 145, 5, 202, 12, 160, 580, 202, 12, 6, 52, 58,
2, 92, 401, 728, 12, 39, 14, 251, 8, 15, 251, 5, 2, 12, 38, 84, 80, 124, 12, 9, 23]),
list([1, 17, 6, 194, 337, 7, 4, 204, 22, 45, 254, 8, 106, 14, 123, 4, 2, 270, 2, 5, 2, 2, 732, 2098, 101, 405
, 39, 14, 1034, 4, 1310, 9, 115, 50, 305, 12, 47, 4, 168, 5, 235, 7, 38, 111, 699, 102, 7, 4, 4039, 9245, 9, 24, 6,
78, 1099, 17, 2345, 2, 21, 27, 9685, 6139, 5, 2, 1603, 92, 1183, 4, 1310, 7, 4, 204, 42, 97, 90, 35, 221, 109, 29, 1
27, 27, 118, 8, 97, 12, 157, 21, 6789, 2, 9, 6, 66, 78, 1099, 4, 631, 1191, 5, 2642, 272, 191, 1070, 6, 7585, 8, 219

2 of 5 04-12-2024, 12:24
sentimentanalysislab file:///D:/ACADEMICS/BS/V%20BSc(H)/MLP/MLPCODE/MLP%20LAB%20Unit%203%...

7, 2, 2, 544, 5, 383, 1271, 848, 1468, 2, 497, 2, 8, 1597, 8778, 2, 21, 60, 27, 239, 9, 43, 8368, 209, 405, 10, 10,
12, 764, 40, 4, 248, 20, 12, 16, 5, 174, 1791, 72, 7, 51, 6, 1739, 22, 4, 204, 131, 9])],
dtype=object)

In [16]: # Decode a sample review for inspection (optional)


word_index = imdb.get_word_index()
reverse_word_index = {value: key for (key, value) in word_index.items()}

def decode_review(encoded_review):
return " ".join([reverse_word_index.get(i - 3, "?") for i in encoded_review])

print("Sample review:", decode_review(X_train[0]))


print("Label (0 = negative, 1 = positive):", y_train[0])

Sample review: ? this film was just brilliant casting location scenery story direction everyone's really suited the p
art they played and you could just imagine being there robert ? is an amazing actor and now the same being director ?
father came from the same scottish island as myself so i loved the fact there was a real connection with this film th
e witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was
released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it
was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to t
he two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the
? list i think because the stars that play them all grown up are such a big profile for the whole film but these chil
dren are amazing and should be praised for what they have done don't you think the whole story was so lovely because
it was true and was someone's life after all that was shared with us all
Label (0 = negative, 1 = positive): 1

2. Preprocessing:
Sequences are padded to a uniform length of 200 words using pad_sequences.

In [17]: # Preprocess the dataset


# Pad sequences to the same length
max_length = 200
X_train = pad_sequences(X_train, maxlen=max_length)
X_test = pad_sequences(X_test, maxlen=max_length)

3. Model Architecture:
The model includes an Embedding Layer for word vector representation.
An LSTM layer is used for capturing sequence dependencies.

3 of 5 04-12-2024, 12:24
sentimentanalysislab file:///D:/ACADEMICS/BS/V%20BSc(H)/MLP/MLPCODE/MLP%20LAB%20Unit%203%...

A Dense layer with sigmoid activation predicts the sentiment.

In [18]: # Build the model


model = Sequential([
Embedding(input_dim=vocab_size, output_dim=128, input_length=max_length),
LSTM(64, return_sequences=False),
Dropout(0.5),
Dense(1, activation='sigmoid')
])

d:\ACADEMICS\BS\V BSc(H)\MLP\CODE\vbs\lib\site-packages\keras\src\layers\core\embedding.py:90: UserWarning: Argument


`input_length` is deprecated. Just remove it.
warnings.warn(

In [19]: # Compile the model


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [20]: # Train the model


history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

Epoch 1/5
625/625 ━━━━━━━━━━━━━━━━━━━━ 46s 62ms/step - accuracy: 0.7030 - loss: 0.5605 - val_accuracy: 0.8556 - val_loss: 0.340
3
Epoch 2/5
625/625 ━━━━━━━━━━━━━━━━━━━━ 38s 61ms/step - accuracy: 0.8804 - loss: 0.2988 - val_accuracy: 0.8576 - val_loss: 0.335
1
Epoch 3/5
625/625 ━━━━━━━━━━━━━━━━━━━━ 44s 70ms/step - accuracy: 0.9312 - loss: 0.1952 - val_accuracy: 0.8604 - val_loss: 0.355
7
Epoch 4/5
625/625 ━━━━━━━━━━━━━━━━━━━━ 49s 78ms/step - accuracy: 0.9511 - loss: 0.1379 - val_accuracy: 0.8724 - val_loss: 0.370
1
Epoch 5/5
625/625 ━━━━━━━━━━━━━━━━━━━━ 50s 79ms/step - accuracy: 0.9687 - loss: 0.0997 - val_accuracy: 0.8764 - val_loss: 0.463
5

In [21]: # Evaluate the model


loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

782/782 ━━━━━━━━━━━━━━━━━━━━ 12s 15ms/step - accuracy: 0.8598 - loss: 0.5005


Test Accuracy: 0.86

4 of 5 04-12-2024, 12:24
sentimentanalysislab file:///D:/ACADEMICS/BS/V%20BSc(H)/MLP/MLPCODE/MLP%20LAB%20Unit%203%...

In [22]: # Predict on new data (optional)


sample_review = X_test[0].reshape(1, -1)
prediction = model.predict(sample_review)[0][0]
print("Predicted Sentiment:", "Positive" if prediction >= 0.5 else "Negative")

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 391ms/step


Predicted Sentiment: Negative

5 of 5 04-12-2024, 12:24

You might also like