0% found this document useful (0 votes)
27 views19 pages

MlLabManualdocx 2024 09 04 22 02 58

Ml lab manual 2024
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
27 views19 pages

MlLabManualdocx 2024 09 04 22 02 58

Ml lab manual 2024
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 19

IK Gujral Punjab Technical University, Kapurthala

B. Tech, Computer Science &Engg.

LAB MANUAL
Machine Learning
BTCS619-18
Course Code: BTCS619-18

Course Title: Machine Learning Lab

List of Experiment:

1. Implement data pre-processing

2. Deploy Simple Linear Regression

3. Simulate Multiple Linear Regression

4. Implement Decision Tree

5. Deploy Random forest classification

6. Simulate Naïve Bayes algorithm

7. Implement K-Nearest Neighbors (K-NN), k-Means

8. Deploy Support Vector Machine, Apriori algorithm

9. Simulate Artificial Neural Network

10. Implement the Genetic Algorithm code


Task 1. Implement data pre-processing

import cv2
import numpy as np
import tensorflow as tf

model = tf.keras.models.load_model('keras_model.h5')

video = cv2.VideoCapture(0)

while True:

check,frame = video.read()

# Modify the input data by:

# 1. Resizing the image

img = cv2.resize(frame,(224,224))

# 2. Converting the image into Numpy array and increase dimension

test_image = np.array(img, dtype=np.float32)


test_image = np.expand_dims(test_image, axis=0)

# 3. Normalizing the image


normalised_image = test_image/255.0

# Predict Result
prediction = model.predict(normalised_image)

print("Prediction : ", prediction)

cv2.imshow("Result",frame)

key = cv2.waitKey(1)

if key == 32:
print("Closing")
break

video.release()

Task 2. Deploy Simple Linear Regression

from google.colab import files


data_to_load = files.upload()
import pandas as pd
import plotly.express as px

df = pd.read_csv("data.csv")

height = df["Height"].tolist()
weight = df["Weight"].tolist()

fig = px.scatter(x=height, y=weight)


fig.show()
m = 1
c = 0
y = []
for x in height:
y_value = m*x + c
y.append(y_value)

#Plotting the points


fig = px.scatter(x=height, y=weight)
fig.update_layout(shapes=[
dict(
type= 'line',
y0= min(y), y1= max(y),
x0= min(height), x1= max(height)
)
])
fig.show()
m = 0.95
c = -93
y = []
for x in height:
y_value = m*x + c
y.append(y_value)

#Plotting the points


fig = px.scatter(x=height, y=weight)
fig.update_layout(shapes=[
dict(
type= 'line',
y0= min(y), y1= max(y),
x0= min(height), x1= max(height)
)
])
fig.show()
x = 250
y = m * x + c
print(f"Weight of someone with height {x} is {y}")
x = 250
y = m * x + c
print(f"Weight of someone with height {x} is {y}")

x = 250
y = m * x + c
print(f"Weight of someone with height {x} is {y}")

Task 3. Simulate Multiple Linear Regression

pip ins import numpy as np

import pandas as pd

from sklearn.model_selection import train_test_splittall scikit-learn

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score

# Generating some example data

# Let's assume we have 3 features and 1 target variable

np.random.seed(0)

X = np.random.rand(100, 3) # 100 samples, 3 features

y = X @ np.array([1.5, -2.0, 3.0]) + np.random.randn(100) * 0.5 # Linear relationship with noise

# Creating a DataFrame for better visualization

df = pd.DataFrame(X, columns=['Feature1', 'Feature2', 'Feature3'])

df['Ta X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating and training the model

model = LinearRegression()

model.fit(X_train, y_train)rget'] = y

y_pred = model.predict(X_test)

# Evaluating the model


mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print("Coefficients:", model.coef_)

print("Intercept:", model.intercept_)

print("Mean Squared Error:", mse)

print("R^2 Score:", r2)

# If you want to see the first few predictions

predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

print(predictions_df.head())

Task 4. Implement Decision Tree

from google.colab import files


data_to_load = files.upload()
import pandas as pd

#Column Name
col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi',
'pedigree', 'age', 'label']

df = pd.read_csv("diabetes.csv", names=col_names).iloc[1:]

print(df.head())
features = ['pregnant', 'insulin', 'bmi', 'age','glucose','bp','pedigree']
X = df[features]
y = df.label
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

#splitting data in training and testing


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=1)

#Initialising the Decision Tree Model


clf = DecisionTreeClassifier()
#Fitting the data into the model
clf = clf.fit(X_train,y_train)

#Calculating the accuracy of the model


y_pred = clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
#importing the libraries
from sklearn.tree import export_graphviz
from io import StringIO
from IPython.display import Image
import pydotplus

dot_data = StringIO() #Where we will store the data from our decision tree
classifier as text.

#using export_graphviz function to create a graph representation of the


decision tree which can be written in out file.
export_graphviz(clf, out_file=dot_data, filled=True, rounded=True,
special_characters=True, feature_names=features, class_names=['0','1'])

print(dot_data.getvalue())
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('diabetes.png')
Image(graph.create_png())
clf = DecisionTreeClassifier(max_depth=3)

clf = clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
dot_data = StringIO() #Where we will store the data from our decision tree
classifier as text.

# using export_graphviz function to create a graphviz representation of


the decision tree
export_graphviz(clf, out_file=dot_data, filled=True, rounded=True,
special_characters=True, feature_names=features, class_names=['0','1'])

graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('diabetes.png')
Image(graph.create_png())

Task 5. Deploy Random forest classification


# Random Forest Classifier

# Importing the libraries

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

# Importing the datasets

datasets = pd.read_csv('Social_Network_Ads.csv')

X = datasets.iloc[:, [2,3]].values

Y = datasets.iloc[:, 4].values

# Splitting the dataset into the Training set and Test set

from sklearn.model_selection import train_test_split

X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size = 0.25, random_state = 0)

# Feature Scaling

from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()

X_Train = sc_X.fit_transform(X_Train)

X_Test = sc_X.transform(X_Test)

# Fitting the classifier into the Training set


from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(n_estimators = 200, criterion = 'entropy', random_state = 0)

classifier.fit(X_Train,Y_Train)

# Predicting the test set results

Y_Pred = classifier.predict(X_Test)

# Making the Confusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(Y_Test, Y_Pred)

# Visualising the Training set results

from matplotlib.colors import ListedColormap

X_Set, Y_Set = X_Train, Y_Train

X1, X2 = np.meshgrid(np.arange(start = X_Set[:, 0].min() - 1, stop = X_Set[:, 0].max() + 1, step = 0.01),

np.arange(start = X_Set[:, 1].min() - 1, stop = X_Set[:, 1].max() + 1, step = 0.01))

plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),

alpha = 0.75, cmap = ListedColormap(('red', 'green')))

plt.xlim(X1.min(), X1.max())

plt.ylim(X2.min(), X2.max())

for i, j in enumerate(np.unique(Y_Set)):

plt.scatter(X_Set[Y_Set == j, 0], X_Set[Y_Set == j, 1],

c = ListedColormap(('red', 'green'))(i), label = j)


plt.title('Random Forest Classifier (Training set)')

plt.xlabel('Age')

plt.ylabel('Estimated Salary')

plt.legend()

plt.show()

# Visualising the Test set results

from matplotlib.colors import ListedColormap

X_Set, Y_Set = X_Test, Y_Test

X1, X2 = np.meshgrid(np.arange(start = X_Set[:, 0].min() - 1, stop = X_Set[:, 0].max() + 1, step = 0.01),

np.arange(start = X_Set[:, 1].min() - 1, stop = X_Set[:, 1].max() + 1, step = 0.01))

plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),

alpha = 0.75, cmap = ListedColormap(('red', 'green')))

plt.xlim(X1.min(), X1.max())

plt.ylim(X2.min(), X2.max())

for i, j in enumerate(np.unique(Y_Set)):

plt.scatter(X_Set[Y_Set == j, 0], X_Set[Y_Set == j, 1],

c = ListedColormap(('red', 'green'))(i), label = j)

plt.title('Random Forest Classifier (Test set)')

plt.xlabel('Age')

plt.ylabel('Estimated Salary')

plt.legend()

plt.show()

Task 6. Simulate Naïve Bayes algorithm

#Uploading the csv


from google.colab import files
data_to_load = files.upload()
#Code to read the file.
import pandas as pd

df = pd.read_csv('diabetes.csv')
print(df.head())
from sklearn.model_selection import train_test_split

X = df[["glucose", "bloodpressure"]]
y = df["diabetes"]

x_train_1, x_test_1, y_train_1, y_test_1 = train_test_split(X, y,


test_size=0.25, random_state=42)
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score


from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

x_train_1 = sc.fit_transform(x_train_1)
x_test_1 = sc.fit_transform(x_test_1)

model_1 = GaussianNB()
model_1.fit(x_train_1, y_train_1)

y_pred_1 = model_1.predict(x_test_1)

accuracy = accuracy_score(y_test_1, y_pred_1)


print(accuracy)
from sklearn.model_selection import train_test_split

X = df[["glucose", "bloodpressure"]]
y = df["diabetes"]

x_train_2, x_test_2, y_train_2, y_test_2 = train_test_split(X, y,


test_size=0.25, random_state=42)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

x_train_2 = sc.fit_transform(x_train_2)
x_test_2 = sc.fit_transform(x_test_2)

model_2 = LogisticRegression(random_state = 0)
model_2.fit(x_train_2, y_train_2)

y_pred_2 = model_2.predict(x_test_2)
accuracy = accuracy_score(y_test_2, y_pred_2)
print(accuracy)
#Uploading the csv
from google.colab import files
data_to_load = files.upload()

import pandas as pd

df = pd.read_csv('income.csv')

print(df.head())
print(df.describe())
from sklearn.model_selection import train_test_split

X = df[["age", "hours-per-week", "education-num", "capital-gain",


"capital-loss"]]
y = df["income"]

x_train_1, x_test_1, y_train_1, y_test_1 = train_test_split(X, y,


test_size=0.25, random_state=42)
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

x_train_1 = sc.fit_transform(x_train_1)
x_test_1 = sc.fit_transform(x_test_1)

model_1 = GaussianNB()
model_1.fit(x_train_1, y_train_1)

y_pred_1 = model_1.predict(x_test_1)

accuracy = accuracy_score(y_test_1, y_pred_1)


print(accuracy)

from sklearn.model_selection import train_test_split

X = df[["age", "hours-per-week", "education-num", "capital-gain",


"capital-loss"]]
y = df["income"]

x_train_2, x_test_2, y_train_2, y_test_2 = train_test_split(X, y,


test_size=0.25, random_state=42)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

x_train_2 = sc.fit_transform(x_train_2)
x_test_2 = sc.fit_transform(x_test_2)

model_2 = LogisticRegression(random_state = 0)
model_2.fit(x_train_2, y_train_2)

y_pred_2 = model_2.predict(x_test_2)

accuracy = accuracy_score(y_test_2, y_pred_2)


print(accuracy)

Task 7. Implement K-Nearest Neighbors (K-NN), k-Means

from google.colab import files


data_to_load = files.upload()
import pandas as pd
import plotly.express as px

df = pd.read_csv("petals_sepals.csv")

print(df.head())

fig = px.scatter(df, x="petal_size", y="sepal_size")


fig.show()

from sklearn.cluster import KMeans

#Pandas provide a unique method to retrieve rows from a Data frame.


Dataframe.iloc[] method is used when the index label of a data frame is
something other than numeric series of 0, 1, 2, 3….n or in case the user
doesn’t know the index label.

X = df.iloc[:, [0, 1]].values

print(X)

wcss = []

#Here the range is taken till 11 because we just need 10 cluster points.
for i in range(1, 11):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state = 42)
kmeans.fit(X)

# inertia method returns wcss for that model


wcss.append(kmeans.inertia_)
import matplotlib.pyplot as plt
import seaborn as sns

#plotting a figure to show an elbow like structure in the graph

plt.figure(figsize=(10,5))
sns.lineplot(wcss, marker='o', color='red')
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
kmeans = KMeans(n_clusters = 3, init = 'k-means++', random_state = 42)
y_kmeans = kmeans.fit_predict(X)
plt.figure(figsize=(15,7))
sns.scatterplot(x = X[y_kmeans == 0, 0], y = X[y_kmeans == 0, 1], color =
'yellow', label = 'Cluster 1')
sns.scatterplot(x = X[y_kmeans == 1, 0], y = X[y_kmeans == 1, 1], color =
'blue', label = 'Cluster 2')
sns.scatterplot(x = X[y_kmeans == 2, 0], y = X[y_kmeans == 2, 1], color =
'green', label = 'Cluster 3')
sns.scatterplot(x = kmeans.cluster_centers_[:, 0], y =
kmeans.cluster_centers_[:, 1], color = 'red', label =
'Centroids',s=100,marker=',')
plt.grid(False)
plt.title('Clusters of Flowers')
plt.xlabel('Petal Size')
plt.ylabel('Sepal Size')
plt.legend()
plt.show()

Task 8. Deploy Support Vector Machine, Apriori algorithm

https://github.com/earlyann/grocery_market_basket_analysis/blob/main/groc_v2.ipynb

Task 9. Simulate Artificial Neural Network

# Import python libraries required in this example:

from keras.models import Sequential

from keras.layers import Dense, Activation

import numpy as np

# Use numpy arrays to store inputs (x) and outputs (y):

x = np.array([[0,0], [0,1], [1,0], [1,1]])


y = np.array([[0], [1], [1], [0]])

# Define the network model and its arguments.

# Set the number of neurons/nodes for each layer:

model = Sequential()

model.add(Dense(2, input_shape=(2,)))

model.add(Activation('sigmoid'))

model.add(Dense(1))

model.add(Activation('sigmoid'))

# Compile the model and calculate its accuracy:

model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])

# Print a summary of the Keras model:

model.summary()

Task 10. Implement the Genetic Algorithm code

import numpy as np

import yaml

import datetime

from github import Github

from terminaltables import AsciiTable

from terminaltables import GithubFlavoredMarkdownTable

import pickle

import codecs

# Insert you username and password

with open('parameters.yml', 'r') as input:

try:

p = yaml.safe_load(input)

# Make sure it is reading ok


# print("user: %s" % p["user"])

# print("password: %s" % p["password"])

except yaml.YAMLError as error:

print( error )

exit(1)

g = Github(p["user"], p["password"])

number_of_reps = p["items"]

names_of_props = ["Id", "Name", "Description", "Language", "Stars", "Forks"]

github_server_link = "https://github.com/"

last_tables_file_name = 'last_table_data.pickle'

md_file_name = 'readme.md'

# Main query

seach_query = g.search_repositories(p["search"], sort="stars", order="desc")

results = []

for index, rep in enumerate(seach_query):

# print(rep.url) # Everything are here as json file (You can use it instead of the API)

rep_prop = [index+1]

link = github_server_link + rep.full_name

rep_prop.append("[{}]({})".format(rep.name, link))

rep_prop.append(rep.description)

rep_prop.append(rep.language)

rep_prop.append(rep.stargazers_count)

rep_prop.append(rep.forks)
results.append(rep_prop)

if(index > number_of_reps-2):

break

# Creating the table

table_data = [["" for x in range(len(names_of_props))] for y in range(number_of_reps + 1)]

for i in range(len(names_of_props)):

table_data[0][i] = names_of_props[i]

for i in range(number_of_reps):

for j in range(len(names_of_props)):

table_data[i+1][j] = results[i][j]

# Saving Table data (For further analysis)

with open(last_tables_file_name, 'wb') as handle:

pickle.dump(table_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Generating the ascii table

table = GithubFlavoredMarkdownTable(table_data)

table_str = table.table

# Writing the md file

with codecs.open(md_file_name, "w", "utf-8") as f:

now = datetime.datetime.now()
f.write("# Top %s Github repositories\n" % p["search"])

f.write("Based on [Top Deep Learning](http://github.com/mbadry1/Top-Deep-Learning)<br /><br />\


n")

f.write("Here is a list of the top-%s %s Github repositories sorted by the number of stars.\n" %
(p["items"], p["search"]))

f.write("The query that has been used for the GitHub search API is \"%s" % p["search"] + "\".\n")

f.write("<br /><br />\n")

f.write("Date: %s\n" % now.strftime("%m/%d/%Y"))

f.write("<br /><br />\n")

f.write("Note: This listing will be updated regularly.\n")

f.write("<br /><br />\n\n")

f.write(table_str)

You might also like