0% found this document useful (0 votes)
39 views

Final ML File

The document provides an index of 10 programming tasks related to machine learning algorithms. It lists the part numbers, topics, and page numbers for implementing basic Python libraries for machine learning, principal component analysis, the FIND-S algorithm, K-means clustering, k-nearest neighbors, linear regression, logistic regression, Naive Bayes, decision trees, and support vector machines. Each part number links to a separate code sample and output in the document.

Uploaded by

RAVI PARKASH
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
39 views

Final ML File

The document provides an index of 10 programming tasks related to machine learning algorithms. It lists the part numbers, topics, and page numbers for implementing basic Python libraries for machine learning, principal component analysis, the FIND-S algorithm, K-means clustering, k-nearest neighbors, linear regression, logistic regression, Naive Bayes, decision trees, and support vector machines. Each part number links to a separate code sample and output in the document.

Uploaded by

RAVI PARKASH
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 34

INDEX

Sr.No. Particular Page Signature


No.
1. To implement basic python libraries of machine 1-5
learning like numpy, pandas, SciPy, Scitkit-learn,
matplotlib,etc.

2. To implement Principal Component Analysis. 6-7

3. To implement FIND-S algorithm. 8-9

4. To analyse the tested data using K-Means 10 -11


Clustering algorithm.

5. To implement k-nearest neighbor algorithm. 12-13

6. To implement linear regression. 14-15

7. To implement logistics regression. 16-18

8. To implement Naïve Bayes Algorithm. 19-23

9. To implement Decision Tree Algorithm. 24-28

10. To implement Support Vector Machine Algorithm. 29-32


PROGRAM :- 1

AIM: To implement basic python libraries of machine learning


like numpy, pandas, SciPy, Scitkit-learn, matplotlib,etc.

1.

import numpy as nup

# Then, create two arrays of rank 2


K = nup.array([[2, 4], [6, 8]])
R = nup.array([[1, 3], [5, 7]])

# Then, create two arrays of rank 1


P = nup.array([10, 12])
S = nup.array([9, 11])

# Then, we will print the Inner product of vectors


print ("Inner product of vectors: ", nup.dot(P, S), "\n")

# Then, we will print the Matrix and Vector product


print ("Matrix and Vector product: ", nup.dot(K, P), "\n")

# Now, we will print the Matrix and matrix product


print ("Matrix and matrix product: ", nup.dot(K, R))

OUTPUT:-
2.

import matplotlib.pyplot as plt

from scipy import interpolate

x = np.arange(5, 20)

y = np.exp(x/3.0)

f = interpolate.interp1d(x, y)x1 = np.arange(6, 12)

y1 = f(x1) # use interpolation function returned by `interp1d`

plt.plot(x, y, 'o', x1, y1, '--')

plt.show()

OUTPUT:-
3.

from sklearn import datasets as ds


from sklearn import metrics as mt
from sklearn.tree import DecisionTreeClassifier as dtc
# load the iris datasets
dataset_1 = ds.load_iris()
# fit a CART model to the data
model_1 = dtc()
model_1.fit(dataset_1.data, dataset_1.target)
print(model)
# make predictions
expected_1 = dataset_1.target
predicted_1 = model_1.predict(dataset_1.data)
# summarize the fit of the model
print (mt.classification_report(expected_1, predicted_1))
print(mt.confusion_matrix(expected_1, predicted_1))

OUTPUT:-
4.
import pandas as pad

data_1 = {"Countries": ["Bhutan", "Cape Verde", "Chad", "Estonia", "Guinea", "Kenya", "Libya
", "Mexico"],
"capital": ["Thimphu", "Praia", "N'Djamena", "Tallinn", "Conakry", "Nairobi", "Tripoli", "
Mexico City"],
"Currency": ["Ngultrum", "Cape Verdean escudo", "CFA Franc", "Estonia Kroon; Euro", "G
uinean franc", "Kenya shilling", "Libyan dinar", "Mexican peso"],
"population": [20.4, 143.5, 12.52, 135.7, 52.98, 76.21, 34.28, 54.32] }

data_1_table = pad.DataFrame(data_1)
print(data_1_table)

OUTPUT:-
4. Matplotlib

import matplotlib.pyplot as plot


import numpy as nup

# Prepare the data


K = nup.linspace(2, 4, 8)
R = nup.linspace(5, 7, 9)
Q = nup.linspace(0, 1, 3)

# Plot the data


plot.plot(K, K, label = 'K')
plot.plot(R, R, label = 'R')
plot.plot(Q, Q, label = 'Q')

# Add a legend
plot.legend()

# Show the plot


plot.show()

OUTPUT:-
PROGRAM :- 2

AIM: To implement Principal Component Analysis.

import numpy as nmp


import matplotlib.pyplot as mpltl
import pandas as pnd
DS = pnd.read_csv('Wine.csv')
# Now, we will distribute the dataset into two components "X" and "Y"

X = DS.iloc[: , 0:13].values
Y = DS.iloc[: , 13].values
from sklearn.model_selection import train_test_split as tts

X_train, X_test, Y_train, Y_test = tts(X, Y, test_size = 0.2, random_state = 0)


from sklearn.preprocessing import StandardScaler as SS
SC = SS()
X_train = SC.fit_transform(X_train)
X_test = SC.transform(X_test)
from sklearn.decomposition import PCA

PCa = PCA (n_components = 1)


X_train = PCa.fit_transform(X_train)
X_test = PCa.transform(X_test)
explained_variance = PCa.explained_variance_ratio_
from sklearn.linear_model import LogisticRegression as LR
classifier_1 = LR (random_state = 0)
classifier_1.fit(X_train, Y_train)
Y_pred = classifier_1.predict(X_test)
from sklearn.metrics import confusion_matrix as CM
c_m = CM (Y_test, Y_pred)
from matplotlib.colors import ListedColormap as LCM
X_set, Y_set = X_train, Y_train
X_1, X_2 = nmp.meshgrid(nmp.arange(start = X_set[:, 0].min() - 1,
stop = X_set[: , 0].max() + 1, step = 0.01),
nmp.arange(start = X_set[: , 1].min() - 1,
stop = X_set[: , 1].max() + 1, step = 0.01))
mpltl.contourf(X_1, X_2, classifier_1.predict(nmp.array([X_1.ravel(),
X_2.ravel()]).T).reshape(X_1.shape), alpha = 0.75,
cmap = LCM (('yellow', 'grey', 'green')))
mpltl.xlim (X_1.min(), X_1.max())
mpltl.ylim (X_2.min(), X_2.max())
for s, t in enumerate(nmp.unique(Y_set)):
mpltl.scatter(X_set[Y_set == t, 0], X_set[Y_set == t, 1],
c = LCM (('red', 'green', 'blue'))(s), label = t)
mpltl.title('Logistic Regression for Training set: ')
mpltl.xlabel ('PC_1') # for X_label
mpltl.ylabel ('PC_2') # for Y_label
mpltl.legend() # for showing legend
# show scatter plot
mpltl.show()

OUTPUT:-
PROGRAM :- 3

AIM: To implement FIND-S algorithm.

import pandas as pd
import numpy as np

#to read the data in the csv file


data = pd.read_csv("data.csv")
print(data,"n")

#making an array of all the attributes


d = np.array(data)[:,:-1]
print("n The attributes are: ",d)

#segragating the target that has positive and negative examples


target = np.array(data)[:,-1]
print("n The target is: ",target)

#training function to implement find-s algorithm


def train(c,t):
for i, val in enumerate(t):
if val == "Yes":
specific_hypothesis = c[i].copy()
break

for i, val in enumerate(c):


if t[i] == "Yes":
for x in range(len(specific_hypothesis)):
if val[x] != specific_hypothesis[x]:
specific_hypothesis[x] = '?'
else:
pass

return specific_hypothesis

#obtaining the final hypothesis


print("n The final hypothesis is:",train(d,target))

OUTPUT:-
PROGRAM :- 4

AIM: To analyse the tested data using K-MeansClustering


algorithm.

# importing required tools


import numpy as np
import cv2
from matplotlib import pyplot as plt

# creating two test data


X = np.random.randint(10,35,(25,2))
Y = np.random.randint(55,70,(25,2))
Z = np.vstack((X,Y))
Z = Z.reshape((50,2))

# convert to np.float32
Z = np.float32(Z)

plt.xlabel('Test Data')
plt.ylabel('Z samples')

plt.hist(Z,256,[0,256])

plt.show()

X = np.random.randint(10,45,(25,2))
Y = np.random.randint(55,70,(25,2))
Z = np.vstack((X,Y))

# convert to np.float32
Z = np.float32(Z)

# define criteria and apply kmeans()


criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret,label,center = cv2.kmeans(Z,2,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)

# Now separate the data


A = Z[label.ravel()==0]
B = Z[label.ravel()==1]

# Plot the data


plt.scatter(A[:,0],A[:,1])
plt.scatter(B[:,0],B[:,1],c = 'r')
plt.scatter(center[:,0],center[:,1],s = 80,c = 'y', marker = 's')
plt.xlabel('Test Data'),plt.ylabel('Z samples')
plt.show()

OUTPUT:-
PROGRAM :- 5

AIM: To implement k-nearest neighbor algorithm.

# Import necessary modules

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

from sklearn.datasets import load_iris

import numpy as np

import matplotlib.pyplot as plt

irisData = load_iris()

X=irisData.data

y=irisData.targt

# Split into training and test set

X_train, X_test, y_train, y_test = train_test_split( X,

y, test_size = 0.2, random_state=42) neighbors =

np.arange(1, 9)

train_accuracy = np.empty(len(neighbors))

test_accuracy = np.empty(len(neighbors))

# Loop over K values

for i, k in enumerate(neighbors):

knn = KNeighborsClassifier(n_neighbors=k)

knn.fit(X_train, y_train)
# Compute training and test data accuracy

train_accuracy[i] = knn.score(X_train, y_train)

test_accuracy[i] = knn.score(X_test, y_test)

# Generate plot

plt.plot(neighbors, test_accuracy, label = 'Testing dataset Accuracy')

plt.plot(neighbors, train_accuracy, label = 'Training dataset Accuracy')

plt.legend()

plt.xlabel('n_neighbors')

plt.ylabel('Accuracy')

plt.show()

OUTPUT:-
PROGRAM :- 6

AIM: To implement linear regression.

import numpy as nmp


import matplotlib.pyplot as mtplt

def estimate_coeff(p, q):


# Here, we will estimate the total number of points or observation
n1 = nmp.size(p)
# Now, we will calculate the mean of a and b vector
m_p = nmp.mean(p)
m_q = nmp.mean(q)

# here, we will calculate the cross deviation and deviation about a


SS_pq = nmp.sum(q * p) - n1 * m_q * m_p
SS_pp = nmp.sum(p * p) - n1 * m_p * m_p

# here, we will calculate the regression coefficients


b_1 = SS_pq / SS_pp
b_0 = m_q - b_1 * m_p

return (b_0, b_1)

def plot_regression_line(p, q, b):


# Now, we will plot the actual points or observation as scatter plot
mtplt.scatter(p, q, color = "m",
marker = "o", s = 30)

# here, we will calculate the predicted response vector


q_pred = b[0] + b[1] * p

# here, we will plot the regression line


mtplt.plot(p, q_pred, color = "g")
# here, we will put the labels
mtplt.xlabel('p')
mtplt.ylabel('q')
# here, we will define the function to show plot
mtplt.show()
def main():
# entering the observation points or data
p = np.array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
q = np.array([11, 13, 12, 15, 17, 18, 18, 19, 20, 22])
# now, we will estimate the coefficients
b = estimate_coeff(p, q)
print("Estimated coefficients are :\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))
# Now, we will plot the regression line
plot_regression_line(p, q, b)
if __name__ == "__main__":
main()

OUTPUT:-
PROGRAM :- 7

AIM: To implement logistics regression.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection
import train_test_split
from sklearn.preprocessing
import StandardScaler
dataset = pd.read_csv("User_Data.csv")

x = dataset.iloc[:, [2, 3]].values

y = dataset.iloc[:, 4].values

# Splitting the dataset to train and test

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25,


random_state = 0)

sc_x = StandardScaler()

xtrain = sc_x.fit_transform(xtrain)
xtest = sc_x.transform(xtest)

print (xtrain[0:10, :])

OUTPUT:-
[[ 0.58164944 -0.88670699]
[-0.60673761 1.46173768]
[-0.01254409 -0.5677824 ]
[-0.60673761 1.89663484]
[ 1.37390747 -1.40858358]
[ 1.47293972 0.99784738]
[ 0.08648817 -0.79972756]
[-0.01254409 -0.24885782]
[-0.21060859 -0.5677824 ]
[-0.21060859 -0.19087153]]

# Train the model

from sklearn.linear_model import LogisticRegression


classifier = LogisticRegression(random_state = 0)
classifier.fit(xtrain, ytrain)

# prediction

y_pred = classifier.predict(xtest)

# Test the performance of our model

from sklearn.metrics import confusion_matrix cm =


confusion_matrix(ytest, y_pred)
print ("Confusion Matrix : \n", cm)

# Accuracy

from sklearn.metrics import accuracy_score

print ("Accuracy : ", accuracy_score(ytest, y_pred))

OUTPUT 2 :-
Confusion Matrix :
[[65 3]
[8 24]]

Out of 100 :
True Positive + True Negative = 65 + 24
False Positive + False Negative = 3 + 8 Performance measure – Accuracy
Accuracy: 0.89
# Visualizing the performance of our model from
matplotlib.colors import ListedColormapX_set,
y_set = xtest, ytest
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1,
stop = X_set[:, 0].max() + 1, step = 0.01),
np.arange(start = X_set[:, 1].min() - 1,
stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(
np.array([X1.ravel(), X2.ravel()]).T).reshape(
X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],c =
ListedColormap(('red', 'green'))(i), label = j)
plt.title('Classifier (Test set)')plt.xlabel('Age')
plt.ylabel('Estimated Salary')plt.legend()
plt.show()

OUTPUT:-
PROGRAM :- 8

AIM: To implement Naïve Bayes Algorithm.

# Importing library
import math
import random
import csv
# the categorical class names are changed to numberic data
# eg: yes and no encoded to 1 and 0
def encode_class(mydata):
classes = []
for i in range(len(mydata)):
if mydata[i][-1] not in classes:
classes.append(mydata[i][-1])
for i in range(len(classes)):
for j in range(len(mydata)):
if mydata[j][-1] == classes[i]:
mydata[j][-1] = i
return mydata

# Splitting the data


def splitting(mydata, ratio):
train_num = int(len(mydata) * ratio)
train = []
# initially testset will have all the dataset
test = list(mydata)
while len(train) < train_num:
# index generated randomly from range 0
# to length of testset
index = random.randrange(len(test))
# from testset, pop data rows and put it in train
train.append(test.pop(index))
return train, test

# Group the data rows under each class yes or


# no in dictionary eg: dict[yes] and dict[no]
def groupUnderClass(mydata):
dict = {}
for i in range(len(mydata)):
if (mydata[i][-1] not in dict):
dict[mydata[i][-1]] = []
dict[mydata[i][-1]].append(mydata[i])
return dict

# Calculating Mean
def mean(numbers):
return sum(numbers) / float(len(numbers))

# Calculating Standard Deviation


def std_dev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)

def MeanAndStdDev(mydata):
info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]
# eg: list = [ [a, b, c], [m, n, o], [x, y, z]]
# here mean of 1st attribute =(a + m+x), mean of 2nd attribute = (b + n+y)/3
# delete summaries of last class
del info[-1]
return info

# find Mean and Standard Deviation under each class


def MeanAndStdDevForClass(mydata):
info = {}
dict = groupUnderClass(mydata)
for classValue, instances in dict.items():
info[classValue] = MeanAndStdDev(instances)
return info

# Calculate Gaussian Probability Density Function


def calculateGaussianProbability(x, mean, stdev):
expo = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * expo

# Calculate Class Probabilities


def calculateClassProbabilities(info, test):
probabilities = {}
for classValue, classSummaries in info.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, std_dev = classSummaries[i]
x = test[i]
probabilities[classValue] *= calculateGaussianProbability(x, mean, std_dev)
return probabilities
# Make prediction - highest probability is the prediction
def predict(info, test):
probabilities = calculateClassProbabilities(info, test)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
# returns predictions for a set of examples
def getPredictions(info, test):
predictions = []
for i in range(len(test)):
result = predict(info, test[i])
predictions.append(result)
return predictions
# Accuracy score
def accuracy_rate(test, predictions):
correct = 0
for i in range(len(test)):
if test[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(test))) * 100.0
# driver code
# add the data path in your system
filename = r'E:\user\MACHINE LEARNING\machine learning algos\Naive
bayes\filedata.csv'

# load the file and store it in mydata list


mydata = csv.reader(open(filename, "rt"))
mydata = list(mydata)
mydata = encode_class(mydata)
for i in range(len(mydata)):
mydata[i] = [float(x) for x in mydata[i]]
# split ratio = 0.7
# 70% of data is training data and 30% is test data used for testing
ratio = 0.7
train_data, test_data = splitting(mydata, ratio)
print('Total number of examples are: ', len(mydata))
print('Out of these, training examples are: ', len(train_data))
print("Test examples are: ", len(test_data))
# prepare model
info = MeanAndStdDevForClass(train_data)
# test model
predictions = getPredictions(info, test_data)
accuracy = accuracy_rate(test_data, predictions)
print("Accuracy of your model is: ", accuracy)

OUTPUT:-

Total number of examples are: 200


Out of these, training examples are: 140
Test examples are: 60
Accuracy of your model is: 71.237678
PROGRAM :- 9

AIM: To implement Decision Tree Algorithm.

# Run this program on your local python


# interpreter, provided you have installed
# the required libraries.
# Importing the required packages
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Function importing Dataset


def importdata():
balance_data = pd.read_csv(
'https://archive.ics.uci.edu/ml/machine-learning-'+
'databases/balance-scale/balance-scale.data',
sep= ',', header = None)

# Printing the dataswet shape


print ("Dataset Length: ", len(balance_data))
print ("Dataset Shape: ", balance_data.shape)

# Printing the dataset obseravtions


print ("Dataset: ",balance_data.head())
return balance_data
# Function to split the dataset
def splitdataset(balance_data):

# Separating the target variable


X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]

# Splitting the dataset into train and test


X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size = 0.3, random_state = 100)

return X, Y, X_train, X_test, y_train, y_test

# Function to perform training with giniIndex.


def train_using_gini(X_train, X_test, y_train):
# Creating the classifier object
clf_gini = DecisionTreeClassifier(criterion = "gini",
random_state = 100,max_depth=3, min_samples_leaf=5)
# Performing training
clf_gini.fit(X_train, y_train)
return clf_gini
# Function to perform training with entropy.
def tarin_using_entropy(X_train, X_test, y_train):
# Decision tree with entropy
clf_entropy = DecisionTreeClassifier(
criterion = "entropy", random_state = 100,
max_depth = 3, min_samples_leaf = 5)
# Performing training
clf_entropy.fit(X_train, y_train)
return clf_entropy
# Function to make predictions
def prediction(X_test, clf_object):
# Predicton on test with giniIndex
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred
# Function to calculate accuracy
def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ",
confusion_matrix(y_test, y_pred))
print ("Accuracy : ",accuracy_score(y_test,y_pred)*100)
print("Report : ",classification_report(y_test, y_pred))
# Driver code
def main():
# Building Phase
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = tarin_using_entropy(X_train, X_test, y_train)
# Operational Phase
print("Results Using Gini Index:")
# Prediction using gini
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)
print("Results Using Entropy:")
# Prediction using entropy
y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)
# Calling main function
if __name__=="__main__":
main()

OUTPUT:-
Dataset Length: 625
Dataset Shape: (625, 5)
Dataset: 0 1 2 3 4
0 B 1 1 1 1
1 R 1 1 1 2
2 R 1 1 1 3
3 R 1 1 1 4
4 R 1 1 1 5
Results Using Gini Index:

Predicted values:
['R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'L' 'R'
'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
'L' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R']

Confusion Matrix: [[ 0 6 7]
[ 0 67 18]
[ 0 19 71]]

Accuracy : 73.4042553191

Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.73 0.79 0.76 85
R 0.74 0.79 0.76 90
avg/total 0.68 0.73 0.71 188

Results Using Entropy:

Predicted values:
['R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'L' 'L'
'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'L'
'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'L' 'R'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'R']

Confusion Matrix: [[ 0 6 7]
[ 0 63 22]
[ 0 20 70]]

Accuracy : 70.7446808511

Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.71 0.74 0.72 85
R 0.71 0.78 0.74 90

avg / total 0.66 0.71 0.68 188


PROGRAM :- 10

AIM: To implement Support Vector MachineAlgorithm.

Importing the dataset

import pandas as pd
data = pd.read_csv("apples_and_oranges.csv")
Splitting the dataset into training and test samples
from sklearn.model_selection import train_test_split
training_set, test_set = train_test_split(data, test_size = 0.2,
random_state = 1)
Classifying the predictors and target
X_train = training_set.iloc[:,0:2].values
Y_train = training_set.iloc[:,2].values
X_test = test_set.iloc[:,0:2].values
Y_test = test_set.iloc[:,2].values

Initializing Support Vector Machine and fitting the


training data
from sklearn.svm import SVC
classifier = SVC(kernel='rbf', random_state = 1)
classifier.fit(X_train,Y_train)
Predicting the classes for test set
Y_pred = classifier.predict(X_test)
Attaching the predictions to test set for comparing
test_set["Predictions"] = Y_pred
#Comparing the actual classes and predictions

#Calculating the accuracy of the predictions


from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test,Y_pred)
accuracy = float(cm.diagonal().sum())/len(Y_test)
print("\nAccuracy Of SVM For The Given Dataset : ", accuracy)

OUTPUT:-

Accuracy Of SVM For The Given Dataset : 0.875

Visualizing the classifier

from sklearn.preprocessing import LabelEncoder


le = LabelEncoder()
Y_train = le.fit_transform(Y_train)
After encoding , fit the encoded data to the SVM
from sklearn.svm import SVC
classifier = SVC(kernel='rbf', random_state = 1)
classifier.fit(X_train,Y_train)
Let’s Visualize!
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
plt.figure(figsize = (7,7))
X_set, y_set = X_train, Y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step
= 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('black', 'white')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'orange'))(i),
label = j)
plt.title('Apples Vs Oranges')
plt.xlabel('Weight In Grams')
plt.ylabel('Size in cm')
plt.legend()
plt.show()

OUTPUT:-

Visualizing the predictions


import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
plt.figure(figsize = (7,7))
X_set, y_set = X_test, Y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step
= 0.01),np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape),alpha = 0.75, cmap = ListedColormap(('black', 'white')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],c = ListedColormap(('red', 'orange'))(i), label
= j)
plt.title('Apples Vs Oranges Predictions')
plt.xlabel('Weight In Grams')
plt.ylabel('Size in cm')
plt.legend()
plt.show()

OUTPUT:-
A Practical File

On

Machine Learning
Submitted in partial fulfillment of the requirement

for the

Award of Bachelor of Technology Degree

In

Computer Science Engineering

2019-2023

Submitted By:
Ravi Parkash (4811)
Semester 8TH

Under the Guidance of

Mrs, Jyoti Ahlawat


DEPARTMENT OF COMPUTER SCIENCE ENGINEERING
MATU RAM INSTITUTE OF ENGINEERING
& MANAGEMENT Rohtak Haryana

You might also like