Program 1
Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from
a .CSV file.
Code:
import csv
with open('[Link]', 'r') as f:
reader = [Link](f)
your_list = list(reader)
h = [['0', '0', '0', '0', '0', '0']]
for i in your_list:
print(i)
if i[-1] == "Y":
j = 0
for x in i:
if x != "Y":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0][j] != '0':
h[0][j] = '?'
else:
pass
j = j + 1
print("Most specific hypothesis is")
print(h)
-------------------------------------------------------------------------------------------------------------------------------
-
Program 2
For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the set
of all hypotheses consistent with the training examples.
Code:
import numpy as np
import csv
def candidateElimination():
data = []
csvFile = open('[Link]', 'r')
reader = [Link](csvFile, delimiter = ',')
for row in reader:
[Link]([Link](row))
# Convert To Numpy Array
data = [Link](data, dtype = 'object')
X = data[:, :-1]
Y = data[:, -1].reshape([Link][0], 1)
print ("\nTraining Data :")
print (X)
print ("\nLabels :")
print (Y)
print("\nShape Of X :")
print ([Link])
print ("\nShape Of Y :")
print ([Link])
specificH = [" % " for _ in range([Link][1])]
specificH = [Link](specificH, dtype = 'object')
generalH = [[" ? " for _ in range([Link][1])] for _ in
range([Link][1])]
generalH = [Link](generalH, dtype = 'object')
print ("\nInitial Hypothesis :")
print (specificH)
print ("\nInitial General Hypothesis :")
print (generalH)
# Set First Positive Example To Hypothesis
if Y[0] == "P":
specificH = X[0]
else:
for i in range([Link][0]):
if Y[i] == "P":
specificH = X[i]
break
print ("\nCandidate Elimination : ")
# For Each Training Example
for i in range([Link][0]):
# Positive Example
if Y[i] == "P":
for j in range([Link][1]):
if X[i][j] != specificH[j]:
specificH[j] = '?'
if specificH[j] != generalH[j][j] and generalH[j][j] !
= "?":
generalH[j][j] = "?"
print ("\n---------Step " + str(i + 1) + "---------\n")
print ("\nSpecific Set : ")
print (specificH)
print ("\nGeneral Set : ")
print (generalH)
print ("\n------------------------\n")
# Negative Example
else:
for j in range([Link][1]):
if X[i][j] != specificH[j]:
generalH[j][j] = specificH[j]
print ("\n---------Step " + str(i + 1) + "---------\n")
print ("\nSpecific Set : ")
print (specificH)
print ("\nGeneral Set : ")
print (generalH)
print ("\n------------------------\n")
print ("\nFinal Specific Hypothesis : ")
print (specificH)
print ("\nFinal General Hypothesis : ")
print (generalH)
print ("\n")
candidateElimination()
-------------------------------------------------------------------------------------------------------------------------------
-
Program 3
Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply this
knowledge to classify a new sample.
Code:
import pandas as pd
import numpy as np
import math
class Node:
def __init__(self,l):
[Link]=l
[Link] = {}
def entropy(data):
total_ex = len(data)
positive_ex = len([Link][data["Play Tennis"] == 'Y'])
negative_ex = len([Link][data["Play Tennis"] == 'N'])
entropy = 0
if(positive_ex > 0):
entropy = (-
1)*(positive_ex/float(total_ex))*([Link](positive_ex,2)-
[Link](total_ex,2))
if(negative_ex > 0):
entropy += (-
1)*(negative_ex/float(total_ex))*([Link](negative_ex,2)-
[Link](total_ex,2))
return entropy
def gain(s,data,attrib):
values = set(data[attrib])
print(values)
gain = s
for val in values:
gain -= len([Link][data[attrib] ==
val])/float(len(data))*entropy([Link][data[attrib] == val])
return gain
def get_attrib(data):
entropy_s = entropy(data)
attribute =""
max_gain = 0
for attr in [Link][:len([Link])-1]:
g = gain(entropy_s,data,attr)
if g > max_gain:
max_gain = g
attribute = attr
return attribute
def decision_tree(data):
root = Node("NULL")
if(entropy(data) == 0):
if(len([Link][data[[Link][-1]] == 'Y']) == len(data)):
[Link] = "Y"
return root
else:
[Link] = "N"
return root
if(len([Link]) == 1):
return
else:
attrib = get_attrib(data)
[Link] = attrib
values = set(data[attrib])
for val in values:
[Link][val] = decision_tree([Link][data[attrib]
== val].drop(attrib,axis = 1))
return root
def get_rules(root,rule,rules):
if not [Link]:
[Link](rule[:-2]+" => "+[Link])
return rules
for i in [Link]:
get_rules([Link][i],rule+[Link]+"="+i+" ^ ",rules)
return rules
def test(tree,test_str):
if not [Link]:
return [Link]
return test([Link][test_str[[Link]]],test_str)
data = pd.read_csv('[Link]')
entropy_s = entropy(data)
attrib_count = 0
cols = len([Link])-1
tree = decision_tree(data)
rules = get_rules(tree,"",[])
print(rules)
test_str = {}
print("Enter test case input")
for i in [Link][:-1]:
test_str[i] = input(i+": ")
print(test_str)
print(test(tree,test_str))
-------------------------------------------------------------------------------------------------------------------------------
-
Program 4 (Mam)
Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.
Code:
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import random
from math import exp
from random import seed
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
network = list()
hidden_layer = [{'weights':[[Link](-0.5,0.5) for i in range(n_inputs + 1)]} for i in
range(n_hidden)]
# for each hidden node list of weights which is equal to no of inputs plus 1(bias)
[Link](hidden_layer)
output_layer = [{'weights':[[Link](-0.5,0.5) for i in range(n_hidden + 1)]} for i in
range(n_outputs)]
[Link](output_layer)
i= 1
print("\n The initialised Neural Network:\n")
for layer in network:
j=1
for sub in layer:# each layer consists of list of weight arrays for each node
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)# weight array for the node
j=j+1
i=i+1
return network
# Calculate neuron activation (net) for an input
def activate(weights, inputs):
activation = weights[-1] #intialize induced local field to the bias term
for i in range(len(weights)-1):# take all inputs and find the weighted summation
activation += weights[i] * inputs[i]
return activation
# Transfer neuron activation to sigmoid function
def transfer(activation):
return 1.0 / (1.0 + exp(-activation))
# Forward propagate input to a network output
def forward_propagate(network, row):
inputs = row
print("inside the forward")
for layer in network:# traverse through the layer
new_inputs = []# inputs to layer
for neuron in layer:# to point to different lists in weights which is weight vector for each
neuron
activation = activate(neuron['weights'], inputs)# create a list neuron and values should
be same as weights
print("activation",activation)#opt
neuron['output'] = transfer(activation)
print(neuron['output'])#opt
new_inputs.append(neuron['output'])
inputs = new_inputs
return inputs
# Calculate the derivative of an neuron output
def transfer_derivative(output):
return output * (1.0 - output)
# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = list()
if i != len(network)-1: #//if it is hidden layer
for j in range(len(layer)):# //each neuron in the current layer
error = 0.0
for neuron in network[i + 1]: #//downstream layer neurons
error += (neuron['weights'][j] * neuron['delta'])
[Link](error)
else:
for j in range(len(layer)):
neuron = layer[j]
[Link](expected[j] - neuron['output'])
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])
# Update network weights with error
def update_weights(network, row, l_rate):
for i in range(len(network)):
inputs = row[:-1]# all columns except for last one
if i != 0:
inputs = [neuron['output'] for neuron in network[i - 1]]#output of the previous is input
for next
for neuron in network[i]:#neuron with j inputs
for j in range(len(inputs)):
neuron['weights'][j] += l_rate * neuron['delta'] * inputs[j]#jth link weigh of a neuron
neuron['weights'][-1] += l_rate * neuron['delta']#updating bias link
# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
print("\n Network Training Begins:\n")
for epoch in range(n_epoch):
sum_error = 0
for row in train:
outputs = forward_propagate(network, row)
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))])
backward_propagate_error(network, expected)
update_weights(network, row, l_rate)
print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))
print("\n Network Training Ends:\n")
#Test training backprop algorithm
seed(2)
dataset = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]
print("\n The input Data Set :\n",dataset)
n_inputs = len(dataset[0]) - 1
print("\n Number of Inputs :\n",n_inputs)
n_outputs = len(set([row[-1] for row in dataset]))
print("\n Number of Outputs :\n",n_outputs)
#Network Initialization
network = initialize_network(n_inputs, 2, n_outputs)
# Training the Network
train_network(network, dataset, 0.5, 20, n_outputs)
print("\n Final Neural Network :")
i= 1
for layer in network:
j=1
for sub in layer:
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)
j=j+1
i=i+1
# In[5]:
#Prediction
from math import exp
# Calculate neuron activation for an input
def activate(weights, inputs):
activation = weights[-1]
for i in range(len(weights)-1):#skip last value in weight vector it is bias and last value in
inputs it is label
activation += weights[i] * inputs[i]
return activation
# Transfer neuron activation
def transfer(activation):
return 1.0 / (1.0 + exp(-activation))
# Forward propagate input to a network output
def forward_propagate(network, row):
inputs = row
for layer in network:
new_inputs = []
for neuron in layer:
activation = activate(neuron['weights'], inputs)
neuron['output'] = transfer(activation)
new_inputs.append(neuron['output'])
inputs = new_inputs
return inputs
# Make a prediction with a network
def predict(network, row):
outputs = forward_propagate(network, row)
return [Link](max(outputs))
# Test making predictions with the network
dataset = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]
#network = [[{'weights': [-1.482313569067226, 1.8308790073202204,
1.078381922048799]}, {'weights': [0.23244990332399884, 0.3621998343835864,
0.40289821191094327]}],
# [{'weights': [2.5001872433501404, 0.7887233511355132, -1.1026649757805829]},
{'weights': [-2.429350576245497, 0.8357651039198697, 1.0699217181280656]}]]
for row in dataset:
prediction = predict(network, row)
print('Expected=%d, Got=%d' % (row[-1], prediction))
-------------------------------------------------------------------------------------------------------------------------------
-
Program 4 (Alt)
Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.
Code:
import numpy as np
import csv
filename='[Link]'
lines = [Link](open(filename,"r"))
lines2= [Link](open(filename,"r"))
data = list(lines)
data2 = list(lines2)
for i in range(len(data)):
data[i] = [float(x) for x in data[i][:-1]]
for i in range(len(data2)):
data2[i] = [float(x) for x in data2[i][-1]]
X = [Link]((data),dtype=float)
y = [Link]((data2),dtype=float)
print(X)
print(y)
X = X/[Link](X,axis=0) # maximum of X array longitudinally
print(X)
y = y/100
print(y)
#Sigmoid Function
def sigmoid (x):
return 1/(1 + [Link](-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=1500 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=[Link](size=(inputlayer_neurons,hiddenlayer_neurons))
bh=[Link](size=(1,hiddenlayer_neurons))
wout=[Link](size=(hiddenlayer_neurons,output_neurons))
bout=[Link](size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
print("epoch",i+1)
hinp1=[Link](X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=[Link](hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts
contributed to error
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.[Link](d_output) *lr
wh += [Link](d_hiddenlayer) *lr
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
-------------------------------------------------------------------------------------------------------------------------------
-
Program 5
Write a program to implement the naïve Bayesian classifier for a sample training data
set stored as a .CSV file. Compute the accuracy of the classifier, considering few test
data sets.
Code:
import csv
import math
import random
#Handle data
def loadCsv(filename):
lines = [Link](open(filename, "r"))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
#Split dataset with ratio
def splitDataset(dataset, splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
index = [Link](len(copy))
[Link]([Link](index))
return [trainSet, copy]
#Separate by Class
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
#Calculate Mean
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return [Link](variance)
#Summarize Dataset
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
#Summarize attributes by class
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in [Link]():
summaries[classValue] = summarize(instances)
return summaries
#Calculate Gaussian Probability Density Function
def calculateProbability(x, mean, stdev):
exponent = [Link](-([Link](x-mean,2)/(2*[Link](stdev,2))))
return (1/([Link](2*[Link])*stdev))*exponent
#Calculate Class Probabilities
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in [Link]():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean, stdev)
return probabilities
#Make a prediction
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in [Link]():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
#Get predictions
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
[Link](result)
return predictions
#Get Accuracy
def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct/float(len(testSet)))*100.0
def main():
filename = '[Link]'
splitRatio = 0.68
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print('Split {0} rows into train = {1} and test = {2}
rows'.format(len(dataset),len(trainingSet),len(testSet)))
#prepare model
summaries = summarizeByClass(trainingSet)
#test model
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print('Accuracy: {0}%'.format(accuracy))
main()
-------------------------------------------------------------------------------------------------------------------------------
-
Program 6
Assuming a set of documents that need to be classified, use the naïve Bayesian
Classifier model to perform this task. Calculate the accuracy, precision, and recall for
your data set.
Code:
from [Link] import fetch_20newsgroups #Load finenames and data from 20
newsgroups dataset
from [Link] import confusion_matrix #It is used to compute accuracy of
classification
from [Link] import classification_report #Build a text report showing the main
classifications metrics
import numpy as np
import os
#categories=['[Link]','[Link]','[Link]','[Link]']
#twenty_train=fetch_20newsgroups(subset='train',categories=categories,shuffle=True)
#twenty_test=fetch_20newsgroups(subset='test',categories=categories,shuffle=True)
twenty_train=fetch_20newsgroups(data_home='./scikit_learn_data',subset='train',shuffle=T
rue)
#print(twenty_train)
twenty_test=fetch_20newsgroups(data_home='./scikit_learn_data',subset='test',shuffle=Tr
ue)
#print(twenty_train)
print("Number of Training Examples: ",len(twenty_train.data))
print("Number of Test Examples: ",len(twenty_test.data))
print(twenty_train.target_names)
from sklearn.feature_extraction.text import CountVectorizer
count_vect=CountVectorizer()
X_train_tf=count_vect.fit_transform(twenty_train.data)
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer=TfidfTransformer()
X_train_tfidf=tfidf_transformer.fit_transform(X_train_tf)
X_train_tfidf.shape
from sklearn.naive_bayes import MultinomialNB
from [Link] import accuracy_score
from sklearn import metrics
mod=MultinomialNB()
[Link](X_train_tfidf,twenty_train.target)
X_test_tf=count_vect.transform(twenty_test.data)
X_test_tfidf=tfidf_transformer.transform(X_test_tf)
predicted=[Link](X_test_tfidf)
print("Accuracy: ",accuracy_score(twenty_test.target,predicted))
print(classification_report(twenty_test.target,predicted,target_names=twenty_test.target_n
ames))
print("Confusion matrix \n",metrics.confusion_matrix(twenty_test.target,predicted))
-------------------------------------------------------------------------------------------------------------------------------
-
Program 7
Write a program to construct a Bayesian network considering medical data. Use this
model to demonstrate the diagnosis of heart patients using standard Heart Disease
Data Set.
Code:
import numpy as np
import pandas as pd
import csv
from [Link] import MaximumLikelihoodEstimator
from [Link] import BayesianModel
from [Link] import VariableElimination
#read attributes
lines = list([Link](open('Data7_Names.csv','r')))
attributes = lines[0]
#attributes =
['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak',
'slope','ca',thal','heartdisease']
#read cleveland heart disease data
heartDisease = pd.read_csv('[Link]')
#for row in heartDisease:
# print(row)
heartDisease = [Link]("?",[Link])
#display data
print("Few examples from dataset are given below")
print([Link]())
print("Attributes and data types")
print([Link])
#Model Bayseian Network
model = BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),
('sex','trestbps'),('exang','trestbps'),('trestbps','heartdisease'),
('fbs','heartdisease'),('heartdisease','restecg'),('heartdisease','thalach'),
('heartdisease','chol')])
#learning CPDs using maximum likelihood estimators
print("Learning CPDs using maximum likelihood estimators...")
[Link](heartDisease,estimator=MaximumLikelihoodEstimator)
#inferencing with bayesian network
print("\nInferencing the bayesian network:")
HeartDisease_infer = VariableElimination(model)
#Computing the probability of bronc given smoke
print("\[Link] of heart disease given age=28")
q = HeartDisease_infer.query(variables=['heartdisease'],evidence={'age':28})
print(q["heartdisease"])
print("\[Link] of heart disease given chol(cholestrol)=100")
q = HeartDisease_infer.query(variables=['heartdisease'],evidence={'chol':100})
print(q['heartdisease'])
-------------------------------------------------------------------------------------------------------------------------------
-
Program 8
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data
set for clustering using k-Means algorithm. Compare the results of these two
algorithms and comment on the quality of clustering.
Code:
import [Link] as plt
from sklearn import datasets
from [Link] import KMeans
import pandas as pd
import numpy as np
from sklearn import preprocessing
#from [Link] import GMM # Used for older versions of sklearn
from [Link] import GaussianMixture
iris = datasets.load_iris()
X = [Link]([Link])
[Link] = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width']
X_norm = [Link](X)
y = [Link]([Link])
[Link] = ['Targets']
# K-Means Model
model = KMeans(n_clusters = 3)
[Link](X_norm)
# EM Model
#gmm = GMM(n_components = 3) # Used for older versions of sklearn
gmm = GaussianMixture(n_components = 3)
[Link](X_norm)
gmm_y = [Link](X_norm)
[Link](figsize = (14, 14))
colormap = [Link](['red', 'lime', 'black'])
# Real Clusters
[Link](2, 2, 1)
[Link](X.Petal_Length, X.Petal_Width, c = colormap[[Link]], s = 40)
[Link]('Real Clusters')
[Link]('Petal Lenght')
[Link]('Petal Width')
# K-Means Output
[Link](2, 2, 2)
[Link](X.Petal_Length, X.Petal_Width, c = colormap[model.labels_], s = 40)
[Link]('K-Means Clustering')
[Link]('Petal Length')
[Link]('Petal Width')
# EM Output
[Link](2, 2, 3)
[Link](X.Petal_Length, X.Petal_Width, c = colormap[gmm_y], s = 40)
[Link]('GMM Clustering')
[Link]('Petal Length')
[Link]('Petal Width')
[Link]()
-------------------------------------------------------------------------------------------------------------------------------
-
Program 9
Write a program to implement k-Nearest Neighbour algorithm to classify the iris data
set. Print both correct and wrong predictions.
Code:
#import
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split #Use this for Sk Learn 0.20 version
#from sklearn.cross_validation import train_test_split # Used for older versions of sklearn
from [Link] import StandardScaler
from [Link] import KNeighborsClassifier
from [Link] import classification_report, confusion_matrix
#Input Data
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv("Data_8_9.csv", names = names)
print([Link]())
#Preprocessing
x = [Link][:, :-1].values
y = [Link][:,4].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)
scaler = StandardScaler()
[Link](x_train)
x_train = [Link](x_train)
x_test = [Link](x_test)
#Model Creation
classifier = KNeighborsClassifier(n_neighbors = 5)
[Link](x_train, y_train)
#Prediction
y_pred = [Link](x_test)
for i in range(len(y_pred)):
print ("Training Example : ")
print(x_test[i])
print ("Actual Label : ")
print(y_test[i])
print ("Predicted Label : ")
print (y_pred[i])
print ("--------------------------------------------")
print ("Confusion Matrix : ")
print(confusion_matrix(y_test, y_pred))
print ("")
print ("Classification Report : ")
print(classification_report(y_test, y_pred))
-------------------------------------------------------------------------------------------------------------------------------
-
Program 10
Implement the non-parametric Locally Weighted Regression algorithm in order to fit
data points. Select appropriate data set for your experiment and draw graphs.
Code:
import [Link] as plt
import pandas as pd
import numpy as np
def kernel(point,xmat, k):
m,n = [Link](xmat)
weights = [Link]([Link]((m))) # eye - identity matrix
for j in range(m):
diff = point - X[j]
weights[j,j] = [Link](diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n = [Link](xmat)
ypred = [Link](m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
def graphPlot(X,ypred):
sortindex = X[:,1].argsort(0) #argsort - index of the smallest
xsort = X[sortindex][:,0]
fig = [Link]()
ax = fig.add_subplot(1,1,1)
[Link](bill,tip, color='green')
[Link](xsort[:,1],ypred[sortindex], color = 'red', linewidth=5)
[Link]('Total bill')
[Link]('Tip')
[Link]()
# load data points
data = pd.read_csv('[Link]')
bill = [Link](data.total_bill) # We use only Bill amount and Tips data
tip = [Link]([Link])
mbill = [Link](bill) # .mat will convert nd array is converted in 2D array
mtip = [Link](tip)
m= [Link](mbill)[1]
one = [Link]([Link](m))
X = [Link]((one.T,mbill.T)) # 244 rows, 2 cols
ypred = localWeightRegression(X,mtip,0.5) # increase k to get smooth curves
graphPlot(X,ypred)