1.
Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis
based on a given set of training data samples. Read the training data from [Link] file
import csv
a = []
with open('/home/cit/Downloads/[Link]', 'r') as csvfile:
for row in [Link](csvfile):
[Link](row)
print(a)
print("\n The total number of training instances are : ",len(a))
num_attribute = len(a[0])-1
print("\n The initial hypothesis is : ")
hypothesis = ['0']*num_attribute
print(hypothesis)
for i in range(0, len(a)):
if a[i][num_attribute] == 'yes':
for j in range(0, num_attribute):
if hypothesis[j] == '0' or hypothesis[j] == a[i][j]:
hypothesis[j] = a[i][j]
else:
hypothesis[j] = '?'
print("\n The hypothesis for the training instance {} is : \n" .format(i+1),hypothesis)
print("\n The Maximally specific hypothesis for the training instance is ")
print(hypothesis)
output
[['sky', 'airtemp', 'humidity', 'wind', 'water', 'forcast', 'enjoysport'], ['sunny', 'warm', 'normal', 'strong', 'warm', 'same',
'yes'], ['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'yes'], ['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'no'],
['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'yes']]
The total number of training instances are : 5
The initial hypothesis is :
['0', '0', '0', '0', '0', '0']
The hypothesis for the training instance 1 is :
['0', '0', '0', '0', '0', '0']
The hypothesis for the training instance 2 is :
['sunny', 'warm', 'normal', 'strong', 'warm', 'same']
The hypothesis for the training instance 3 is :
['sunny', 'warm', '?', 'strong', 'warm', 'same']
The hypothesis for the training instance 4 is :
['sunny', 'warm', '?', 'strong', 'warm', 'same']
The hypothesis for the training instance 5 is :
['sunny', 'warm', '?', 'strong', '?', '?']
The Maximally specific hypothesis for the training instance is
['sunny', 'warm', '?', 'strong', '?', '?']
[Link]
sky airtemp humidity wind water forcast enjoysport
sunny warm normal strong warm same yes
sunny warm high strong warm same yes
rainy cold high strong warm change no
sunny warm high strong cool change yes
2. For a given set of training data examples stored in a .CSV file, implement and demonstrate
the Candidate-Elimination algorithm to output a description of the set of all hypotheses
consistent with the training examples
import csv
with open("[Link]") as f:
csv_file = [Link](f)
data = list(csv_file)
specific = data[1][:-1]
general = [['?' for i in range(len(specific))] for j in range(len(specific))]
for i in data:
if i[-1] == "Yes":
for j in range(len(specific)):
if i[j] != specific[j]:
specific[j] = "?"
general[j][j] = "?"
elif i[-1] == "No":
for j in range(len(specific)):
if i[j] != specific[j]:
general[j][j] = specific[j]
else:
general[j][j] = "?"
print("\nStep " + str([Link](i)+1) + " of Candidate Elimination Algorithm")
print(specific)
print(general)
gh = [] # gh = general Hypothesis
for i in general:
for j in i:
if j != '?':
[Link](i)
break
print("\nFinal Specific hypothesis:\n", specific)
print("\nFinal General hypothesis:\n", gh)
output
Step 1 of Candidate Elimination Algorithm
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
Step 2 of Candidate Elimination Algorithm
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
Step 3 of Candidate Elimination Algorithm
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
Step 4 of Candidate Elimination Algorithm
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', 'Same']]
Step 5 of Candidate Elimination Algorithm
['Sunny', 'Warm', '?', 'Strong', '?', '?']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?']]
Final Specific hypothesis:
['Sunny', 'Warm', '?', 'Strong', '?', '?']
Final General hypothesis:
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
[Link]
sky airtemp humidity wind
water forcast enjoysport
War
Sunny Warm Normal Strong m Same Yes
War
Sunny Warm High Strong m Same Yes
War Chang
Rainy Cold High Strong m e No
Chang
Sunny Warm High Strong Cool e Yes
3. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use
an appropriate data set for building the decision tree and apply this knowledge to classify a new
sample.
import math
import csv
def load_csv(filename):
lines=[Link](open(filename,"r"));
dataset = list(lines)
headers = [Link](0)
return dataset,headers
class Node:
def __init__(self,attribute):
[Link]=attribute
[Link]=[]
[Link]=""
def subtables(data,col,delete):
dic={}
coldata=[row[col] for row in data]
attr=list(set(coldata))
counts=[0]*len(attr)
r=len(data)
c=len(data[0])
for x in range(len(attr)):
for y in range(r):
if data[y][col]==attr[x]:
counts[x]+=1
for x in range(len(attr)):
dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])]
pos=0
for y in range(r):
if data[y][col]==attr[x]:
if delete:
del data[y][col]
dic[attr[x]][pos]=data[y]
pos+=1
return attr,dic
def entropy(S):
attr=list(set(S))
if len(attr)==1:
return 0
counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
sums=0
for cnt in counts:
sums+=-1*cnt*[Link](cnt,2)
return sums
def compute_gain(data,col):
attr,dic = subtables(data,col,delete=False)
total_size=len(data)
entropies=[0]*len(attr)
ratio=[0]*len(attr)
total_entropy=entropy([row[-1] for row in data])
for x in range(len(attr)):
ratio[x]=len(dic[attr[x]])/(total_size*1.0)
entropies[x]=entropy([row[-1] for row in dic[attr[x]]])
total_entropy-=ratio[x]*entropies[x]
return total_entropy
def build_tree(data,features):
lastcol=[row[-1] for row in data]
if(len(set(lastcol)))==1:
node=Node("")
[Link]=lastcol[0]
return node
n=len(data[0])-1
gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=[Link](max(gains))
node=Node(features[split])
fea = features[:split]+features[split+1:]
attr,dic=subtables(data,split,delete=True)
for x in range(len(attr)):
child=build_tree(dic[attr[x]],fea)
[Link]((attr[x],child))
return node
def print_tree(node,level):
if [Link]!="":
print(" "*level,[Link])
return
print(" "*level,[Link])
for value,n in [Link]:
print(" "*(level+1),value)
print_tree(n,level+2)
def classify(node,x_test,features):
if [Link]!="":
print([Link])
return
pos=[Link]([Link])
for value, n in [Link]:
if x_test[pos]==value:
classify(n,x_test,features)
'''Main program'''
dataset,features=load_csv("[Link]")
node1=build_tree(dataset,features)
print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node1,0)
testdata,features=load_csv("[Link]")
for xtest in testdata:
print("The test instance:",xtest)
print("The label for test instance:",end=" ")
classify(node1,xtest,features)
output
The decision tree for the dataset using ID3 algorithm is
Outlook
overcast
yes
sunny
Humidity
high
no
normal
yes
rain
Wind
strong
no
weak
yes
The test instance: ['sunny', 'hot', 'high', 'weak', 'no']
The label for test instance: no
The test instance: ['sunny', 'hot', 'high', 'strong', 'no']
The label for test instance: no
The test instance: ['overcast', 'hot', 'high', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'mild', 'high', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'cool', 'normal', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'cool', 'normal', 'strong', 'no']
The label for test instance: no
The test instance: ['overcast', 'cool', 'normal', 'strong', 'yes']
The label for test instance: yes
The test instance: ['sunny', 'mild', 'high', 'weak', 'no']
The label for test instance: no
The test instance: ['sunny', 'cool', 'normal', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'mild', 'normal', 'weak', 'yes']
The label for test instance: yes
The test instance: ['sunny', 'mild', 'normal', 'strong', 'yes']
The label for test instance: yes
The test instance: ['overcast', 'mild', 'high', 'strong', 'yes']
The label for test instance: yes
The test instance: ['overcast', 'hot', 'normal', 'weak', 'yes']
The label for test instance: yes
The test instance: ['rain', 'mild', 'high', 'strong', 'no']
The label for test instance: no
[Link]
Outlook Temperatur Humidity Wind Answer
e
sunny hot high weak no
sunny hot high strong no
overcast hot high weak yes
rain mild high weak yes
rain cool normal weak yes
rain cool normal strong no
overcast cool normal strong yes
sunny mild high weak no
sunny cool normal weak yes
rain mild normal weak yes
sunny mild normal strong yes
overcast mild high strong yes
overcast hot normal weak yes
rain mild high strong no
[Link] an Artificial Neural Network by implementing the Backpropagation algorithm and test
the same using appropriate data sets.
import numpy as np
X = [Link](([2, 9], [1, 5], [3, 6]), dtype=float)
y = [Link](([92], [86], [89]), dtype=float)
X = X/[Link](X,axis=0) # maximum of X array longitudinally
y = y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + [Link](-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=[Link](size=(inputlayer_neurons,hiddenlayer_neurons))
bh=[Link](size=(1,hiddenlayer_neurons))
wout=[Link](size=(hiddenlayer_neurons,output_neurons))
bout=[Link](size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
hinp1=[Link](X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=[Link](hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
#how much hidden layer wts contributed to error
hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad
# dotproduct of nextlayererror and currentlayerop
wout += hlayer_act.[Link](d_output) *lr
wh += [Link](d_hiddenlayer) *lr
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
output
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.89571283]
[0.88239245]
[0.89153673]]
[Link] a Program to implement the naive bayesian classifier for a sample training data set
stored as a .CSV file. Compute the accuracy of the classifier few test data sets.
# import necessary libraries
import pandas as pd
from sklearn import tree
from [Link] import LabelEncoder
from sklearn.naive_bayes import GaussianNB
# Load Data from CSV
data = pd.read_csv('[Link]')
print("The first 5 Values of data is :\n", [Link]())
# obtain train data and train output
X = [Link][:, :-1]
print("\nThe First 5 values of the train data is\n", [Link]())
y = [Link][:, -1]
print("\nThe First 5 values of train output is\n", [Link]())
# convert them in numbers
le_outlook = LabelEncoder()
[Link] = le_outlook.fit_transform([Link])
le_Temperature = LabelEncoder()
[Link] = le_Temperature.fit_transform([Link])
le_Humidity = LabelEncoder()
[Link] = le_Humidity.fit_transform([Link])
le_Windy = LabelEncoder()
[Link] = le_Windy.fit_transform([Link])
print("\nNow the Train output is\n", [Link]())
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.20)
classifier = GaussianNB()
[Link](X_train, y_train)
from [Link] import accuracy_score
print("Accuracy is:", accuracy_score([Link](X_test), y_test))
output
The first 5 Values of data is :
Outlook Temperature Humidity Windy PlayTennis
0 Sunny Hot High Weak No
1 Sunny Hot High Strong No
2 Overcast Hot High Weak Yes
3 Rain Mild High Weak Yes
4 Rain Cool Normal Weak Yes
The First 5 values of the train data is
Outlook Temperature Humidity Windy
0 Sunny Hot High Weak
1 Sunny Hot High Strong
2 Overcast Hot High Weak
3 Rain Mild High Weak
4 Rain Cool Normal Weak
The First 5 values of train output is
0 No
1 No
2 Yes
3 Yes
4 Yes
Name: PlayTennis, dtype: object
Now the Train output is
Outlook Temperature Humidity Windy
0 2 1 0 1
1 2 1 0 0
2 0 1 0 1
3 1 2 0 1
4 1 0 1 1
Now the Train output is
[0 0 1 1 1 0 1 0 1 1 1 1 1 0]
Accuracy is: 0.3333333333333333
[Link]
PlayTenni
Outlook Temperature Humidity Windy s
Sunny Hot High Weak No
Sunny Hot High Strong No
Overcast Hot High Weak Yes
Rain Mild High Weak Yes
Rain Cool Normal Weak Yes
Rain Cool Normal Strong No
Overcast Cool Normal Strong Yes
Sunny Mild High Weak No
Sunny Cool Normal Weak Yes
Rain Mild Normal Weak Yes
Sunny Mild Normal Strong Yes
Overcast Mild High Strong Yes
Overcast Hot Normal Weak Yes
Rain Mild High Strong No
[Link] a program to construct a Bayesian network considering medical data. Use this model
to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can
use Python ML library classes/API.
import numpy as np
import pandas as pd
import csv
from [Link] import MaximumLikelihoodEstimator
from [Link] import BayesianModel
from [Link] import VariableElimination
heartDisease = pd.read_csv('[Link]')
heartDisease = [Link]('?',[Link])
print('Sample instances from the dataset are given below')
print([Link]())
print('\n Attributes and datatypes')
print([Link])
model=
BayesianModel([('age','heartdisease'),('sex','heartdisease'),('exang','heartdisease'),('cp','heartdisease'),
('heartdisease','restecg'),('heartdisease','chol')])
print('\nLearning CPD using Maximum likelihood estimators')
[Link](heartDisease,estimator=MaximumLikelihoodEstimator)
print('\n Inferencing with Bayesian Network:')
HeartDiseasetest_infer = VariableElimination(model)
print('\n 1. Probability of HeartDisease given evidence= restecg')
q1=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'restecg':1})
print(q1)
print('\n 2. Probability of HeartDisease given evidence= cp ')
q2=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'cp':2})
print(q2)
output
Sample instances from the dataset are given below
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \
0 63 1 1 145 233 1 2 150 0 2.3 3
1 67 1 4 160 286 0 2 108 1 1.5 2
2 67 1 4 120 229 0 2 129 1 2.6 2
3 37 1 3 130 250 0 0 187 0 3.5 3
4 41 0 2 130 204 0 2 172 0 1.4 1
ca thal heartdisease
0 0 6 0
1 3 3 2
2 2 7 1
3 0 3 0
4 0 3 0
Attributes and datatypes
age int64
sex int64
cp int64
trestbps int64
chol int64
fbs int64
restecg int64
thalach int64
exang int64
oldpeak float64
slope int64
ca object
thal object
heartdisease int64
dtype: object
Learning CPD using Maximum likelihood estimators
Inferencing with Bayesian Network:
1. Probability of HeartDisease given evidence= restecg
+-----------------+---------------------+
| heartdisease | phi(heartdisease) |
+=================+=====================+
| heartdisease(0) | 0.1016 |
+-----------------+---------------------+
| heartdisease(1) | 0.0000 |
+-----------------+---------------------+
| heartdisease(2) | 0.2361 |
+-----------------+---------------------+
| heartdisease(3) | 0.2017 |
+-----------------+---------------------+
| heartdisease(4) | 0.4605 |
+-----------------+---------------------+
2. Probability of HeartDisease given evidence= cp
+-----------------+---------------------+
| heartdisease | phi(heartdisease) |
+=================+=====================+
| heartdisease(0) | 0.3742 |
+-----------------+---------------------+
| heartdisease(1) | 0.2018 |
+-----------------+---------------------+
| heartdisease(2) | 0.1375 |
+-----------------+---------------------+
| heartdisease(3) | 0.1541 |
+-----------------+---------------------+
| heartdisease(4) | 0.1323 |
+-----------------+---------------------+
[Link] EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for
clustering using k-Means algorithm. Compare the results of these two algorithms and comment
on the quality of clustering. You can add Python ML library classes/API in the program.
import [Link] as plt
from sklearn import datasets
from [Link] import KMeans
import [Link] as sm
import pandas as pd
import numpy as np
iris = datasets.load_iris()
X = [Link]([Link])
[Link] = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = [Link]([Link])
[Link] = ['Targets']
model = KMeans(n_clusters=3)
[Link](X)
[Link](figsize=(14,7))
colormap = [Link](['red', 'lime', 'black'])
# Plot the Original Classifications
[Link](1, 2, 1)
[Link](X.Petal_Length, X.Petal_Width, c=colormap[[Link]], s=40)
[Link]('Real Classification')
[Link]('Petal Length')
[Link]('Petal Width')
# Plot the Models Classifications
[Link](1, 2, 2)
[Link](X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
[Link]('K Mean Classification')
[Link]('Petal Length')
[Link]('Petal Width')
print('The accuracy score of K-Mean: ',sm.accuracy_score(y, model.labels_))
print('The Confusion matrixof K-Mean: ',sm.confusion_matrix(y, model.labels_))
from sklearn import preprocessing
scaler = [Link]()
[Link](X)
xsa = [Link](X)
xs = [Link](xsa, columns = [Link])
#[Link](5)
from [Link] import GaussianMixture
gmm = GaussianMixture(n_components=3)
[Link](xs)
y_gmm = [Link](xs)
#y_cluster_gmm
[Link](2, 2, 3)
[Link](X.Petal_Length, X.Petal_Width, c=colormap[y_gmm], s=40)
[Link]('GMM Classification')
[Link]('Petal Length')
[Link]('Petal Width')
print('The accuracy score of EM: ',sm.accuracy_score(y, y_gmm))
print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_gmm))
output
The accuracy score of K-Mean: 0.24
The Confusion matrixof K-Mean: [[ 0 50 0]
[48 0 2]
[14 0 36]]
The accuracy score of EM: 0.3333333333333333
The Confusion matrix of EM: [[ 0 50 0]
[45 0 5]
[ 0 0 50]]
[Link] a program to implement k-Nearest Neighbour algorithm to classify the iris data set.
Print both correct and wrong predictions. Java/Python ML library classes can be used for this
problem.
from [Link] import load_iris
from [Link] import KNeighborsClassifier
from sklearn.model_selection import train_test_split
iris_dataset=load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris_dataset["data"], iris_dataset["target"],
random_state=0)
kn = KNeighborsClassifier()
[Link](X_train, y_train)
prediction = [Link](X_test)
print("ACCURACY:"+str([Link](X_test, y_test)))
target_names = iris_dataset.target_names
for pred,actual in zip(prediction,y_test):
print("prediction is "+str(target_names[pred])+",actual is"+str(target_names[actual]))
output
ACCURACY:0.9736842105263158
prediction is virginica,actual isvirginica
prediction is versicolor,actual isversicolor
prediction is setosa,actual issetosa
prediction is virginica,actual isvirginica
prediction is setosa,actual issetosa
prediction is virginica,actual isvirginica
.....................................
[Link] and demonstrate the working of svm algorithm for classification.
# Step 1: Import necessary libraries
import numpy as np
import [Link] as plt
from [Link] import make_classification
from sklearn.model_selection import train_test_split
from [Link] import SVC
from [Link] import accuracy_score, classification_report
# Step 2: Prepare the data
# Generate synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Step 3: Train the SVM model
svm_model = SVC(kernel='linear', random_state=42) # Linear SVM
svm_model.fit(X_train, y_train)
# Step 4: Evaluate the model
# Make predictions on the test set
y_pred = svm_model.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))
output
Accuracy: 0.87
Classification Report:
precision recall f1-score support
0 0.83 0.91 0.87 93
1 0.92 0.83 0.87 107
accuracy 0.87 200
macro avg 0.87 0.87 0.87 200
weighted avg 0.87 0.87 0.87 200
import [Link] as plt
import pandas as pd
import numpy as np
def kernel(point, xmat, k):
m,n = [Link](xmat)
weights = [Link]([Link]((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = [Link](diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point, xmat, ymat, k):
wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat, ymat, k):
m,n = [Link](xmat)
ypred = [Link](m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
data = pd.read_csv('[Link]')
bill = [Link](data.total_bill)
tip = [Link]([Link])
mbill = [Link](bill)
mtip = [Link](tip)
m= [Link](mbill)[1]
one = [Link]([Link](m))
X = [Link]((one.T,mbill.T))
ypred = localWeightRegression(X,mtip,0.5)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]
fig = [Link]()
ax = fig.add_subplot(1,1,1)
[Link](bill,tip, color='green')
[Link](xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5)
[Link]('Total bill')
[Link]('Tip')
[Link]();
csv
total_bill tip
50 12
30 7.5
6013
40 8.5
65 15
20 6
80 18