0% found this document useful (0 votes)
29 views8 pages

Prakhar - Week 5

The document contains a lab assignment for data mining and web algorithms, featuring implementations of K-Nearest Neighbors (KNN), Naive Bayes, and Decision Tree algorithms in Python. It includes code for calculating distances, fitting models, making predictions, and evaluating accuracy using the Iris dataset. The assignment concludes with a test of the Decision Tree model on sample data with predictions and accuracy output.

Uploaded by

xonab72362
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
29 views8 pages

Prakhar - Week 5

The document contains a lab assignment for data mining and web algorithms, featuring implementations of K-Nearest Neighbors (KNN), Naive Bayes, and Decision Tree algorithms in Python. It includes code for calculating distances, fitting models, making predictions, and evaluating accuracy using the Iris dataset. The assignment concludes with a test of the Decision Tree model on sample data with predictions and accuracy output.

Uploaded by

xonab72362
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

DATA MINING & WEB ALGORITHMS LAB

Assignment: Week 5

Ans 1. import math


import pandas as pd
def distance(u1, u2):
distance = 0
for i in range(len(u1)):
distance += (u1[i] - u2[i]) ** 2
return math.sqrt(distance)

def fun(data):
m=[[0]*(len(data)) for i in range(len(data))]
for i in range(1,len(data)):
for j in range(1,len(data)):
m[i][j]=distance(data.loc[i],data.loc[j])
return m

data=pd.read_csv('1.csv')
m=fun(data)
print(m)

Output:
-
Ans 2.
import pandas as pd
import numpy as np
import math
import scipy.spatial.distance
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

class KNN:
def __init__(self,k):
self.k=k

def fit(self,X_train,y_train):
self.X_train=X_train
self.y_train=y_train

def predict(self,X_test):
y_pred=[]
for x in X_test:
distances=[np.linalg.norm(x-x_train) for x_train in
self.X_train]
k_neighbors=np.argsort(distances)[:self.k]
k_labels=[self.y_train[i] for i in k_neighbors]
k_neighbors=np.argsort(distances)[:self.k]
k_labels=[self.y_train[i] for i in k_neighbors]
y_pred.append(max(set(k_labels),
key=k_labels.count))
return np.array(y_pred)

def score(self,X_test,y_test):
predictions=self.predict(X_test)
return (predictions==y_test).sum()/len(y_test)

class NaiveBayes:
def fit(self,X,y):
self.classes=np.unique(y)
self.priors={c: np.mean(y==c) for c in self.classes}
self.means={c: np.mean(X[y == c],axis=0) for c in
self.classes}
self.variances={c: np.var(X[y == c], axis=0)+1e-6 for
c in self.classes}

def predict(self, X_test):


y_pred=[]
for x in X_test:
p={}
for c in self.classes:
likelihood = np.prod(1
/np.sqrt(2*np.pi*self.variances[c])*np.exp(-(x-
self.means[c])**2/(2*self.variances[c])))
p[c]=self.priors[c]*likelihood
y_pred.append(max(p, key=p.get))
return np.array(y_pred)

def score(self,X_test,y_test):
predictions=self.predict(X_test)
return (predictions==y_test).sum()/len(y_test)

iris=load_iris()
iris=load_iris()
X_train,X_test,y_train,y_test=train_test_split(iris.data,iris.t
arget,random_state=42,test_size=0.2)

# KNN
knn= KNN(3)
knn.fit(X_train,y_train)
knn_r=knn.predict(X_test)
accuracyknn=knn.score(X_test,y_test)
print(f"KNN Accuracy: {accuracyknn:.4f}")

# Naive Bayes
nb=NaiveBayes()
nb.fit(X_train,y_train)
nb_r=nb.predict(X_test)
accuracynb=nb.score(X_test,y_test)
print(f"Naive Bayes Accuracy: {accuracynb:.4f}")

Output:
-
Ans 3. class DecisionTreeID3:
def __init__(self, depth=3):
self.depth = depth

def _entropy(self, y):


values, counts = np.unique(y, return_counts=True)
prob = counts / len(y)
return -np.sum(prob * np.log2(prob + 1e-6))

def _info_gain(self, X, y, feature_idx):


total_entropy = self._entropy(y)
values, counts = np.unique(X[:, feature_idx],
return_counts=True)
weighted_entropy = np.sum([(counts[i] / len(X)) *
self._entropy(y[X[:, feature_idx] == values[i]]) for i in
range(len(values))])
return total_entropy - weighted_entropy

def _best_split(self, X, y):


gains = [self._info_gain(X, y, i) for i in
range(X.shape[1])]
return np.argmax(gains)

def _build_tree(self, X, y, depth=0):


if len(set(y)) == 1 or depth == self.depth:
return y[0]
feature_idx = self._best_split(X, y)
tree = {feature_idx: {}}
for value in np.unique(X[:, feature_idx]):
mask = X[:, feature_idx] == value
tree[feature_idx][value] = self._build_tree(X[mask],
y[mask], depth + 1)
return tree

def fit(self, X, y):


self.tree = self._build_tree(X, y)
def fit(self, X, y):
self.tree = self._build_tree(X, y)

def predict(self, X):


return np.array([self._predict_one(x, self.tree) for x in
X])

def _predict_one(self, x, tree):


if not isinstance(tree, dict):
return tree
feature_idx = list(tree.keys())[0]
return self._predict_one(x,
tree[feature_idx].get(x[feature_idx], -1))

def score(self, X_test, y_test):


predictions = self.predict(X_test)
return (predictions == y_test).mean()

tree = DecisionTreeID3(depth=3)
tree.fit(X, y)
test_samples = [
{'Branch': 'CSE', 'CGPA': 'Low', 'Gamer': 'Yes', 'Movie
Fanatic': 'No', 'Committed?': 'Yes'},
{'Branch': 'ECE', 'CGPA': 'High', 'Gamer': 'Yes', 'Movie
Fanatic': 'No', 'Committed?': 'No'},
{'Branch': 'MECH', 'CGPA': 'Low', 'Gamer': 'No', 'Movie
Fanatic': 'Yes', 'Committed?': 'No'}
]
test_df = pd.DataFrame(test_samples)
X_test=test_df.drop(columns='Committed?').values
y_test=test_df['Committed?'].values
predictions = tree.predict(X_test)
print(f"Predictions: {predictions}")
print(f"True Labels: {y_test}")
accuracy = tree.score(X_test, y_test)
print(f"True Labels: {y_test}")
accuracy = tree.score(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

Output:
-

----------------------FINISHED-----------------------

Submitted By-:
Name: PRAKHAR MADNANI
Enrol. No.-: 22104057

You might also like