DATA MINING & WEB ALGORITHMS LAB
Assignment: Week 5
Ans 1. import math
import pandas as pd
def distance(u1, u2):
distance = 0
for i in range(len(u1)):
distance += (u1[i] - u2[i]) ** 2
return math.sqrt(distance)
def fun(data):
m=[[0]*(len(data)) for i in range(len(data))]
for i in range(1,len(data)):
for j in range(1,len(data)):
m[i][j]=distance(data.loc[i],data.loc[j])
return m
data=pd.read_csv('1.csv')
m=fun(data)
print(m)
Output:
-
Ans 2.
import pandas as pd
import numpy as np
import math
import scipy.spatial.distance
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
class KNN:
def __init__(self,k):
self.k=k
def fit(self,X_train,y_train):
self.X_train=X_train
self.y_train=y_train
def predict(self,X_test):
y_pred=[]
for x in X_test:
distances=[np.linalg.norm(x-x_train) for x_train in
self.X_train]
k_neighbors=np.argsort(distances)[:self.k]
k_labels=[self.y_train[i] for i in k_neighbors]
k_neighbors=np.argsort(distances)[:self.k]
k_labels=[self.y_train[i] for i in k_neighbors]
y_pred.append(max(set(k_labels),
key=k_labels.count))
return np.array(y_pred)
def score(self,X_test,y_test):
predictions=self.predict(X_test)
return (predictions==y_test).sum()/len(y_test)
class NaiveBayes:
def fit(self,X,y):
self.classes=np.unique(y)
self.priors={c: np.mean(y==c) for c in self.classes}
self.means={c: np.mean(X[y == c],axis=0) for c in
self.classes}
self.variances={c: np.var(X[y == c], axis=0)+1e-6 for
c in self.classes}
def predict(self, X_test):
y_pred=[]
for x in X_test:
p={}
for c in self.classes:
likelihood = np.prod(1
/np.sqrt(2*np.pi*self.variances[c])*np.exp(-(x-
self.means[c])**2/(2*self.variances[c])))
p[c]=self.priors[c]*likelihood
y_pred.append(max(p, key=p.get))
return np.array(y_pred)
def score(self,X_test,y_test):
predictions=self.predict(X_test)
return (predictions==y_test).sum()/len(y_test)
iris=load_iris()
iris=load_iris()
X_train,X_test,y_train,y_test=train_test_split(iris.data,iris.t
arget,random_state=42,test_size=0.2)
# KNN
knn= KNN(3)
knn.fit(X_train,y_train)
knn_r=knn.predict(X_test)
accuracyknn=knn.score(X_test,y_test)
print(f"KNN Accuracy: {accuracyknn:.4f}")
# Naive Bayes
nb=NaiveBayes()
nb.fit(X_train,y_train)
nb_r=nb.predict(X_test)
accuracynb=nb.score(X_test,y_test)
print(f"Naive Bayes Accuracy: {accuracynb:.4f}")
Output:
-
Ans 3. class DecisionTreeID3:
def __init__(self, depth=3):
self.depth = depth
def _entropy(self, y):
values, counts = np.unique(y, return_counts=True)
prob = counts / len(y)
return -np.sum(prob * np.log2(prob + 1e-6))
def _info_gain(self, X, y, feature_idx):
total_entropy = self._entropy(y)
values, counts = np.unique(X[:, feature_idx],
return_counts=True)
weighted_entropy = np.sum([(counts[i] / len(X)) *
self._entropy(y[X[:, feature_idx] == values[i]]) for i in
range(len(values))])
return total_entropy - weighted_entropy
def _best_split(self, X, y):
gains = [self._info_gain(X, y, i) for i in
range(X.shape[1])]
return np.argmax(gains)
def _build_tree(self, X, y, depth=0):
if len(set(y)) == 1 or depth == self.depth:
return y[0]
feature_idx = self._best_split(X, y)
tree = {feature_idx: {}}
for value in np.unique(X[:, feature_idx]):
mask = X[:, feature_idx] == value
tree[feature_idx][value] = self._build_tree(X[mask],
y[mask], depth + 1)
return tree
def fit(self, X, y):
self.tree = self._build_tree(X, y)
def fit(self, X, y):
self.tree = self._build_tree(X, y)
def predict(self, X):
return np.array([self._predict_one(x, self.tree) for x in
X])
def _predict_one(self, x, tree):
if not isinstance(tree, dict):
return tree
feature_idx = list(tree.keys())[0]
return self._predict_one(x,
tree[feature_idx].get(x[feature_idx], -1))
def score(self, X_test, y_test):
predictions = self.predict(X_test)
return (predictions == y_test).mean()
tree = DecisionTreeID3(depth=3)
tree.fit(X, y)
test_samples = [
{'Branch': 'CSE', 'CGPA': 'Low', 'Gamer': 'Yes', 'Movie
Fanatic': 'No', 'Committed?': 'Yes'},
{'Branch': 'ECE', 'CGPA': 'High', 'Gamer': 'Yes', 'Movie
Fanatic': 'No', 'Committed?': 'No'},
{'Branch': 'MECH', 'CGPA': 'Low', 'Gamer': 'No', 'Movie
Fanatic': 'Yes', 'Committed?': 'No'}
]
test_df = pd.DataFrame(test_samples)
X_test=test_df.drop(columns='Committed?').values
y_test=test_df['Committed?'].values
predictions = tree.predict(X_test)
print(f"Predictions: {predictions}")
print(f"True Labels: {y_test}")
accuracy = tree.score(X_test, y_test)
print(f"True Labels: {y_test}")
accuracy = tree.score(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")
Output:
-
----------------------FINISHED-----------------------
Submitted By-:
Name: PRAKHAR MADNANI
Enrol. No.-: 22104057