0% found this document useful (0 votes)
5 views

ccc

The document contains multiple Python scripts implementing various machine learning algorithms, including the Candidate Elimination algorithm, ID3 decision tree, neural networks, Naive Bayes classifier, and a Bayesian network for heart disease prediction. Each script includes data loading, preprocessing, model training, and evaluation steps. The scripts demonstrate practical applications of machine learning techniques on different datasets.

Uploaded by

sharmila11121311
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

ccc

The document contains multiple Python scripts implementing various machine learning algorithms, including the Candidate Elimination algorithm, ID3 decision tree, neural networks, Naive Bayes classifier, and a Bayesian network for heart disease prediction. Each script includes data loading, preprocessing, model training, and evaluation steps. The scripts demonstrate practical applications of machine learning techniques on different datasets.

Uploaded by

sharmila11121311
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 25

import numpy as np

import pandas as pd

# Load the data

data = pd.read_csv('finds1.csv')

concepts = np.array(data.iloc[:, :-1]) # All columns except the last

target = np.array(data.iloc[:, -1]) # Last column as target

def learn(concepts, target):

# Initialize specific and general hypotheses

specific_h = concepts[0].copy()

print("Initialization of specific_h and general_h:")

print(f"Specific_h: {specific_h}")

# Initialize general_h with '?' placeholders

general_h = [["?" for _ in range(len(specific_h))] for _ in range(len(specific_h))]

print(f"General_h: {general_h}")

for i, h in enumerate(concepts):

if target[i] == "yes":

# Positive example: update specific_h and general_h

for x in range(len(specific_h)):

if h[x] != specific_h[x]:

specific_h[x] = '?'

general_h[x][x] = '?'

elif target[i] == "no":

# Negative example: update general_h


for x in range(len(specific_h)):

if h[x] != specific_h[x]:

general_h[x][x] = specific_h[x]

else:

general_h[x][x] = 'x'

print(f"Step {i + 1}")

print(f"Specific_h: {specific_h}")

print(f"General_h: {general_h}")

# Remove any general hypotheses that are completely '?'

indices = [i for i, val in enumerate(general_h) if val == ['?' for _ in range(len(specific_h))]]

for i in indices:

general_h.remove(['?' for _ in range(len(specific_h))])

return specific_h, general_h

# Run the learning algorithm

specific_h, general_h_final = learn(concepts, target)

# Print final results

print("\nFinal Specific_h:")

print(specific_h)

print("\nFinal General_h:")

print(general_h_final)
2)

import pandas as pd

import numpy as np

# Load dataset

dataset = pd.read_csv('playtennis.csv', names=['outlook', 'temperature', 'humidity', 'wind', 'class'])

# Entropy function

def entropy(target_col):

elements, counts = np.unique(target_col, return_counts=True)

entropy_value = np.sum([-(counts[i] / np.sum(counts)) * np.log2(counts[i] / np.sum(counts)) for i


in range(len(elements))])

return entropy_value

# Information Gain function

def InfoGain(data, split_attribute_name, target_name="class"):

total_entropy = entropy(data[target_name])

vals, counts = np.unique(data[split_attribute_name], return_counts=True)

Weighted_Entropy = np.sum([(counts[i] / np.sum(counts)) *


entropy(data.where(data[split_attribute_name] == vals[i]).dropna()[target_name]) for i in
range(len(vals))])

Information_Gain = total_entropy - Weighted_Entropy

return Information_Gain

# ID3 Algorithm

def ID3(data, Originaldata, features, target_attribute_name="class", parent_node_class=None):

if len(np.unique(data[target_attribute_name])) <= 1:

return np.unique(data[target_attribute_name])[0]
elif len(data) == 0:

return np.unique(Originaldata[target_attribute_name])
[np.argmax(np.unique(Originaldata[target_attribute_name], return_counts=True)[1])]

elif len(features) == 0:

return parent_node_class

else:

parent_node_class = np.unique(data[target_attribute_name])
[np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]

item_values = [InfoGain(data, feature, target_attribute_name) for feature in features]

best_feature_index = np.argmax(item_values)

best_feature = features[best_feature_index]

tree = {best_feature: {}}

features = [i for i in features if i != best_feature]

for value in np.unique(data[best_feature]):

sub_data = data.where(data[best_feature] == value).dropna()

subtree = ID3(sub_data, Originaldata, features, target_attribute_name, parent_node_class)

tree[best_feature][value] = subtree

return tree

# Run ID3

tree = ID3(dataset, dataset, dataset.columns[:-1].tolist())

print("\nDecision Tree:\n", tree)

3)
import numpy as np

# Input and Output Data

X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)

y = np.array(([92], [86], [89]), dtype=float)

# Normalize Input and Output Data

X = X / np.amax(X, axis=0) # Feature scaling

y = y / 100 # Normalizing output values

# Sigmoid Activation Function

def sigmoid(x):

return 1 / (1 + np.exp(-x))

# Derivative of Sigmoid Function

def derivative_sigmoid(x):

return x * (1 - x)

# Initialize Neural Network Parameters

epochs = 7000 # Number of training iterations

lr = 0.1 # Learning rate

input_neurons = 2 # Input layer neurons

hidden_neurons = 3 # Hidden layer neurons

output_neurons = 1 # Output layer neurons

# Initialize Weights and Biases

wh = np.random.uniform(size=(input_neurons, hidden_neurons)) # Weights from Input to Hidden


layer
bh = np.random.uniform(size=(1, hidden_neurons)) # Bias for Hidden layer

wout = np.random.uniform(size=(hidden_neurons, output_neurons)) # Weights from Hidden to


Output layer

bout = np.random.uniform(size=(1, output_neurons)) # Bias for Output layer

# Training the Neural Network

for i in range(epochs):

# Forward Propagation

hinp = np.dot(X, wh) + bh # Hidden layer weighted sum

hlayer_act = sigmoid(hinp) # Activation function at hidden layer

outinp = np.dot(hlayer_act, wout) + bout # Output layer weighted sum

output = sigmoid(outinp) # Activation function at output layer

# Backpropagation

EO = y - output # Error at output layer

outgrad = derivative_sigmoid(output) # Output layer gradient

d_output = EO * outgrad # Delta output

EH = d_output.dot(wout.T) # Error at hidden layer

hiddengrad = derivative_sigmoid(hlayer_act) # Hidden layer gradient

d_hiddenlayer = EH * hiddengrad # Delta hidden layer

# Updating Weights and Biases

wout += hlayer_act.T.dot(d_output) * lr

bout += np.sum(d_output, axis=0, keepdims=True) * lr

wh += X.T.dot(d_hiddenlayer) * lr

bh += np.sum(d_hiddenlayer, axis=0, keepdims=True) * lr


# Print Final Results

print("Input:\n", X)

print("Actual Output:\n", y)

print("Predicted Output:\n", output)

4)

import numpy as np

# Input and Output Data

X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)

y = np.array(([92], [86], [89]), dtype=float)

# Normalize Input and Output Data

X = X / np.amax(X, axis=0) # Feature scaling

y = y / 100 # Normalizing output values

# Sigmoid Activation Function

def sigmoid(x):

return 1 / (1 + np.exp(-x))

# Derivative of Sigmoid Function

def derivative_sigmoid(x):

return x * (1 - x)
# Initialize Neural Network Parameters

epochs = 7000 # Number of training iterations

lr = 0.1 # Learning rate

input_neurons = 2 # Input layer neurons

hidden_neurons = 3 # Hidden layer neurons

output_neurons = 1 # Output layer neurons

# Initialize Weights and Biases

wh = np.random.uniform(size=(input_neurons, hidden_neurons)) # Weights from Input to Hidden


layer

bh = np.random.uniform(size=(1, hidden_neurons)) # Bias for Hidden layer

wout = np.random.uniform(size=(hidden_neurons, output_neurons)) # Weights from Hidden to


Output layer

bout = np.random.uniform(size=(1, output_neurons)) # Bias for Output layer

# Training the Neural Network

for i in range(epochs):

# Forward Propagation

hinp = np.dot(X, wh) + bh # Hidden layer weighted sum

hlayer_act = sigmoid(hinp) # Activation function at hidden layer

outinp = np.dot(hlayer_act, wout) + bout # Output layer weighted sum

output = sigmoid(outinp) # Activation function at output layer

# Backpropagation

EO = y - output # Error at output layer

outgrad = derivative_sigmoid(output) # Output layer gradient

d_output = EO * outgrad # Delta output


EH = d_output.dot(wout.T) # Error at hidden layer

hiddengrad = derivative_sigmoid(hlayer_act) # Hidden layer gradient

d_hiddenlayer = EH * hiddengrad # Delta hidden layer

# Updating Weights and Biases

wout += hlayer_act.T.dot(d_output) * lr

bout += np.sum(d_output, axis=0, keepdims=True) * lr

wh += X.T.dot(d_hiddenlayer) * lr

bh += np.sum(d_hiddenlayer, axis=0, keepdims=True) * lr

# Print Final Results

print("Input:\n", X)

print("Actual Output:\n", y)

print("Predicted Output:\n", output)

5)

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.naive_bayes import MultinomialNB

from sklearn import metrics

# Load dataset

msg = pd.read_csv('naivetext1.csv', names=['message', 'label'])


# Print dataset dimensions

print('The dimensions of the dataset:', msg.shape)

# Map labels to numerical values

msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})

# Split into input (X) and output (y)

X = msg.message

y = msg.labelnum

# Train-test split (with fixed random state for reproducibility)

xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)

# Print shapes of splits

print("xtest.shape:", xtest.shape)

print("xtrain.shape:", xtrain.shape)

print("ytest.shape:", ytest.shape)

print("ytrain.shape:", ytrain.shape)

# Convert text to numerical features using CountVectorizer

count_vect = CountVectorizer()

xtrain_dtm = count_vect.fit_transform(xtrain)

xtest_dtm = count_vect.transform(xtest)

# Train Naïve Bayes model

clf = MultinomialNB().fit(xtrain_dtm, ytrain)


# Predictions on test data

predicted_test = clf.predict(xtest_dtm)

# Accuracy metrics

print("Accuracy of the classifier:", metrics.accuracy_score(ytest, predicted_test))

print("Confusion Matrix:\n", metrics.confusion_matrix(ytest, predicted_test))

print("Recall:", metrics.recall_score(ytest, predicted_test))

print("Precision:", metrics.precision_score(ytest, predicted_test))

6)

import csv

import random

import math

def loadCsv(filename):

lines=csv.reader(open(filename,"r"));

dataset=list(lines)

for i in range(len(dataset)):

dataset[i]=[float(x) for x in dataset[i]]

return dataset

def splitDataset(dataset,splitRatio):

trainSize=int(len(dataset)*splitRatio);

trainSet=[]

copy=list(dataset)
while len (trainSet)<trainsize:

index=random.randrange(len(copy));

trainSet,append(copy.pop(index))

return[trainSet,copy]

def separateByClass(dataset):

seperated={}

belonging to each class

for i in range(len(dataset));

vector=dataset[i]

if(vector[-1]not i9n separated):

separated[vector[-1]]=[]

separated[vector[-1]].append(vector)

return separated

def mean(numbers):

return sum(numbers)/float(len(numbers))

def stdev(numbers):

avg=mean(numbers)

variance=sum([pow(x-avg,2)for x in numbers])/float(len(numbers)-1)

return math.sqrt(variance)

def summarize(dataset):

smmaries=[(mean(attribute),stdev(attribute)) for attribute in zip(*dataset)];

del summaries[-1]

return summaries

def summarizeByClass(dataset):

separated=saparatedByClass(dataset);

summaries={}

for classValue,instance in separated.items():


summaries[classValue]=summarize(instance)

return summaries

def calculateProbability(x,mean,stdev):

exponent=math.exp(-(math.pow(*x-mean,2)/2*math.pow(stdev,2)))

return(1/(math.sqrt(2*math.pi)*stdev))*exponent

def calculateClassProbability(summari3es,inputVector):

probability={}

for class value,classsummariesw in summaries.items():

probabilities[classValue]=1

for i in range(len(classSummaries)):

mean,stdev=classSummaries[i]

x=inputVector[i]

probability[class value]* calculateProbability(x,mean,stdev);

normal dist

return probabilities

def predict(summaries,inputVector):

probabilities=calculateClassprpbabilities(summaries,inputVector)

bestLabel,bestprob=none,-1

for classValue,probability in probabilities.items()_:

highest prob

if best Label is NOne or probability>bestprob:

bestprpb=probability

bestlavel=classValue

return bestLabel

def getpredictions(summaries,testset)

predictions=[]

for i in range(len(testSet)):
result=predict(summaries,testset[i])

predictions.append(result)

return predictions

def getAccuracy(testSet,predictions):

correct=0

for i in range(len(testSet)):

if testSet[i][-1]==predictions[i]:

corect+=1

return(correct/float(len(testset)))*100.0

def main():

filename='5data.csv'splitRatio=0.67

dataset=loadCsv(filename);

training set,testset=splitDataset(dataset,splitRatio)

print('split{0} rows into train={1} and test=2


rows'format(len(dataset),

len(training set),len(testSet)))

summaries=summarizeByclass(trainingset)

accuracy=getAccuracy(testSetr,predictions)

print('Accuarcy of the classifier is:


{0}%'.format(accuracy))

main()

7)
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.naive_bayes import MultinomialNB

from sklearn import metrics

# Load dataset

msg = pd.read_csv('naivetext1.csv', names=['message', 'label'])

print('The dimensions of the dataset:', msg.shape)

# Convert labels to numerical values

msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})

X = msg.message

y = msg.labelnum

# Split into training and testing sets

xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data to numerical features using CountVectorizer

count_vect = CountVectorizer()

xtrain_dtm = count_vect.fit_transform(xtrain)

xtest_dtm = count_vect.transform(xtest)

# Train Naive Bayes model


clf = MultinomialNB().fit(xtrain_dtm, ytrain)

# Predict on test data

predicted = clf.predict(xtest_dtm)

# Accuracy metrics

print('Accuracy of the classifier is:', metrics.accuracy_score(ytest, predicted))

print('Confusion matrix:')

print(metrics.confusion_matrix(ytest, predicted))

print('Recall:', metrics.recall_score(ytest, predicted))

print('Precision:', metrics.precision_score(ytest, predicted))

8)

import numpy as np

import pandas as pd

from pgmpy.models import BayesianNetwork

from pgmpy.estimators import MaximumLikelihoodEstimator

from pgmpy.inference import VariableElimination

# Read Cleveland Heart Disease data

heartDisease = pd.read_csv('heart.csv')

heartDisease = heartDisease.replace('?', np.nan)


# Display the data

print('Few examples from the dataset are given below:')

print(heartDisease.head())

# Define Bayesian Network Model

model = BayesianNetwork([

('age', 'trestbps'),

('age', 'fbs'),

('sex', 'trestbps'),

('exang', 'trestbps'),

('trestbps', 'heartdisease'),

('fbs', 'heartdisease'),

('heartdisease', 'restecg'),

('heartdisease', 'thalach'),

('heartdisease', 'chol')

])

print('\nLearning CPD using Maximum Likelihood Estimators...')

model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

print('\nInference with Bayesian Network:')

HeartDisease_infer = VariableElimination(model)

# Query 1: Probability of Heart Disease given Age=30

print('\n1. Probability of HeartDisease given Age=30')

q1 = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 30})

print(q1)
# Query 2: Probability of Heart Disease given Cholesterol=100

print('\n2. Probability of HeartDisease given Cholesterol=100')

q2 = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})

print(q2)

9)

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

from sklearn.mixture import GaussianMixture

# Load Data

X = pd.read_csv("kmeansdata.csv")

# Extract features

x1 = X['Distance_Feature'].values

x2 = X['Speeding_Feature'].values

X = np.column_stack((x1, x2))

# Plot dataset

plt.figure()

plt.xlim([0, 100])
plt.ylim([1, 50])

plt.title('Dataset')

plt.scatter(x1, x2)

plt.show()

# Gaussian Mixture Model (GMM)

gmm = GaussianMixture(n_components=3)

gmm.fit(X)

em_predictions = gmm.predict(X)

print("EM Predictions:\n", em_predictions)

print("Means:\n", gmm.means_)

print("\nCovariances:\n", gmm.covariances_)

# Plot GMM Clusters

plt.title('Expectation Maximization')

plt.scatter(X[:, 0], X[:, 1], c=em_predictions, s=50)

plt.show()

# K-Means Clustering

kmeans = KMeans(n_clusters=3, n_init=10)

kmeans.fit(X)

print("Cluster Centers:\n", kmeans.cluster_centers_)

print("Cluster Labels:\n", kmeans.labels_)

# Plot K-Means Clusters


plt.title('K-Means Clustering')

plt.scatter(X[:, 0], X[:, 1], c=kmeans.labels_, s=50)

plt.show()

10)

import numpy as np

import pandas as pd

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

from sklearn import metrics

from sklearn.datasets import load_iris

# Load dataset

iris = load_iris()

df = pd.DataFrame(iris['data'], columns=iris['feature_names'])

X = df

y = iris['target']

# Split dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train KNN classifier

classifier = KNeighborsClassifier(n_neighbors=3)

classifier.fit(X_train, y_train)
# Make predictions

y_pred = classifier.predict(X_test)

# Print first few predictions alongside actual values

print("\n-----------------------------------")

print(f"{'Actual Label':<25}{'Predicted Label':<25}{'Result':<25}")

print("-----------------------------------")

for i in range(len(y_test)):

result = "Correct" if y_test[i] == y_pred[i] else "Wrong"

print(f"{y_test[i]:<25}{y_pred[i]:<25}{result:<25}")

print("--------------------------------------")

# Confusion Matrix

print("\nConfusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))

print("---------------------------------------")

# Classification Report

print("\nClassification Report:\n", metrics.classification_report(y_test, y_pred))

print("---------------------------------------")

# Accuracy of the classifier

accuracy = metrics.accuracy_score(y_test, y_pred)

print(f'Accuracy of the Classifier is {accuracy:.2f}%')

print("------------------------------")

11) import matplotlib.pyplot as plt


import pandas as pd

import as np

def kernel(point,xmat,k):

m,n=np.mat(np1.eye((m)))

for j in range(m):

diff=point-x[j]

weights[j,j]=np.exp(diff.T/(-2*k***2))

return weights

def localWeight(point,xmat,ymat,k):

wei=kernel(point,xmat,k)

W=(X.T*(wi*X)).I*(wei*ymat.T))

return W

def localWeightRegression(xmat,ymat,k):

m,n=np.shape(xmat)

ypred=np.zeros(m)

for i in range(m)

for i in range(m):

ypred[i]=xmat[i]*localWeight(xmat,ymat,k)

return ypred

data=pd.read_csv('10-dataset.csv')

bill=np.array(data.total_bill)

tip=np.array(data.tip)

mbill=np.mat(bill)

data=pd.read_csv("/Users/HP/Downloads/10-dataset.csv")

bill=np.array(data.total_bill)

tip=np.array(data.tip)

mbill=np.mat(tip)
m=np.shape(mbill)[1]

one=np.mat(np.ones(m))

x=np.hstack((one.Tmbill.T))

ypred=localWeightRegression(X,mtip,0.5)

SortIndex=X[:,1].argsort(0)

xsort=X[SortIndex][:,0]

fig=plt.figure()

ax=fig.add_subplot(1,1,1)

ax.scatter(bill,tip,color='green')

ax.plot(xsort[:,1],ypred[SortIndex],color='red',linewidth=5)

plt.xlabel('Total bill')

plt.ylabel('tip')

plt.show()

12)

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

# Function to calculate kernel weights

def kernel(point, xmat, k):

m, n = np.shape(xmat)

weights = np.mat(np.eye(m)) # Identity matrix

for j in range(m):

diff = point - xmat[j]


weights[j, j] = np.exp(diff * diff.T / (-2 * k**2))

return weights

# Function to compute locally weighted regression coefficients

def localWeight(point, xmat, ymat, k):

wei = kernel(point, xmat, k)

W = (xmat.T * (wei * xmat)).I * (xmat.T * (wei * ymat.T))

return W

# Function to perform local weighted regression

def localWeightRegression(xmat, ymat, k):

m, n = np.shape(xmat)

ypred = np.zeros(m)

for i in range(m):

ypred[i] = xmat[i] * localWeight(xmat[i], xmat, ymat, k)

return ypred

# Load dataset

data = pd.read_csv("10-dataset.csv") # Ensure this file exists in the working directory

bill = np.array(data.total_bill)

tip = np.array(data.tip)

# Convert to matrices

mbill = np.mat(bill).T

mtip = np.mat(tip).T

# Construct X matrix with an additional ones column


m = np.shape(mbill)[0]

one = np.mat(np.ones(m)).T

X = np.hstack((one, mbill))

# Apply locally weighted regression

ypred = localWeightRegression(X, mtip, 0.5)

# Sorting for visualization

SortIndex = X[:, 1].argsort(0)

xsort = X[SortIndex][:, 0]

# Plot the data points and regression line

fig = plt.figure()

ax = fig.add_subplot(1, 1, 1)

ax.scatter(bill, tip, color='green', label="Data Points")

ax.plot(xsort[:, 1], ypred[SortIndex], color='red', linewidth=2, label="LOWESS Fit")

plt.xlabel('Total Bill')

plt.ylabel('Tip')

plt.legend()

plt.show()

You might also like