ccc
ccc
import pandas as pd
data = pd.read_csv('finds1.csv')
specific_h = concepts[0].copy()
print(f"Specific_h: {specific_h}")
print(f"General_h: {general_h}")
for i, h in enumerate(concepts):
if target[i] == "yes":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = 'x'
print(f"Step {i + 1}")
print(f"Specific_h: {specific_h}")
print(f"General_h: {general_h}")
for i in indices:
print("\nFinal Specific_h:")
print(specific_h)
print("\nFinal General_h:")
print(general_h_final)
2)
import pandas as pd
import numpy as np
# Load dataset
# Entropy function
def entropy(target_col):
return entropy_value
total_entropy = entropy(data[target_name])
return Information_Gain
# ID3 Algorithm
if len(np.unique(data[target_attribute_name])) <= 1:
return np.unique(data[target_attribute_name])[0]
elif len(data) == 0:
return np.unique(Originaldata[target_attribute_name])
[np.argmax(np.unique(Originaldata[target_attribute_name], return_counts=True)[1])]
elif len(features) == 0:
return parent_node_class
else:
parent_node_class = np.unique(data[target_attribute_name])
[np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]
best_feature_index = np.argmax(item_values)
best_feature = features[best_feature_index]
tree[best_feature][value] = subtree
return tree
# Run ID3
3)
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def derivative_sigmoid(x):
return x * (1 - x)
for i in range(epochs):
# Forward Propagation
# Backpropagation
wout += hlayer_act.T.dot(d_output) * lr
wh += X.T.dot(d_hiddenlayer) * lr
print("Input:\n", X)
print("Actual Output:\n", y)
4)
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def derivative_sigmoid(x):
return x * (1 - x)
# Initialize Neural Network Parameters
for i in range(epochs):
# Forward Propagation
# Backpropagation
wout += hlayer_act.T.dot(d_output) * lr
wh += X.T.dot(d_hiddenlayer) * lr
print("Input:\n", X)
print("Actual Output:\n", y)
5)
import pandas as pd
# Load dataset
X = msg.message
y = msg.labelnum
print("xtest.shape:", xtest.shape)
print("xtrain.shape:", xtrain.shape)
print("ytest.shape:", ytest.shape)
print("ytrain.shape:", ytrain.shape)
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm = count_vect.transform(xtest)
predicted_test = clf.predict(xtest_dtm)
# Accuracy metrics
6)
import csv
import random
import math
def loadCsv(filename):
lines=csv.reader(open(filename,"r"));
dataset=list(lines)
for i in range(len(dataset)):
return dataset
def splitDataset(dataset,splitRatio):
trainSize=int(len(dataset)*splitRatio);
trainSet=[]
copy=list(dataset)
while len (trainSet)<trainsize:
index=random.randrange(len(copy));
trainSet,append(copy.pop(index))
return[trainSet,copy]
def separateByClass(dataset):
seperated={}
for i in range(len(dataset));
vector=dataset[i]
separated[vector[-1]]=[]
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg=mean(numbers)
variance=sum([pow(x-avg,2)for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
def summarize(dataset):
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated=saparatedByClass(dataset);
summaries={}
return summaries
def calculateProbability(x,mean,stdev):
exponent=math.exp(-(math.pow(*x-mean,2)/2*math.pow(stdev,2)))
return(1/(math.sqrt(2*math.pi)*stdev))*exponent
def calculateClassProbability(summari3es,inputVector):
probability={}
probabilities[classValue]=1
for i in range(len(classSummaries)):
mean,stdev=classSummaries[i]
x=inputVector[i]
normal dist
return probabilities
def predict(summaries,inputVector):
probabilities=calculateClassprpbabilities(summaries,inputVector)
bestLabel,bestprob=none,-1
highest prob
bestprpb=probability
bestlavel=classValue
return bestLabel
def getpredictions(summaries,testset)
predictions=[]
for i in range(len(testSet)):
result=predict(summaries,testset[i])
predictions.append(result)
return predictions
def getAccuracy(testSet,predictions):
correct=0
for i in range(len(testSet)):
if testSet[i][-1]==predictions[i]:
corect+=1
return(correct/float(len(testset)))*100.0
def main():
filename='5data.csv'splitRatio=0.67
dataset=loadCsv(filename);
training set,testset=splitDataset(dataset,splitRatio)
len(training set),len(testSet)))
summaries=summarizeByclass(trainingset)
accuracy=getAccuracy(testSetr,predictions)
main()
7)
import pandas as pd
# Load dataset
X = msg.message
y = msg.labelnum
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm = count_vect.transform(xtest)
predicted = clf.predict(xtest_dtm)
# Accuracy metrics
print('Confusion matrix:')
print(metrics.confusion_matrix(ytest, predicted))
8)
import numpy as np
import pandas as pd
heartDisease = pd.read_csv('heart.csv')
print(heartDisease.head())
model = BayesianNetwork([
('age', 'trestbps'),
('age', 'fbs'),
('sex', 'trestbps'),
('exang', 'trestbps'),
('trestbps', 'heartdisease'),
('fbs', 'heartdisease'),
('heartdisease', 'restecg'),
('heartdisease', 'thalach'),
('heartdisease', 'chol')
])
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
HeartDisease_infer = VariableElimination(model)
print(q1)
# Query 2: Probability of Heart Disease given Cholesterol=100
print(q2)
9)
import numpy as np
import pandas as pd
# Load Data
X = pd.read_csv("kmeansdata.csv")
# Extract features
x1 = X['Distance_Feature'].values
x2 = X['Speeding_Feature'].values
X = np.column_stack((x1, x2))
# Plot dataset
plt.figure()
plt.xlim([0, 100])
plt.ylim([1, 50])
plt.title('Dataset')
plt.scatter(x1, x2)
plt.show()
gmm = GaussianMixture(n_components=3)
gmm.fit(X)
em_predictions = gmm.predict(X)
print("Means:\n", gmm.means_)
print("\nCovariances:\n", gmm.covariances_)
plt.title('Expectation Maximization')
plt.show()
# K-Means Clustering
kmeans.fit(X)
plt.show()
10)
import numpy as np
import pandas as pd
# Load dataset
iris = load_iris()
df = pd.DataFrame(iris['data'], columns=iris['feature_names'])
X = df
y = iris['target']
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(X_train, y_train)
# Make predictions
y_pred = classifier.predict(X_test)
print("\n-----------------------------------")
print("-----------------------------------")
for i in range(len(y_test)):
print(f"{y_test[i]:<25}{y_pred[i]:<25}{result:<25}")
print("--------------------------------------")
# Confusion Matrix
print("---------------------------------------")
# Classification Report
print("---------------------------------------")
print("------------------------------")
import as np
def kernel(point,xmat,k):
m,n=np.mat(np1.eye((m)))
for j in range(m):
diff=point-x[j]
weights[j,j]=np.exp(diff.T/(-2*k***2))
return weights
def localWeight(point,xmat,ymat,k):
wei=kernel(point,xmat,k)
W=(X.T*(wi*X)).I*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n=np.shape(xmat)
ypred=np.zeros(m)
for i in range(m)
for i in range(m):
ypred[i]=xmat[i]*localWeight(xmat,ymat,k)
return ypred
data=pd.read_csv('10-dataset.csv')
bill=np.array(data.total_bill)
tip=np.array(data.tip)
mbill=np.mat(bill)
data=pd.read_csv("/Users/HP/Downloads/10-dataset.csv")
bill=np.array(data.total_bill)
tip=np.array(data.tip)
mbill=np.mat(tip)
m=np.shape(mbill)[1]
one=np.mat(np.ones(m))
x=np.hstack((one.Tmbill.T))
ypred=localWeightRegression(X,mtip,0.5)
SortIndex=X[:,1].argsort(0)
xsort=X[SortIndex][:,0]
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.scatter(bill,tip,color='green')
ax.plot(xsort[:,1],ypred[SortIndex],color='red',linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('tip')
plt.show()
12)
import numpy as np
import pandas as pd
m, n = np.shape(xmat)
for j in range(m):
return weights
return W
m, n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
return ypred
# Load dataset
bill = np.array(data.total_bill)
tip = np.array(data.tip)
# Convert to matrices
mbill = np.mat(bill).T
mtip = np.mat(tip).T
one = np.mat(np.ones(m)).T
X = np.hstack((one, mbill))
xsort = X[SortIndex][:, 0]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.legend()
plt.show()