Program-1
Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis
based on a given set of training data samples. Read the training data from a .CSV file
import csv
hypo = ['%','%','%','%','%','%']
print("Mohit Singh\n2101921530093")
with open('trainingdata.csv') as csv_file:
readcsv = csv.reader(csv_file, delimiter=',')
print(readcsv)
data = []
print("\nThe given training examples are:")
for row in readcsv:
print(row)
if row[len(row)-1].upper() == "YES":
data.append(row)
print("\nThe positive examples are:")
for x in data:
print(x)
print("\n")
TotalExamples = len(data)
i=0
j=0
k=0
print("The steps of the Find-s algorithm are :\n",hypo)
list = []
p=0
d=len(data[p])-1
for j in range(d):
list.append(data[i][j])
hypo=list
i=1
for i in range(TotalExamples):
for k in range(d):
if hypo[k]!=data[i][k]:
hypo[k]='?'
k=k+1;
else:
hypo[k]
print(hypo)
i=i+1
print("\nThe maximally specific Find-s hypothesis for the given
training examples is :")
list=[]
for i in range(d):
list.append(hypo[i])
print(list)
CSV File :
sky,airTemp,humidity,wind,water,forecast,enjoySport
Sunny,Warm,Normal,Strong,Warm,Same,Yes
Sunny,Warm,High,Strong,Warm,Same,Yes
Rainy,Cold,High,Strong,Warm,Change,No
Sunny,Warm,High,Strong,Cool,Change,Yes
Program-2
For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate Elimination algorithm to output a description of
the set of all hypotheses consistent with the training examples.
import numpy as np
import pandas as pd
print("Mohit
Singh\n2101921530093") # Loading
Data from a CSV File
data = pd.DataFrame(data=pd.read_csv('trainingdata.csv'))
print(data)
# Separating concept features from Target
concepts = np.array(data.iloc[:,0:-1])
print(concepts)
# Isolating target into a separate DataFrame
# copying last column to target array
target = np.array(data.iloc[:,-1])
print(target)
def learn(concepts, target):
'''
learn() function implements the learning method of the Candidate
elimination algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
'''
# Initialise S0 with the first instance from concepts
# .copy() makes sure a new list is created instead of just
pointing to the same memory location
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print(specific_h)
#h=["#" for i in range(0,5)]
#print(h)
general_h = [["?" for i in range(len(specific_h))] for i in
range(len(specific_h))]
print(general_h)
# The learning iterations
for i, h in enumerate(concepts):
# Checking if the hypothesis has a positive target
if target[i] == "Yes":
for x in range(len(specific_h)):
# Change values in S & G only if values change
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
# Checking if the hypothesis has a positive target
if target[i] == "No":
for x in range(len(specific_h)):
# For negative hyposthesis change values only in G
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("\nSteps of Candidate Elimination Algorithm",i+1)
print(specific_h)
print(general_h)
# find indices where we have empty rows, meaning those that are
unchanged
indices = [i for i, val in enumerate(general_h) if val == ['?',
'?', '?', '?', '?', '?']]
for i in indices:
# remove those rows from general_h
general_h.remove(['?', '?', '?', '?', '?', '?'])
# Return final values
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("\nFinal Specific_h:", s_final, sep="\n")
print("\nFinal General_h:", g_final, sep="\n")
Program-3
Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and
apply this knowledge to classify a new sample
import numpy as np
import math
import csv
print("Mohit
Singh\n2101921530093") def
read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
return (metadata, traindata)
class Node:
def init (self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def str (self):
return self.attribute
def subtables(data, col, delete):
dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0], 1), dtype=np.int32)
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1
for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]),
dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
return items, dict
def entropy(S):
items = np.unique(S)
if items.size == 1:
return 0
counts = np.zeros((items.shape[0], 1))
sums = 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)
for count in counts:
sums += -1 * count * math.log(count, 2)
return sums
def gain_ratio(data, col):
items, dict = subtables(data, col, delete=False)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))
for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)
total_entropy = entropy(data[:, -1])
iv = -1 * sum(intrinsic)
for x in range(entropies.shape[0]):
total_entropy -= entropies[x]
return total_entropy / iv
def create_node(data, metadata):
if (np.unique(data[:, -1])).shape[0] == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node
gains = np.zeros((data.shape[1] - 1, 1))
for col in range(data.shape[1] - 1):
gains[col] = gain_ratio(data, col)
split = np.argmax(gains)
node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)
items, dict = subtables(data, split, delete=True)
for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))
return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s
def print_tree(node, level):
if node.answer != "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)
metadata, traindata = read_data("tennisdata.csv")
data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)
Program-4
Build an Artificial Neural Network by implementing the Backpropagation
algorithm and test the same using appropriate data sets.
import numpy as np
print("Mohit Singh\n2101921530093")
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # X = (hours
sleeping, hours studying)
y = np.array(([92], [86], [89]), dtype=float) # y = score
on test
# scale units
X = X/np.amax(X, axis=0) # maximum of X array
y = y/100 # max test score is 100
class Neural_Network(object):
def init (self):
# Parameters
self.inputSize = 2
self.outputSize = 1
self.hiddenSize = 3
# Weights
self.W1 = np.random.randn(self.inputSize,
self.hiddenSize) # (3x2) weight matrix from input to hidden
layer
self.W2 = np.random.randn(self.hiddenSize,
self.outputSize) # (3x1) weight matrix from hidden to output
layer
def forward(self, X):
#forward propagation through our
network
self.z = np.dot(X, self.W1) # dot product of X
(input) and first set of 3x2 weights
self.z2 = self.sigmoid(self.z) # activation
function
self.z3 = np.dot(self.z2, self.W2) # dot product of
hidden layer (z2) and second set of 3x1 weights
o = self.sigmoid(self.z3) # final activation
function
return o
def sigmoid(self, s):
return 1/(1+np.exp(-s)) # activation function
def sigmoidPrime(self, s):
return s * (1 - s) # derivative of sigmoid
def backward(self, X, y, o):
# backward propgate through the
network
self.o_error = y - o # error in output
self.o_delta = self.o_error*self.sigmoidPrime(o) # applying
derivative of sigmoid to
self.z2_error = self.o_delta.dot(self.W2.T) # z2 error:
how much our hidden layer weights contributed to output error
self.z2_delta = self.z2_error*self.sigmoidPrime(self.z2) #
applying derivative of sigmoid to z2 error
self.W1 += X.T.dot(self.z2_delta) # adjusting first
set (input --> hidden) weights
self.W2 += self.z2.T.dot(self.o_delta) # adjusting second
set (hidden --> output) weights
def train (self, X, y):
o = self.forward(X)
self.backward(X, y, o)
NN = Neural_Network()
print ("\nInput: \n" + str(X))
print ("\nActual Output: \n" + str(y))
print ("\nPredicted Output: \n" + str(NN.forward(X)))
print ("\nLoss: \n" + str(np.mean(np.square(y -
NN.forward(X))))) # mean sum squared loss)
NN.train(X, y)