0% found this document useful (0 votes)
9 views2 pages

ML 5

Uploaded by

wooyoung654
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views2 pages

ML 5

Uploaded by

wooyoung654
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

import pandas as pd

import numpy as np

# Load the dataset


dataset = pd.read_csv("/content/drive/MyDrive/id3dataset.csv - Sheet1.csv",
names=['age', 'income', 'student', 'credit_rating',
'buys_computer'])

# Function to calculate entropy


def entropy(target_col):
elements, counts = np.unique(target_col, return_counts=True)
entropy = np.sum([(-counts[i]/np.sum(counts)) *
np.log2(counts[i]/np.sum(counts))
for i in range(len(elements))])
return entropy

# Function to calculate Information Gain


def InfoGain(data, split_attribute_name, target_name="buys_computer"):
# Calculate the entropy of the total dataset
total_entropy = entropy(data[target_name])

# Calculate the values and the corresponding counts for the split attribute
vals, counts = np.unique(data[split_attribute_name], return_counts=True)

# Calculate the weighted entropy


Weighted_Entropy = np.sum([(counts[i]/np.sum(counts)) *
entropy(data.where(data[split_attribute_name] == vals[i]).dropna()[target_name])
for i in range(len(vals))])

# Calculate the information gain


Information_Gain = total_entropy - Weighted_Entropy
return Information_Gain

# Function to build the ID3 Decision Tree


def ID3(data, originaldata, features, target_attribute_name="buys_computer",
parent_node_class=None):
# If all target_values have the same value, return this value
if len(np.unique(data[target_attribute_name])) <= 1:
return np.unique(data[target_attribute_name])[0]

# If the dataset is empty, return the mode target feature value in the original
dataset
elif len(data) == 0:
return np.unique(originaldata[target_attribute_name])
[np.argmax(np.unique(originaldata[target_attribute_name], return_counts=True)[1])]

# If there are no more features to split on, return the parent node
elif len(features) == 0:
return parent_node_class

else:
# Set the default value for this node
parent_node_class = np.unique(data[target_attribute_name])
[np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]

# Calculate the Information Gain for each feature


item_values = [InfoGain(data, feature, target_attribute_name) for feature
in features]
best_feature_index = np.argmax(item_values)
best_feature = features[best_feature_index]

# Create the tree structure


tree = {best_feature: {}}

# Remove the best feature from the feature space


features = [i for i in features if i != best_feature]

# Grow the tree recursively for each unique value of the best feature
for value in np.unique(data[best_feature]):
# Split the dataset along the value of the feature with the largest
information gain
sub_data = data.where(data[best_feature] == value).dropna()

# Call the ID3 algorithm recursively


subtree = ID3(sub_data, dataset, features, target_attribute_name,
parent_node_class)

# Add the subtree to the tree


tree[best_feature][value] = subtree

return tree

# Build the ID3 Decision Tree


tree = ID3(dataset, dataset, dataset.columns[:-1])

# Print the tree structure


print('\nDisplay Tree:\n', tree)

You might also like