ML 5
ML 5
import numpy as np
# Calculate the values and the corresponding counts for the split attribute
vals, counts = np.unique(data[split_attribute_name], return_counts=True)
# If the dataset is empty, return the mode target feature value in the original
dataset
elif len(data) == 0:
return np.unique(originaldata[target_attribute_name])
[np.argmax(np.unique(originaldata[target_attribute_name], return_counts=True)[1])]
# If there are no more features to split on, return the parent node
elif len(features) == 0:
return parent_node_class
else:
# Set the default value for this node
parent_node_class = np.unique(data[target_attribute_name])
[np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]
# Grow the tree recursively for each unique value of the best feature
for value in np.unique(data[best_feature]):
# Split the dataset along the value of the feature with the largest
information gain
sub_data = data.where(data[best_feature] == value).dropna()
return tree