import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
depths = range(1, 16)
train_accuracy = []
test_accuracy = []
print("Method 1: Cross-Validation")
for depth in depths:
clf = DecisionTreeClassifier(max_depth=depth, random_state=42)
scores = cross_val_score(clf, X_train, y_train, cv=5)
train_accuracy.append(scores.mean())
plt.figure(figsize=(10, 6))
plt.plot(depths, train_accuracy, marker='o', label="Cross-Validation Accuracy")
plt.xlabel('Tree Depth')
plt.ylabel('Cross-Validated Accuracy')
plt.title('Tree Depth vs. Cross-Validated Accuracy')
plt.legend()
plt.grid(True)
plt.show()
max_depth = 7
clf = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
clf.fit(X_train, y_train)
train_accuracy_fixed_depth = accuracy_score(y_train, clf.predict(X_train))
test_accuracy_fixed_depth = accuracy_score(y_test, clf.predict(X_test))
print(f"Method 2: Set Max Depth to {max_depth}")
print(f"Training Accuracy (Max Depth {max_depth}): {train_accuracy_fixed_depth}")
print(f"Test Accuracy (Max Depth {max_depth}): {test_accuracy_fixed_depth}")
train_accuracy = []
validation_accuracy = []
for depth in depths:
clf = DecisionTreeClassifier(max_depth=depth, random_state=42)
clf.fit(X_train, y_train)
train_accuracy.append(clf.score(X_train, y_train))
validation_scores = cross_val_score(clf, X_train, y_train, cv=5)
validation_accuracy.append(validation_scores.mean())
plt.figure(figsize=(10, 6))
plt.plot(depths, train_accuracy, marker='o', label="Training Accuracy")
plt.plot(depths, validation_accuracy, marker='o', label="Validation Accuracy")
plt.xlabel('Tree Depth')
plt.ylabel('Accuracy')
plt.title('Tree Depth vs. Training and Validation Accuracy')
plt.legend()
plt.grid(True)
plt.show()
param_grid = {'max_depth': range(1, 16)}
grid_search = GridSearchCV(DecisionTreeClassifier(random_state=42), param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_depth = grid_search.best_params_['max_depth']
print(f"Method 4: Best Depth found by Grid Search is {best_depth}")
clf_pruned = DecisionTreeClassifier(max_depth=best_depth, min_samples_split=4, min_samples_leaf=2, random_state=42)
clf_pruned.fit(X_train, y_train)
train_accuracy_pruned = accuracy_score(y_train, clf_pruned.predict(X_train))
test_accuracy_pruned = accuracy_score(y_test, clf_pruned.predict(X_test))
print("Method 5: Pruning with min_samples_split=4, min_samples_leaf=2")
print(f"Training Accuracy (Pruned): {train_accuracy_pruned}")
print(f"Test Accuracy (Pruned): {test_accuracy_pruned}")