ML program 7 ,8,9 and10
ML program 7 ,8,9 and10
OUTPUT:
Demonstrating Linear Regression and Polynomial Regression
Linear Regression - California Housing Dataset
Mean Squared Error: 1.2923314440807299
R^2 Score: 0.013795337532284901
Polynomial Regression - Auto MPG Dataset
Mean Squared Error: 0.743149055720586
R^2 Score: 0.7505650609469626
8. Develop a program to demonstrate the working of the decision tree algorithm. Use
Breast Cancer Data set for building the decision tree and apply this knowledge to
classify a new sample.
# Importing necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import treedata = load_breast_cancer()
X = data.data
y = data.targetX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
new_sample = np.array([X_test[0]])
prediction = clf.predict(new_sample)prediction_class = "Benign" if prediction == 1 else
"Malignant"
print(f"Predicted Class for the new sample: {prediction_class}")plt.figure(figsize=(12,8))
tree.plot_tree(clf,filled=True,feature_names=data.feature_names,class_names=data.target_na
mes)
plt.title("Decision Tree - Breast Cancer Dataset")
plt.show()
OUTPUT:
9. Develop a program to implement the Naive Bayesian classifier considering Olivetti
Face Data set for training. Compute the accuracy of the classifier, considering a few test
data sets.
import numpy as np
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as pltdata = fetch_olivetti_faces(shuffle=True, random_state=42)
X = data.data
y = data.targetX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=1))print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))cross_val_accuracy = cross_val_score(gnb, X, y,
cv=5, scoring='accuracy')
print(f'\nCross-validation accuracy: {cross_val_accuracy.mean() * 100:.2f}%')fig, axes =
plt.subplots(3, 5, figsize=(12, 8))
for ax, image, label, prediction in zip(axes.ravel(), X_test, y_test, y_pred):
ax.imshow(image.reshape(64, 64), cmap=plt.cm.gray)
ax.set_title(f"True: {label}, Pred: {prediction}")
ax.axis('off')plt.show()
Output:
Accuracy: 80.83%
Classification Report:
precision recall f1-score support
0 0.67 1.00 0.80 2
1 1.00 1.00 1.00 2
2 0.33 0.67 0.44 3
3 1.00 0.00 0.00 5
4 1.00 0.50 0.67 4
5 1.00 1.00 1.00 2
7 1.00 0.75 0.86 4
8 1.00 0.67 0.80 3
9 1.00 0.75 0.86 4
10 1.00 1.00 1.00 3
11 1.00 1.00 1.00 1
12 0.40 1.00 0.57 4
13 1.00 0.80 0.89 5
14 1.00 0.40 0.57 5
15 0.67 1.00 0.80 2
16 1.00 0.67 0.80 3
17 1.00 1.00 1.00 3
18 1.00 1.00 1.00 3
19 0.67 1.00 0.80 2
20 1.00 1.00 1.00 3
21 1.00 0.67 0.80 3
22 1.00 0.60 0.75 5
23 1.00 0.75 0.86 4
24 1.00 1.00 1.00 3
25 1.00 0.75 0.86 4
26 1.00 1.00 1.00 2
27 1.00 1.00 1.00 5
28 0.50 1.00 0.67 2
29 1.00 1.00 1.00 2
30 1.00 1.00 1.00 2
31 1.00 0.75 0.86 4
32 1.00 1.00 1.00 2
34 0.25 1.00 0.40 1
35 1.00 1.00 1.00 5
36 1.00 1.00 1.00 3
37 1.00 1.00 1.00 1
38 1.00 0.75 0.86 4
39 0.50 1.00 0.67 5accuracy 0.81 120
macro avg 0.89 0.85 0.83 120
weighted avg 0.91 0.81 0.81 120Confusion Matrix:
[[2 0 0 ... 0 0 0]
[0 2 0 ... 0 0 0]
[0 0 2 ... 0 0 1]
...
[0 0 0 ... 1 0 0]
[0 0 0 ... 0 3 0]
[0 0 0 ... 0 0 5]]
Cross-validation accuracy: 87.25%
10. Develop a program to implement k-means clustering using Wisconsin Breast Cancer data
set and visualize the clustering result.
PROGRAM:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, classification_report
data = load_breast_cancer()
X = data.data
y = data.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=2, random_state=42)
y_kmeans = kmeans.fit_predict(X_scaled)
print("Confusion Matrix:")
print(confusion_matrix(y, y_kmeans))
print("\nClassification Report:")
print(classification_report(y, y_kmeans))
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
df['Cluster'] = y_kmeans
df['True Label'] = y
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster', palette='Set1', s=100,
edgecolor='black', alpha=0.7)
plt.title('K-Means Clustering of Breast Cancer Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title="Cluster")
plt.show()
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='True Label', palette='coolwarm', s=100,
edgecolor='black', alpha=0.7)
plt.title('True Labels of Breast Cancer Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title="True Label")
plt.show()
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster', palette='Set1', s=100,
edgecolor='black', alpha=0.7)
centers = pca.transform(kmeans.cluster_centers_)
plt.scatter(centers[:, 0], centers[:, 1], s=200, c='red', marker='X', label='Centroids')
plt.title('K-Means Clustering with Centroids')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title="Cluster")
plt.show()
Output:
Confusion Matrix:
[[175 37]
[ 13 344]]
Classification Report:
precision recall f1-score support