0% found this document useful (0 votes)
17 views8 pages

AIML Practical 02 22105A2021

Uploaded by

Quereshi Naushin
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views8 pages

AIML Practical 02 22105A2021

Uploaded by

Quereshi Naushin
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 8

Naushin Quereshi

22105A2021
AIML
Practical 2: Load breast cancer data and apply
classification
Load breast cancer data from sklearn.datasets and apply
different classification methods. Compare the accuracy,
precision and recall. Analyse the metrics and tell us your
preferred choice of classification method and why?
Submit neatly labelled code in jupyter notebook which
you can create in Google Colab. Ensure that code
compiles and prints the required output correctly. Choose
appropriate visualization technique to depict the
data/results.

Code:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,
precision_score, recall_score, confusion_matrix,
classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Load breast cancer dataset


data = load_breast_cancer()
X = data.data
y = data.target

# Split the data into training and testing sets (80% train,
20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)

# Standardize the features for better model performance


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the classification models


models = {
"Logistic Regression":
LogisticRegression(random_state=42),
"SVM": SVC(kernel='linear', random_state=42),
"KNN": KNeighborsClassifier(),
"Decision Tree":
DecisionTreeClassifier(random_state=42),
"Random Forest":
RandomForestClassifier(random_state=42)
}

# Initialize lists to store evaluation metrics


accuracy_scores = []
precision_scores = []
recall_scores = []

# Fit each model and calculate metrics


for model_name, model in models.items():
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)


precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
accuracy_scores.append(accuracy)
precision_scores.append(precision)
recall_scores.append(recall)

# Print classification report for each model


print(f"Classification Report for {model_name}:\n")
print(classification_report(y_test, y_pred))
print("="*60)

# Visualization: Compare accuracy, precision, and recall


labels = list(models.keys())

# Plot accuracy, precision, and recall


fig, ax = plt.subplots(1, 3, figsize=(18, 5))

ax[0].barh(labels, accuracy_scores, color='blue')


ax[0].set_title("Accuracy Comparison")
ax[0].set_xlabel("Accuracy")

ax[1].barh(labels, precision_scores, color='green')


ax[1].set_title("Precision Comparison")
ax[1].set_xlabel("Precision")
ax[2].barh(labels, recall_scores, color='red')
ax[2].set_title("Recall Comparison")
ax[2].set_xlabel("Recall")

plt.tight_layout()
plt.show()

# Summarize metrics in a DataFrame


metrics_df = pd.DataFrame({
'Model': labels,
'Accuracy': accuracy_scores,
'Precision': precision_scores,
'Recall': recall_scores
})

print(metrics_df)
Output:

You might also like