0% found this document useful (0 votes)
5 views

Codes for Project

Uploaded by

mrudula24
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

Codes for Project

Uploaded by

mrudula24
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 8

# Required imports

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder, StandardScaler

from sklearn.neighbors import KNeighborsClassifier

from sklearn.linear_model import LogisticRegression

from sklearn.svm import SVC

from sklearn.metrics import classification_report, accuracy_score

from imblearn.over_sampling import SMOTE

# Step 1: Encode categorical features

data_encoded = data.copy()

label_encoders = {}

for col in data.select_dtypes(include='object').columns:

label_encoders[col] = LabelEncoder()

data_encoded[col] = label_encoders[col].fit_transform(data[col])

# Step 2: Split features and target

X = data_encoded.drop(columns=['y'])

y = data_encoded['y']

# Step 3: Train-test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,


random_state=42, stratify=y)

# Step 4: Apply SMOTE

smote = SMOTE(random_state=42)

X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Step 5: Scale features


scaler = StandardScaler()

X_train_smote = scaler.fit_transform(X_train_smote)

X_test = scaler.transform(X_test)

# Step 6: Train and evaluate models

# KNN Classifier

knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_train_smote, y_train_smote)

y_pred_knn = knn.predict(X_test)

print("KNN Classifier:")

print(classification_report(y_test, y_pred_knn))

print(f"Accuracy: {accuracy_score(y_test, y_pred_knn)}")

# Logistic Regression

logreg = LogisticRegression(max_iter=1000, random_state=42)

logreg.fit(X_train_smote, y_train_smote)

y_pred_logreg = logreg.predict(X_test)

print("\nLogistic Regression:")

print(classification_report(y_test, y_pred_logreg))

print(f"Accuracy: {accuracy_score(y_test, y_pred_logreg)}")

# Support Vector Machine

svm = SVC(kernel='linear', random_state=42)

svm.fit(X_train_smote, y_train_smote)

y_pred_svm = svm.predict(X_test)

print("\nSupport Vector Machine:")

print(classification_report(y_test, y_pred_svm))

print(f"Accuracy: {accuracy_score(y_test, y_pred_svm)}")


Here's the Python code that compares the accuracy scores of KNN,
Logistic Regression, and SVM, then displays the best accuracy score and
its corresponding algorithm:

# Required imports

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder, StandardScaler

from sklearn.neighbors import KNeighborsClassifier

from sklearn.linear_model import LogisticRegression

from sklearn.svm import SVC

from sklearn.metrics import accuracy_score

from imblearn.over_sampling import SMOTE

# Step 1: Encode categorical features

data_encoded = data.copy()

label_encoders = {}

for col in data.select_dtypes(include='object').columns:

label_encoders[col] = LabelEncoder()

data_encoded[col] = label_encoders[col].fit_transform(data[col])

# Step 2: Split features and target

X = data_encoded.drop(columns=['y'])

y = data_encoded['y']

# Step 3: Train-test split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42, stratify=y)

# Step 4: Apply SMOTE

smote = SMOTE(random_state=42)

X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Step 5: Scale features

scaler = StandardScaler()

X_train_smote = scaler.fit_transform(X_train_smote)

X_test = scaler.transform(X_test)

# Step 6: Train and evaluate models

accuracy_scores = {}

# KNN Classifier

knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_train_smote, y_train_smote)

y_pred_knn = knn.predict(X_test)

accuracy_scores['KNN'] = accuracy_score(y_test, y_pred_knn)

# Logistic Regression

logreg = LogisticRegression(max_iter=1000, random_state=42)

logreg.fit(X_train_smote, y_train_smote)

y_pred_logreg = logreg.predict(X_test)

accuracy_scores['Logistic Regression'] = accuracy_score(y_test,


y_pred_logreg)

# Support Vector Machine

svm = SVC(kernel='linear', random_state=42)


svm.fit(X_train_smote, y_train_smote)

y_pred_svm = svm.predict(X_test)

accuracy_scores['SVM'] = accuracy_score(y_test, y_pred_svm)

# Step 7: Find and display the best accuracy score

best_algorithm = max(accuracy_scores, key=accuracy_scores.get)

best_accuracy = accuracy_scores[best_algorithm]

print("Accuracy Scores for Each Algorithm:")

for algorithm, score in accuracy_scores.items():

print(f"{algorithm}: {score:.4f}")

print(f"\nBest Algorithm: {best_algorithm}")

print(f"Best Accuracy Score: {best_accuracy:.4f}")

Explanation:

1. Accuracy Calculation: Each classifier's accuracy score is


calculated using accuracy_score.

2. Storing Results: Accuracy scores are stored in a dictionary with


algorithm names as keys.

3. Finding the Best Algorithm: The max function is used to


determine the algorithm with the highest accuracy score.

4. Display: The scores of all algorithms and the best algorithm are
displayed.

Run this code locally to compare the performance of the classifiers and
identify the best one for your dataset.

There are many algorithms you can use to classify your dataset. Here are
additional popular classification algorithms along with Python code:

1. Decision Tree
from sklearn.tree import DecisionTreeClassifier

# Decision Tree Classifier

dt = DecisionTreeClassifier(random_state=42)

dt.fit(X_train_smote, y_train_smote)

y_pred_dt = dt.predict(X_test)

accuracy_scores['Decision Tree'] = accuracy_score(y_test, y_pred_dt)

2. Random Forest

from sklearn.ensemble import RandomForestClassifier

# Random Forest Classifier

rf = RandomForestClassifier(random_state=42, n_estimators=100)

rf.fit(X_train_smote, y_train_smote)

y_pred_rf = rf.predict(X_test)

accuracy_scores['Random Forest'] = accuracy_score(y_test, y_pred_rf)

3. Gradient Boosting (e.g., XGBoost)

from xgboost import XGBClassifier

# XGBoost Classifier

xgb = XGBClassifier(random_state=42, use_label_encoder=False,


eval_metric='logloss')

xgb.fit(X_train_smote, y_train_smote)

y_pred_xgb = xgb.predict(X_test)

accuracy_scores['XGBoost'] = accuracy_score(y_test, y_pred_xgb)

4. Naive Bayes

from sklearn.naive_bayes import GaussianNB


# Naive Bayes Classifier

nb = GaussianNB()

nb.fit(X_train_smote, y_train_smote)

y_pred_nb = nb.predict(X_test)

accuracy_scores['Naive Bayes'] = accuracy_score(y_test, y_pred_nb)

5. Neural Networks (MLPClassifier)

from sklearn.neural_network import MLPClassifier

# Neural Network Classifier

mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500,


random_state=42)

mlp.fit(X_train_smote, y_train_smote)

y_pred_mlp = mlp.predict(X_test)

accuracy_scores['Neural Network'] = accuracy_score(y_test, y_pred_mlp)

6. AdaBoost

from sklearn.ensemble import AdaBoostClassifier

# AdaBoost Classifier

ada = AdaBoostClassifier(random_state=42, n_estimators=100)

ada.fit(X_train_smote, y_train_smote)

y_pred_ada = ada.predict(X_test)

accuracy_scores['AdaBoost'] = accuracy_score(y_test, y_pred_ada)

7. LightGBM

from lightgbm import LGBMClassifier


# LightGBM Classifier

lgbm = LGBMClassifier(random_state=42)

lgbm.fit(X_train_smote, y_train_smote)

y_pred_lgbm = lgbm.predict(X_test)

accuracy_scores['LightGBM'] = accuracy_score(y_test, y_pred_lgbm)

Final Comparison of All Algorithms

After adding the algorithms above, update the comparison and best score
display:

# Display all accuracy scores and the best algorithm

print("\nAccuracy Scores for Each Algorithm:")

for algorithm, score in accuracy_scores.items():

print(f"{algorithm}: {score:.4f}")

# Identify the best algorithm

best_algorithm = max(accuracy_scores, key=accuracy_scores.get)

best_accuracy = accuracy_scores[best_algorithm]

print(f"\nBest Algorithm: {best_algorithm}")

print(f"Best Accuracy Score: {best_accuracy:.4f}")

Steps to Use:

1. Copy and paste the desired algorithm's code into your script.

2. Run the script to compare the results.

3. Use the final comparison code to find the best-performing algorithm.

You might also like