0% found this document useful (0 votes)
4 views

Python Code For Machine Learning

Uploaded by

mohanadvani74
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

Python Code For Machine Learning

Uploaded by

mohanadvani74
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 26

d4sfkkorg

July 2, 2024

[43]: import pandas as pd


import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

[44]: df=pd.read_csv("/kaggle/input/breast-cancer-wisconsin-data/data.csv")

[45]: df.head()

[45]: id diagnosis radius_mean texture_mean perimeter_mean area_mean \


0 842302 M 17.99 10.38 122.80 1001.0
1 842517 M 20.57 17.77 132.90 1326.0
2 84300903 M 19.69 21.25 130.00 1203.0
3 84348301 M 11.42 20.38 77.58 386.1
4 84358402 M 20.29 14.34 135.10 1297.0

smoothness_mean compactness_mean concavity_mean concave points_mean \


0 0.11840 0.27760 0.3001 0.14710
1 0.08474 0.07864 0.0869 0.07017
2 0.10960 0.15990 0.1974 0.12790
3 0.14250 0.28390 0.2414 0.10520
4 0.10030 0.13280 0.1980 0.10430

… texture_worst perimeter_worst area_worst smoothness_worst \


0 … 17.33 184.60 2019.0 0.1622
1 … 23.41 158.80 1956.0 0.1238
2 … 25.53 152.50 1709.0 0.1444
3 … 26.50 98.87 567.7 0.2098
4 … 16.67 152.20 1575.0 0.1374

compactness_worst concavity_worst concave points_worst symmetry_worst \


0 0.6656 0.7119 0.2654 0.4601
1 0.1866 0.2416 0.1860 0.2750
2 0.4245 0.4504 0.2430 0.3613
3 0.8663 0.6869 0.2575 0.6638
4 0.2050 0.4000 0.1625 0.2364

1
fractal_dimension_worst Unnamed: 32
0 0.11890 NaN
1 0.08902 NaN
2 0.08758 NaN
3 0.17300 NaN
4 0.07678 NaN

[5 rows x 33 columns]

[46]: df.isnull().sum()

[46]: id 0
diagnosis 0
radius_mean 0
texture_mean 0
perimeter_mean 0
area_mean 0
smoothness_mean 0
compactness_mean 0
concavity_mean 0
concave points_mean 0
symmetry_mean 0
fractal_dimension_mean 0
radius_se 0
texture_se 0
perimeter_se 0
area_se 0
smoothness_se 0
compactness_se 0
concavity_se 0
concave points_se 0
symmetry_se 0
fractal_dimension_se 0
radius_worst 0
texture_worst 0
perimeter_worst 0
area_worst 0
smoothness_worst 0
compactness_worst 0
concavity_worst 0
concave points_worst 0
symmetry_worst 0
fractal_dimension_worst 0
Unnamed: 32 569
dtype: int64

[47]: df.columns

2
[47]: Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
'fractal_dimension_se', 'radius_worst', 'texture_worst',
'perimeter_worst', 'area_worst', 'smoothness_worst',
'compactness_worst', 'concavity_worst', 'concave points_worst',
'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
dtype='object')

[48]: df.describe()

[48]: id radius_mean texture_mean perimeter_mean area_mean \


count 5.690000e+02 569.000000 569.000000 569.000000 569.000000
mean 3.037183e+07 14.127292 19.289649 91.969033 654.889104
std 1.250206e+08 3.524049 4.301036 24.298981 351.914129
min 8.670000e+03 6.981000 9.710000 43.790000 143.500000
25% 8.692180e+05 11.700000 16.170000 75.170000 420.300000
50% 9.060240e+05 13.370000 18.840000 86.240000 551.100000
75% 8.813129e+06 15.780000 21.800000 104.100000 782.700000
max 9.113205e+08 28.110000 39.280000 188.500000 2501.000000

smoothness_mean compactness_mean concavity_mean concave points_mean \


count 569.000000 569.000000 569.000000 569.000000
mean 0.096360 0.104341 0.088799 0.048919
std 0.014064 0.052813 0.079720 0.038803
min 0.052630 0.019380 0.000000 0.000000
25% 0.086370 0.064920 0.029560 0.020310
50% 0.095870 0.092630 0.061540 0.033500
75% 0.105300 0.130400 0.130700 0.074000
max 0.163400 0.345400 0.426800 0.201200

symmetry_mean … texture_worst perimeter_worst area_worst \


count 569.000000 … 569.000000 569.000000 569.000000
mean 0.181162 … 25.677223 107.261213 880.583128
std 0.027414 … 6.146258 33.602542 569.356993
min 0.106000 … 12.020000 50.410000 185.200000
25% 0.161900 … 21.080000 84.110000 515.300000
50% 0.179200 … 25.410000 97.660000 686.500000
75% 0.195700 … 29.720000 125.400000 1084.000000
max 0.304000 … 49.540000 251.200000 4254.000000

smoothness_worst compactness_worst concavity_worst \


count 569.000000 569.000000 569.000000
mean 0.132369 0.254265 0.272188
std 0.022832 0.157336 0.208624

3
min 0.071170 0.027290 0.000000
25% 0.116600 0.147200 0.114500
50% 0.131300 0.211900 0.226700
75% 0.146000 0.339100 0.382900
max 0.222600 1.058000 1.252000

concave points_worst symmetry_worst fractal_dimension_worst \


count 569.000000 569.000000 569.000000
mean 0.114606 0.290076 0.083946
std 0.065732 0.061867 0.018061
min 0.000000 0.156500 0.055040
25% 0.064930 0.250400 0.071460
50% 0.099930 0.282200 0.080040
75% 0.161400 0.317900 0.092080
max 0.291000 0.663800 0.207500

Unnamed: 32
count 0.0
mean NaN
std NaN
min NaN
25% NaN
50% NaN
75% NaN
max NaN

[8 rows x 32 columns]

[49]: df.drop(columns=['id','Unnamed: 32'],inplace=True,axis=1)

[50]: df.head()

[50]: diagnosis radius_mean texture_mean perimeter_mean area_mean \


0 M 17.99 10.38 122.80 1001.0
1 M 20.57 17.77 132.90 1326.0
2 M 19.69 21.25 130.00 1203.0
3 M 11.42 20.38 77.58 386.1
4 M 20.29 14.34 135.10 1297.0

smoothness_mean compactness_mean concavity_mean concave points_mean \


0 0.11840 0.27760 0.3001 0.14710
1 0.08474 0.07864 0.0869 0.07017
2 0.10960 0.15990 0.1974 0.12790
3 0.14250 0.28390 0.2414 0.10520
4 0.10030 0.13280 0.1980 0.10430

symmetry_mean … radius_worst texture_worst perimeter_worst \

4
0 0.2419 … 25.38 17.33 184.60
1 0.1812 … 24.99 23.41 158.80
2 0.2069 … 23.57 25.53 152.50
3 0.2597 … 14.91 26.50 98.87
4 0.1809 … 22.54 16.67 152.20

area_worst smoothness_worst compactness_worst concavity_worst \


0 2019.0 0.1622 0.6656 0.7119
1 1956.0 0.1238 0.1866 0.2416
2 1709.0 0.1444 0.4245 0.4504
3 567.7 0.2098 0.8663 0.6869
4 1575.0 0.1374 0.2050 0.4000

concave points_worst symmetry_worst fractal_dimension_worst


0 0.2654 0.4601 0.11890
1 0.1860 0.2750 0.08902
2 0.2430 0.3613 0.08758
3 0.2575 0.6638 0.17300
4 0.1625 0.2364 0.07678

[5 rows x 31 columns]

[51]: df["diagnosis"].value_counts()

[51]: diagnosis
B 357
M 212
Name: count, dtype: int64

[52]: sns.countplot(x="diagnosis",data=df)
plt.show()

5
[53]: df["diagnosis"]=df["diagnosis"].replace({"M":1,"B":0})

/tmp/ipykernel_33/4040419218.py:1: FutureWarning: Downcasting behavior in


`replace` is deprecated and will be removed in a future version. To retain the
old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to
the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
df["diagnosis"]=df["diagnosis"].replace({"M":1,"B":0})

[54]: corr = df.corr()


corr.style.background_gradient(cmap='cool')

[54]: <pandas.io.formats.style.Styler at 0x7d43a9ae43a0>

[55]: x=["radius_mean","texture_mean","perimeter_mean","area_mean","radius_worst","texture_worst","p

[56]: from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()
df[x] = scaler.fit_transform(df[x])

[57]: df.head()

6
[57]: diagnosis radius_mean texture_mean perimeter_mean area_mean \
0 1 1.097064 -2.073335 1.269934 0.984375
1 1 1.829821 -0.353632 1.685955 1.908708
2 1 1.579888 0.456187 1.566503 1.558884
3 1 -0.768909 0.253732 -0.592687 -0.764464
4 1 1.750297 -1.151816 1.776573 1.826229

smoothness_mean compactness_mean concavity_mean concave points_mean \


0 0.11840 0.27760 0.3001 0.14710
1 0.08474 0.07864 0.0869 0.07017
2 0.10960 0.15990 0.1974 0.12790
3 0.14250 0.28390 0.2414 0.10520
4 0.10030 0.13280 0.1980 0.10430

symmetry_mean … radius_worst texture_worst perimeter_worst \


0 0.2419 … 1.886690 -1.359293 2.303601
1 0.1812 … 1.805927 -0.369203 1.535126
2 0.2069 … 1.511870 -0.023974 1.347475
3 0.2597 … -0.281464 0.133984 -0.249939
4 0.1809 … 1.298575 -1.466770 1.338539

area_worst smoothness_worst compactness_worst concavity_worst \


0 2019.0 0.1622 0.6656 0.7119
1 1956.0 0.1238 0.1866 0.2416
2 1709.0 0.1444 0.4245 0.4504
3 567.7 0.2098 0.8663 0.6869
4 1575.0 0.1374 0.2050 0.4000

concave points_worst symmetry_worst fractal_dimension_worst


0 0.2654 0.4601 0.11890
1 0.1860 0.2750 0.08902
2 0.2430 0.3613 0.08758
3 0.2575 0.6638 0.17300
4 0.1625 0.2364 0.07678

[5 rows x 31 columns]

[58]: sns.histplot(df["diagnosis"], bins=10, kde=False, color='skyblue')


plt.title('Histogram of Sample Data')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()

/opt/conda/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning:
use_inf_as_na option is deprecated and will be removed in a future version.
Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):

7
[59]: sns.histplot(df["texture_mean"], bins=10, kde=False, color='blue')
plt.title('Histogram of Sample Data')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()

/opt/conda/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning:
use_inf_as_na option is deprecated and will be removed in a future version.
Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):

8
[60]: sns.histplot(df["perimeter_mean"], bins=10, kde=False, color='brown')
plt.title('Histogram of Sample Data')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()

/opt/conda/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning:
use_inf_as_na option is deprecated and will be removed in a future version.
Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):

9
[61]: sns.histplot(df["area_mean"], bins=10, kde=False, color='red')
plt.title('Histogram of Sample Data')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()

/opt/conda/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning:
use_inf_as_na option is deprecated and will be removed in a future version.
Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):

10
[62]: X=df.drop(columns="diagnosis",axis=1)
Y=df["diagnosis"]

[63]: from sklearn.model_selection import train_test_split


from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report,␣
↪confusion_matrix, roc_auc_score

x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.2,␣


↪random_state=42)

1 decision tree model


[64]: model=DecisionTreeClassifier(random_state=42)
model.fit(x_train,y_train)
pred1=model.predict(x_test)
acc=accuracy_score(y_test,pred1)
acc

[64]: 0.9473684210526315

11
2 confusion matrix
[65]: cf=confusion_matrix(y_test,pred1)
sns.heatmap(cf,annot=True,fmt="d",cmap="spring")
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

3 classification report
[66]: print(classification_report(y_test,pred1))

precision recall f1-score support

0 0.96 0.96 0.96 71


1 0.93 0.93 0.93 43

accuracy 0.95 114

12
macro avg 0.94 0.94 0.94 114
weighted avg 0.95 0.95 0.95 114

4 roc auc score


[67]: roc_auc = roc_auc_score(y_test, pred1)
print("ROC AUC Score:", roc_auc)

ROC AUC Score: 0.9439895185063871

5 roc curve
[68]: from sklearn.metrics import roc_curve, roc_auc_score

probs = model.predict_proba(x_test)
preds = probs[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, preds)

roc_auc = roc_auc_score(y_test, preds)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (AUC = {:.2f})'.
↪format(roc_auc))

plt.plot([0, 1], [0, 1], color='gray', linestyle='--')


plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

13
6 random forest
[69]: from sklearn.ensemble import RandomForestClassifier
model2=RandomForestClassifier()
model2.fit(x_train,y_train)
pred2=model2.predict(x_test)
acc1=accuracy_score(y_test,pred2)
acc1

[69]: 0.9649122807017544

7 confusion matrix
[70]: cf=confusion_matrix(y_test,pred2)
sns.heatmap(cf,annot=True,fmt="d",cmap="winter")
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')

14
plt.show()

8 classification report
[71]: print(classification_report(y_test,pred2))

precision recall f1-score support

0 0.96 0.99 0.97 71


1 0.98 0.93 0.95 43

accuracy 0.96 114


macro avg 0.97 0.96 0.96 114
weighted avg 0.97 0.96 0.96 114

15
9 roc auc score
[72]: roc_auc = roc_auc_score(y_test, pred2)
print("ROC AUC Score:", roc_auc)

ROC AUC Score: 0.9580740255486406

10 roc curve
[73]: probs = model2.predict_proba(x_test)
preds = probs[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, preds)

roc_auc = roc_auc_score(y_test, preds)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='green', lw=4, label='ROC curve (AUC = {:.2f})'.
↪format(roc_auc))

plt.plot([0, 1], [0, 1], color='gray', linestyle='--')


plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

16
11 LGB
[74]: from lightgbm import LGBMClassifier
model3 = LGBMClassifier(random_state=42)
model3.fit(x_train, y_train)
pred3=model3.predict(x_test)
acc3=accuracy_score(y_test,pred3)
acc3

[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines


[LightGBM] [Info] Number of positive: 169, number of negative: 286
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of
testing was 0.000408 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4544
[LightGBM] [Info] Number of data points in the train set: 455, number of used
features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.371429 -> initscore=-0.526093
[LightGBM] [Info] Start training from score -0.526093

17
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf

18
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf

[74]: 0.9649122807017544

19
12 Confusion matrix
[75]: cf=confusion_matrix(y_test,pred3)
sns.heatmap(cf,annot=True,fmt="d",cmap="hot")
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

13 Classification report
[76]: print(classification_report(y_test,pred3))

precision recall f1-score support

0 0.96 0.99 0.97 71


1 0.98 0.93 0.95 43

accuracy 0.96 114

20
macro avg 0.97 0.96 0.96 114
weighted avg 0.97 0.96 0.96 114

14 roc auc score


[77]: roc_auc = roc_auc_score(y_test, pred3)
print("ROC AUC Score:", roc_auc)

ROC AUC Score: 0.9580740255486406

15 roc curve
[78]: probs = model3.predict_proba(x_test)
preds = probs[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, preds)

roc_auc = roc_auc_score(y_test, preds)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='crimson', lw=4, label='ROC curve (AUC = {:.2f})'.
↪format(roc_auc))

plt.plot([0, 1], [0, 1], color='gray', linestyle='--')


plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

21
16 GradientBoostingClassifier
[79]: from sklearn.ensemble import GradientBoostingClassifier
model4 = GradientBoostingClassifier(random_state=42)
model4.fit(x_train, y_train)
pred4=model4.predict(x_test)
acc4=accuracy_score(y_test,pred4)
acc4

[79]: 0.956140350877193

17 Confusion matrix
[80]: cf=confusion_matrix(y_test,pred4)
sns.heatmap(cf,annot=True,fmt="d",cmap="pink")
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')

22
plt.show()

18 classification report
[81]: print(classification_report(y_test,pred4))

precision recall f1-score support

0 0.96 0.97 0.97 71


1 0.95 0.93 0.94 43

accuracy 0.96 114


macro avg 0.96 0.95 0.95 114
weighted avg 0.96 0.96 0.96 114

23
19 roc auc score
[82]: roc_auc = roc_auc_score(y_test, pred3)
print("ROC AUC Score:", roc_auc)

ROC AUC Score: 0.9580740255486406

20 roc curve
[83]: probs = model4.predict_proba(x_test)
preds = probs[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, preds)

roc_auc = roc_auc_score(y_test, preds)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='red', lw=4, label='ROC curve (AUC = {:.2f})'.
↪format(roc_auc))

plt.plot([0, 1], [0, 1], color='gray', linestyle='--')


plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

24
[84]: accuracies = [acc, acc1, acc3, acc4]

classifiers = ['Decision Tree', 'Random Forest', 'LGBM', 'Gradient Boosting']

plt.figure(figsize=(10, 6))
bars = plt.bar(classifiers, accuracies, color='skyblue')

for bar, acc in zip(bars, accuracies):


plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, f'{acc:
↪.2f}', ha='center', va='bottom')

plt.title('Accuracy Scores of Different Classifiers')


plt.xlabel('Classifiers')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.show()

25
[ ]:

26

You might also like