Python Code For Machine Learning
Python Code For Machine Learning
July 2, 2024
[44]: df=pd.read_csv("/kaggle/input/breast-cancer-wisconsin-data/data.csv")
[45]: df.head()
1
fractal_dimension_worst Unnamed: 32
0 0.11890 NaN
1 0.08902 NaN
2 0.08758 NaN
3 0.17300 NaN
4 0.07678 NaN
[5 rows x 33 columns]
[46]: df.isnull().sum()
[46]: id 0
diagnosis 0
radius_mean 0
texture_mean 0
perimeter_mean 0
area_mean 0
smoothness_mean 0
compactness_mean 0
concavity_mean 0
concave points_mean 0
symmetry_mean 0
fractal_dimension_mean 0
radius_se 0
texture_se 0
perimeter_se 0
area_se 0
smoothness_se 0
compactness_se 0
concavity_se 0
concave points_se 0
symmetry_se 0
fractal_dimension_se 0
radius_worst 0
texture_worst 0
perimeter_worst 0
area_worst 0
smoothness_worst 0
compactness_worst 0
concavity_worst 0
concave points_worst 0
symmetry_worst 0
fractal_dimension_worst 0
Unnamed: 32 569
dtype: int64
[47]: df.columns
2
[47]: Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
'fractal_dimension_se', 'radius_worst', 'texture_worst',
'perimeter_worst', 'area_worst', 'smoothness_worst',
'compactness_worst', 'concavity_worst', 'concave points_worst',
'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
dtype='object')
[48]: df.describe()
3
min 0.071170 0.027290 0.000000
25% 0.116600 0.147200 0.114500
50% 0.131300 0.211900 0.226700
75% 0.146000 0.339100 0.382900
max 0.222600 1.058000 1.252000
Unnamed: 32
count 0.0
mean NaN
std NaN
min NaN
25% NaN
50% NaN
75% NaN
max NaN
[8 rows x 32 columns]
[50]: df.head()
4
0 0.2419 … 25.38 17.33 184.60
1 0.1812 … 24.99 23.41 158.80
2 0.2069 … 23.57 25.53 152.50
3 0.2597 … 14.91 26.50 98.87
4 0.1809 … 22.54 16.67 152.20
[5 rows x 31 columns]
[51]: df["diagnosis"].value_counts()
[51]: diagnosis
B 357
M 212
Name: count, dtype: int64
[52]: sns.countplot(x="diagnosis",data=df)
plt.show()
5
[53]: df["diagnosis"]=df["diagnosis"].replace({"M":1,"B":0})
[55]: x=["radius_mean","texture_mean","perimeter_mean","area_mean","radius_worst","texture_worst","p
[57]: df.head()
6
[57]: diagnosis radius_mean texture_mean perimeter_mean area_mean \
0 1 1.097064 -2.073335 1.269934 0.984375
1 1 1.829821 -0.353632 1.685955 1.908708
2 1 1.579888 0.456187 1.566503 1.558884
3 1 -0.768909 0.253732 -0.592687 -0.764464
4 1 1.750297 -1.151816 1.776573 1.826229
[5 rows x 31 columns]
/opt/conda/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning:
use_inf_as_na option is deprecated and will be removed in a future version.
Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
7
[59]: sns.histplot(df["texture_mean"], bins=10, kde=False, color='blue')
plt.title('Histogram of Sample Data')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()
/opt/conda/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning:
use_inf_as_na option is deprecated and will be removed in a future version.
Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
8
[60]: sns.histplot(df["perimeter_mean"], bins=10, kde=False, color='brown')
plt.title('Histogram of Sample Data')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()
/opt/conda/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning:
use_inf_as_na option is deprecated and will be removed in a future version.
Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
9
[61]: sns.histplot(df["area_mean"], bins=10, kde=False, color='red')
plt.title('Histogram of Sample Data')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()
/opt/conda/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning:
use_inf_as_na option is deprecated and will be removed in a future version.
Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
10
[62]: X=df.drop(columns="diagnosis",axis=1)
Y=df["diagnosis"]
[64]: 0.9473684210526315
11
2 confusion matrix
[65]: cf=confusion_matrix(y_test,pred1)
sns.heatmap(cf,annot=True,fmt="d",cmap="spring")
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
3 classification report
[66]: print(classification_report(y_test,pred1))
12
macro avg 0.94 0.94 0.94 114
weighted avg 0.95 0.95 0.95 114
5 roc curve
[68]: from sklearn.metrics import roc_curve, roc_auc_score
probs = model.predict_proba(x_test)
preds = probs[:, 1]
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (AUC = {:.2f})'.
↪format(roc_auc))
13
6 random forest
[69]: from sklearn.ensemble import RandomForestClassifier
model2=RandomForestClassifier()
model2.fit(x_train,y_train)
pred2=model2.predict(x_test)
acc1=accuracy_score(y_test,pred2)
acc1
[69]: 0.9649122807017544
7 confusion matrix
[70]: cf=confusion_matrix(y_test,pred2)
sns.heatmap(cf,annot=True,fmt="d",cmap="winter")
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
14
plt.show()
8 classification report
[71]: print(classification_report(y_test,pred2))
15
9 roc auc score
[72]: roc_auc = roc_auc_score(y_test, pred2)
print("ROC AUC Score:", roc_auc)
10 roc curve
[73]: probs = model2.predict_proba(x_test)
preds = probs[:, 1]
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='green', lw=4, label='ROC curve (AUC = {:.2f})'.
↪format(roc_auc))
16
11 LGB
[74]: from lightgbm import LGBMClassifier
model3 = LGBMClassifier(random_state=42)
model3.fit(x_train, y_train)
pred3=model3.predict(x_test)
acc3=accuracy_score(y_test,pred3)
acc3
17
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
18
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[74]: 0.9649122807017544
19
12 Confusion matrix
[75]: cf=confusion_matrix(y_test,pred3)
sns.heatmap(cf,annot=True,fmt="d",cmap="hot")
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
13 Classification report
[76]: print(classification_report(y_test,pred3))
20
macro avg 0.97 0.96 0.96 114
weighted avg 0.97 0.96 0.96 114
15 roc curve
[78]: probs = model3.predict_proba(x_test)
preds = probs[:, 1]
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='crimson', lw=4, label='ROC curve (AUC = {:.2f})'.
↪format(roc_auc))
21
16 GradientBoostingClassifier
[79]: from sklearn.ensemble import GradientBoostingClassifier
model4 = GradientBoostingClassifier(random_state=42)
model4.fit(x_train, y_train)
pred4=model4.predict(x_test)
acc4=accuracy_score(y_test,pred4)
acc4
[79]: 0.956140350877193
17 Confusion matrix
[80]: cf=confusion_matrix(y_test,pred4)
sns.heatmap(cf,annot=True,fmt="d",cmap="pink")
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
22
plt.show()
18 classification report
[81]: print(classification_report(y_test,pred4))
23
19 roc auc score
[82]: roc_auc = roc_auc_score(y_test, pred3)
print("ROC AUC Score:", roc_auc)
20 roc curve
[83]: probs = model4.predict_proba(x_test)
preds = probs[:, 1]
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='red', lw=4, label='ROC curve (AUC = {:.2f})'.
↪format(roc_auc))
24
[84]: accuracies = [acc, acc1, acc3, acc4]
plt.figure(figsize=(10, 6))
bars = plt.bar(classifiers, accuracies, color='skyblue')
25
[ ]:
26