from
sklearn.datasets
import
load_breast_cancer
from
sklearn.model_selection
import
train_test_split
from
sklearn.svm
import
SVC
from
sklearn.tree
import
DecisionTreeClassifier
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.naive_bayes
import
GaussianNB
from
sklearn.calibration
import
CalibratedClassifierCV
from
sklearn.calibration
import
CalibrationDisplay
import
matplotlib.pyplot as plt
cancer
=
load_breast_cancer()
X
=
cancer.data
y
=
cancer.target
X_train, X_test, y_train, y_test
=
train_test_split(X, y, test_size
=
0.2
, random_state
=
42
)
svc
=
SVC()
tree
=
DecisionTreeClassifier()
log
=
LogisticRegression(C
=
0.5
)
gnb
=
GaussianNB()
svc_sigmoid
=
CalibratedClassifierCV(svc, cv
=
3
, method
=
"sigmoid"
, ensemble
=
True
)
tree_isotonic
=
CalibratedClassifierCV(tree, cv
=
3
, method
=
"isotonic"
, ensemble
=
True
)
gnb_sigmoid
=
CalibratedClassifierCV(gnb, cv
=
3
, method
=
"sigmoid"
, ensemble
=
True
)
classifiers
=
{
"Logistic"
:log,
"Naive Bayes"
: gnb,
"SVM + sigmoid"
: svc_sigmoid,
"Decision Tree + Isotonic"
: tree_isotonic,
"Naive Bayes + Sigmoid"
: gnb_sigmoid
}
fig, ax
=
plt.subplots(figsize
=
(
7
,
5
), dpi
=
150
)
ax.plot([
0
,
1
], [
0
,
1
], linestyle
=
'--'
, color
=
'gray'
)
for
name, clf
in
classifiers.items():
clf.fit(X_train, y_train)
clf_disp
=
CalibrationDisplay.from_estimator(clf, X_test, y_test, n_bins
=
10
, name
=
name, ax
=
ax)
plt.title(
'Probability Calibration Curve'
)
plt.legend(loc
=
"best"
)
plt.show()