Import As Import As Import As: "Default - CSV"
Import As Import As Import As: "Default - CSV"
import pandas as pd
import seaborn as sns
In [36]: df = pd.read_csv("Default.csv")
df.head(5)
0 1 No No 729.53 44361.63
2 3 No No 1073.55 31767.14
3 4 No No 529.25 35704.49
4 5 No No 785.66 38463.50
In [38]: df.shape
Out[38]: (10000, 4)
Out[42]: default
No 9667
Yes 333
Name: count, dtype: int64
In [43]: df.student.value_counts()
Out[43]: student
No 7056
Yes 2944
Name: count, dtype: int64
In [46]: df.head()
0 No No 729.53 44361.63 0 0
2 No No 1073.55 31767.14 0 0
3 No No 529.25 35704.49 0 0
4 No No 785.66 38463.50 0 0
Out[48]: Student 0 1
Default
0 6850 2817
1 206 127
In [57]: X = df[['balance']]
y = df['default 1']
In [50]:
In [58]: y
Out[58]: 0 0
1 0
2 0
3 0
4 0
..
9995 0
9996 0
9997 0
9998 0
9999 0
Name: default 1, Length: 10000, dtype: int64
In [59]: from sklearn.linear_model import LogisticRegression
from sklearn import metrics
logreg = LogisticRegression()
logreg.fit(X, y)
print(logreg.coef_)
print(logreg.intercept_)
y_pred = logreg.predict_proba(X)
plt.scatter(X.values, y_pred[:,1])
#plt.scatter(X.values, y)
plt.show()
[[0.00549892]]
[-10.65132824]
In [60]: y_pred
In [62]: X.head()
Out[62]: balance
0 729.53
1 817.18
2 1073.55
3 529.25
4 785.66
In [63]: #splitting the data into train and test with 70:30 ratio
from sklearn.model_selection import train_test_split
xTrain, xTest, yTrain, yTest = train_test_split(X, y, test_size=0.30, random_st
[[0.005602]]
[-7.42029855]
Out[65]: LogisticRegression(class_weight='balanced')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust
the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with
nbviewer.org.
In [66]: #now applying our learnt model on test and also on train data
y_log_pred_test = logreg.predict(xTest)
y_log_pred_train = logreg.predict(xTrain)
In [67]: y_log_pred_test.shape
Out[67]: (3000,)
In [68]: y_log_pred_train.shape
Out[68]: (7000,)
In [69]: y_log_pred_test
[[2514 377]
[ 22 87]]
TP 2514
TN 87
FN 22
FP 377
In [79]: cmap = sns.cubehelix_palette(50, hue=0.05, rot=0, light=0.9, dark=0, as_cmap=T
sns.heatmap(conf,cmap = cmap,xticklabels=['predicted_default_yes=0','predicted_
Out[79]: <Axes: >
True [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]
Pred [0 0 0 0 0 0 1 0 1 0 1 1 1 1 1]
In [81]: #comparing the metrics of predicted lebel and real label of test data
print('Accuracy_Score:', metrics.accuracy_score(yTest, y_log_pred_test))
Accuracy_Score: 0.867
0.1875
In [ ]:
In [ ]:
In [ ]:
In [ ]: