Ai in HC - 2
Ai in HC - 2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline
sns.set_style("whitegrid")
plt.style.use("fivethirtyeight")
df = pd.read_csv("heart.csv")
df.head()
Output:-
df.target.value_counts().plot(kind="bar", color=["salmon",
"lightblue"])
Output:-
We have 165 people with heart disease and 138 people without
heart disease, so our problem is balanced.
# Checking for messing values
df.isna().sum()
Output:-
age 0
sex 0
cp 0
trestbps 0
chol 0
fbs 0
restecg 0
thalach 0
exang 0
oldpeak 0
slope 0
ca 0
thal 0
target 0
dtype: int64
categorical_val = []
continous_val = []
for column in df.columns:
print('==============================')
print(f"{column} : {df[column].unique()}")
if len(df[column].unique()) <= 10:
categorical_val.append(column)
else:
continous_val.append(column)
Output:-
plt.figure(figsize=(15, 15))
for i, column in enumerate(categorical_val, 1):
plt.subplot(3, 3, i)
df[df["target"] == 0][column].hist(bins=35, color='blue',
label='Have Heart Disease = NO', alpha=0.6)
df[df["target"] == 1][column].hist(bins=35, color='red',
label='Have Heart Disease = YES', alpha=0.6)
plt.legend()
plt.xlabel(column)
Output:-
# Create another figure
plt.figure(figsize=(10, 8))
Output:-
Data Processing
categorical_val.remove('target')
dataset = pd.get_dummies(df, columns = categorical_val)
s_sc = StandardScaler()
col_to_scale = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
dataset[col_to_scale] = s_sc.fit_transform(dataset[col_to_scale])
#Now let’s split the data into training and test sets. I will split
the data into 70% training and 30% testing:
X = dataset.drop('target', axis=1)
y = dataset.target
Now let’s train the machine learning model and print the
classification report of our logistic regression model:
from sklearn.linear_model import LogisticRegression
lr_clf = LogisticRegression(solver='liblinear')
lr_clf.fit(X_train, y_train)
Output:-
test_score = accuracy_score(y_test, lr_clf.predict(X_test)) * 100
train_score = accuracy_score(y_train, lr_clf.predict(X_train)) *
100
Output:-