3-LinearRegression Formula Based
3-LinearRegression Formula Based
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [24]: data=pd.read_csv('headbrain.csv')
print(data.shape)
data.head()
(237, 4)
Out[24]: Gender Age Range Head Size(cm^3) Brain Weight(grams)
0 1 1 4512 1530
1 1 1 3738 1297
2 1 1 4261 1335
3 1 1 3777 1282
4 1 1 4177 1590
print(b1,b0)
0.26342933948939945 325.57342104944223
plt.plot(x,y,label='Regression Line')
plt.scatter(X,Y,label='Scatter Plot')
plt.xlabel('Head Size')
plt.ylabel('Brain Weight')
plt.legend()
plt.show()
In [30]: # To find how good our model is, lets calculate R Square
# ss_t is total sum of square
# ss_r is sum of residual
ss_t = 0
ss_r = 0
for i in range(n):
y_pred=b0+b1*X[i]
ss_t += (Y[i]-mean_y)**2
ss_r += (Y[i]-y_pred)**2
r2=1-(ss_r/ss_t)
print(r2)
0.6393117199570003
In [32]: # Now lets see how it can be implemented using ML library sci kit learn
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# Creating Model
reg=LinearRegression()
reg=reg.fit(X,Y)
Y_pred=reg.predict(X)
mse=mean_squared_error(Y,Y_pred)
rmse=np.sqrt(mse)
r2_score=reg.score(X,Y)
print(np.sqrt(mse))
print(r2_score)
72.1206213783709
0.639311719957