Multivariate Linear Regression
Multivariate Linear Regression
ipynb - Colaboratory
keyboard_arrow_down Yt lec
import pandas as pd
import numpy as np
import math
from sklearn import linear_model
df = pd.read_csv("homeprices.csv")
df
df.bedrooms = df.bedrooms.fillna(med) # filling all the NaN values with the median
df
https://colab.research.google.com/drive/1mWjym7fs-JIEhrgZV1pu0Ey6ErZKC5EL#scrollTo=79BoMxPt-T8E&printMode=true 1/5
3/10/24, 4:35 PM MlYtLec3.ipynb - Colaboratory
reg = linear_model.LinearRegression()
reg.fit(df[['area','bedrooms', 'age']], df.price)
▾ LinearRegression
LinearRegression()
reg.coef_
reg.intercept_
221323.00186540396
reg.predict([[3000,3,40]])
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
warnings.warn(
array([498408.25158031])
reg.predict([[2500,4,5]])
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
warnings.warn(
array([578876.03748933])
keyboard_arrow_down Exercise
import pandas as pd
import numpy as np
import math
from sklearn import linear_model
df1 = pd.read_csv("hiring.csv")
df1
https://colab.research.google.com/drive/1mWjym7fs-JIEhrgZV1pu0Ey6ErZKC5EL#scrollTo=79BoMxPt-T8E&printMode=true 2/5
3/10/24, 4:35 PM MlYtLec3.ipynb - Colaboratory
df1.experience = df1.experience.fillna("zero") # we replace all the NaN values in 'experience' col with 'zero'
df1
https://colab.research.google.com/drive/1mWjym7fs-JIEhrgZV1pu0Ey6ErZKC5EL#scrollTo=79BoMxPt-T8E&printMode=true 3/5
3/10/24, 4:35 PM MlYtLec3.ipynb - Colaboratory
df1.experience = df1.experience.apply(w2n.word_to_num) # .apply() applies a function to arguments. w2n.word_to_num converts every text number into numeric value
df1
0 0 8.0 9 50000
1 0 8.0 6 45000
2 5 6.0 7 60000
3 2 10.0 10 65000
4 7 9.0 6 70000
5 3 7.0 10 62000
6 10 NaN 7 72000
7 11 7.0 8 80000
mean = math.floor(df1['test_score(out of 10)'].mean()) # finding out the mean of all the values present in the 'test_score' col
print("The mean is :",mean)
df1['test_score(out of 10)'] = df1['test_score(out of 10)'].fillna(mean) # replacing the NaN values with the mean
df1
The mean is : 7
experience test_score(out of 10) interview_score(out of 10) salary($)
0 0 8.0 9 50000
1 0 8.0 6 45000
2 5 6.0 7 60000
3 2 10.0 10 65000
4 7 9.0 6 70000
5 3 7.0 10 62000
6 10 7.0 7 72000
7 11 7.0 8 80000
reg1 = linear_model.LinearRegression()
reg1.fit(df1[['experience','test_score(out of 10)','interview_score(out of 10)']], df1['salary($)'])
https://colab.research.google.com/drive/1mWjym7fs-JIEhrgZV1pu0Ey6ErZKC5EL#scrollTo=79BoMxPt-T8E&printMode=true 4/5
3/10/24, 4:35 PM MlYtLec3.ipynb - Colaboratory
▾ LinearRegression
LinearRegression()
reg1.coef_
reg1.intercept_
14992.65144669314
reg1.predict([[2,9,6]])
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
warnings.warn(
array([53713.86677124])
reg1.predict([[12,10,10]])
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
warnings.warn(
array([93747.79628651])
https://colab.research.google.com/drive/1mWjym7fs-JIEhrgZV1pu0Ey6ErZKC5EL#scrollTo=79BoMxPt-T8E&printMode=true 5/5