ML Regression
ML Regression
Task 1
In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
In [2]:
df = pd.read_csv("homeprices.csv")
df.head()
Out[2]:
area price
0 2600 550000
1 3000 565000
2 3200 610000
3 3600 680000
4 4000 725000
In [3]:
inputs = df.drop("price",axis='columns')
inputs
Out[3]:
area
0 2600
1 3000
2 3200
3 3600
4 4000
In [4]:
target = df['price']
In [5]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
In [6]:
model.fit(inputs.values, target.values)
Out[6]:
▾ LinearRegression
LinearRegression()
In [7]:
model.predict([[3300]])
Out[7]:
array([628715.75342466])
In [8]:
model.predict([[5000]])
Out[8]:
array([859554.79452055])
Task 2
In [9]:
df1 = pd.read_csv("areas.csv")
df1.head()
Out[9]:
area
0 1000
1 1500
2 2300
3 3540
4 4120
In [10]:
p = model.predict(df1.values)
p
Out[10]:
array([ 316404.10958904, 384297.94520548, 492928.08219178,
661304.79452055, 740061.64383562, 799808.21917808,
926090.75342466, 650441.78082192, 825607.87671233,
492928.08219178, 1402705.47945205, 1348390.4109589 ,
1144708.90410959])
In [11]:
df1['prices'] = p
df.head()
Out[11]:
area price
0 2600 550000
1 3000 565000
2 3200 610000
3 3600 680000
4 4000 725000
In [12]:
df1.to_csv("Prices.csv",index=False)
In [13]:
df2 = pd.read_csv("Prices.csv")
df2.head()
Out[13]:
area prices
0 1000 316404.109589
1 1500 384297.945205
2 2300 492928.082192
3 3540 661304.794521
4 4120 740061.643836
Task 3
In [14]:
df3 = pd.read_csv("canada_pci.csv")
df3.head()
Out[14]:
0 1970 3399.299037
1 1971 3768.297935
2 1972 4251.175484
3 1973 4804.463248
4 1974 5576.514583
In [15]:
inputs1 = df3.drop("per capita income (US$)", axis='columns')
inputs1.head()
Out[15]:
year
0 1970
1 1971
2 1972
3 1973
4 1974
In [16]:
target1 = df3['per capita income (US$)']
target1.head()
Out[16]:
0 3399.299037
1 3768.297935
2 4251.175484
3 4804.463248
4 5576.514583
Name: per capita income (US$), dtype: float64
In [17]:
In [17]:
from sklearn.linear_model import LinearRegression
model1 = LinearRegression()
In [18]:
model1.fit(inputs1.values,target1.values)
Out[18]:
▾ LinearRegression
LinearRegression()
In [19]:
model1.predict([[2020]])
Out[19]:
array([41288.69409442])
In [20]:
model1.score(inputs1.values, target1.values)
Out[20]:
0.890916917957032
In [22]:
df = pd.read_csv("homerates.csv")
df.head()
Out[22]:
In [23]:
mn = df.bedrooms.mean()
mn
Out[23]:
4.2
In [24]:
df.bedrooms = df.bedrooms.fillna(mn)
df.head()
Out[24]:
area bedrooms age price
In [25]:
inputs = df.drop('price',axis='columns')
inputs
Out[25]:
0 2600 3.0 20
1 3000 4.0 15
2 3200 4.2 18
3 3600 3.0 30
4 4000 5.0 8
5 4100 6.0 8
In [26]:
target = df['price']
target
Out[26]:
0 550000
1 565000
2 610000
3 595000
4 760000
5 810000
Name: price, dtype: int64
In [27]:
sns.pairplot(df)
Out[27]:
<seaborn.axisgrid.PairGrid at 0x22c1e3b25f0>
In [28]:
x = df.corr(numeric_only=True)
x
Out[28]:
In [29]:
sns.heatmap(x, annot=True, cmap='coolwarm')
Out[29]:
<Axes: >
In [30]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
In [31]:
model.fit(inputs.values,target.values)
Out[31]:
▾ LinearRegression
LinearRegression()
In [32]:
model.score(inputs.values, target.values)
Out[32]:
0.9540926625396438
In [33]:
model.predict([[300,3,40]])
Out[33]:
array([175825.67757211])
In [34]:
model.predict([[2500,4,5]])
Out[34]:
array([579906.16685223])
In [36]:
df1 = pd.read_csv("hiring.csv")
df1.head()
Out[36]:
test_score(out of interview_score(out of
experience salary($)
10) 10)
In [38]:
df1['test_score(out of 10)'] = df1['test_score(out of 10)'].fillna(df1['test_score(out of
10)'].mean())
In [39]:
df1.head()
Out[39]:
test_score(out of interview_score(out of
experience salary($)
10) 10)
In [40]:
df1.experience = df1.experience.apply(w2n.word_to_num)
In [41]:
df1.head()
Out[41]:
test_score(out of interview_score(out of
experience salary($)
10) 10)
0 0 8.0 9 50000
1 0 8.0 6 45000
2 5 6.0 7 60000
3 2 10.0 10 65000
4 7 9.0 6 70000
In [42]:
inputs1 = df1.drop("salary($)", axis = 'columns')
inputs1
Out[42]:
test_score(out of interview_score(out of
experience
10) 10)
0 0 8.000000 9
1 0 8.000000 6
2 5 6.000000 7
3 2 10.000000 10
4 7 9.000000 6
5 3 7.000000 10
6 10 7.857143 7
7 11 7.000000 8
In [43]:
target1 = df1['salary($)']
target1
Out[43]:
0 50000
1 45000
2 60000
3 65000
4 70000
5 62000
6 72000
7 80000
Name: salary($), dtype: int64
In [44]:
from sklearn.linear_model import LinearRegression
model1 = LinearRegression()
In [45]:
model1.fit(inputs1.values, target1.values)
Out[45]:
▾ LinearRegression
LinearRegression()
In [46]:
model1.predict([[2,9,6]])
Out[46]:
array([53290.89255945])
In [47]:
model1.predict([[12,10,10]])
Out[47]:
array([92268.07227784])
In [ ]: