Skip to content

Commit 2263dfe

Browse files
committed
ENH Match book version
This is now exactly like the book version (2nd Edition)
1 parent 198bc22 commit 2263dfe

File tree

3 files changed

+44
-45
lines changed

3 files changed

+44
-45
lines changed

ch07/figure1_2.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99
from sklearn.datasets import load_boston
1010
from sklearn.linear_model import LinearRegression
11+
from sklearn.metrics import mean_squared_error, r2_score
1112
from matplotlib import pyplot as plt
1213

1314
boston = load_boston()
@@ -32,8 +33,8 @@
3233
[0, lr.predict(boston.data[:, 5].max() + 1)], '-', lw=4)
3334
plt.savefig('Figure1.png', dpi=150)
3435

35-
# The instance member `residues_` contains the sum of the squared residues
36-
rmse = np.sqrt(lr.residues_ / len(x))
36+
mse = mean_squared_error(y, lr.predict(x))
37+
rmse = np.sqrt(mse)
3738
print('RMSE (no intercept): {}'.format(rmse))
3839

3940
# Repeat, but fitting an intercept this time:
@@ -45,6 +46,12 @@
4546
[0, lr.predict(boston.data[:, 5].max() + 1)], '-', lw=4)
4647
plt.savefig('Figure2.png', dpi=150)
4748

48-
# The instance member `residues_` contains the sum of the squared residues
49-
rmse = np.sqrt(lr.residues_ / len(x))
50-
print('RMSE intercept: {}'.format(rmse))
49+
mse = mean_squared_error(y, lr.predict(x))
50+
print("Mean squared error (of training data): {:.3}".format(mse))
51+
52+
rmse = np.sqrt(mse)
53+
print("Root mean squared error (of training data): {:.3}".format(mse))
54+
55+
cod = r2_score(y, lr.predict(x))
56+
print('COD (on training data): {:.2}'.format(cod))
57+

ch07/lr10k.py

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,36 +6,33 @@
66
# It is made available under the MIT License
77

88
import numpy as np
9+
from sklearn.metrics import mean_squared_error, r2_score
910
from sklearn.datasets import load_svmlight_file
10-
from sklearn.linear_model import ElasticNet, LinearRegression
11+
from sklearn.linear_model import LinearRegression
1112
from sklearn.cross_validation import KFold
1213

13-
USE_EN = False
14+
# Whether to use Elastic nets (otherwise, ordinary linear regression is used)
1415

16+
# Load data:
1517
data, target = load_svmlight_file('data/E2006.train')
16-
if USE_EN:
17-
lr = ElasticNet(fit_intercept=True)
18-
else:
19-
lr = LinearRegression(fit_intercept=True)
2018

21-
kf = KFold(len(target), n_folds=10)
22-
err = 0
23-
for train, test in kf:
24-
lr.fit(data[train], target[train])
25-
p = lr.predict(data[test])
26-
p = np.array(p).ravel()
27-
e = p - target[test]
28-
err += np.dot(e, e)
29-
30-
rmse_10cv = np.sqrt(err / len(target))
19+
lr = LinearRegression()
3120

21+
# Compute error on training data to demonstrate that we can obtain near perfect
22+
# scores:
3223

3324
lr.fit(data, target)
34-
p = lr.predict(data)
35-
p = p.ravel()
36-
e = p - target
37-
total_error = np.dot(e, e)
38-
rmse_train = np.sqrt(total_error / len(p))
39-
40-
print('RMSE on training: {}'.format(rmse_train))
41-
print('RMSE on 10-fold CV: {}'.format(rmse_10cv))
25+
pred = lr.predict(data)
26+
27+
print('RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
28+
print('R2 on training, {:.2}'.format(r2_score(target, pred)))
29+
print('')
30+
31+
pred = np.zeros_like(target)
32+
kf = KFold(len(target), n_folds=5)
33+
for train, test in kf:
34+
lr.fit(data[train], target[train])
35+
pred[test] = lr.predict(data[test])
36+
37+
print('RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
38+
print('R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))

ch07/predict10k_en.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,33 +8,28 @@
88
import numpy as np
99
from sklearn.datasets import load_svmlight_file
1010
from sklearn.cross_validation import KFold
11-
from sklearn.linear_model import ElasticNet, LinearRegression
11+
from sklearn.linear_model import ElasticNet
12+
from sklearn.metrics import mean_squared_error, r2_score
1213

1314
data, target = load_svmlight_file('data/E2006.train')
1415

1516
# Edit the lines below if you want to switch method:
1617
# met = LinearRegression(fit_intercept=True)
1718
met = ElasticNet(fit_intercept=True, alpha=.1)
1819

19-
kf = KFold(len(target), n_folds=10)
20-
err = 0
20+
kf = KFold(len(target), n_folds=5)
21+
pred = np.zeros_like(target)
2122
for train, test in kf:
2223
met.fit(data[train], target[train])
23-
p = met.predict(data[test])
24-
p = np.array(p).ravel()
25-
e = p - target[test]
26-
err += np.dot(e, e)
27-
28-
rmse_10cv = np.sqrt(err / len(target))
24+
pred[test] = met.predict(data[test])
2925

26+
print('[EN 0.1] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
27+
print('[EN 0.1] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
28+
print('')
3029

3130
met.fit(data, target)
32-
p = met.predict(data)
33-
p = p.ravel()
34-
e = p - target
35-
total_error = np.dot(e, e)
36-
rmse_train = np.sqrt(total_error / len(p))
31+
pred = met.predict(data)
32+
print('[EN 0.1] RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
33+
print('[EN 0.1] R2 on training, {:.2}'.format(r2_score(target, pred)))
3734

3835

39-
print('RMSE on training: {}'.format(rmse_train))
40-
print('RMSE on 10-fold CV: {}'.format(rmse_10cv))

0 commit comments

Comments
 (0)