|
6 | 6 | # It is made available under the MIT License |
7 | 7 |
|
8 | 8 | import numpy as np |
| 9 | +from sklearn.metrics import mean_squared_error, r2_score |
9 | 10 | from sklearn.datasets import load_svmlight_file |
10 | | -from sklearn.linear_model import ElasticNet, LinearRegression |
| 11 | +from sklearn.linear_model import LinearRegression |
11 | 12 | from sklearn.cross_validation import KFold |
12 | 13 |
|
13 | | -USE_EN = False |
| 14 | +# Whether to use Elastic nets (otherwise, ordinary linear regression is used) |
14 | 15 |
|
| 16 | +# Load data: |
15 | 17 | data, target = load_svmlight_file('data/E2006.train') |
16 | | -if USE_EN: |
17 | | - lr = ElasticNet(fit_intercept=True) |
18 | | -else: |
19 | | - lr = LinearRegression(fit_intercept=True) |
20 | 18 |
|
21 | | -kf = KFold(len(target), n_folds=10) |
22 | | -err = 0 |
23 | | -for train, test in kf: |
24 | | - lr.fit(data[train], target[train]) |
25 | | - p = lr.predict(data[test]) |
26 | | - p = np.array(p).ravel() |
27 | | - e = p - target[test] |
28 | | - err += np.dot(e, e) |
29 | | - |
30 | | -rmse_10cv = np.sqrt(err / len(target)) |
| 19 | +lr = LinearRegression() |
31 | 20 |
|
| 21 | +# Compute error on training data to demonstrate that we can obtain near perfect |
| 22 | +# scores: |
32 | 23 |
|
33 | 24 | lr.fit(data, target) |
34 | | -p = lr.predict(data) |
35 | | -p = p.ravel() |
36 | | -e = p - target |
37 | | -total_error = np.dot(e, e) |
38 | | -rmse_train = np.sqrt(total_error / len(p)) |
39 | | - |
40 | | -print('RMSE on training: {}'.format(rmse_train)) |
41 | | -print('RMSE on 10-fold CV: {}'.format(rmse_10cv)) |
| 25 | +pred = lr.predict(data) |
| 26 | + |
| 27 | +print('RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred)))) |
| 28 | +print('R2 on training, {:.2}'.format(r2_score(target, pred))) |
| 29 | +print('') |
| 30 | + |
| 31 | +pred = np.zeros_like(target) |
| 32 | +kf = KFold(len(target), n_folds=5) |
| 33 | +for train, test in kf: |
| 34 | + lr.fit(data[train], target[train]) |
| 35 | + pred[test] = lr.predict(data[test]) |
| 36 | + |
| 37 | +print('RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred)))) |
| 38 | +print('R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred))) |
0 commit comments