3.2 Grid Search
3.2 Grid Search
2
3.2.1) GridSearchCV
وهو يستخدم للتقييم بين المعامالت العليا Hyperparameterالمستخدمة في اي خوارزم ,وحتي في استخدام انواع مخصصة داخل
الخوارزم ,مثل خطي او غيره ,ونقوم باعطائها عدد من الخيارات ,وتقوم هي بتجريبها جميعا و اختيار االدق
يتم استخدامها عبر الموديول model_selection.GridSearchCV
خطوات تنفيذها كالتالي :
3
الصيغة العامة
#Import Libraries
from sklearn.model_selection import GridSearchCV
import pandas as pd
#----------------------------------------------------
'''
#=======================================================================
#Example :
#from sklearn.svm import SVR
#SelectedModel = SVR(epsilon=0.1,gamma='auto')
#SelectedParameters = {'kernel':('linear', 'rbf'), 'C':[1,2,3,4,5]}
#=======================================================================
GridSearchModel = GridSearchCV(SelectedModel,SelectedParameters, cv = 2,return_train_score=True)
4
GridSearchModel.fit(X_train, y_train)
sorted(GridSearchModel.cv_results_.keys())
GridSearchResults = pd.DataFrame(GridSearchModel.cv_results_)[['mean_test_score', 'std_test_score', 'params' ,
'rank_test_score' , 'mean_fit_time']]
# Showing Results
print('All Results are :\n', GridSearchResults )
print('Best Score is :', GridSearchModel.best_score_)
print('Best Parameters are :', GridSearchModel.best_params_)
print('Best Estimator is :', GridSearchModel.best_estimator_)
5
مثال
#Import Libraries
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import pandas as pd
#----------------------------------------------------
BostonData = load_boston()
#X Data
X = BostonData.data
#print('X Data is \n' , X[:10])
#print('X shape is ' , X.shape)
#print('X Features are \n' , BostonData.feature_names)
#y Data
y = BostonData.target
#print('y Data is \n' , y[:10])
6
#print('y shape is ' , y.shape)
#----------------------------------------------------
#Splitting data
#----------------------------------------------------
#Applying Grid Searching :
'''
model_selection.GridSearchCV(estimator, param_grid, scoring=None,fit_params=None, n_jobs=None, iid=’warn’,
refit=True, cv=’warn’, verbose=0,pre_dispatch=‘2*n_jobs’, error_score=
’raisedeprecating’,return_train_score=’warn’)
'''
#Example :
from sklearn.svm import SVR
SelectedModel = SVR(epsilon=0.1,gamma='auto')
SelectedParameters = {'kernel':('linear', 'rbf'), 'C':[1,2,3,4,5]}
7
GridSearchModel = GridSearchCV(SelectedModel,SelectedParameters, cv = 2,return_train_score=True)
GridSearchModel.fit(X_train, y_train)
sorted(GridSearchModel.cv_results_.keys())
GridSearchResults = pd.DataFrame(GridSearchModel.cv_results_)[['mean_test_score', 'std_test_score', 'params' ,
'rank_test_score' , 'mean_fit_time']]
# Showing Results
print('All Results are :\n', GridSearchResults )
print('Best Score is :', GridSearchModel.best_score_)
print('Best Parameters are :', GridSearchModel.best_params_)
print('Best Estimator is :', GridSearchModel.best_estimator_)
8
مثال
import pandas as pd
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'rbf'), 'C':[1,2,3,4,5]}
svc = svm.SVC(gamma="scale")
clf = GridSearchCV(svc, parameters, cv=5)
clf.fit(iris.data, iris.target)
sorted(clf.cv_results_.keys())
pd.DataFrame(clf.cv_results_)[['mean_test_score', 'std_test_score', 'params' , 'rank_test_score' , 'mean_fit_time']]
9
مثال
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
iris = load_iris()
X = iris.data
y = iris.target
knn = KNeighborsClassifier(n_neighbors=5)
scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy')
print(scores)
param_grid = dict(n_neighbors=k_range)
10
print(param_grid)
print(grid.cv_results_['params'])
print(grid.cv_results_['mean_test_score'])
grid_mean_scores = grid.cv_results_['mean_test_score']
print(grid_mean_scores)
plt.plot(k_range, grid_mean_scores)
plt.xlabel('Value of K for KNN')
plt.ylabel('Cross-Validated Accuracy')
11
مثال
import time
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.kernel_ridge import KernelRidge
rng = np.random.RandomState(0)
# #############################################################################
# Generate sample data
X = 5 * rng.rand(10000, 1)
y = np.sin(X).ravel()
12
# #############################################################################
# Fit regression model
train_size = 100
svr = GridSearchCV(SVR(kernel='rbf', gamma=0.1), cv=5,
param_grid={"C": [1e0, 1e1, 1e2, 1e3],
"gamma": np.logspace(-2, 2, 5)})
t0 = time.time()
svr.fit(X[:train_size], y[:train_size])
svr_fit = time.time() - t0
print("SVR complexity and bandwidth selected and model fitted in %.3f s"
% svr_fit)
t0 = time.time()
kr.fit(X[:train_size], y[:train_size])
kr_fit = time.time() - t0
print("KRR complexity and bandwidth selected and model fitted in %.3f s"
% kr_fit)
13
sv_ratio = svr.best_estimator_.support_.shape[0] / train_size
print("Support vector ratio: %.3f" % sv_ratio)
t0 = time.time()
y_svr = svr.predict(X_plot)
svr_predict = time.time() - t0
print("SVR prediction for %d inputs in %.3f s"
% (X_plot.shape[0], svr_predict))
t0 = time.time()
y_kr = kr.predict(X_plot)
kr_predict = time.time() - t0
print("KRR prediction for %d inputs in %.3f s"
% (X_plot.shape[0], kr_predict))
print('==================================')
15
مثال
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
iris = load_iris()
X = iris.data
y = iris.target
knn = KNeighborsClassifier(n_neighbors=5)
scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy')
print(scores)
16
param_grid = dict(n_neighbors=k_range, weights=weight_options)
print(param_grid)
print(grid.best_score_)
print(grid.best_params_)
knn.predict([[3, 5, 4, 2]])
17
3.2.2) RandomizedSearchCV
وهي مشابهة لفكرة الـ grid searchفي البحث عن القيم المثالية ,اال ان الفارق انها لن تقوم بعمل تقييم لكل القيم الموجودة النها ستاخذ وقت
رهيب ,لكن بانتقاء قيم عشوائية ,وفحص ايهم افضل . .
و تكون شبيهة في االستخدام مع الـ gridاال اننا نكتب لها ايضا عدد المحاوالت n_iterالمسموحة ,وكذلك طريقة عمل العشوائية
random_state
يتم استخدامها عبر الموديول model_selection.RandomizedSearchCV
18
الصيغة العامة
#Import Libraries
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd
#----------------------------------------------------
'''
#=======================================================================
#Example :
#from sklearn.svm import SVR
#SelectedModel = SVR(epsilon=0.1,gamma='auto')
#SelectedParameters = {'kernel':('linear', 'rbf'), 'C':[1,2,3,4,5]}
#=======================================================================
19
RandomizedSearchModel = RandomizedSearchCV(SelectedModel,SelectedParameters, cv = 2,return_train_score=True)
RandomizedSearchModel.fit(X_train, y_train)
sorted(RandomizedSearchModel.cv_results_.keys())
RandomizedSearchResults = pd.DataFrame(RandomizedSearchModel.cv_results_)[['mean_test_score', 'std_test_score',
'params' , 'rank_test_score' , 'mean_fit_time']]
# Showing Results
print('All Results are :\n', RandomizedSearchResults )
print('Best Score is :', RandomizedSearchModel.best_score_)
print('Best Parameters are :', RandomizedSearchModel.best_params_)
print('Best Estimator is :', RandomizedSearchModel.best_estimator_)
20
مثال
#Import Libraries
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd
#----------------------------------------------------
BostonData = load_boston()
#X Data
X = BostonData.data
#print('X Data is \n' , X[:10])
#print('X shape is ' , X.shape)
#print('X Features are \n' , BostonData.feature_names)
#y Data
y = BostonData.target
#print('y Data is \n' , y[:10])
21
#print('y shape is ' , y.shape)
#----------------------------------------------------
#Splitting data
#----------------------------------------------------
#Applying Randomized Grid Searching :
'''
model_selection.RandomizedSearchCV(estimator, param_distributions,n_iter=10,
scoring=None,fit_params=None,n_jobs=
None,iid=’warn’, refit=True, cv=’warn’, verbose=0, pre_dispatch=‘2*n_jobs’,
random_state=None,error_score=’raise-deprecating’, return_train_score=’warn’)
'''
#=======================================================================
#Example :
from sklearn.svm import SVR
SelectedModel = SVR(epsilon=1,gamma='auto')
SelectedParameters = {'kernel':('linear', 'rbf'), 'C':[1,2]}
22
RandomizedSearchModel = RandomizedSearchCV(SelectedModel,SelectedParameters, cv = 2,return_train_score=True)
RandomizedSearchModel.fit(X_train, y_train)
sorted(RandomizedSearchModel.cv_results_.keys())
RandomizedSearchResults = pd.DataFrame(RandomizedSearchModel.cv_results_)[['mean_test_score', 'std_test_score',
'params' , 'rank_test_score' , 'mean_fit_time']]
# Showing Results
print('All Results are :\n', RandomizedSearchResults )
print('Best Score is :', RandomizedSearchModel.best_score_)
print('Best Parameters are :', RandomizedSearchModel.best_params_)
print('Best Estimator is :', RandomizedSearchModel.best_estimator_)
23
مثال
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
import pandas as pd
24
# define the parameter values that should be searched
k_range = list(range(1, 31))
weight_options = ['uniform', 'distance']
# run RandomizedSearchCV 20 times (with n_iter=10) and record the best score
best_scores = []
for _ in range(20):
rand = RandomizedSearchCV(knn, param_dist, cv=10, scoring='accuracy', n_iter=10, return_train_score=False)
25
rand.fit(X, y)
best_scores.append(round(rand.best_score_, 3))
print(best_scores)
26
مثال
# Load libraries
from scipy.stats import uniform
from sklearn import linear_model, datasets
from sklearn.model_selection import RandomizedSearchCV
iris = datasets.load_iris()
X = iris.data
y = iris.target
logistic = linear_model.LogisticRegression()
27
# Create randomized search 5-fold cross validation and 100 iterations
clf = RandomizedSearchCV(logistic, hyperparameters, random_state=1, n_iter=100, cv=5, verbose=0, n_jobs=-1)
28