0% found this document useful (1 vote)
772 views

Machine Learning Scikit Handson

The document covers hands-on exercises for various machine learning algorithms including preprocessing, classification, regression, clustering, and ensembles. It loads sample datasets, splits data into training and test sets, trains models, and evaluates performance. For each algorithm, it explores different parameters and evaluates their effect on model performance.
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (1 vote)
772 views

Machine Learning Scikit Handson

The document covers hands-on exercises for various machine learning algorithms including preprocessing, classification, regression, clustering, and ensembles. It loads sample datasets, splits data into training and test sets, trains models, and evaluates performance. For each algorithm, it explores different parameters and evaluates their effect on model performance.
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

1st hands-on

import sklearn.datasets as skl


import sklearn.preprocessing as skp
import numpy as np
iris = skl.load_iris()
iris_normalized = skp.normalize(iris.data, norm='l2')
print(iris_normalized.mean(axis=0))

enc = skp.OneHotEncoder()
iris_target_onehot = enc.fit_transform(iris.target.reshape(-1, 1))
print(iris_target_onehot.toarray()[[0,50,100]])

iris.data[:50,:] = np.nan
iris_imputed = skp.Imputer(missing_values="NaN", strategy="mean", axis = 0)
iris_imputed = iris_imputed.fit(iris.data).transform(iris.data)
print(iris_imputed.mean(axis=0))

********************************************************************
2nd hands-on

import sklearn.datasets as datasets


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

iris = datasets.load_iris()
X_train, X_test, Y_train, Y_test = train_test_split(iris.data, iris.target,
stratify=iris.target, random_state=30)

print(X_train.shape)
print(X_test.shape)

knn_clf = KNeighborsClassifier()
knn_clf = knn_clf.fit(X_train, Y_train)

print(knn_clf.score(X_train,Y_train))
print(knn_clf.score(X_test,Y_test))

ls = []
prev_score = 0
n_val = 0
for i in range(3,11):
knn_clf = KNeighborsClassifier(n_neighbors = i)
knn_clf = knn_clf.fit(X_train, Y_train)
score = knn_clf.score(X_test,Y_test)
if prev_score<=score:
prev_score = score
n_val = i
print(n_val)

*****************************Desicion Tree*****************************

import sklearn.datasets as datasets


from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
import numpy as np

np.random.seed(100)
boston = datasets.load_boston()
X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target,
random_state=30)
print(X_train.shape)
print(X_test.shape)

dt_reg = DecisionTreeRegressor()
dt_reg = dt_reg.fit(X_train, Y_train)
print(dt_reg.score(X_train,Y_train))
print(dt_reg.score(X_test,Y_test))
y_pred = dt_reg.predict(X_test[:2])
print(y_pred)

myList = list(range(2,5))
scores =[]
for i in myList:
dt_reg = DecisionTreeRegressor(max_depth=i)
dt_reg.fit(X_train,Y_train)
scores.append(dt_reg.score(X_test, Y_test))
print(myList[scores.index(max(scores))])

*****************************SVM*****************************

import sklearn.datasets as datasets


from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import sklearn.preprocessing as preprocessing

digits = datasets.load_digits()
X = digits.data
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30,


stratify=y)

print(X_train.shape)
print(X_test.shape)

svm_clf = SVC().fit(X_train, y_train)


print(svm_clf.score(X_test,y_test))

standardizer = preprocessing.StandardScaler()
standardizer = standardizer.fit(digits.data)
digits_standardized = standardizer.transform(digits.data)
X = digits_standardized
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30,
stratify=y)
svm_clf2 = SVC().fit(X_train, y_train)
print(svm_clf2.score(X_test,y_test))

*****************************Ensemble*****************************
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import numpy as np

np.random.seed(100)
max_depth = range(3,5)
boston = datasets.load_boston()
X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target,
random_state=30)
print(X_train.shape)
print(X_test.shape)

rf_reg = RandomForestRegressor()
rf_reg = rf_reg.fit(X_train, Y_train)
print(rf_reg.score(X_train,Y_train))
print(rf_reg.score(X_test,Y_test))
y_pred = rf_reg.predict(X_test[:2])
print(y_pred)

a = [3,4,5]
b = [50,100,200]
score = []

for i in range(3):
rf_reg = RandomForestRegressor(n_estimators=b[i], max_depth=a[i])
rf_reg = rf_reg.fit(X_train, Y_train)
score.append(rf_reg.score(X_test,Y_test))
print(max(score))
print(tuple([5,100]))

*****************************Clustering *****************************

import sklearn.datasets as datasets


from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import homogeneity_score
from sklearn.cluster import AffinityPropagation
iris = datasets.load_iris()
X_train, X_test, Y_train, Y_test = train_test_split(iris.data, iris.target,
stratify=iris.target, random_state=30)

km_cls= KMeans(n_clusters=3)
km_cls = km_cls.fit(X_train)
km_cls.predict(X_test)
print(metrics.homogeneity_score(km_cls.predict(X_test), Y_test))

agg_cls= AgglomerativeClustering(n_clusters=3)
agg_cls = agg_cls.fit(X_train)
agg_cls.fit_predict(X_test)
print(metrics.homogeneity_score(agg_cls.fit_predict(X_test), Y_test))

af_cls= AffinityPropagation()
af_cls = af_cls.fit(X_train)
af_cls.fit_predict(X_test)
print(metrics.homogeneity_score(af_cls.fit_predict(X_test), Y_test))

You might also like