6 in 1 PRGM
6 in 1 PRGM
ROHAN G A
In [2]: import sys
In [6]: url="https://raw.githubuserconte
names=['sepal-length','sepal-wid
dataset=read_csv(url,names=names
In [7]: print(dataset.shape)
(150, 5)
In [8]: dataset
Iris
0 5.1 3.5 1.4 0.2
setos
Iris
1 4.9 3.0 1.4 0.2
setos
Iris
2 4.7 3.2 1.3 0.2
setos
Iris
3 4.6 3.1 1.5 0.2
setos
Iris
4 5.0 3.6 1.4 0.2
setos
Iris
145 6.7 3.0 5.2 2.3
virginic
Iris
146 6.3 2.5 5.0 1.9
virginic
Iris
147 6.5 3.0 5.2 2.0
virginic
Iris
148 6.2 3.4 5.4 2.3
virginic
Iris
149 5.9 3.0 5.1 1.8
virginic
In [9]: print(dataset.head(20))
sepal-length sepal-width p
etal-length petal-width
class
0 5.1 3.5
1.4 0.2 Iris-setosa
1 4.9 3.0
1.4 0.2 Iris-setosa
2 4.7 3.2
1.3 0.2 Iris-setosa
3 4.6 3.1
1.5 0.2 Iris-setosa
4 5.0 3.6
1.4 0.2 Iris-setosa
5 5.4 3.9
1.7 0.4 Iris-setosa
6 4.6 3.4
1.4 0.3 Iris-setosa
7 5.0 3.4
1.5 0.2 Iris-setosa
8 4.4 2.9
1.4 0.2 Iris-setosa
9 4.9 3.1
1.5 0.1 Iris-setosa
10 5.4 3.7
1.5 0.2 Iris-setosa
11 4.8 3.4
1.6 0.2 Iris-setosa
12 4.8 3.0
1.4 0.1 Iris-setosa
13 4.3 3.0
1.1 0.1 Iris-setosa
14 5.8 4.0
1.2 0.2 Iris-setosa
15 5.7 4.4
1.5 0.4 Iris-setosa
16 5.4 3.9
1.3 0.4 Iris-setosa
17 5.1 3.5
1.4 0.3 Iris-setosa
18 5.7 3.8
1.7 0.3 Iris-setosa
19 5.1 3.8
1.5 0.3 Iris-setosa
In [10]: print(dataset.describe())
sepal-length sepal-width
petal-length petal-width
count 150.000000 150.000000
150.000000 150.000000
mean 5.843333 3.054000
3.758667 1.198667
std 0.828066 0.433594
1.764420 0.763161
min 4.300000 2.000000
1.000000 0.100000
25% 5.100000 2.800000
1.600000 0.300000
50% 5.800000 3.000000
4.350000 1.300000
75% 6.400000 3.300000
5.100000 1.800000
max 7.900000 4.400000
6.900000 2.500000
In [12]: print(dataset.groupby('class').s
class
Iris-setosa 50
Iris-versicolor 50
Iris-virginica 50
dtype: int64
In [14]: dataset.plot(kind='box',subplots
pyplot.show()
In [15]: dataset.hist()
pyplot.show()
In [16]: scatter_matrix(dataset)
pyplot.show()
In [17]: array=dataset.values
X=array[:,0:4]
y=array[:,4]
X_train,X_validation,Y_train,Y_v
In [ ]:
In [20]: models=[]
models.append(('LR',LogisticRegr
models.append(('LDA',LinearDiscr
models.append(('KNN',KNeighborsC
models.append(('CART',DecisionTr
models.append(('NB',GaussianNB()
models.append(('SVM',SVC(gamma='
results=[]
names=[]
for name, model in models:
kfold = StratifiedKFold(n_split
cv_results = cross_val_score(mo
results.append(cv_results)
names.append(name)
print('%s: %f (%f)' %(name,cv_r
In [23]: pyplot.boxplot(results,labels=na
pyplot.title('Algorith Compariso
pyplot.show()
In [25]: model=SVC(gamma='auto')
model.fit(X_train,Y_train)
prdictions=model.predict(X_valid
In [31]: print(accuracy_score(Y_validatio
print(confusion_matrix(Y_validat
print(classification_report(Y_va
0.9666666666666667
[[11 0 0]
[ 0 12 1]
[ 0 0 6]]
precision re
call f1-score support
Iris-setosa 1.00
1.00 1.00 11
Iris-versicolor 1.00
0.92 0.96 13
Iris-virginica 0.86
1.00 0.92 6
accuracy
0.97 30
macro avg 0.95
0.97 0.96 30
weighted avg 0.97
0.97 0.97 30
In [ ]: