0% found this document useful (0 votes)
12 views

K Means On IRIS Dataset

Uploaded by

bucky17d
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views

K Means On IRIS Dataset

Uploaded by

bucky17d
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

K Means on IRIS dataset

#Suppress warnings
import warnings
warnings.filterwarnings('ignore')

#Importing Libraries
import numpy as np
import pandas as pd

#Creating a dataframe
data=pd.read_csv("IRIS.csv")

data.head()

sepal_length sepal_width petal_length petal_width species

0 5.1 3.5 1.4 0.2 Iris-setosa

1 4.9 3.0 1.4 0.2 Iris-setosa

2 4.7 3.2 1.3 0.2 Iris-setosa

3 4.6 3.1 1.5 0.2 Iris-setosa

4 5.0 3.6 1.4 0.2 Iris-setosa

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 sepal_length 150 non-null float64
1 sepal_width 150 non-null float64
2 petal_length 150 non-null float64
3 petal_width 150 non-null float64
4 species 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB

data.describe

<bound method NDFrame.describe of sepal_length sepal_width petal_length petal_width species


0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
.. ... ... ... ... ...
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica

[150 rows x 5 columns]>

Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#Histogram
plt.hist(data["sepal_length"],bins=10,color="green")
plt.xlabel("sepla_length")
plt.ylabel("petal_length")
plt.show()
#Scatter Plot
sns.pairplot(data, hue="species")
plt.show()

#Histogrm with Line Graph


sns.histplot(data,x="petal_length",bins=25,kde=True,hue="species")
plt.xlabel("petal_length")
plt.ylabel("count")
plt.title("Petal Length distribution")
plt.show()
df1=data.drop(['species'],axis=1)

sns.heatmap(df1.corr(),annot=True)
plt.show()

Mapping the Species into number from 0 to 2

flower_mapping = {'Iris-setosa': 0,'Iris-versicolor': 1,'Iris-virginica':2}


data["species"] = data["species"].map(flower_mapping)

KMeans
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3)

kmeans.fit(data[['petal_length','petal_width']])

▾ KMeans
KMeans(n_clusters=3)

kmeans.cluster_centers_
array([[5.59583333, 2.0375 ],
[1.464 , 0.244 ],
[4.26923077, 1.34230769]])

plt.scatter(data['petal_length'],data['petal_width'],c=data['species'], cmap='rainbow')
plt.scatter(1.464, 0.244, s=200, c='b', marker='s')
plt.scatter(5.59583333, 2.0375, s=200, c='r', marker='s')
plt.scatter(4.26923077, 1.34230769, s=200, c='g', marker='s')
plt.show()

# Compute cluster centers and predict cluster index for each sample.
pred = kmeans.predict(data[['petal_length','petal_width']])

pred

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

sample_test=np.array([6,2])
second_test=sample_test.reshape(1, -1)
kmeans.predict(second_test)

array([0])

Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js

You might also like