06K_means_clustering
06K_means_clustering
In [ ]: import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree
In [ ]: htWtData = pd.read_csv("/BOOK.csv")
df = pd.DataFrame(htWtData)
df.head()
0 185 72
1 170 56
2 168 60
3 179 68
4 182 72
In [ ]: ht = df["Height"].tolist()
print(ht)
wt = df["Weight"].tolist()
print(wt)
localhost:8888/nbconvert/html/Desktop/ML/K_means_clustering.ipynb?download=false 1/4
4/22/24, 10:06 PM K_means_clustering
if len(ed) >= 5 : break
print("\n")
for d in ed :
print(d)
[185, 170, 168, 179, 182, 188, 180, 180, 183, 180, 180, 177]
[72, 56, 60, 68, 72, 77, 71, 70, 84, 88, 67, 76]
21.93171219946131
20.808652046684813
7.211102550927978
3.0
5.830951894845301
5.0990195135927845
5.385164807134504
12.165525060596439
16.76305461424021
7.0710678118654755
8.94427190999916
In [ ]: plt.figure(figsize=(8, 5))
# plt.show()
plt.scatter(htWtData['Weight'], htWtData['Height'])
plt.xlabel("weight")
plt.ylabel("height")
plt.show()
localhost:8888/nbconvert/html/Desktop/ML/K_means_clustering.ipynb?download=false 2/4
4/22/24, 10:06 PM K_means_clustering
In [ ]: kmeans = KMeans(n_clusters = 3)
kmeans.fit(htWtData)
pdVals = kmeans.predict(htWtData)
print(pdVals)
f = pd.DataFrame(htWtData)
f["cluster"] = pdVals
print(f)
plt.show()
[0 1 1 0 0 2 0 0 2 2 0 0]
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarn
ing: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the v
alue of `n_init` explicitly to suppress the warning
warnings.warn(
In [ ]: f = pd.DataFrame(htWtData)
f["cluster"] = pdVals
print(f)
plt.show()
localhost:8888/nbconvert/html/Desktop/ML/K_means_clustering.ipynb?download=false 3/4
4/22/24, 10:06 PM K_means_clustering
localhost:8888/nbconvert/html/Desktop/ML/K_means_clustering.ipynb?download=false 4/4