Program 7
Program 7
Program: Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for
clustering using k-Means algorithm. Compare the results of these two algorithms and comment on
the quality of clustering. You can add Python ML library classes/API in the program.
import numpy as np
import pandas as pd
num_samples = 100
num_features = 2
# Convert to DataFrame
df.to_csv('test_data.csv', index=False)
import pandas as pd
data = pd.read_csv('test_data.csv')
# plt.scatter(data['X'], data['Y'])
# plt.xlabel('X')
# plt.ylabel('Y')
# plt.show()
X = data.values
# Number of clusters
k=3
# K-means clustering
kmeans = KMeans(n_clusters=k)
kmeans.fit(X)
kmeans_labels = kmeans.labels_
kmeans_centers = kmeans.cluster_centers_
# EM clustering
em = GaussianMixture(n_components=k)
em.fit(X)
em_labels = em.predict(X)
em_centers = em.means_
print("K-means labels:")
print(kmeans_labels)
print("EM labels:")
print(em_labels)
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.title('K-means Clustering')
plt.subplot(1, 2, 2)
plt.title('EM Clustering')