Aim: Implement and demonstrate the working model of K-means clustering algorithm with
Expectation Maximization Concept.
Program: Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for
clustering using k-Means algorithm. Compare the results of these two algorithms and comment on
the quality of clustering. You can add Python ML library classes/API in the program.
import numpy as np
import pandas as pd
# Generate random data for testing
[Link](0) # for reproducibility
num_samples = 100
num_features = 2
# Generate random data points
data = [Link](num_samples, num_features)
# Convert to DataFrame
df = [Link](data, columns=['X', 'Y'])
# Save DataFrame to CSV file
df.to_csv('test_data.csv', index=False)
print("CSV file 'test_data.csv' has been generated successfully.")
import numpy as np
import pandas as pd
from [Link] import KMeans
from [Link] import GaussianMixture
import [Link] as plt
# Load the dataset from CSV file
data = pd.read_csv('test_data.csv')
# Visualize the dataset if needed
# [Link](data['X'], data['Y'])
# [Link]('X')
# [Link]('Y')
# [Link]()
# Convert the dataset to numpy array
X = [Link]
# Number of clusters
k=3
# K-means clustering
kmeans = KMeans(n_clusters=k)
[Link](X)
kmeans_labels = kmeans.labels_
kmeans_centers = kmeans.cluster_centers_
# EM clustering
em = GaussianMixture(n_components=k)
[Link](X)
em_labels = [Link](X)
em_centers = em.means_
# Compare clustering results
print("K-means labels:")
print(kmeans_labels)
print("EM labels:")
print(em_labels)
# Visualize clustering results
[Link](figsize=(12, 5))
[Link](1, 2, 1)
[Link](X[:, 0], X[:, 1], c=kmeans_labels, cmap='viridis')
[Link](kmeans_centers[:, 0], kmeans_centers[:, 1], marker='*', s=300, c='r')
[Link]('K-means Clustering')
[Link](1, 2, 2)
[Link](X[:, 0], X[:, 1], c=em_labels, cmap='viridis')
[Link](em_centers[:, 0], em_centers[:, 1], marker='*', s=300, c='r')
[Link]('EM Clustering')