ML Python Exercises UOM BDS Cluster Analysis
ML Python Exercises UOM BDS Cluster Analysis
import pandas as pd
data = {'x': [25, 34, 22, 27, 33, 33, 31, 22, 35, 34, 67, 54, 57, 43, 50, 57, 59, 52, 65, 47, 49, 48, 35,
33, 44, 45,
38,43,51,46],
'y': [79, 51, 53, 78, 59, 74, 73, 57, 69, 75, 51, 32, 40, 47, 53, 36, 35, 58, 59, 50, 25, 20, 14, 12,
20, 5, 29,
27, 8, 7]}
# K-means Clustering
kmeans = KMeans(n_clusters=3).fit(df)
centroids = kmeans.cluster_centers_
print(centroids)
labels = kmeans.labels_
plt.show()
OUTPUT
2. AGGLOMERATIVE CLUSTERING :[pg.no:96]
Agglomerative clustering: Commonly referred to as AGNES (AGglomerative NESting) works in a
bottom-up manner. That is, each observation is initially considered as a single-element cluster
(leaf). At each step of the algorithm, the two clusters that are the most similar are combined into
a new bigger cluster (nodes).
PROGRAM
import numpy as np
X = np.array([[2, 8], [8, 15], [3, 6], [6, 9], [8, 7], [10, 10]])
# Agglomerative Clustering
cluster = AgglomerativeClustering(n_clusters=3,
affinity='euclidean', linkage='ward')
labels = cluster.fit_predict(X)
print(labels)
# Drawing Dendrograms
plt.figure(figsize=(10, 7))
plt.show()
OUTPUT
[1 2 1 0 0 0]
X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]
threshold=0.5, compute_labels=True)
brc.fit(X)
print(brc.predict(X))
OUTPUT
[0 0 0 1 1 1]
PROGRAM
import numpy as np
cluster_std=0.4, random_state=0)
X = StandardScaler().fit_transform(X)
# Compute DBSCAN
db = DBSCAN(eps=0.3, min_samples=10).fit(X)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
n_noise = list(labels).count(-1)
print("Homogeneity: %0.3f" %
metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" %
metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" %
metrics.v_measure_score(labels_true, labels))
metrics.adjusted_rand_score(labels_true, labels))
metrics.adjusted_mutual_info_score(labels_true, labels))
metrics.silhouette_score(X, labels))
# Plot result
# Black removed and is used for noise instead.
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1,
len(unique_labels)))
if k == -1:
col = [0, 0, 0, 1]
class_member_mask = (labels == k)
markerfacecolor=tuple(col), markeredgecolor='k',
markersize=14)
markerfacecolor=tuple(col), markeredgecolor='k',
markersize=6)
plt.show()
OUTPUT
Homogeneity: 0.953
Completeness: 0.883
V-measure: 0.917
import numpy as np
np.random.seed(0)
n_points_per_cluster = 250
C4 = [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)
C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)
min_cluster_size=0.05)
clust.fit(X)
labels_050 = cluster_optics_dbscan(reachability=clust.reachability_,
core_distances=clust.core_distances_,
ordering=clust.ordering_,
eps=0.5)
labels_200 = cluster_optics_dbscan(reachability=clust.reachability_,
core_distances=clust.core_distances_,
ordering=clust.ordering_,
eps=2)
space = np.arange(len(X))
reachability = clust.reachability_[clust.ordering_]
labels = clust.labels_[clust.ordering_]
plt.figure(figsize=(10, 7))
G = gridspec.GridSpec(2, 3)
# Reachability plot
Xk = space[labels == klass]
Rk = reachability[labels == klass]
alpha=0.3)
alpha=0.5)
alpha=0.5)
ax1.set_title('Reachability Plot')
# OPTICS
Xk = X[clust.labels_ == klass]
ax2.set_title('Automatic Clustering\nOPTICS')
plt.tight_layout()
plt.show()
OUTPUT