0% found this document useful (0 votes)

6 views

Spectral Clustering

The document contains Python code for implementing various spectral clustering algorithms, including unnormalized, normalized random walk, and normalized symmetric spectral clustering. It generates synthetic datasets and visualizes the clustering results using KMeans and the spectral methods. The code utilizes libraries such as NumPy, Pandas, Matplotlib, and Scikit-learn for data manipulation and visualization.

Uploaded by

ravintej22

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

6 views

Spectral Clustering

Uploaded by

ravintej22

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 5

import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import cluster, datasets, mixture

from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler, LabelEncoder
from itertools import cycle, islice

import networkx as nx
from scipy import sparse
from sklearn.cluster import KMeans

np.random.seed(0)

def getClusterCentroids(X,spectral_labels):
"""
Funcao auxiliar para obter os centroids dos clusters a partir dos dados X e das
marcacoes de spectral_labels
"""
tmp = pd.DataFrame(X)
cols = tmp.columns
tmp['spectral_labels'] = spectral_labels
return tmp.groupby("spectral_labels")[cols].mean().values

def unnormalizedSpectralClustering(X,k,params):
"""
Unnormalized Spectral Clustering

Inputs:
X - Array of data points
k - Number of clusters to construct
params - Additional parameters for constructing the similarity graph
"""
n = len(X)

## Construct a similarity graph by one of the ways described in Section 2. Let

W be its weighted adjacency matrix
# Computes the (weighted) graph of k-Neighbors for points in X. The default
distance is 'euclidean'
A = kneighbors_graph(X, params['k_neighbors'], mode='distance',
metric='euclidean', include_self=True)

## Compute the unnormalized Laplacian L

G = nx.from_scipy_sparse_matrix(A)
L = nx.laplacian_matrix(G)

## Compute the first k eigenvectors u_1,...,u_k of L

eigenvalues, eigenvectors = sparse.linalg.eigs(L, k=k, which='SM')
eigenvectors = np.real_if_close(eigenvectors)
eigenvalues = np.real_if_close(eigenvalues)
kfirst_indices = np.argsort(eigenvalues)[:k]

## Let U be the matrix containing the vectors u_1,...,u_k as columns

## For i=1,...,n, let y_i be the vector corresponding to the i-th row of U
Y = eigenvectors[:,kfirst_indices]
## Cluster the points (y_i)i=1,...,n with the k-means algorithm into clusters
C_1,...,C_k
kmeans = KMeans(n_clusters=k, random_state=0).fit(Y)

## Output: Clusters A_1,...,A_k with A_i = {j|y_j in C_i}

return {
'labels': kmeans.labels_,
'centroids': getClusterCentroids(X,kmeans.labels_)
}

def normalizedRWSpectralClustering(X,k,params):
"""
Normalized Spectral Clustering according to Shi and Malik (2000)
Uses the normalized Random Walk Laplacian matrix

Inputs:
X - Array of data points
k - Number of clusters to construct
params - Additional parameters for constructing the similarity graph
"""

## Construct a similarity graph by one of the ways described in Section 2. Let

## Compute the unnormalized Laplacian L

G = nx.from_scipy_sparse_matrix(A)
L = nx.laplacian_matrix(G)

degreeView = G.degree(G.nodes(), weight='weight')

degrees = np.array(degreeView)[:,1]
D = sparse.diags(degrees)

## Compute the first k generalized eigenvectors u_1,...,u_k of the generalized

eigenproblem Lu=lambda*Du
eigenvalues, eigenvectors = sparse.linalg.eigs(L, k=k, M=D, which='SM') # SM =
Smallest Magnitude
eigenvectors = np.real_if_close(eigenvectors)
eigenvalues = np.real_if_close(eigenvalues)
kfirst_indices = np.argsort(eigenvalues)[:k]

## Let U be the matrix containing the vectors u_1,...,u_k as columns

U = eigenvectors[:,kfirst_indices]

## For i=1,...,n, let y_i be the vector corresponding to the i-th row of U
Y = U

## Cluster the points (y_i)i=1,...,n with the k-means algorithm into clusters
C_1,...,C_k
kmeans = KMeans(n_clusters=k, random_state=0).fit(Y)

## Output: Clusters A_1,...,A_k with A_i = {j|y_j in C_i}

return {
'labels': kmeans.labels_,
'centroids': getClusterCentroids(X,kmeans.labels_)
}

def normalizedSymSpectralClustering(X,k,params):
"""
Normalized Spectral Clustering according to Ng, Jordan and Weiss (2002)
Uses the normalized symmetric Laplacian matrix

Inputs:
X - Array of data points
k - Number of clusters to construct
params - Additional parameters for constructing the similarity graph
"""
n = len(X)

## Construct a similarity graph by one of the ways described in Section 2. Let

## Compute the normalized Laplacian L_sym

G = nx.from_scipy_sparse_matrix(A)
L_sym = nx.normalized_laplacian_matrix(G)

## Compute the first k eigenvectors u_1,...,u_k of L_sym

eigenvalues, eigenvectors = sparse.linalg.eigs(L_sym, k=k, which='SM')
eigenvectors = np.real_if_close(eigenvectors)
eigenvalues = np.real_if_close(eigenvalues)
kfirst_indices = np.argsort(eigenvalues)[:k]

## Let U be the matrix containing the vectors u_1,...,u_k as columns

U = eigenvectors[:,kfirst_indices]

## Form the matrix T from U by normalizing the rows to norm 1

T = U / np.sqrt(np.sum(U**2, axis=1))[:,np.newaxis]

## For i=1,...,n, let y_i be the vector corresponding to the i-th row of T
## Cluster the points (y_i)i=1,...,n with the k-means algorithm into clusters
C_1,...,C_k
kmeans = KMeans(n_clusters=k, random_state=0).fit(T)

## Output: Clusters A_1,...,A_k with A_i = {j|y_j in C_i}

return {
'labels': kmeans.labels_,
'centroids': getClusterCentroids(X,kmeans.labels_)
}

# ============
# Generate datasets. We choose the size big enough to see the scalability
# of the algorithms, but not too big to avoid too long running times
# ============
n_samples = 1500
noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,
noise=.05)
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)
blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
no_structure = np.random.rand(n_samples, 2), None
# Anisotropicly distributed data
random_state = 170
X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
transformation = [[0.6, -0.6], [-0.4, 0.8]]
X_aniso = np.dot(X, transformation)
aniso = (X_aniso, y)

# blobs with varied variances

varied = datasets.make_blobs(n_samples=n_samples,
cluster_std=[1.0, 2.5, 0.5],
random_state=random_state)

simple_datasets = [
(noisy_circles, {'name': 'Noisy Circles','n_clusters': 2}),
(noisy_moons, {'name': 'Noisy Moons', 'n_clusters': 2}),
(varied, {'name': 'Blobs with varied variances','n_clusters': 3}),
(aniso, {'name': 'Anisotropic data', 'n_clusters': 3}),
(blobs, {'name': 'Blobs', 'n_clusters': 3}),
(no_structure, {'name': 'No structure', 'n_clusters': 3})]

plt.figure(figsize=(9 * 2 + 3, 3))
plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,
hspace=.01)
plot_num = 1
for i_dataset, (dataset, dataset_params) in enumerate(simple_datasets):

X, y = dataset
# normalize dataset for easier parameter selection
X = StandardScaler().fit_transform(X)

name = dataset_params['name']
plt.subplot(1, len(simple_datasets), plot_num)
plt.title(name, size=18)
plt.scatter(X[:, 0], X[:, 1], s=10) #c=y)#, cmap='Set1')

plt.xlim(-2.5, 2.5)
plt.ylim(-2.5, 2.5)
plt.xticks(())
plt.yticks(())

plot_num += 1

# ============
# Set up cluster parameters
# ============
plt.figure(figsize=(4 * 2 + 3, 12.5))
plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,
hspace=.01)

plot_num = 1

for i_dataset, (dataset, params) in enumerate(simple_datasets):

X, y = dataset

# normalize dataset for easier parameter selection

X = StandardScaler().fit_transform(X)

kmeans = cluster.KMeans(n_clusters=params['n_clusters'])
clustering_algorithms = (
('KMeans', kmeans),
('Unnormalized Spectral Clustering', unnormalizedSpectralClustering),
('Normalized Spectral Clustering\nRandom Walk',
normalizedRWSpectralClustering),
('Normalized Spectral Clustering\nSymmetric Laplacian',
normalizedSymSpectralClustering)
)

for name, algorithm in clustering_algorithms:

t0 = time.time()

if name == 'KMeans':
algorithm.fit(X)
else:
k = params['n_clusters']
spectral_params = {
'k_neighbors': 12
}
result = algorithm(X,k,spectral_params)

t1 = time.time()

if name == 'KMeans':
y_pred = algorithm.labels_.astype(np.int)
else:
y_pred = result['labels']

plt.subplot(len(simple_datasets), len(clustering_algorithms), plot_num)

if i_dataset == 0:
plt.title(name)

colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',

'#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']),
int(max(y_pred) + 1))))

plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])

plt.xlim(-2.5, 2.5)
plt.ylim(-2.5, 2.5)
plt.xticks(())
plt.yticks(())
plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),
transform=plt.gca().transAxes, size=15,
horizontalalignment='right')
plot_num += 1

plt.show()

Clustering
No ratings yet
Clustering
1 page
From Import Import As Import As From Import From Import From Import From Import
No ratings yet
From Import Import As Import As From Import From Import From Import From Import
9 pages
DWDM Lab All
No ratings yet
DWDM Lab All
20 pages
4.cluster Analysis
No ratings yet
4.cluster Analysis
7 pages
Practical 5
No ratings yet
Practical 5
6 pages
ML Minors Exp7
No ratings yet
ML Minors Exp7
6 pages
PRAC9_23BME053
No ratings yet
PRAC9_23BME053
4 pages
Prac7 8 9 10
No ratings yet
Prac7 8 9 10
12 pages
SE_KMeansClustering
No ratings yet
SE_KMeansClustering
21 pages
K++
No ratings yet
K++
5 pages
DS - ML - 7 - 60019210046 1
No ratings yet
DS - ML - 7 - 60019210046 1
6 pages
ML Exp5 C36
No ratings yet
ML Exp5 C36
18 pages
Casos de ML Unsupervised Daniel Ames Camayo
No ratings yet
Casos de ML Unsupervised Daniel Ames Camayo
20 pages
Mla 7th
No ratings yet
Mla 7th
2 pages
AbidAdhikari26840-DWDM
No ratings yet
AbidAdhikari26840-DWDM
43 pages
K Means
No ratings yet
K Means
3 pages
2092 On Spectral Clustering Analysis and An Algorithm
No ratings yet
2092 On Spectral Clustering Analysis and An Algorithm
8 pages
Week 8 DS Practical (1)
No ratings yet
Week 8 DS Practical (1)
13 pages
2403res62 - CS564 - Assignment - 4 - K-Means-Iris - Intrinsic - CVIs
No ratings yet
2403res62 - CS564 - Assignment - 4 - K-Means-Iris - Intrinsic - CVIs
30 pages
ML2 Practical List
No ratings yet
ML2 Practical List
80 pages
ML Python Exercises UOM BDS Cluster Analysis
No ratings yet
ML Python Exercises UOM BDS Cluster Analysis
8 pages
Kmeans Gradtut 22B0394
No ratings yet
Kmeans Gradtut 22B0394
3 pages
01 K Means - Merged
No ratings yet
01 K Means - Merged
26 pages
Intro Cluster Problem Python
No ratings yet
Intro Cluster Problem Python
13 pages
DataScience All 1to8
No ratings yet
DataScience All 1to8
6 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
23CC554
No ratings yet
23CC554
10 pages
Numpy NP Sklearn - Cluster Sklearn Sklearn - Datasets Sklearn - Preprocessing
No ratings yet
Numpy NP Sklearn - Cluster Sklearn Sklearn - Datasets Sklearn - Preprocessing
1 page
Machine Learning Lab Assessment 5: 18BCE2301 Devangshu Mazumder
No ratings yet
Machine Learning Lab Assessment 5: 18BCE2301 Devangshu Mazumder
10 pages
HW5 Clustering (50 PTS) : Test Algorithms
No ratings yet
HW5 Clustering (50 PTS) : Test Algorithms
5 pages
Kmeans Clustering
No ratings yet
Kmeans Clustering
3 pages
ML0101EN Clus DBSCN Weather Py v1
No ratings yet
ML0101EN Clus DBSCN Weather Py v1
16 pages
Suneel Varma
No ratings yet
Suneel Varma
11 pages
KMEANS
No ratings yet
KMEANS
5 pages
DM ML Practical
No ratings yet
DM ML Practical
13 pages
Spectral Clustering 2
No ratings yet
Spectral Clustering 2
39 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Pca 2382487
No ratings yet
Pca 2382487
8 pages
D3 docs
No ratings yet
D3 docs
6 pages
DAVL PR1.2 Mit
No ratings yet
DAVL PR1.2 Mit
10 pages
Código K-Means en Spyder
No ratings yet
Código K-Means en Spyder
3 pages
kmeans
No ratings yet
kmeans
5 pages
21BEC505 Exp2
No ratings yet
21BEC505 Exp2
7 pages
ML DSBA Lab7
No ratings yet
ML DSBA Lab7
6 pages
Week 6 (PCA, SVD, LDA)
No ratings yet
Week 6 (PCA, SVD, LDA)
14 pages
Slip Clustering
No ratings yet
Slip Clustering
2 pages
DATA MINING EX1
No ratings yet
DATA MINING EX1
10 pages
ML 2.3 Prashant
No ratings yet
ML 2.3 Prashant
4 pages
AIML_LAB
No ratings yet
AIML_LAB
37 pages
Py 2
No ratings yet
Py 2
7 pages
Experiment 4 1
No ratings yet
Experiment 4 1
4 pages
DSM 1
No ratings yet
DSM 1
6 pages
Detecting Patterns with Unsupervised Learning
No ratings yet
Detecting Patterns with Unsupervised Learning
21 pages
MIT6 00SCS11 Lec20 PDF
No ratings yet
MIT6 00SCS11 Lec20 PDF
3 pages
Unsupervisd Learning Algorithm
No ratings yet
Unsupervisd Learning Algorithm
6 pages
lab-8ml
No ratings yet
lab-8ml
6 pages
Assignment # 1: Performance Timeline of Flynn Taxonomy
No ratings yet
Assignment # 1: Performance Timeline of Flynn Taxonomy
21 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
Experiment 3.1 K-Mean
No ratings yet
Experiment 3.1 K-Mean
8 pages
IDM Assignment
No ratings yet
IDM Assignment
15 pages
MathCAD Lecture 2
No ratings yet
MathCAD Lecture 2
20 pages
Chapter 3: Solving Systems of Linear Equations Using Gaussian Elimination
No ratings yet
Chapter 3: Solving Systems of Linear Equations Using Gaussian Elimination
13 pages
The Rank-Nullity Theorem
No ratings yet
The Rank-Nullity Theorem
6 pages
Press - 09 Tanzania
No ratings yet
Press - 09 Tanzania
9 pages
Determinents
No ratings yet
Determinents
2 pages
CHAP. 7 Linear Algebra: Matrices, Vectors, Determinants. Linear Systems
0% (1)
CHAP. 7 Linear Algebra: Matrices, Vectors, Determinants. Linear Systems
1 page
Gaussian Elimination
No ratings yet
Gaussian Elimination
5 pages
Matrix Questions For SSC Stenographer PDF
No ratings yet
Matrix Questions For SSC Stenographer PDF
9 pages
2.4.3 Gaussian Elimination: An Example
No ratings yet
2.4.3 Gaussian Elimination: An Example
3 pages
Week 1
No ratings yet
Week 1
10 pages
MTH 501
No ratings yet
MTH 501
3 pages
Mathematics Statistics Commerce
No ratings yet
Mathematics Statistics Commerce
179 pages
L U Decomposition 3
No ratings yet
L U Decomposition 3
5 pages
Chapter Test-12 (Matrices and Determinants)SOLUTIONS
No ratings yet
Chapter Test-12 (Matrices and Determinants)SOLUTIONS
3 pages
I. Multiple Choice Questions
No ratings yet
I. Multiple Choice Questions
7 pages
Multiple Choice Question 1 Q.No. Answer
No ratings yet
Multiple Choice Question 1 Q.No. Answer
2 pages
Laode Module Bank
No ratings yet
Laode Module Bank
3 pages
Monthly Test Formate
No ratings yet
Monthly Test Formate
6 pages
Maths Assignment 1
No ratings yet
Maths Assignment 1
4 pages
Complex Matrices
No ratings yet
Complex Matrices
13 pages
06 Matrices and Vector Analysis (Lesson 06)
No ratings yet
06 Matrices and Vector Analysis (Lesson 06)
8 pages
Lectia I - Algebra
No ratings yet
Lectia I - Algebra
12 pages
Solution To Crimes
No ratings yet
Solution To Crimes
3 pages
Metode Greville
No ratings yet
Metode Greville
12 pages
Decomposition of Hadamard Matrices: Journal of Combinatorial Theory
No ratings yet
Decomposition of Hadamard Matrices: Journal of Combinatorial Theory
4 pages
Chapter 8 - Further Matrix Algebra: 8.1 - Eigenvalues and Eigenvectors
No ratings yet
Chapter 8 - Further Matrix Algebra: 8.1 - Eigenvalues and Eigenvectors
3 pages
Singular Value Decomposition
No ratings yet
Singular Value Decomposition
34 pages
07 Matrix Frame
No ratings yet
07 Matrix Frame
72 pages
Matrices, Determinant and Inverse
100% (1)
Matrices, Determinant and Inverse
12 pages
Matrices and Determinants
No ratings yet
Matrices and Determinants
65 pages

Spectral Clustering

Uploaded by

Spectral Clustering

Uploaded by

import time

from sklearn import cluster, datasets, mixture

## Construct a similarity graph by one of the ways described in Section 2. Let

## Compute the unnormalized Laplacian L

## Compute the first k eigenvectors u_1,...,u_k of L

## Let U be the matrix containing the vectors u_1,...,u_k as columns

## Output: Clusters A_1,...,A_k with A_i = {j|y_j in C_i}

## Construct a similarity graph by one of the ways described in Section 2. Let

## Compute the unnormalized Laplacian L

degreeView = G.degree(G.nodes(), weight='weight')

## Compute the first k generalized eigenvectors u_1,...,u_k of the generalized

## Let U be the matrix containing the vectors u_1,...,u_k as columns

## Output: Clusters A_1,...,A_k with A_i = {j|y_j in C_i}

## Construct a similarity graph by one of the ways described in Section 2. Let

## Compute the normalized Laplacian L_sym

## Compute the first k eigenvectors u_1,...,u_k of L_sym

## Let U be the matrix containing the vectors u_1,...,u_k as columns

## Form the matrix T from U by normalizing the rows to norm 1

## Output: Clusters A_1,...,A_k with A_i = {j|y_j in C_i}

# blobs with varied variances

for i_dataset, (dataset, params) in enumerate(simple_datasets):

# normalize dataset for easier parameter selection

for name, algorithm in clustering_algorithms:

plt.subplot(len(simple_datasets), len(clustering_algorithms), plot_num)

colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',

plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])

You might also like