0% found this document useful (0 votes)
7 views

Aai 02

The document describes a Gaussian mixture model (GMM) algorithm. It generates synthetic data from two clusters, initializes a GMM, and runs the expectation-maximization algorithm for 20 iterations to estimate the GMM parameters.

Uploaded by

ahmed.412052.cs
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views

Aai 02

The document describes a Gaussian mixture model (GMM) algorithm. It generates synthetic data from two clusters, initializes a GMM, and runs the expectation-maximization algorithm for 20 iterations to estimate the GMM parameters.

Uploaded by

ahmed.412052.cs
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

2/11/24, 3:30 PM GMMnew.

ipynb - Colaboratory

import numpy as np
from numpy import random
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
from scipy.stats import multivariate_normal
import matplotlib.pyplot as plt

class GMM():
def __init__(self, k=3, dim=2, init_mu=None, init_sigma=None, init_pi=None, colors=None):
self.k = k
self.dim = dim
if(init_mu is None):
init_mu = random.rand(k, dim) * 15 - 10
self.mu = init_mu
if(init_sigma is None):
init_sigma = np.zeros((k, dim, dim))
for i in range(k):
init_sigma[i] = np.eye(dim)
self.sigma = init_sigma
if(init_pi is None):
init_pi = np.ones(self.k) / self.k
self.pi = init_pi
if(colors is None):
colors = random.rand(k, 3)
self.colors = colors

def init_em(self, X):


self.data = X
self.num_points = X.shape[0]
self.z = np.zeros((self.num_points, self.k))

def e_step(self):
for i in range(self.k):
self.z[:, i] = self.pi[i] * multivariate_normal.pdf(self.data, mean=self.mu[i], cov=self
self.z /= self.z.sum(axis=1, keepdims=True)

def m_step(self):
sum_z = self.z.sum(axis=0)
self.pi = sum_z / self.num_points
self.mu = np.matmul(self.z.T, self.data)
self.mu /= sum_z[:, None]
for i in range(self.k):
j = np.expand_dims(self.data, axis=1) - self.mu[i]
s = np.matmul(j.transpose([0, 2, 1]), j)
self.sigma[i] = np.matmul(s.transpose(1, 2, 0), self.z[:, i] )
self.sigma[i] /= sum_z[i]

def log_likelihood(self, X):


ll = []
for d in X:
tot = 0
for i in range(self.k):
tot += self.pi[i] * multivariate_normal.pdf(d, mean=self.mu[i], cov=self.sigma[i])
ll.append(np.log(tot))
return np.sum(ll)

def plot_gaussian(self, mean, cov, ax, n_std=3.0, facecolor='none', **kwargs):


pearson = cov[0, 1] / np.sqrt(cov[0, 0] * cov[1, 1])
ll di t( )
https://colab.research.google.com/drive/1b1BfowB_YMM4gZPiehOELIlZwV8bJjYq#scrollTo=Xkt20kdReEKB&printMode=true 1/5
2/11/24, 3:30 PM GMMnew.ipynb - Colaboratory
ell_radius_x = np.sqrt(1 + pearson)
ell_radius_y = np.sqrt(1 - pearson)
ellipse = Ellipse((0, 0),
width=ell_radius_x * 2,
height=ell_radius_y * 2,
facecolor=facecolor,
**kwargs)
scale_x = np.sqrt(cov[0, 0]) * n_std
mean_x = mean[0]
scale_y = np.sqrt(cov[1, 1]) * n_std
mean_y = mean[1]
transf = transforms.Affine2D() \
.rotate_deg(45) \
.scale(scale_x, scale_y) \
.translate(mean_x, mean_y)
ellipse.set_transform(transf + ax.transData)
return ax.add_patch(ellipse)

def draw(self, ax, n_std=2.0, facecolor='none', **kwargs):


if(self.dim != 2):
print("Drawing available only for 2D case.")
return
for i in range(self.k):
self.plot_gaussian(self.mu[i], self.sigma[i], ax, n_std=n_std, edgecolor=self.colors[i]

def gen_data(k=3, dim=2, points_per_cluster=200, lim=[-10, 10]):


x = []
mean = random.rand(k, dim) * (lim[1] - lim[0]) + lim[0]
for i in range(k):
cov = random.rand(dim, dim + 10)
cov = np.matmul(cov, cov.T)
_x = np.random.multivariate_normal(mean[i], cov, points_per_cluster)
x += list(_x)
x = np.array(x)
if(dim == 2):
fig = plt.figure(figsize=(5, 4))
ax = fig.gca()
ax.scatter(x[:, 0], x[:, 1], s=3, alpha=0.4)
ax.autoscale(enable=True)
return x

def plot(title, ax):


ax.scatter(X[:, 0], X[:, 1], s=3, alpha=0.4)
ax.scatter(gmm.mu[:, 0], gmm.mu[:, 1], c=gmm.colors)
gmm.draw(ax, lw=3)
ax.set_xlim((-12, 12))
ax.set_ylim((-12, 12))
ax.set_title(title)

X = gen_data(k=2, dim=2, points_per_cluster=1000)

https://colab.research.google.com/drive/1b1BfowB_YMM4gZPiehOELIlZwV8bJjYq#scrollTo=Xkt20kdReEKB&printMode=true 2/5
2/11/24, 3:30 PM GMMnew.ipynb - Colaboratory

gmm = GMM(k=2, dim=2, colors=['red', 'orange'])

# Training the GMM using EM


gmm.init_em(X)
num_iters = 20
log_likelihood = [gmm.log_likelihood(X)]

fig, axs = plt.subplots(5, 4, figsize=(12, 9))

# Iterating through EM steps


for e in range(num_iters):
# E-step
gmm.e_step()
# M-step
gmm.m_step()
# Computing log-likelihood
log_likelihood.append(gmm.log_likelihood(X))
print("Iteration: {}, log-likelihood: {:.4f}".format(e + 1, log_likelihood[-1]))
# Plotting
if e < 30:
plot("Iteration: " + str(e + 1), axs[e // 4, e % 4])

plt.tight_layout()
plt.show()

https://colab.research.google.com/drive/1b1BfowB_YMM4gZPiehOELIlZwV8bJjYq#scrollTo=Xkt20kdReEKB&printMode=true 3/5
2/11/24, 3:30 PM GMMnew.ipynb - Colaboratory

Iteration: 1, log-likelihood: -8942.9833


Iteration: 2, log-likelihood: -8898.5335
Iteration: 3, log-likelihood: -8878.0091
Iteration: 4, log-likelihood: -8859.3504
Iteration: 5, log-likelihood: -8840.8788
Iteration: 6, log-likelihood: -8824.2268
Iteration: 7, log-likelihood: -8810.8088
Iteration: 8, log-likelihood: -8801.0437
Iteration: 9, log-likelihood: -8794.5307
Iteration: 10, log-likelihood: -8790.4836
Iteration: 11, log-likelihood: -8788.1017
Iteration: 12, log-likelihood: -8786.7544
Iteration: 13, log-likelihood: -8786.0132
Iteration: 14, log-likelihood: -8785.6133
Iteration: 15, log-likelihood: -8785.4002
Iteration: 16, log-likelihood: -8785.2877
Iteration: 17, log-likelihood: -8785.2287
Iteration: 18, log-likelihood: -8785.1978
Iteration: 19, log-likelihood: -8785.1817
Iteration: 20, log-likelihood: -8785.1734

https://colab.research.google.com/drive/1b1BfowB_YMM4gZPiehOELIlZwV8bJjYq#scrollTo=Xkt20kdReEKB&printMode=true 4/5
2/11/24, 3:30 PM GMMnew.ipynb - Colaboratory

# Plot log-likelihood
fig = plt.figure(figsize=(5, 4))
plt.plot(log_likelihood[1:], marker='.')
plt.show()

https://colab.research.google.com/drive/1b1BfowB_YMM4gZPiehOELIlZwV8bJjYq#scrollTo=Xkt20kdReEKB&printMode=true 5/5

You might also like