import numpy as np
import pandas as pd
Week 4: EM Algorithm
Colab Link: Click here!
= '{:.5f}'.format
pd.options.display.float_format
%precision 5
'%.5f'
Consider the following one-dimensional dataset:
X\ =\ \begin{bmatrix}
-1.5 & -1 & -0.5 & 0.5 & 1 & 1.5
\end{bmatrix}
= np.array([(-1.5),(-1),(-0.5),(0.5),(1),(1.5)]) X
E.M Algorithm
Initialize \displaystyle \theta ^{0} = \displaystyle \left\{\begin{matrix} \mu _{1}^{0} , & \dotsc & ,\mu _{k}^{0}\\ \sigma ^{2}{}_{1}^{0} , & \dotsc & ,\sigma ^{2}{}_{k}^{0}\\ \pi _{1}^{0} , & \dotsc & ,\pi _{k}^{0} \end{matrix}\right\}
Until Convergence (||\theta ^{t+1} -\theta ^{t} ||\leq \epsilon ), where \epsilon is the tolerance parameter, do the following:
\begin{align*} \lambda ^{t+1} &=\underset{\lambda }{\arg\max}\text{ modified\_log} (\theta ^{t} ,\lambda ) \quad \rightarrow \text{Expectation Step } \\ \theta ^{t+1} &=\underset{\theta }{\arg\max}\text{ modified\_log} (\theta ,\lambda ^{t+1}) \rightarrow \text{Maximization Step } \end{align*}
We can initialize \displaystyle \theta ^{0} as,
\displaystyle \begin{aligned}
\mu _{1}^{0} \ & =-0.667\\
\mu _{2}^{0} \ & =\ 0.667\\
\sigma ^{2}{}_{1}^{0} & =\ 0.722\\
\sigma ^{2}{}_{2}^{0} & =\ 0.722\ \\
\pi _{1}^{0} & =\ 0.5\\
\pi _{2}^{0} & =\ 0.5
\end{aligned}
def init():
return np.array([-0.667,0.667, 0.722, 0.722, 0.5, 0.5])
= init() theta_0
In the E-step we calculate the values of λ_{k}^{i}
\begin{align*}
\hat{\lambda }{_{k}^{i}}^{MML} & =\frac{\left(\frac{1}{\sqrt{2\pi } \sigma _{k}} e^{\frac{-(x_{i} -\mu _{k} )^{2}}{2\sigma _{k}^{2}}}\right) *\pi _{k}}{{\displaystyle \sum _{k=1}^{K}\left(\frac{1}{\sqrt{2\pi } \sigma _{k}} e^{\frac{-(x_{i} -\mu _{k} )^{2}}{2\sigma _{k}^{2}}} *\pi _{k}\right)}}
\end{align*}
def gaussian(x, mu, sigma):
= np.sqrt(2 * np.pi) * sigma
den = np.exp(-(x - mu) ** 2 / (2 * sigma ** 2))
num return num / den
def estep(theta, X):
= X.shape[0]
n = theta.shape[0] // 3
K = theta[: K],\
mu, sigma, pi 2 * K]),\
np.sqrt(theta[K: 2 * K: ]
theta[= np.zeros((n, K))
lamb for i in range(n):
= X[i]
x = sum([pi[k] * gaussian(x, mu[k], sigma[k])
evidence for k in range(K)])
for k in range(K):
= pi[k]
prior = gaussian(x, mu[k], sigma[k])
likelihood = prior * likelihood / evidence
lamb[i][k] return lamb
def mstep(lamb, X):
= lamb.shape
n, K = np.zeros(K)
mu = np.zeros(K)
var = np.zeros(K)
pi for k in range(K):
= (X * lamb[:, k]).sum() / lamb[:, k].sum()
mu[k] = (((X - mu[k]) ** 2) * lamb[:, k]).sum() / lamb[:, k].sum()
var[k] = lamb[:, k].sum() / n
pi[k] return np.concatenate([mu, var, pi])
We will repeat the above two steps until the given convergence criteria is satisfied,
(||\theta ^{t+1} -\theta ^{t} ||\leq \epsilon )
For the given example we shall perform these steps for 8 iterations. After 8 iterations the change in values in negligible.
= theta_0
theta_k = np.zeros(6)
theta_k_1
for k in range(8):
= estep(theta_k, X)
lamb_k = theta_k
theta_k_1 = mstep(lamb_k, X)
theta_k print("\nlambda-"+str(k+1)+":")
print(pd.DataFrame(data=lamb_k.T,columns=[1,2,3,4,5,6],index=[1,2]))
print("\ntheta-"+str(k+1)+": "+str(theta_k))
print("\nnorm(theta_k - theta_k-1): "+str(np.round(np.linalg.norm(theta_k - theta_k_1),5)))
print('-' * 70)
lambda-1:
1 2 3 4 5 6
1 0.94111 0.86385 0.71582 0.28418 0.13615 0.05889
2 0.05889 0.13615 0.28418 0.71582 0.86385 0.94111
theta-1: [-0.75562 0.75562 0.5957 0.5957 0.5 0.5 ]
norm(theta_k - theta_k-1): 0.2182
----------------------------------------------------------------------
lambda-2:
1 2 3 4 5 6
1 0.97823 0.92669 0.78048 0.21952 0.07331 0.02177
2 0.02177 0.07331 0.21952 0.78048 0.92669 0.97823
theta-2: [-0.85619 0.85619 0.43361 0.43361 0.5 0.5 ]
norm(theta_k - theta_k-1): 0.26976
----------------------------------------------------------------------
lambda-3:
1 2 3 4 5 6
1 0.99733 0.98109 0.87810 0.12190 0.01891 0.00267
2 0.00267 0.01891 0.12190 0.87810 0.98109 0.99733
theta-3: [-0.94409 0.94409 0.27536 0.27536 0.5 0.5 ]
norm(theta_k - theta_k-1): 0.25602
----------------------------------------------------------------------
lambda-4:
1 2 3 4 5 6
1 0.99997 0.99895 0.96859 0.03141 0.00105 0.00003
2 0.00003 0.00105 0.03141 0.96859 0.99895 0.99997
theta-4: [-0.98879 0.98879 0.18895 0.18895 0.5 0.5 ]
norm(theta_k - theta_k-1): 0.13758
----------------------------------------------------------------------
lambda-5:
1 2 3 4 5 6
1 1.00000 0.99997 0.99469 0.00531 0.00003 0.00000
2 0.00000 0.00003 0.00531 0.99469 0.99997 1.00000
theta-5: [-0.99821 0.99821 0.17024 0.17024 0.5 0.5 ]
norm(theta_k - theta_k-1): 0.02962
----------------------------------------------------------------------
lambda-6:
1 2 3 4 5 6
1 1.00000 0.99999 0.99717 0.00283 0.00001 0.00000
2 0.00000 0.00001 0.00283 0.99717 0.99999 1.00000
theta-6: [-0.99905 0.99905 0.16857 0.16857 0.5 0.5 ]
norm(theta_k - theta_k-1): 0.00265
----------------------------------------------------------------------
lambda-7:
1 2 3 4 5 6
1 1.00000 0.99999 0.99734 0.00266 0.00001 0.00000
2 0.00000 0.00001 0.00266 0.99734 0.99999 1.00000
theta-7: [-0.99911 0.99911 0.16845 0.16845 0.5 0.5 ]
norm(theta_k - theta_k-1): 0.00018
----------------------------------------------------------------------
lambda-8:
1 2 3 4 5 6
1 1.00000 0.99999 0.99735 0.00265 0.00001 0.00000
2 0.00000 0.00001 0.00265 0.99735 0.99999 1.00000
theta-8: [-0.99911 0.99911 0.16844 0.16844 0.5 0.5 ]
norm(theta_k - theta_k-1): 1e-05
----------------------------------------------------------------------