import numpy as np
import matplotlib.pyplot as plt
Week 2: Kernel PCA
Colab Link: Click here!
Kernel PCA
Let’s take a dataset \mathbf{X} where
- d: no. of features
- n: no. of datapoints X=\left [ \begin{array}{ccccc} | & | & | & & | \\ x_1 & x_2 & x_3 & \ldots & x_4 \\ | & | & | & & | \end{array} \right ]
= np.array([[1, 1],[2, 4],[-1, 1],[-2, 4]]).T X
X
array([[ 1, 2, -1, -2],
[ 1, 4, 1, 4]])
0, :], X[1, :])
plt.scatter(X[='k')
plt.axhline(c='k');
plt.axvline(c; plt.grid()
Step 1: Calculate \mathbf{K} \in \mathbb{R}^{n \times n} using a kernel function where \mathbf{K}_{ij}=k(x_i,x_j).
def pol_ker(A, B, k):
return (A.T@B + 1) ** k
= pol_ker(X, X, 2) K_pol
K_pol
array([[ 9, 49, 1, 9],
[ 49, 441, 9, 169],
[ 1, 9, 9, 49],
[ 9, 169, 49, 441]])
Step 2: Center the kernel using the following formula.
\mathbf{K}^C=\mathbf{K}-\mathbf{I}\mathbf{K}-\mathbf{K}\mathbf{I}+\mathbf{I}\mathbf{K}\mathbf{I} where \mathbf{K}^C is the centered kernel, and \mathbf{I} \in \mathbb{R}^{n \times n} where all the elements are \frac{1}{n}.
def ker_cen(K):
= K.shape[0]
n = np.ones((n,n)) * (1/n)
I return K - I@K - K@I + I@K@I
= ker_cen(K_pol) KC
KC
array([[ 67., -43., 59., -83.],
[-43., 199., -83., -73.],
[ 59., -83., 67., -43.],
[-83., -73., -43., 199.]])
Step 3: Compute the eigenvectors \{\beta _1, \beta _2, \ldots, \beta _n\} and eigenvalues \{n\lambda _1, n\lambda _2, \ldots, n\lambda _n\} of K^C and normalize to get
\forall u \hspace{2em} \alpha _u = \frac{\beta _u}{\sqrt{n \lambda _u}}
# Enter your solution here
= np.linalg.eigh(KC)
lam, bet = lam[::-1][:-1], bet[:,::-1][:,:-1] lam, bet
lam, bet
(array([277.9275172, 252. , 2.0724828]),
array([[ 0.10365278, -0.5 , -0.69946844],
[ 0.69946844, 0.5 , 0.10365278],
[-0.10365278, -0.5 , 0.69946844],
[-0.69946844, 0.5 , -0.10365278]]))
Step 3: Compute \sum _{j=1}^{n}\mathbf{\alpha }_{kj}\mathbf{K}_{ij}^{C} \ \ \forall k
\begin{equation*} \mathbf{x}_{i} \in \mathbb{R}^{d}\rightarrow \left[\begin{array}{ c c c c } \sum\limits _{j=1}^{n}\mathbf{\alpha }_{1j}\mathbf{K}_{ij}^{C} & \sum\limits _{j=1}^{n}\mathbf{\alpha }_{2j}\mathbf{K}_{ij}^{C} & \dotsc & \sum\limits _{j=1}^{n}\mathbf{\alpha }_{nj}\mathbf{K}_{ij}^{C} \end{array}\right] \end{equation*}
= bet / np.sqrt(lam.reshape((1,-1))) alp
alp
array([[ 0.00621749, -0.03149704, -0.48587288],
[ 0.0419568 , 0.03149704, 0.0720005 ],
[-0.00621749, -0.03149704, 0.48587288],
[-0.0419568 , 0.03149704, -0.0720005 ]])
= KC@alp X_prime
X_prime
array([[ 1.72801191, -7.93725393, -1.00696319],
[ 11.66094908, 7.93725393, 0.14921979],
[ -1.72801191, -7.93725393, 1.00696319],
[-11.66094908, 7.93725393, -0.14921979]])
Kernel PCA on Swiss Roll Dataset
Step 1: Import Libraries
First, let’s import the necessary libraries:
from sklearn.datasets import make_swiss_roll
from mpl_toolkits.mplot3d import Axes3D
Step 2: Create the Swiss Roll Dataset
In the next cell, we’ll generate the Swiss Roll dataset. This dataset is commonly used to demonstrate nonlinear dimensionality reduction techniques like Kernel PCA. We’ll create a function to generate the dataset with specified parameters:
def generate_swiss_roll(n_samples=1000, noise=0.2):
= 1.5 * np.pi * (1 + 2 * np.random.rand(1, n_samples))
t = t * np.cos(t)
x = 21 * np.random.rand(1, n_samples)
y = t * np.sin(t)
z = np.vstack((x, y, z)).T + noise * np.random.randn(n_samples, 3)
data return data
# Generate the Swiss Roll dataset
= generate_swiss_roll() swiss_roll
Step 3: Visualize the Original Dataset
Swiss roll dataset looks like this:
Now, let’s create a 3D scatter plot to visualize the original Swiss Roll dataset:
= plt.figure()
fig = fig.add_subplot(111, projection='3d')
ax 0], swiss_roll[:, 1], swiss_roll[:, 2], c='b', cmap=plt.cm.Spectral)
ax.scatter(swiss_roll[:, "Original Swiss Roll Dataset")
ax.set_title( plt.show()
/tmp/ipykernel_5882/3992820084.py:3: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
ax.scatter(swiss_roll[:, 0], swiss_roll[:, 1], swiss_roll[:, 2], c='b', cmap=plt.cm.Spectral)
Step 4: Define the Kernel Function
Kernel PCA relies on a kernel function. We’ll use the Radial Basis Function (RBF) kernel, also known as the Gaussian kernel.
def rbf_kernel(X, sigma=1.0):
= np.sum(X**2, axis=1).reshape(-1, 1) + np.sum(X**2, axis=1) - 2 * np.dot(X, X.T)
pairwise_dists return np.exp(-pairwise_dists / (2 * sigma**2))
Step 5: Implement Kernel PCA
Now, let’s implement Kernel PCA. We’ll perform the following steps:
- Compute the kernel matrix.
- Center the kernel matrix.
- Calculate the eigenvalues and eigenvectors of the centered kernel matrix.
- Sort the eigenvectors by decreasing eigenvalues.
- Select the top k scaled eigenvectors as the new feature vectors.
def kernel_pca(X, n_components=2, sigma=1.0):
# Compute the kernel matrix
= rbf_kernel(X, sigma)
K
# Center the kernel matrix
= K.shape[0]
n = np.ones((n, n)) / n
one_n = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n)
K_centered
# Calculate eigenvalues and eigenvectors
= np.linalg.eigh(K_centered)
eigvals, eigvecs
# Sort eigenvectors by decreasing eigenvalues
= eigvals[::-1], eigvecs[:, ::-1]
eigvals, eigvecs
# Select the top n_components scaled eigenvectors
= eigvecs[:, :n_components] / np.sqrt(eigvals[:n_components])
alphas
return alphas
# Apply Kernel PCA to the Swiss Roll dataset
= 2
n_components = kernel_pca(swiss_roll, n_components, sigma=1.0) alphas
Step 6: Visualize the Transformed Data
Finally, let’s visualize the data in the new feature space:
# Separate the transformed data into two components
= alphas[:, 0]
pc1 = alphas[:, 1]
pc2
# Create a scatter plot with two different colors for the two PCs
=range(len(pc1)))
plt.scatter(pc1, pc2, c"Kernel PCA Result")
plt.title("First Principal Component")
plt.xlabel("Second Principal Component")
plt.ylabel(='Data Point Index')
plt.colorbar(label plt.show()