• Home
  • About
    • Qikai Gu photo

      Qikai Gu

      Software Engineer in Machine Learning

    • Learn More
    • LinkedIn
    • Github
    • Twitter
    • StackOverflow
  • Posts
    • All Posts
    • All Tags
  • Projects

[ML from Scratch] Prinpical Conponent Analysis (PCA)

01 Mar 2025

Reading time ~1 minute

Prinpical Conponent Analysis (PCA) is an unsupervised learning algorithm.

Implementation from scratch

import numpy as np
from numpy import ndarray


class PCA:
    def __init__(self, n_components: int = 3):
        if not n_components >= 1:
            raise ValueError("n_components must be a positive integer")
        
        self.n_components = n_components
        self.mean = None
        self.components = None

    def _check_X(self, X: ndarray):
        if len(X.shape) != 2:
            raise ValueError("X must be of shape (n_samples, n_features)")
        if X.shape[0] < 1:
            raise ValueError("X must contain at least 1 sample")
    
    def fit(self, X: ndarray):
        self._check_X(X)

        if X.shape[1] < self.n_components:
            raise ValueError("The number of features in X cannot be smaller than n_components")
        
        self.mean = X.mean(axis=0)
        X_centered = X - self.mean
        cov_matrix = np.cov(X_centered, rowvar=False)
        eigenvalues, eigenvectors = np.linalg.eign(cov_matrix)
        sorted_indices = np.argsort(eigenvalues)[::-1]
        self.components = eigenvectors[:sorted_indices[:self.n_components]]
        return self

    def transform(self, X: ndarray) -> ndarray:
        self._check_X(X)

        if self.mean is None or self.components is None:
            raise RuntimeError("Must call fit() first")

        return np.dot(X - self.mean, self.components)

Unit test:

def test_pca():
    ...

test_pca()


machine-learningml-from-scratch Share Tweet +1