[ML from Scratch] Logistic Regression

01 Mar 2025

Reading time ~3 minutes

Logistic Regression is a supervised learning algorithm used for binary classification, which models the probability of an instance belonging to a class using the sigmoid function and optimizes parameters via cross-entropy loss minimization.

Since Logistic Regression does not have a closed-form solution, it is solved using an iterative optimization algorithm such as Gradient Descent. The key steps are:

Set weights \(\mathbf{w}\) and bias \(b\) to small random values or zeros.
Iterate the following steps until converge:
1. Use the sigmoid function to estimate probabilities.
2. Calculate loss.
3. Compute the gradients of loss w.r.t. weights and bias.
4. Adjust weights and bias using learning rate.
Iteration stop conditions (any):
1. The loss function stops decreasing.
2. A fixed number of epochs is reached.

For a step-by-step deviation of gradients, check this post: Step-by-Step Derivation of Gradients for Logistic Regression using the Chain Rule

Implementation from scratch

import numpy as np
from numpy import ndarray


class LogisticRegression:
    def __init__(
        self, learning_rate: float = 0.01, epochs: int = 1000
    ):
        if not learning_rate > 0:
            raise ValueError(
                "learning_rate must be greater than 0"
            )
        if not epochs > 0:
            raise ValueError("epochs must be greater than 0")

        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights: ndarray | None = None
        self.bias: int | None = None

    def _check_X(self, X: np.ndarray):
        if len(X.shape) != 2:
            raise ValueError(
                "X must be of shape (n_samples, n_features)"
            )
        if X.shape[0] < 1:
            raise ValueError("X must contain at least 1 sample")

    def sigmoid(self, z: np.ndarray) -> np.ndarray:
        return 1 / (1 + np.exp(-z))

    def fit(
        self, X: np.ndarray, y: np.ndarray
    ) -> "LogisticRegression":
        self._check_X(X)
        if len(y.shape) != 1:
            raise ValueError("y must be of shape (n_samples,)")
        if X.shape[0] != y.shape[0]:
            raise ValueError(
                "X and y must have the same number of samples"
            )

        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.epochs):
            z = np.dot(X, self.weights) + self.bias
            y_pred = self.sigmoid(z)

            # dJ/dw = dJ/da * da/dz * dz/dw
            #       = 1/m * (-y/y_hat + (1-y)/(1-y_hat)) * y_hat * (1-y_hat) * X
            #       = 1/m * (-y * (1-y_hat) + (1-y) * y_hat) * X
            #       = 1/m * (-y + y * y_hat + y_hat - y * y_hat) * X
            #       = 1/m * (y_hat - y) * X

            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

        return self

    def predict_proba(self, X: np.ndarray) -> np.ndarray:
        self._check_X(X)

        if self.weights is None or self.bias is None:
            raise RuntimeError("Must call fit() first")

        z = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(z)
        return y_pred

    def predict(self, X: np.ndarray) -> np.ndarray:
        y_pred = self.predict_proba(X)
        return (y_pred >= 0.5).astype(int)


def test_logistic_regression():
    X = np.array([[0], [1], [2], [3]])
    y = np.array([0, 0, 1, 1])
    model = LogisticRegression(learning_rate=0.1, epochs=100)
    model.fit(X, y)
    predictions = model.predict(np.array([[0.9], [2.1]]))
    np.testing.assert_equal(predictions, [0, 1])

    with np.testing.assert_raises(ValueError):
        model = LogisticRegression(
            learning_rate=0.1, epochs=100
        )
        model.fit(X=np.random.rand(1, 2, 3), y=np.array([1]))

    with np.testing.assert_raises(ValueError):
        model = LogisticRegression(
            learning_rate=0.1, epochs=100
        )
        model.fit(X=np.empty(shape=(2, 3)), y=np.array([]))


if __name__ == "__main__":
    test_logistic_regression()
    print("All tests passed.")