Logistic Regression is a supervised learning algorithm used for binary classification, which models the probability of an instance belonging to a class using the sigmoid function and optimizes parameters via cross-entropy loss minimization.
Since Logistic Regression does not have a closed-form solution, it is solved using an iterative optimization algorithm such as Gradient Descent. The key steps are:
- Set weights \(\mathbf{w}\) and bias \(b\) to small random values or zeros.
- Iterate the following steps until converge:
- Use the sigmoid function to estimate probabilities.
- Calculate loss.
- Compute the gradients of loss w.r.t. weights and bias.
- Adjust weights and bias using learning rate.
- Iteration stop conditions (any):
- The loss function stops decreasing.
- A fixed number of epochs is reached.
Implementation from scratch
import numpy as np
class LogisticRegression:
def __init__(self, learning_rate: float = 0.01, epochs: int = 1000):
if not learning_rate > 0:
raise ValueError("learning_rate must be greater than 0")
if not epochs > 0:
raise ValueError("epochs must be greater than 0")
self.learning_rate = learning_rate
self.epochs = epochs
self.weights = None
self.bias = None
def _check_X(self, X: np.ndarray):
if len(X.shape) != 2:
raise ValueError("X must be of shape (n_samples, n_features)")
if X.shape[0] < 1:
raise ValueError("X must contain at least 1 sample")
def sigmoid(self, z: np.ndarray) -> np.ndarray:
return 1 / (1 + np.exp(-z))
def fit(self, X: np.ndarray, y: np.ndarray) -> "LogisticRegression":
self._check_X(X)
if len(y.shape) != 1:
raise ValueError("y must be of shape (n_samples,)")
if X.shape[0] != y.shape[0]:
raise ValueError("X and y must have the same number of samples")
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0
for _ in range(self.epochs):
z = np.dot(X, self.weights) + self.bias
y_pred = self.sigmoid(z)
# dJ/dw = dJ/da * da/dz * dz/dw
# = 1/m * (-y/y_hat + (1-y)/(1-y_hat)) * y_hat * (1-y_hat) * X
# = 1/m * (-y * (1-y_hat) + (1-y) * y_hat) * X
# = 1/m * (-y + y * y_hat + y_hat - y * y_hat) * X
# = 1/m * (y_hat - y) * X
dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
db = (1 / n_samples) * np.sum(y_pred - y)
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
return self
def predict_proba(self, X: np.ndarray) -> np.ndarray:
self._check_X(X)
if self.weights is None or self.bias is None:
raise RuntimeError("Must call fit() first")
z = np.dot(X, self.weights) + self.bias
y_pred = self.sigmoid(z)
return y_pred
def predict(self, X: np.ndarray) -> np.ndarray:
y_pred = self.predict_proba(X)
return (y_pred >= 0.5).astype(int)
Unit test:
def test_logistic_regression():
X = np.array([[0], [1], [2], [3]])
y = np.array([0, 0, 1, 1])
model = LogisticRegression(learning_rate=0.1, epochs=100)
model.fit(X, y)
predictions = model.predict(np.array([[0.9], [2.1]]))
np.testing.assert_equal(predictions, [0, 1])
with np.testing.assert_raises(ValueError):
model = LogisticRegression(learning_rate=0.1, epochs=100)
model.fit(X=np.random.rand(1, 2, 3), y=np.array([1]))
with np.testing.assert_raises(ValueError):
model = LogisticRegression(learning_rate=0.1, epochs=100)
model.fit(X=np.empty(shape=(2, 3)), y=np.array([]))
test_logistic_regression()