Multi-sample online least-squares (#74)

ddbourgin · kennex2004 · web-flow · commit 7c210a62f478 · 2021-06-01T12:28:30.000-04:00
* added support for online linear regression

* added test

* Addressed reviewer's concern

* Refactor update method, add documentation

* Expand linear regression unit test

* Allow multi-sample updates from the update method

* Update tests for linear regression

Co-authored-by: Kenneth Odoh &lt;kenluck2001@yahoo.com&gt;
diff --git a/numpy_ml/linear_models/lm.py b/numpy_ml/linear_models/lm.py
@@ -32,16 +32,17 @@ def __init__(self, fit_intercept=True):
 
         self._is_fit = False
 
-    def update(self, x, y):
+    def update(self, X, y):
         r"""
-        Incrementally update the least-squares coefficients on a new example
-        via recursive least-squares (RLS) [1]_ .
+        Incrementally update the least-squares coefficients for a set of new
+        examples.
 
         Notes
         -----
-        The RLS algorithm [2]_ is used to efficiently update the regression
-        parameters as new examples become available. For a new example
-        :math:`(\mathbf{x}_{t+1}, \mathbf{y}_{t+1})`, the parameter updates are
+        The recursive least-squares algorithm [1]_ [2]_ is used to efficiently
+        update the regression parameters as new examples become available. For
+        a single new example :math:`(\mathbf{x}_{t+1}, \mathbf{y}_{t+1})`, the
+        parameter updates are
 
         .. math::
 
@@ -55,33 +56,41 @@ def update(self, x, y):
         :math:`\mathbf{X}_{1:t}` and :math:`\mathbf{Y}_{1:t}` are the set of
         examples observed from timestep 1 to *t*.
 
-        To perform the above update efficiently, the RLS algorithm makes use of
-        the Sherman-Morrison formula [3]_ to avoid re-inverting the covariance
-        matrix on each new update.
+        In the single-example case, the RLS algorithm uses the Sherman-Morrison
+        formula [3]_ to avoid re-inverting the covariance matrix on each new
+        update. In the multi-example case (i.e., where :math:`\mathbf{X}_{t+1}`
+        and :math:`\mathbf{y}_{t+1}` are matrices of `N` examples each), we use
+        the generalized Woodbury matrix identity [4]_ to update the inverse
+        covariance. This comes at a performance cost, but is still more
+        performant than doing multiple single-example updates if *N* is large.
 
         References
         ----------
         .. [1] Gauss, C. F. (1821) _Theoria combinationis observationum
            erroribus minimis obnoxiae_, Werke, 4. Gottinge
         .. [2] https://en.wikipedia.org/wiki/Recursive_least_squares_filter
         .. [3] https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
+        .. [4] https://en.wikipedia.org/wiki/Woodbury_matrix_identity
 
         Parameters
         ----------
-        x : :py:class:`ndarray <numpy.ndarray>` of shape `(1, M)`
-            A single example of rank `M`
-        y : :py:class:`ndarray <numpy.ndarray>` of shape `(1, K)`
-            A `K`-dimensional target vector for the current example
+        X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
+            A dataset consisting of `N` examples, each of dimension `M`
+        y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, K)`
+            The targets for each of the `N` examples in `X`, where each target
+            has dimension `K`
         """
         if not self._is_fit:
             raise RuntimeError("You must call the `fit` method before calling `update`")
 
-        x, y = np.atleast_2d(x), np.atleast_2d(y)
-        beta, S_inv = self.beta, self.sigma_inv
+        X, y = np.atleast_2d(X), np.atleast_2d(y)
 
-        X1, Y1 = x.shape[0], y.shape[0]
-        err_str = f"First dimension of x and y must be 1, but got {X1} and {Y1}"
-        assert X1 == Y1 == 1, err_str
+        X1, Y1 = X.shape[0], y.shape[0]
+        self._update1D(X, y) if X1 == Y1 == 1 else self._update2D(X, y)
+
+    def _update1D(self, x, y):
+        """Sherman-Morrison update for a single example"""
+        beta, S_inv = self.beta, self.sigma_inv
 
         # convert x to a design vector if we're fitting an intercept
         if self.fit_intercept:
@@ -93,6 +102,22 @@ def update(self, x, y):
         # update the model coefficients
         beta += S_inv @ x.T @ (y - x @ beta)
 
+    def _update2D(self, X, y):
+        """Woodbury update for multiple examples"""
+        beta, S_inv = self.beta, self.sigma_inv
+
+        # convert X to a design matrix if we're fitting an intercept
+        if self.fit_intercept:
+            X = np.c_[np.ones(X.shape[0]), X]
+
+        I = np.eye(X.shape[0])
+
+        # update the inverse of the covariance matrix via Woodbury identity
+        S_inv -= S_inv @ X.T @ np.linalg.pinv(I + X @ S_inv @ X.T) @ X @ S_inv
+
+        # update the model coefficients
+        beta += S_inv @ X.T @ (y - X @ beta)
+
     def fit(self, X, y):
         """
         Fit the regression coefficients via maximum likelihood.
diff --git a/numpy_ml/tests/test_linear_regression.py b/numpy_ml/tests/test_linear_regression.py
@@ -1,3 +1,4 @@
+# flake8: noqa
 import numpy as np
 
 from sklearn.linear_model import LinearRegression as LinearRegressionGold
@@ -12,7 +13,7 @@ def test_linear_regression(N=10):
 
     i = 1
     while i < N + 1:
-        train_samples = np.random.randint(1, 30)
+        train_samples = np.random.randint(2, 30)
         update_samples = np.random.randint(1, 30)
         n_samples = train_samples + update_samples
 
@@ -37,15 +38,21 @@ def test_linear_regression(N=10):
         lr = LinearRegression(fit_intercept=fit_intercept)
         lr.fit(X_train, y_train)
 
+        do_single_sample_update = np.random.choice([True, False])
+
         # ...then update our model on the examples (X_update, y_update)
-        for x_new, y_new in zip(X_update, y_update):
-            lr.update(x_new, y_new)
+        if do_single_sample_update:
+            for x_new, y_new in zip(X_update, y_update):
+                lr.update(x_new, y_new)
+        else:
+            lr.update(X_update, y_update)
 
         # check that model predictions match
-        np.testing.assert_almost_equal(lr.predict(X), lr_gold.predict(X))
+        np.testing.assert_almost_equal(lr.predict(X), lr_gold.predict(X), decimal=5)
 
         # check that model coefficients match
         beta = lr.beta.T[:, 1:] if fit_intercept else lr.beta.T
-        np.testing.assert_almost_equal(beta, lr_gold.coef_)
+        np.testing.assert_almost_equal(beta, lr_gold.coef_, decimal=6)
+
         print("\tPASSED")
         i += 1