feat: lasso documentation

2020-12-13 13:35:14 -08:00
parent a27c29b736
commit cceb2f046d
4 changed files with 86 additions and 34 deletions
@@ -1,5 +1,51 @@
+#![allow(clippy::needless_range_loop)]
 //! # Elastic Net
 //!
+//! Elastic net is an extension of [linear regression](../linear_regression/index.html) that adds regularization penalties to the loss function during training.
+//! Just like in ordinary linear regression you assume a linear relationship between input variables and the target variable.
+//! Unlike linear regression elastic net adds regularization penalties to the loss function during training.
+//! In particular, the elastic net coefficient estimates \\(\beta\\) are the values that minimize
+//!
+//! \\[L(\alpha, \beta) = \vert \boldsymbol{y} - \boldsymbol{X}\beta\vert^2 + \lambda_1 \vert \beta \vert^2 + \lambda_2 \vert \beta \vert_1\\]
+//!
+//! where \\(\lambda_1 = \\alpha l_{1r}\\), \\(\lambda_2 = \\alpha (1 -  l_{1r})\\) and \\(l_{1r}\\) is the l1 ratio, elastic net mixing parameter.
+//!
+//! In essense, elastic net combines both the [L1](../lasso/index.html) and [L2](../ridge_regression/index.html) penalties during training,
+//! which can result in better performance than a model with either one or the other penalty on some problems.
+//! The elastic net is particularly useful when the number of predictors (p) is much bigger than the number of observations (n).
+//!
+//! Example:
+//!
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::*;
+//! use smartcore::linear::elastic_net::*;
+//!
+//! // Longley dataset (https://www.statsmodels.org/stable/datasets/generated/longley.html)
+//! let x = DenseMatrix::from_2d_array(&[
+//!               &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+//!               &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+//!               &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+//!               &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
+//!               &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+//!               &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
+//!               &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
+//!               &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
+//!               &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+//!               &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
+//!               &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+//!               &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
+//!               &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+//!               &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+//!               &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+//!               &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
+//!          ]);
+//!
+//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
+//!           100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
+//!
+//! let y_hat = ElasticNet::fit(&x, &y, Default::default()).
+//!                 and_then(|lr| lr.predict(&x)).unwrap();
+//! ```
 //!
 //! ## References:
 //!
@@ -19,17 +65,24 @@ use crate::math::num::RealNumber;

 use crate::linear::lasso_optimizer::InteriorPointOptimizer;

-/// Ridge Regression parameters
+/// Elastic net parameters
 #[derive(Serialize, Deserialize, Debug)]
 pub struct ElasticNetParameters<T: RealNumber> {
+    /// Regularization parameter.
    pub alpha: T,
+    /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
+    /// For l1_ratio = 0 the penalty is an L2 penalty.
+    /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
    pub l1_ratio: T,
+    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
    pub normalize: bool,
+    /// The tolerance for the optimization
    pub tol: T,
+    /// The maximum number of iterations
    pub max_iter: usize,
 }

-/// Ridge regression
+/// Elastic net
 #[derive(Serialize, Deserialize, Debug)]
 pub struct ElasticNet<T: RealNumber, M: Matrix<T>> {
    coefficients: M,
@@ -56,7 +109,7 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for ElasticNet<T, M> {
 }

 impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
-    /// Fits ridge regression to your data.
+    /// Fits elastic net regression to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
    /// * `y` - target values
    /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
@@ -81,7 +134,7 @@ impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
        let (w, b) = if parameters.normalize {
            let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?;

-            let (x, y, gamma) = Self::augment_X_and_y(&scaled_x, y, l2_reg);
+            let (x, y, gamma) = Self::augment_x_and_y(&scaled_x, y, l2_reg);

            let mut optimizer = InteriorPointOptimizer::new(&x, p);

@@ -102,7 +155,7 @@ impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {

            (w, b)
        } else {
-            let (x, y, gamma) = Self::augment_X_and_y(x, y, l2_reg);
+            let (x, y, gamma) = Self::augment_x_and_y(x, y, l2_reg);

            let mut optimizer = InteriorPointOptimizer::new(&x, p);

@@ -159,7 +212,7 @@ impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
        Ok((scaled_x, col_mean, col_std))
    }

-    fn augment_X_and_y(x: &M, y: &M::RowVector, l2_reg: T) -> (M, M::RowVector, T) {
+    fn augment_x_and_y(x: &M, y: &M::RowVector, l2_reg: T) -> (M, M::RowVector, T) {
        let (n, p) = x.shape();

        let gamma = T::one() / (T::one() + l2_reg).sqrt();
@@ -105,18 +105,15 @@ impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
            return Err(Failed::fit("Number of rows in X should = len(y)"));
        }

+        let l1_reg = parameters.alpha * T::from_usize(n).unwrap();
+
        let (w, b) = if parameters.normalize {
            let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?;

            let mut optimizer = InteriorPointOptimizer::new(&scaled_x, p);

-            let mut w = optimizer.optimize(
-                &scaled_x,
-                y,
-                parameters.alpha,
-                parameters.max_iter,
-                parameters.tol,
-            )?;
+            let mut w =
+                optimizer.optimize(&scaled_x, y, l1_reg, parameters.max_iter, parameters.tol)?;

            for (j, col_std_j) in col_std.iter().enumerate().take(p) {
                w.set(j, 0, w.get(j, 0) / *col_std_j);
@@ -133,8 +130,7 @@ impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
        } else {
            let mut optimizer = InteriorPointOptimizer::new(x, p);

-            let w =
-                optimizer.optimize(x, y, parameters.alpha, parameters.max_iter, parameters.tol)?;
+            let w = optimizer.optimize(x, y, l1_reg, parameters.max_iter, parameters.tol)?;

            (w, y.mean())
        };
@@ -215,18 +211,9 @@ mod tests {
            114.2, 115.7, 116.9,
        ];

-        let y_hat = Lasso::fit(
-            &x,
-            &y,
-            LassoParameters {
-                alpha: 0.1,
-                normalize: true,
-                tol: 1e-4,
-                max_iter: 1000,
-            },
-        )
-        .and_then(|lr| lr.predict(&x))
-        .unwrap();
+        let y_hat = Lasso::fit(&x, &y, Default::default())
+            .and_then(|lr| lr.predict(&x))
+            .unwrap();

        assert!(mean_absolute_error(&y_hat, &y) < 2.0);

@@ -21,7 +21,7 @@
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>

 pub(crate) mod bg_solver;
-pub mod elasticnet;
+pub mod elastic_net;
 pub mod lasso;
 pub(crate) mod lasso_optimizer;
 pub mod linear_regression;