From cceb2f046d112094dd149985e1d482da40b1b194 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Sun, 13 Dec 2020 13:35:14 -0800 Subject: [PATCH] feat: lasso documentation --- src/linalg/naive/dense_matrix.rs | 24 ++++++-- src/linear/{elasticnet.rs => elastic_net.rs} | 65 ++++++++++++++++++-- src/linear/lasso.rs | 29 +++------ src/linear/mod.rs | 2 +- 4 files changed, 86 insertions(+), 34 deletions(-) rename src/linear/{elasticnet.rs => elastic_net.rs} (74%) diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index 400366d..a0b7bdb 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -187,9 +187,7 @@ impl BaseVector for Vec { ); } - for i in 0..self.len() { - self[i] = other[i]; - } + self[..].clone_from_slice(&other[..]); } } @@ -929,9 +927,7 @@ impl BaseMatrix for DenseMatrix { ); } - for i in 0..self.values.len() { - self.values[i] = other.values[i]; - } + self.values[..].clone_from_slice(&other.values[..]); } fn abs_mut(&mut self) -> &Self { @@ -1066,6 +1062,14 @@ mod tests { assert_eq!(32.0, BaseVector::dot(&v1, &v2)); } + #[test] + fn vec_copy_from() { + let mut v1 = vec![1., 2., 3.]; + let v2 = vec![4., 5., 6.]; + v1.copy_from(&v2); + assert_eq!(v1, v2); + } + #[test] fn vec_approximate_eq() { let a = vec![1., 2., 3.]; @@ -1199,6 +1203,14 @@ mod tests { assert_eq!(a.dot(&b), 32.); } + #[test] + fn copy_from() { + let mut a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]); + let b = DenseMatrix::from_2d_array(&[&[7., 8.], &[9., 10.], &[11., 12.]]); + a.copy_from(&b); + assert_eq!(a, b); + } + #[test] fn slice() { let m = DenseMatrix::from_2d_array(&[ diff --git a/src/linear/elasticnet.rs b/src/linear/elastic_net.rs similarity index 74% rename from src/linear/elasticnet.rs rename to src/linear/elastic_net.rs index 7b6acb1..c01f3c7 100644 --- a/src/linear/elasticnet.rs +++ b/src/linear/elastic_net.rs @@ -1,5 +1,51 @@ +#![allow(clippy::needless_range_loop)] //! # Elastic Net //! +//! Elastic net is an extension of [linear regression](../linear_regression/index.html) that adds regularization penalties to the loss function during training. +//! Just like in ordinary linear regression you assume a linear relationship between input variables and the target variable. +//! Unlike linear regression elastic net adds regularization penalties to the loss function during training. +//! In particular, the elastic net coefficient estimates \\(\beta\\) are the values that minimize +//! +//! \\[L(\alpha, \beta) = \vert \boldsymbol{y} - \boldsymbol{X}\beta\vert^2 + \lambda_1 \vert \beta \vert^2 + \lambda_2 \vert \beta \vert_1\\] +//! +//! where \\(\lambda_1 = \\alpha l_{1r}\\), \\(\lambda_2 = \\alpha (1 - l_{1r})\\) and \\(l_{1r}\\) is the l1 ratio, elastic net mixing parameter. +//! +//! In essense, elastic net combines both the [L1](../lasso/index.html) and [L2](../ridge_regression/index.html) penalties during training, +//! which can result in better performance than a model with either one or the other penalty on some problems. +//! The elastic net is particularly useful when the number of predictors (p) is much bigger than the number of observations (n). +//! +//! Example: +//! +//! ``` +//! use smartcore::linalg::naive::dense_matrix::*; +//! use smartcore::linear::elastic_net::*; +//! +//! // Longley dataset (https://www.statsmodels.org/stable/datasets/generated/longley.html) +//! let x = DenseMatrix::from_2d_array(&[ +//! &[234.289, 235.6, 159.0, 107.608, 1947., 60.323], +//! &[259.426, 232.5, 145.6, 108.632, 1948., 61.122], +//! &[258.054, 368.2, 161.6, 109.773, 1949., 60.171], +//! &[284.599, 335.1, 165.0, 110.929, 1950., 61.187], +//! &[328.975, 209.9, 309.9, 112.075, 1951., 63.221], +//! &[346.999, 193.2, 359.4, 113.270, 1952., 63.639], +//! &[365.385, 187.0, 354.7, 115.094, 1953., 64.989], +//! &[363.112, 357.8, 335.0, 116.219, 1954., 63.761], +//! &[397.469, 290.4, 304.8, 117.388, 1955., 66.019], +//! &[419.180, 282.2, 285.7, 118.734, 1956., 67.857], +//! &[442.769, 293.6, 279.8, 120.445, 1957., 68.169], +//! &[444.546, 468.1, 263.7, 121.950, 1958., 66.513], +//! &[482.704, 381.3, 255.2, 123.366, 1959., 68.655], +//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], +//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], +//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], +//! ]); +//! +//! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, +//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; +//! +//! let y_hat = ElasticNet::fit(&x, &y, Default::default()). +//! and_then(|lr| lr.predict(&x)).unwrap(); +//! ``` //! //! ## References: //! @@ -19,17 +65,24 @@ use crate::math::num::RealNumber; use crate::linear::lasso_optimizer::InteriorPointOptimizer; -/// Ridge Regression parameters +/// Elastic net parameters #[derive(Serialize, Deserialize, Debug)] pub struct ElasticNetParameters { + /// Regularization parameter. pub alpha: T, + /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1. + /// For l1_ratio = 0 the penalty is an L2 penalty. + /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. pub l1_ratio: T, + /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation. pub normalize: bool, + /// The tolerance for the optimization pub tol: T, + /// The maximum number of iterations pub max_iter: usize, } -/// Ridge regression +/// Elastic net #[derive(Serialize, Deserialize, Debug)] pub struct ElasticNet> { coefficients: M, @@ -56,7 +109,7 @@ impl> PartialEq for ElasticNet { } impl> ElasticNet { - /// Fits ridge regression to your data. + /// Fits elastic net regression to your data. /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation. /// * `y` - target values /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values. @@ -81,7 +134,7 @@ impl> ElasticNet { let (w, b) = if parameters.normalize { let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?; - let (x, y, gamma) = Self::augment_X_and_y(&scaled_x, y, l2_reg); + let (x, y, gamma) = Self::augment_x_and_y(&scaled_x, y, l2_reg); let mut optimizer = InteriorPointOptimizer::new(&x, p); @@ -102,7 +155,7 @@ impl> ElasticNet { (w, b) } else { - let (x, y, gamma) = Self::augment_X_and_y(x, y, l2_reg); + let (x, y, gamma) = Self::augment_x_and_y(x, y, l2_reg); let mut optimizer = InteriorPointOptimizer::new(&x, p); @@ -159,7 +212,7 @@ impl> ElasticNet { Ok((scaled_x, col_mean, col_std)) } - fn augment_X_and_y(x: &M, y: &M::RowVector, l2_reg: T) -> (M, M::RowVector, T) { + fn augment_x_and_y(x: &M, y: &M::RowVector, l2_reg: T) -> (M, M::RowVector, T) { let (n, p) = x.shape(); let gamma = T::one() / (T::one() + l2_reg).sqrt(); diff --git a/src/linear/lasso.rs b/src/linear/lasso.rs index bb9e69c..7395bdc 100644 --- a/src/linear/lasso.rs +++ b/src/linear/lasso.rs @@ -105,18 +105,15 @@ impl> Lasso { return Err(Failed::fit("Number of rows in X should = len(y)")); } + let l1_reg = parameters.alpha * T::from_usize(n).unwrap(); + let (w, b) = if parameters.normalize { let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?; let mut optimizer = InteriorPointOptimizer::new(&scaled_x, p); - let mut w = optimizer.optimize( - &scaled_x, - y, - parameters.alpha, - parameters.max_iter, - parameters.tol, - )?; + let mut w = + optimizer.optimize(&scaled_x, y, l1_reg, parameters.max_iter, parameters.tol)?; for (j, col_std_j) in col_std.iter().enumerate().take(p) { w.set(j, 0, w.get(j, 0) / *col_std_j); @@ -133,8 +130,7 @@ impl> Lasso { } else { let mut optimizer = InteriorPointOptimizer::new(x, p); - let w = - optimizer.optimize(x, y, parameters.alpha, parameters.max_iter, parameters.tol)?; + let w = optimizer.optimize(x, y, l1_reg, parameters.max_iter, parameters.tol)?; (w, y.mean()) }; @@ -215,18 +211,9 @@ mod tests { 114.2, 115.7, 116.9, ]; - let y_hat = Lasso::fit( - &x, - &y, - LassoParameters { - alpha: 0.1, - normalize: true, - tol: 1e-4, - max_iter: 1000, - }, - ) - .and_then(|lr| lr.predict(&x)) - .unwrap(); + let y_hat = Lasso::fit(&x, &y, Default::default()) + .and_then(|lr| lr.predict(&x)) + .unwrap(); assert!(mean_absolute_error(&y_hat, &y) < 2.0); diff --git a/src/linear/mod.rs b/src/linear/mod.rs index 8c056e8..3824d36 100644 --- a/src/linear/mod.rs +++ b/src/linear/mod.rs @@ -21,7 +21,7 @@ //! pub(crate) mod bg_solver; -pub mod elasticnet; +pub mod elastic_net; pub mod lasso; pub(crate) mod lasso_optimizer; pub mod linear_regression;