From ab7f46603c576f7c96feea8a9db09db79d996a54 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Fri, 6 Nov 2020 10:48:00 -0800 Subject: [PATCH 1/8] feat: + ridge regression --- src/linalg/mod.rs | 48 +++++ src/linalg/naive/dense_matrix.rs | 3 + src/linalg/nalgebra_bindings.rs | 6 + src/linalg/ndarray_bindings.rs | 6 + src/linalg/stats.rs | 139 +++++++++++++ src/linear/mod.rs | 1 + src/linear/ridge_regression.rs | 323 +++++++++++++++++++++++++++++++ 7 files changed, 526 insertions(+) create mode 100644 src/linalg/stats.rs create mode 100644 src/linear/ridge_regression.rs diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index fb12909..42ed558 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -48,6 +48,7 @@ pub mod nalgebra_bindings; pub mod ndarray_bindings; /// QR factorization that factors a matrix into a product of an orthogonal matrix and an upper triangular matrix. pub mod qr; +pub mod stats; /// Singular value decomposition. pub mod svd; @@ -60,6 +61,7 @@ use cholesky::CholeskyDecomposableMatrix; use evd::EVDDecomposableMatrix; use lu::LUDecomposableMatrix; use qr::QRDecomposableMatrix; +use stats::MatrixStats; use svd::SVDDecomposableMatrix; /// Column or row vector @@ -163,6 +165,32 @@ pub trait BaseVector: Clone + Debug { ///assert_eq!(a.unique(), vec![-7., -6., -2., 1., 2., 3., 4.]); /// ``` fn unique(&self) -> Vec; + + /// Compute the arithmetic mean. + fn mean(&self) -> T { + let n = self.len(); + let mut mean = T::zero(); + + for i in 0..n { + mean += self.get(i); + } + mean / T::from_usize(n).unwrap() + } + /// Compute the standard deviation. + fn std(&self) -> T { + let n = self.len(); + + let mut mu = T::zero(); + let mut sum = T::zero(); + let div = T::from_usize(n).unwrap(); + for i in 0..n { + let xi = self.get(i); + mu += xi; + sum += xi * xi; + } + mu /= div; + (sum / div - mu * mu).sqrt() + } } /// Generic matrix type. @@ -510,6 +538,7 @@ pub trait Matrix: + QRDecomposableMatrix + LUDecomposableMatrix + CholeskyDecomposableMatrix + + MatrixStats + PartialEq + Display { @@ -545,3 +574,22 @@ impl<'a, T: RealNumber, M: BaseMatrix> Iterator for RowIter<'a, T, M> { res } } + +#[cfg(test)] +mod tests { + use crate::linalg::BaseVector; + + #[test] + fn mean() { + let m = vec![1., 2., 3.]; + + assert_eq!(m.mean(), 2.0); + } + + #[test] + fn std() { + let m = vec![1., 2., 3.]; + + assert!((m.std() - 0.81f64).abs() < 1e-2); + } +} diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index d3d6353..e34dd91 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -12,6 +12,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix; use crate::linalg::evd::EVDDecomposableMatrix; use crate::linalg::lu::LUDecomposableMatrix; use crate::linalg::qr::QRDecomposableMatrix; +use crate::linalg::stats::MatrixStats; use crate::linalg::svd::SVDDecomposableMatrix; use crate::linalg::Matrix; pub use crate::linalg::{BaseMatrix, BaseVector}; @@ -445,6 +446,8 @@ impl LUDecomposableMatrix for DenseMatrix {} impl CholeskyDecomposableMatrix for DenseMatrix {} +impl MatrixStats for DenseMatrix {} + impl Matrix for DenseMatrix {} impl PartialEq for DenseMatrix { diff --git a/src/linalg/nalgebra_bindings.rs b/src/linalg/nalgebra_bindings.rs index e0b885b..ad39057 100644 --- a/src/linalg/nalgebra_bindings.rs +++ b/src/linalg/nalgebra_bindings.rs @@ -46,6 +46,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix; use crate::linalg::evd::EVDDecomposableMatrix; use crate::linalg::lu::LUDecomposableMatrix; use crate::linalg::qr::QRDecomposableMatrix; +use crate::linalg::stats::MatrixStats; use crate::linalg::svd::SVDDecomposableMatrix; use crate::linalg::Matrix as SmartCoreMatrix; use crate::linalg::{BaseMatrix, BaseVector}; @@ -550,6 +551,11 @@ impl + MatrixStats for Matrix> +{ +} + impl SmartCoreMatrix for Matrix> { diff --git a/src/linalg/ndarray_bindings.rs b/src/linalg/ndarray_bindings.rs index 00c9745..e8de983 100644 --- a/src/linalg/ndarray_bindings.rs +++ b/src/linalg/ndarray_bindings.rs @@ -53,6 +53,7 @@ use crate::linalg::cholesky::CholeskyDecomposableMatrix; use crate::linalg::evd::EVDDecomposableMatrix; use crate::linalg::lu::LUDecomposableMatrix; use crate::linalg::qr::QRDecomposableMatrix; +use crate::linalg::stats::MatrixStats; use crate::linalg::svd::SVDDecomposableMatrix; use crate::linalg::Matrix; use crate::linalg::{BaseMatrix, BaseVector}; @@ -500,6 +501,11 @@ impl + MatrixStats for ArrayBase, Ix2> +{ +} + impl Matrix for ArrayBase, Ix2> { diff --git a/src/linalg/stats.rs b/src/linalg/stats.rs new file mode 100644 index 0000000..f5db1e9 --- /dev/null +++ b/src/linalg/stats.rs @@ -0,0 +1,139 @@ +//! # Various Statistical Methods +//! +//! + +use crate::linalg::BaseMatrix; +use crate::math::num::RealNumber; + +/// Defines baseline implementations for various statistical functions +pub trait MatrixStats: BaseMatrix { + /// Compute the arithmetic mean along the specified axis. + fn mean(&self, axis: u8) -> Vec { + let (n, m) = match axis { + 0 => { + let (n, m) = self.shape(); + (m, n) + } + _ => self.shape(), + }; + + let mut x: Vec = vec![T::zero(); n]; + + let div = T::from_usize(m).unwrap(); + + for i in 0..n { + for j in 0..m { + x[i] += match axis { + 0 => self.get(j, i), + _ => self.get(i, j), + }; + } + x[i] /= div; + } + + x + } + + /// Compute the standard deviation along the specified axis. + fn std(&self, axis: u8) -> Vec { + let (n, m) = match axis { + 0 => { + let (n, m) = self.shape(); + (m, n) + } + _ => self.shape(), + }; + + let mut x: Vec = vec![T::zero(); n]; + + let div = T::from_usize(m).unwrap(); + + for i in 0..n { + let mut mu = T::zero(); + let mut sum = T::zero(); + for j in 0..m { + let a = match axis { + 0 => self.get(j, i), + _ => self.get(i, j), + }; + mu += a; + sum += a * a; + } + mu /= div; + x[i] = (sum / div - mu * mu).sqrt(); + } + + x + } + + /// standardize values by removing the mean and scaling to unit variance + fn scale_mut(&mut self, mean: &Vec, std: &Vec, axis: u8) { + let (n, m) = match axis { + 0 => { + let (n, m) = self.shape(); + (m, n) + } + _ => self.shape(), + }; + + for i in 0..n { + for j in 0..m { + match axis { + 0 => self.set(j, i, (self.get(j, i) - mean[i]) / std[i]), + _ => self.set(i, j, (self.get(i, j) - mean[i]) / std[i]), + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_matrix::DenseMatrix; + use crate::linalg::BaseVector; + + #[test] + fn mean() { + let m = DenseMatrix::from_2d_array(&[ + &[1., 2., 3., 1., 2.], + &[4., 5., 6., 3., 4.], + &[7., 8., 9., 5., 6.], + ]); + let expected_0 = vec![4., 5., 6., 3., 4.]; + let expected_1 = vec![1.8, 4.4, 7.]; + + assert_eq!(m.mean(0), expected_0); + assert_eq!(m.mean(1), expected_1); + } + + #[test] + fn std() { + let m = DenseMatrix::from_2d_array(&[ + &[1., 2., 3., 1., 2.], + &[4., 5., 6., 3., 4.], + &[7., 8., 9., 5., 6.], + ]); + let expected_0 = vec![2.44, 2.44, 2.44, 1.63, 1.63]; + let expected_1 = vec![0.74, 1.01, 1.41]; + + assert!(m.std(0).approximate_eq(&expected_0, 1e-2)); + assert!(m.std(1).approximate_eq(&expected_1, 1e-2)); + } + + #[test] + fn scale() { + let mut m = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]); + let expected_0 = DenseMatrix::from_2d_array(&[&[-1., -1., -1.], &[1., 1., 1.]]); + let expected_1 = DenseMatrix::from_2d_array(&[&[-1.22, 0.0, 1.22], &[-1.22, 0.0, 1.22]]); + + { + let mut m = m.clone(); + m.scale_mut(&m.mean(0), &m.std(0), 0); + assert!(m.approximate_eq(&expected_0, std::f32::EPSILON)); + } + + m.scale_mut(&m.mean(1), &m.std(1), 1); + assert!(m.approximate_eq(&expected_1, 1e-2)); + } +} diff --git a/src/linear/mod.rs b/src/linear/mod.rs index 54bbca0..fef7070 100644 --- a/src/linear/mod.rs +++ b/src/linear/mod.rs @@ -22,3 +22,4 @@ pub mod linear_regression; pub mod logistic_regression; +pub mod ridge_regression; diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs new file mode 100644 index 0000000..18df6cb --- /dev/null +++ b/src/linear/ridge_regression.rs @@ -0,0 +1,323 @@ +//! # Ridge Regression +//! +//! [Linear regression](../linear_regression/index.html) is the standard algorithm for predicting a quantitative response \\(y\\) on the basis of a linear combination of explanatory variables \\(X\\) +//! that assumes that there is approximately a linear relationship between \\(X\\) and \\(y\\). +//! Ridge regression is an extension to linear regression that adds L2 regularization term to the loss function during training. +//! This term encourages simpler models that have smaller coefficient values. +//! +//! In ridge regression coefficients \\(\beta_0, \beta_0, ... \beta_n\\) are are estimated by solving +//! +//! \\[\hat{\beta} = (X^TX + \alpha I)^{-1}X^Ty \\] +//! +//! where \\(\alpha \geq 0\\) is a tuning parameter that controls strength of regularization. When \\(\alpha = 0\\) the penalty term has no effect, and ridge regression will produce the least squares estimates. +//! However, as \\(\alpha \rightarrow \infty\\), the impact of the shrinkage penalty grows, and the ridge regression coefficient estimates will approach zero. +//! +//! SmartCore uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). +//! The Cholesky decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly, +//! but does not work for all data matrices. Unlike the Cholesky decomposition, all matrices have an SVD decomposition. +//! +//! Example: +//! +//! ``` +//! use smartcore::linalg::naive::dense_matrix::*; +//! use smartcore::linear::ridge_regression::*; +//! +//! // Longley dataset (https://www.statsmodels.org/stable/datasets/generated/longley.html) +//! let x = DenseMatrix::from_2d_array(&[ +//! &[234.289, 235.6, 159.0, 107.608, 1947., 60.323], +//! &[259.426, 232.5, 145.6, 108.632, 1948., 61.122], +//! &[258.054, 368.2, 161.6, 109.773, 1949., 60.171], +//! &[284.599, 335.1, 165.0, 110.929, 1950., 61.187], +//! &[328.975, 209.9, 309.9, 112.075, 1951., 63.221], +//! &[346.999, 193.2, 359.4, 113.270, 1952., 63.639], +//! &[365.385, 187.0, 354.7, 115.094, 1953., 64.989], +//! &[363.112, 357.8, 335.0, 116.219, 1954., 63.761], +//! &[397.469, 290.4, 304.8, 117.388, 1955., 66.019], +//! &[419.180, 282.2, 285.7, 118.734, 1956., 67.857], +//! &[442.769, 293.6, 279.8, 120.445, 1957., 68.169], +//! &[444.546, 468.1, 263.7, 121.950, 1958., 66.513], +//! &[482.704, 381.3, 255.2, 123.366, 1959., 68.655], +//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], +//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], +//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], +//! ]); +//! +//! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, +//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; +//! +//! let y_hat = RidgeRegression::fit(&x, &y, RidgeRegressionParameters { +//! solver: RidgeRegressionSolverName::Cholesky, +//! alpha: 0.1, +//! normalize: true +//! }).and_then(|lr| lr.predict(&x)).unwrap(); +//! ``` +//! +//! ## References: +//! +//! * ["An Introduction to Statistical Learning", James G., Witten D., Hastie T., Tibshirani R., 6.2. Shrinkage Methods](http://faculty.marshall.usc.edu/gareth-james/ISL/) +//! * ["Numerical Recipes: The Art of Scientific Computing", Press W.H., Teukolsky S.A., Vetterling W.T, Flannery B.P, 3rd ed., Section 15.4 General Linear Least Squares](http://numerical.recipes/) +//! +//! +//! +use std::fmt::Debug; + +use serde::{Deserialize, Serialize}; + +use crate::error::Failed; +use crate::linalg::BaseVector; +use crate::linalg::Matrix; +use crate::math::num::RealNumber; + +#[derive(Serialize, Deserialize, Debug)] +/// Approach to use for estimation of regression coefficients. Cholesky is more efficient but SVD is more stable. +pub enum RidgeRegressionSolverName { + /// Cholesky decomposition, see [Cholesky](../../linalg/cholesky/index.html) + Cholesky, + /// SVD decomposition, see [SVD](../../linalg/svd/index.html) + SVD, +} + +/// Ridge Regression parameters +#[derive(Serialize, Deserialize, Debug)] +pub struct RidgeRegressionParameters { + /// Solver to use for estimation of regression coefficients. + pub solver: RidgeRegressionSolverName, + /// Controls the strength of the penalty to the loss function. + pub alpha: T, + /// If true the regressors X will be normalized before regression + /// by subtracting the mean and dividing by the standard deviation. + pub normalize: bool, +} + +/// Ridge regression +#[derive(Serialize, Deserialize, Debug)] +pub struct RidgeRegression> { + coefficients: M, + intercept: T, + solver: RidgeRegressionSolverName, +} + +impl Default for RidgeRegressionParameters { + fn default() -> Self { + RidgeRegressionParameters { + solver: RidgeRegressionSolverName::Cholesky, + alpha: T::one(), + normalize: true, + } + } +} + +impl> PartialEq for RidgeRegression { + fn eq(&self, other: &Self) -> bool { + self.coefficients == other.coefficients + && (self.intercept - other.intercept).abs() <= T::epsilon() + } +} + +impl> RidgeRegression { + /// Fits ridge regression to your data. + /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation. + /// * `y` - target values + /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values. + pub fn fit( + x: &M, + y: &M::RowVector, + parameters: RidgeRegressionParameters, + ) -> Result, Failed> { + //w = inv(X^t X + alpha*Id) * X.T y + + let (n, p) = x.shape(); + + if n <= p { + return Err(Failed::fit(&format!( + "Number of rows in X should be >= number of columns in X" + ))); + } + + let y_column = M::from_row_vector(y.clone()).transpose(); + + let (w, b) = if parameters.normalize { + let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?; + let x_t = scaled_x.transpose(); + let x_t_y = x_t.matmul(&y_column); + let mut x_t_x = x_t.matmul(&scaled_x); + + for i in 0..p { + x_t_x.add_element_mut(i, i, parameters.alpha); + } + + let mut w = match parameters.solver { + RidgeRegressionSolverName::Cholesky => x_t_x.cholesky_solve_mut(x_t_y)?, + RidgeRegressionSolverName::SVD => x_t_x.svd_solve_mut(x_t_y)?, + }; + + for i in 0..p { + w.set(i, 0, w.get(i, 0) / col_std[i]); + } + + let mut b = T::zero(); + + for i in 0..p { + b += w.get(i, 0) * col_mean[i]; + } + + let b = y.mean() - b; + + (w, b) + } else { + let x_t = x.transpose(); + let x_t_y = x_t.matmul(&y_column); + let mut x_t_x = x_t.matmul(x); + + for i in 0..p { + x_t_x.add_element_mut(i, i, parameters.alpha); + } + + let w = match parameters.solver { + RidgeRegressionSolverName::Cholesky => x_t_x.cholesky_solve_mut(x_t_y)?, + RidgeRegressionSolverName::SVD => x_t_x.svd_solve_mut(x_t_y)?, + }; + + (w, T::zero()) + }; + + Ok(RidgeRegression { + intercept: b, + coefficients: w, + solver: parameters.solver, + }) + } + + fn rescale_x(x: &M) -> Result<(M, Vec, Vec), Failed> { + let col_mean = x.mean(0); + let col_std = x.std(0); + + for i in 0..col_std.len() { + if (col_std[i] - T::zero()).abs() < T::epsilon() { + return Err(Failed::fit(&format!( + "Cannot rescale constant column {}", + i + ))); + } + } + + let mut scaled_x = x.clone(); + scaled_x.scale_mut(&col_mean, &col_std, 0); + Ok((scaled_x, col_mean, col_std)) + } + + /// Predict target values from `x` + /// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features. + pub fn predict(&self, x: &M) -> Result { + let (nrows, _) = x.shape(); + let mut y_hat = x.matmul(&self.coefficients); + y_hat.add_mut(&M::fill(nrows, 1, self.intercept)); + Ok(y_hat.transpose().to_row_vector()) + } + + /// Get estimates regression coefficients + pub fn coefficients(&self) -> M { + self.coefficients.clone() + } + + /// Get estimate of intercept + pub fn intercept(&self) -> T { + self.intercept + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_matrix::*; + use crate::metrics::mean_absolute_error; + + #[test] + fn ridge_fit_predict() { + let x = DenseMatrix::from_2d_array(&[ + &[234.289, 235.6, 159.0, 107.608, 1947., 60.323], + &[259.426, 232.5, 145.6, 108.632, 1948., 61.122], + &[258.054, 368.2, 161.6, 109.773, 1949., 60.171], + &[284.599, 335.1, 165.0, 110.929, 1950., 61.187], + &[328.975, 209.9, 309.9, 112.075, 1951., 63.221], + &[346.999, 193.2, 359.4, 113.270, 1952., 63.639], + &[365.385, 187.0, 354.7, 115.094, 1953., 64.989], + &[363.112, 357.8, 335.0, 116.219, 1954., 63.761], + &[397.469, 290.4, 304.8, 117.388, 1955., 66.019], + &[419.180, 282.2, 285.7, 118.734, 1956., 67.857], + &[442.769, 293.6, 279.8, 120.445, 1957., 68.169], + &[444.546, 468.1, 263.7, 121.950, 1958., 66.513], + &[482.704, 381.3, 255.2, 123.366, 1959., 68.655], + &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], + &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], + &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], + ]); + + let y: Vec = vec![ + 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, + 114.2, 115.7, 116.9, + ]; + + let y_hat_cholesky = RidgeRegression::fit( + &x, + &y, + RidgeRegressionParameters { + solver: RidgeRegressionSolverName::Cholesky, + alpha: 0.1, + normalize: true, + }, + ) + .and_then(|lr| lr.predict(&x)) + .unwrap(); + + assert!(mean_absolute_error(&y_hat_cholesky, &y) < 2.0); + + let y_hat_svd = RidgeRegression::fit( + &x, + &y, + RidgeRegressionParameters { + solver: RidgeRegressionSolverName::SVD, + alpha: 0.1, + normalize: false, + }, + ) + .and_then(|lr| lr.predict(&x)) + .unwrap(); + + assert!(mean_absolute_error(&y_hat_svd, &y) < 2.0); + } + + #[test] + fn serde() { + let x = DenseMatrix::from_2d_array(&[ + &[234.289, 235.6, 159.0, 107.608, 1947., 60.323], + &[259.426, 232.5, 145.6, 108.632, 1948., 61.122], + &[258.054, 368.2, 161.6, 109.773, 1949., 60.171], + &[284.599, 335.1, 165.0, 110.929, 1950., 61.187], + &[328.975, 209.9, 309.9, 112.075, 1951., 63.221], + &[346.999, 193.2, 359.4, 113.270, 1952., 63.639], + &[365.385, 187.0, 354.7, 115.094, 1953., 64.989], + &[363.112, 357.8, 335.0, 116.219, 1954., 63.761], + &[397.469, 290.4, 304.8, 117.388, 1955., 66.019], + &[419.180, 282.2, 285.7, 118.734, 1956., 67.857], + &[442.769, 293.6, 279.8, 120.445, 1957., 68.169], + &[444.546, 468.1, 263.7, 121.950, 1958., 66.513], + &[482.704, 381.3, 255.2, 123.366, 1959., 68.655], + &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], + &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], + &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], + ]); + + let y = vec![ + 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, + 114.2, 115.7, 116.9, + ]; + + let lr = RidgeRegression::fit(&x, &y, Default::default()).unwrap(); + + let deserialized_lr: RidgeRegression> = + serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap(); + + assert_eq!(lr, deserialized_lr); + } +} From 83048dbe9457ced9e71309682d1b7db6437dd990 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Fri, 6 Nov 2020 11:20:43 -0800 Subject: [PATCH 2/8] fix: small doc changes --- src/linalg/mod.rs | 4 ++-- src/linalg/stats.rs | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index 42ed558..fe3e197 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -166,7 +166,7 @@ pub trait BaseVector: Clone + Debug { /// ``` fn unique(&self) -> Vec; - /// Compute the arithmetic mean. + /// Computes the arithmetic mean. fn mean(&self) -> T { let n = self.len(); let mut mean = T::zero(); @@ -176,7 +176,7 @@ pub trait BaseVector: Clone + Debug { } mean / T::from_usize(n).unwrap() } - /// Compute the standard deviation. + /// Computes the standard deviation. fn std(&self) -> T { let n = self.len(); diff --git a/src/linalg/stats.rs b/src/linalg/stats.rs index f5db1e9..ecb7ceb 100644 --- a/src/linalg/stats.rs +++ b/src/linalg/stats.rs @@ -1,13 +1,14 @@ //! # Various Statistical Methods //! -//! +//! This module provides reference implementations for various statistical functions. +//! Concrete implementations of the `BaseMatrix` trait are free to override these methods for better performance. use crate::linalg::BaseMatrix; use crate::math::num::RealNumber; /// Defines baseline implementations for various statistical functions pub trait MatrixStats: BaseMatrix { - /// Compute the arithmetic mean along the specified axis. + /// Computes the arithmetic mean along the specified axis. fn mean(&self, axis: u8) -> Vec { let (n, m) = match axis { 0 => { @@ -34,7 +35,7 @@ pub trait MatrixStats: BaseMatrix { x } - /// Compute the standard deviation along the specified axis. + /// Computes the standard deviation along the specified axis. fn std(&self, axis: u8) -> Vec { let (n, m) = match axis { 0 => { From ca3a3a101c9209552db558cc20226b511d9636d6 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Wed, 11 Nov 2020 12:00:58 -0800 Subject: [PATCH 3/8] fix: ridge regression, post-review changes --- src/linalg/mod.rs | 25 +++++++++++++++---------- src/linalg/stats.rs | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index fe3e197..41ec415 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -168,16 +168,10 @@ pub trait BaseVector: Clone + Debug { /// Computes the arithmetic mean. fn mean(&self) -> T { - let n = self.len(); - let mut mean = T::zero(); - - for i in 0..n { - mean += self.get(i); - } - mean / T::from_usize(n).unwrap() + self.sum() / T::from_usize(self.len()).unwrap() } - /// Computes the standard deviation. - fn std(&self) -> T { + /// Computes variance. + fn var(&self) -> T { let n = self.len(); let mut mu = T::zero(); @@ -189,7 +183,11 @@ pub trait BaseVector: Clone + Debug { sum += xi * xi; } mu /= div; - (sum / div - mu * mu).sqrt() + sum / div - mu * mu + } + /// Computes the standard deviation. + fn std(&self) -> T { + self.var().sqrt() } } @@ -592,4 +590,11 @@ mod tests { assert!((m.std() - 0.81f64).abs() < 1e-2); } + + #[test] + fn var() { + let m = vec![1., 2., 3., 4.]; + + assert!((m.var() - 1.25f64).abs() < std::f64::EPSILON); + } } diff --git a/src/linalg/stats.rs b/src/linalg/stats.rs index ecb7ceb..fc339e0 100644 --- a/src/linalg/stats.rs +++ b/src/linalg/stats.rs @@ -35,8 +35,8 @@ pub trait MatrixStats: BaseMatrix { x } - /// Computes the standard deviation along the specified axis. - fn std(&self, axis: u8) -> Vec { + /// Computes variance along the specified axis. + fn var(&self, axis: u8) -> Vec { let (n, m) = match axis { 0 => { let (n, m) = self.shape(); @@ -61,7 +61,24 @@ pub trait MatrixStats: BaseMatrix { sum += a * a; } mu /= div; - x[i] = (sum / div - mu * mu).sqrt(); + x[i] = sum / div - mu * mu; + } + + x + } + + /// Computes the standard deviation along the specified axis. + fn std(&self, axis: u8) -> Vec { + + let mut x = self.var(axis); + + let n = match axis { + 0 => self.shape().1, + _ => self.shape().0, + }; + + for i in 0..n { + x[i] = x[i].sqrt(); } x @@ -122,6 +139,19 @@ mod tests { assert!(m.std(1).approximate_eq(&expected_1, 1e-2)); } + #[test] + fn var() { + let m = DenseMatrix::from_2d_array(&[ + &[1., 2., 3., 4.], + &[5., 6., 7., 8.] + ]); + let expected_0 = vec![4., 4., 4., 4.]; + let expected_1 = vec![1.25, 1.25]; + + assert!(m.var(0).approximate_eq(&expected_0, std::f64::EPSILON)); + assert!(m.var(1).approximate_eq(&expected_1, std::f64::EPSILON)); + } + #[test] fn scale() { let mut m = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]); From 7a4fe114d8eb8d3d39e9985e5d56510e9bb4cad7 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Wed, 11 Nov 2020 12:01:57 -0800 Subject: [PATCH 4/8] fix: ridge regression, formatting --- src/linalg/stats.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/linalg/stats.rs b/src/linalg/stats.rs index fc339e0..ac7a1bc 100644 --- a/src/linalg/stats.rs +++ b/src/linalg/stats.rs @@ -69,7 +69,6 @@ pub trait MatrixStats: BaseMatrix { /// Computes the standard deviation along the specified axis. fn std(&self, axis: u8) -> Vec { - let mut x = self.var(axis); let n = match axis { @@ -77,7 +76,7 @@ pub trait MatrixStats: BaseMatrix { _ => self.shape().0, }; - for i in 0..n { + for i in 0..n { x[i] = x[i].sqrt(); } @@ -141,16 +140,13 @@ mod tests { #[test] fn var() { - let m = DenseMatrix::from_2d_array(&[ - &[1., 2., 3., 4.], - &[5., 6., 7., 8.] - ]); + let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]); let expected_0 = vec![4., 4., 4., 4.]; let expected_1 = vec![1.25, 1.25]; assert!(m.var(0).approximate_eq(&expected_0, std::f64::EPSILON)); assert!(m.var(1).approximate_eq(&expected_1, std::f64::EPSILON)); - } + } #[test] fn scale() { From c42fccdc228a17cfff98d4dfd91be7f16a67ab39 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Wed, 11 Nov 2020 15:59:04 -0800 Subject: [PATCH 5/8] fix: ridge regression, code refactoring --- src/linear/linear_regression.rs | 4 +- src/linear/logistic_regression.rs | 75 ++++++++++++++++++++++--------- src/linear/ridge_regression.rs | 8 +++- 3 files changed, 63 insertions(+), 24 deletions(-) diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index 61bb678..5de5007 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -154,8 +154,8 @@ impl> LinearRegression { } /// Get estimates regression coefficients - pub fn coefficients(&self) -> M { - self.coefficients.clone() + pub fn coefficients(&self) -> &M { + &self.coefficients } /// Get estimate of intercept diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index ec09184..116d700 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -68,7 +68,8 @@ use crate::optimization::FunctionOrder; /// Logistic Regression #[derive(Serialize, Deserialize, Debug)] pub struct LogisticRegression> { - weights: M, + coefficients: M, + intercept: M, classes: Vec, num_attributes: usize, num_classes: usize, @@ -109,7 +110,7 @@ impl> PartialEq for LogisticRegression { } } - return self.weights == other.weights; + return self.coefficients == other.coefficients && self.intercept == other.intercept; } } } @@ -246,9 +247,11 @@ impl> LogisticRegression { }; let result = LogisticRegression::minimize(x0, objective); + let weights = result.x; Ok(LogisticRegression { - weights: result.x, + coefficients: weights.slice(0..1, 0..num_attributes), + intercept: weights.slice(0..1, num_attributes..num_attributes + 1), classes: classes, num_attributes: num_attributes, num_classes: k, @@ -268,7 +271,8 @@ impl> LogisticRegression { let weights = result.x.reshape(k, num_attributes + 1); Ok(LogisticRegression { - weights: weights, + coefficients: weights.slice(0..k, 0..num_attributes), + intercept: weights.slice(0..k, num_attributes..num_attributes + 1), classes: classes, num_attributes: num_attributes, num_classes: k, @@ -283,21 +287,26 @@ impl> LogisticRegression { let mut result = M::zeros(1, n); if self.num_classes == 2 { let (nrows, _) = x.shape(); - let x_and_bias = x.h_stack(&M::ones(nrows, 1)); - let y_hat: Vec = x_and_bias - .matmul(&self.weights.transpose()) - .get_col_as_vec(0); + let y_hat: Vec = x.matmul(&self.coefficients.transpose()).get_col_as_vec(0); + let intercept = self.intercept.get(0, 0); for i in 0..n { result.set( 0, i, - self.classes[if y_hat[i].sigmoid() > T::half() { 1 } else { 0 }], + self.classes[if (y_hat[i] + intercept).sigmoid() > T::half() { + 1 + } else { + 0 + }], ); } } else { - let (nrows, _) = x.shape(); - let x_and_bias = x.h_stack(&M::ones(nrows, 1)); - let y_hat = x_and_bias.matmul(&self.weights.transpose()); + let mut y_hat = x.matmul(&self.coefficients.transpose()); + for r in 0..n { + for c in 0..self.num_classes { + y_hat.set(r, c, y_hat.get(r, c) + self.intercept.get(c, 0)); + } + } let class_idxs = y_hat.argmax(); for i in 0..n { result.set(0, i, self.classes[class_idxs[i]]); @@ -307,17 +316,13 @@ impl> LogisticRegression { } /// Get estimates regression coefficients - pub fn coefficients(&self) -> M { - self.weights - .slice(0..self.num_classes, 0..self.num_attributes) + pub fn coefficients(&self) -> &M { + &self.coefficients } /// Get estimate of intercept - pub fn intercept(&self) -> M { - self.weights.slice( - 0..self.num_classes, - self.num_attributes..self.num_attributes + 1, - ) + pub fn intercept(&self) -> &M { + &self.intercept } fn minimize(x0: M, objective: impl ObjectiveFunction) -> OptimizerResult { @@ -336,7 +341,9 @@ impl> LogisticRegression { #[cfg(test)] mod tests { use super::*; + use crate::dataset::generator::make_blobs; use crate::linalg::naive::dense_matrix::*; + use crate::metrics::accuracy; #[test] fn multiclass_objective_f() { @@ -466,6 +473,34 @@ mod tests { ); } + #[test] + fn lr_fit_predict_multiclass() { + let blobs = make_blobs(15, 4, 3); + + let x = DenseMatrix::from_vec(15, 4, &blobs.data); + let y = blobs.target; + + let lr = LogisticRegression::fit(&x, &y).unwrap(); + + let y_hat = lr.predict(&x).unwrap(); + + assert!(accuracy(&y_hat, &y) > 0.9); + } + + #[test] + fn lr_fit_predict_binary() { + let blobs = make_blobs(20, 4, 2); + + let x = DenseMatrix::from_vec(20, 4, &blobs.data); + let y = blobs.target; + + let lr = LogisticRegression::fit(&x, &y).unwrap(); + + let y_hat = lr.predict(&x).unwrap(); + + assert!(accuracy(&y_hat, &y) > 0.9); + } + #[test] fn serde() { let x = DenseMatrix::from_2d_array(&[ diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index 18df6cb..a718e2a 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -134,6 +134,10 @@ impl> RidgeRegression { ))); } + if y.len() != n { + return Err(Failed::fit(&format!("Number of rows in X should = len(y)"))); + } + let y_column = M::from_row_vector(y.clone()).transpose(); let (w, b) = if parameters.normalize { @@ -216,8 +220,8 @@ impl> RidgeRegression { } /// Get estimates regression coefficients - pub fn coefficients(&self) -> M { - self.coefficients.clone() + pub fn coefficients(&self) -> &M { + &self.coefficients } /// Get estimate of intercept From cc26555bfd4df2c9b96e194c0739af5e015d8458 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Wed, 11 Nov 2020 16:10:37 -0800 Subject: [PATCH 6/8] fix: fixes suggested by Clippy --- src/linear/logistic_regression.rs | 76 ++++++++++++++++--------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 116d700..2df9b87 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -52,6 +52,7 @@ //! //! //! +use std::cmp::Ordering; use std::fmt::Debug; use std::marker::PhantomData; @@ -232,51 +233,53 @@ impl> LogisticRegression { yi[i] = classes.iter().position(|c| yc == *c).unwrap(); } - if k < 2 { - Err(Failed::fit(&format!( + match k.cmp(&2) { + Ordering::Less => Err(Failed::fit(&format!( "incorrect number of classes: {}. Should be >= 2.", k - ))) - } else if k == 2 { - let x0 = M::zeros(1, num_attributes + 1); + ))), + Ordering::Equal => { + let x0 = M::zeros(1, num_attributes + 1); - let objective = BinaryObjectiveFunction { - x: x, - y: yi, - phantom: PhantomData, - }; + let objective = BinaryObjectiveFunction { + x: x, + y: yi, + phantom: PhantomData, + }; - let result = LogisticRegression::minimize(x0, objective); - let weights = result.x; + let result = LogisticRegression::minimize(x0, objective); + let weights = result.x; - Ok(LogisticRegression { - coefficients: weights.slice(0..1, 0..num_attributes), - intercept: weights.slice(0..1, num_attributes..num_attributes + 1), - classes: classes, - num_attributes: num_attributes, - num_classes: k, - }) - } else { - let x0 = M::zeros(1, (num_attributes + 1) * k); + Ok(LogisticRegression { + coefficients: weights.slice(0..1, 0..num_attributes), + intercept: weights.slice(0..1, num_attributes..num_attributes + 1), + classes: classes, + num_attributes: num_attributes, + num_classes: k, + }) + } + Ordering::Greater => { + let x0 = M::zeros(1, (num_attributes + 1) * k); - let objective = MultiClassObjectiveFunction { - x: x, - y: yi, - k: k, - phantom: PhantomData, - }; + let objective = MultiClassObjectiveFunction { + x: x, + y: yi, + k: k, + phantom: PhantomData, + }; - let result = LogisticRegression::minimize(x0, objective); + let result = LogisticRegression::minimize(x0, objective); - let weights = result.x.reshape(k, num_attributes + 1); + let weights = result.x.reshape(k, num_attributes + 1); - Ok(LogisticRegression { - coefficients: weights.slice(0..k, 0..num_attributes), - intercept: weights.slice(0..k, num_attributes..num_attributes + 1), - classes: classes, - num_attributes: num_attributes, - num_classes: k, - }) + Ok(LogisticRegression { + coefficients: weights.slice(0..k, 0..num_attributes), + intercept: weights.slice(0..k, num_attributes..num_attributes + 1), + classes: classes, + num_attributes: num_attributes, + num_classes: k, + }) + } } } @@ -286,7 +289,6 @@ impl> LogisticRegression { let n = x.shape().0; let mut result = M::zeros(1, n); if self.num_classes == 2 { - let (nrows, _) = x.shape(); let y_hat: Vec = x.matmul(&self.coefficients.transpose()).get_col_as_vec(0); let intercept = self.intercept.get(0, 0); for i in 0..n { From f0371673a43642314ae55ebb0006b8dd2163625e Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Wed, 11 Nov 2020 17:23:49 -0800 Subject: [PATCH 7/8] fix: changes recommended by Clippy --- src/linear/logistic_regression.rs | 18 +++++++++--------- src/linear/ridge_regression.rs | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index addede7..4b52529 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -110,8 +110,8 @@ impl> PartialEq for LogisticRegression { return false; } } - - return self.coefficients == other.coefficients && self.intercept == other.intercept; + + self.coefficients == other.coefficients && self.intercept == other.intercept } } } @@ -242,7 +242,7 @@ impl> LogisticRegression { let x0 = M::zeros(1, num_attributes + 1); let objective = BinaryObjectiveFunction { - x: x, + x, y: yi, phantom: PhantomData, }; @@ -254,8 +254,8 @@ impl> LogisticRegression { Ok(LogisticRegression { coefficients: weights.slice(0..1, 0..num_attributes), intercept: weights.slice(0..1, num_attributes..num_attributes + 1), - classes: classes, - num_attributes: num_attributes, + classes, + num_attributes, num_classes: k, }) } @@ -263,9 +263,9 @@ impl> LogisticRegression { let x0 = M::zeros(1, (num_attributes + 1) * k); let objective = MultiClassObjectiveFunction { - x: x, + x, y: yi, - k: k, + k, phantom: PhantomData, }; @@ -275,8 +275,8 @@ impl> LogisticRegression { Ok(LogisticRegression { coefficients: weights.slice(0..k, 0..num_attributes), intercept: weights.slice(0..k, num_attributes..num_attributes + 1), - classes: classes, - num_attributes: num_attributes, + classes, + num_attributes, num_classes: k, }) } diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index a718e2a..beac40b 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -129,13 +129,13 @@ impl> RidgeRegression { let (n, p) = x.shape(); if n <= p { - return Err(Failed::fit(&format!( + return Err(Failed::fit( "Number of rows in X should be >= number of columns in X" - ))); + )); } if y.len() != n { - return Err(Failed::fit(&format!("Number of rows in X should = len(y)"))); + return Err(Failed::fit("Number of rows in X should = len(y)")); } let y_column = M::from_row_vector(y.clone()).transpose(); From 830a0d919421a58ecb76a218df14d3f91bfa9b35 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Wed, 11 Nov 2020 17:26:49 -0800 Subject: [PATCH 8/8] fix: formatting --- src/linear/ridge_regression.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index beac40b..bb03c54 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -130,7 +130,7 @@ impl> RidgeRegression { if n <= p { return Err(Failed::fit( - "Number of rows in X should be >= number of columns in X" + "Number of rows in X should be >= number of columns in X", )); }