Merge potential next release v0.4 (#187) Breaking Changes
* First draft of the new n-dimensional arrays + NB use case * Improves default implementation of multiple Array methods * Refactors tree methods * Adds matrix decomposition routines * Adds matrix decomposition methods to ndarray and nalgebra bindings * Refactoring + linear regression now uses array2 * Ridge & Linear regression * LBFGS optimizer & logistic regression * LBFGS optimizer & logistic regression * Changes linear methods, metrics and model selection methods to new n-dimensional arrays * Switches KNN and clustering algorithms to new n-d array layer * Refactors distance metrics * Optimizes knn and clustering methods * Refactors metrics module * Switches decomposition methods to n-dimensional arrays * Linalg refactoring - cleanup rng merge (#172) * Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure. * Exclude AUC metrics. Needs reimplementation * Improve developers walkthrough New traits system in place at `src/numbers` and `src/linalg` Co-authored-by: Lorenzo <tunedconsulting@gmail.com> * Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021 * Implement getters to use as_ref() in src/neighbors * Implement getters to use as_ref() in src/naive_bayes * Implement getters to use as_ref() in src/linear * Add Clone to src/naive_bayes * Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021 * Implement ndarray-bindings. Remove FloatNumber from implementations * Drop nalgebra-bindings support (as decided in conf-call to go for ndarray) * Remove benches. Benches will have their own repo at smartcore-benches * Implement SVC * Implement SVC serialization. Move search parameters in dedicated module * Implement SVR. Definitely too slow * Fix compilation issues for wasm (#202) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> * Fix tests (#203) * Port linalg/traits/stats.rs * Improve methods naming * Improve Display for DenseMatrix Co-authored-by: Montana Low <montanalow@users.noreply.github.com> Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
This commit is contained in:
+78
-48
@@ -1,13 +1,42 @@
|
||||
//! This is a generic solver for Ax = b type of equation
|
||||
//!
|
||||
//! Example:
|
||||
//! ```
|
||||
//! use smartcore::linalg::basic::arrays::Array1;
|
||||
//! use smartcore::linalg::basic::arrays::Array2;
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::linear::bg_solver::*;
|
||||
//! use smartcore::numbers::floatnum::FloatNumber;
|
||||
//! use smartcore::linear::bg_solver::BiconjugateGradientSolver;
|
||||
//!
|
||||
//! pub struct BGSolver {}
|
||||
//! impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X> for BGSolver {}
|
||||
//!
|
||||
//! let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
|
||||
//! let b = vec![40., 51., 28.];
|
||||
//! let expected = vec![1.0, 2.0, 3.0];
|
||||
//! let mut x = Vec::zeros(3);
|
||||
//! let solver = BGSolver {};
|
||||
//! let err: f64 = solver.solve_mut(&a, &b, &mut x, 1e-6, 6).unwrap();
|
||||
//! ```
|
||||
//!
|
||||
//! for more information take a look at [this Wikipedia article](https://en.wikipedia.org/wiki/Biconjugate_gradient_method)
|
||||
//! and [this paper](https://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf)
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::{Array, Array1, Array2, ArrayView1, MutArrayView1};
|
||||
use crate::numbers::floatnum::FloatNumber;
|
||||
|
||||
pub trait BiconjugateGradientSolver<T: RealNumber, M: Matrix<T>> {
|
||||
fn solve_mut(&self, a: &M, b: &M, x: &mut M, tol: T, max_iter: usize) -> Result<T, Failed> {
|
||||
///
|
||||
pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2<T>> {
|
||||
///
|
||||
fn solve_mut(
|
||||
&self,
|
||||
a: &'a X,
|
||||
b: &Vec<T>,
|
||||
x: &mut Vec<T>,
|
||||
tol: T,
|
||||
max_iter: usize,
|
||||
) -> Result<T, Failed> {
|
||||
if tol <= T::zero() {
|
||||
return Err(Failed::fit("tolerance shoud be > 0"));
|
||||
}
|
||||
@@ -16,25 +45,25 @@ pub trait BiconjugateGradientSolver<T: RealNumber, M: Matrix<T>> {
|
||||
return Err(Failed::fit("maximum number of iterations should be > 0"));
|
||||
}
|
||||
|
||||
let (n, _) = b.shape();
|
||||
let n = b.shape();
|
||||
|
||||
let mut r = M::zeros(n, 1);
|
||||
let mut rr = M::zeros(n, 1);
|
||||
let mut z = M::zeros(n, 1);
|
||||
let mut zz = M::zeros(n, 1);
|
||||
let mut r = Vec::zeros(n);
|
||||
let mut rr = Vec::zeros(n);
|
||||
let mut z = Vec::zeros(n);
|
||||
let mut zz = Vec::zeros(n);
|
||||
|
||||
self.mat_vec_mul(a, x, &mut r);
|
||||
|
||||
for j in 0..n {
|
||||
r.set(j, 0, b.get(j, 0) - r.get(j, 0));
|
||||
rr.set(j, 0, r.get(j, 0));
|
||||
r[j] = b[j] - r[j];
|
||||
rr[j] = r[j];
|
||||
}
|
||||
|
||||
let bnrm = b.norm(T::two());
|
||||
self.solve_preconditioner(a, &r, &mut z);
|
||||
let bnrm = b.norm(2f64);
|
||||
self.solve_preconditioner(a, &r[..], &mut z[..]);
|
||||
|
||||
let mut p = M::zeros(n, 1);
|
||||
let mut pp = M::zeros(n, 1);
|
||||
let mut p = Vec::zeros(n);
|
||||
let mut pp = Vec::zeros(n);
|
||||
let mut bkden = T::zero();
|
||||
let mut err = T::zero();
|
||||
|
||||
@@ -43,35 +72,33 @@ pub trait BiconjugateGradientSolver<T: RealNumber, M: Matrix<T>> {
|
||||
|
||||
self.solve_preconditioner(a, &rr, &mut zz);
|
||||
for j in 0..n {
|
||||
bknum += z.get(j, 0) * rr.get(j, 0);
|
||||
bknum += z[j] * rr[j];
|
||||
}
|
||||
if iter == 1 {
|
||||
for j in 0..n {
|
||||
p.set(j, 0, z.get(j, 0));
|
||||
pp.set(j, 0, zz.get(j, 0));
|
||||
}
|
||||
p[..n].copy_from_slice(&z[..n]);
|
||||
pp[..n].copy_from_slice(&zz[..n]);
|
||||
} else {
|
||||
let bk = bknum / bkden;
|
||||
for j in 0..n {
|
||||
p.set(j, 0, bk * p.get(j, 0) + z.get(j, 0));
|
||||
pp.set(j, 0, bk * pp.get(j, 0) + zz.get(j, 0));
|
||||
p[j] = bk * pp[j] + z[j];
|
||||
pp[j] = bk * pp[j] + zz[j];
|
||||
}
|
||||
}
|
||||
bkden = bknum;
|
||||
self.mat_vec_mul(a, &p, &mut z);
|
||||
let mut akden = T::zero();
|
||||
for j in 0..n {
|
||||
akden += z.get(j, 0) * pp.get(j, 0);
|
||||
akden += z[j] * pp[j];
|
||||
}
|
||||
let ak = bknum / akden;
|
||||
self.mat_t_vec_mul(a, &pp, &mut zz);
|
||||
for j in 0..n {
|
||||
x.set(j, 0, x.get(j, 0) + ak * p.get(j, 0));
|
||||
r.set(j, 0, r.get(j, 0) - ak * z.get(j, 0));
|
||||
rr.set(j, 0, rr.get(j, 0) - ak * zz.get(j, 0));
|
||||
x[j] += ak * p[j];
|
||||
r[j] -= ak * z[j];
|
||||
rr[j] -= ak * zz[j];
|
||||
}
|
||||
self.solve_preconditioner(a, &r, &mut z);
|
||||
err = r.norm(T::two()) / bnrm;
|
||||
err = T::from_f64(r.norm(2f64) / bnrm).unwrap();
|
||||
|
||||
if err <= tol {
|
||||
break;
|
||||
@@ -81,36 +108,38 @@ pub trait BiconjugateGradientSolver<T: RealNumber, M: Matrix<T>> {
|
||||
Ok(err)
|
||||
}
|
||||
|
||||
fn solve_preconditioner(&self, a: &M, b: &M, x: &mut M) {
|
||||
///
|
||||
fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) {
|
||||
let diag = Self::diag(a);
|
||||
let n = diag.len();
|
||||
|
||||
for (i, diag_i) in diag.iter().enumerate().take(n) {
|
||||
if *diag_i != T::zero() {
|
||||
x.set(i, 0, b.get(i, 0) / *diag_i);
|
||||
x[i] = b[i] / *diag_i;
|
||||
} else {
|
||||
x.set(i, 0, b.get(i, 0));
|
||||
x[i] = b[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// y = Ax
|
||||
fn mat_vec_mul(&self, a: &M, x: &M, y: &mut M) {
|
||||
y.copy_from(&a.matmul(x));
|
||||
/// y = Ax
|
||||
fn mat_vec_mul(&self, a: &X, x: &Vec<T>, y: &mut Vec<T>) {
|
||||
y.copy_from(&x.xa(false, a));
|
||||
}
|
||||
|
||||
// y = Atx
|
||||
fn mat_t_vec_mul(&self, a: &M, x: &M, y: &mut M) {
|
||||
y.copy_from(&a.ab(true, x, false));
|
||||
/// y = Atx
|
||||
fn mat_t_vec_mul(&self, a: &X, x: &Vec<T>, y: &mut Vec<T>) {
|
||||
y.copy_from(&x.xa(true, a));
|
||||
}
|
||||
|
||||
fn diag(a: &M) -> Vec<T> {
|
||||
///
|
||||
fn diag(a: &X) -> Vec<T> {
|
||||
let (nrows, ncols) = a.shape();
|
||||
let n = nrows.min(ncols);
|
||||
|
||||
let mut d = Vec::with_capacity(n);
|
||||
for i in 0..n {
|
||||
d.push(a.get(i, i));
|
||||
d.push(*a.get((i, i)));
|
||||
}
|
||||
|
||||
d
|
||||
@@ -120,28 +149,29 @@ pub trait BiconjugateGradientSolver<T: RealNumber, M: Matrix<T>> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::linalg::basic::arrays::Array2;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
|
||||
pub struct BGSolver {}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> BiconjugateGradientSolver<T, M> for BGSolver {}
|
||||
impl<T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'_, T, X> for BGSolver {}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn bg_solver() {
|
||||
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
|
||||
let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]);
|
||||
let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]);
|
||||
let b = vec![40., 51., 28.];
|
||||
let expected = vec![1.0, 2.0, 3.0];
|
||||
|
||||
let mut x = DenseMatrix::zeros(3, 1);
|
||||
let mut x = Vec::zeros(3);
|
||||
|
||||
let solver = BGSolver {};
|
||||
|
||||
let err: f64 = solver
|
||||
.solve_mut(&a, &b.transpose(), &mut x, 1e-6, 6)
|
||||
.unwrap();
|
||||
let err: f64 = solver.solve_mut(&a, &b, &mut x, 1e-6, 6).unwrap();
|
||||
|
||||
assert!(x.transpose().approximate_eq(&expected, 1e-4));
|
||||
assert!(x
|
||||
.iter()
|
||||
.zip(expected.iter())
|
||||
.all(|(&a, &b)| (a - b).abs() < 1e-4));
|
||||
assert!((err - 0.0).abs() < 1e-4);
|
||||
}
|
||||
}
|
||||
|
||||
+169
-116
@@ -17,7 +17,7 @@
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::linalg::naive::dense_matrix::*;
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::linear::elastic_net::*;
|
||||
//!
|
||||
//! // Longley dataset (https://www.statsmodels.org/stable/datasets/generated/longley.html)
|
||||
@@ -55,36 +55,38 @@
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::api::{Predictor, SupervisedEstimator};
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::{Array, Array1, Array2, MutArray};
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::floatnum::FloatNumber;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
|
||||
use crate::linear::lasso_optimizer::InteriorPointOptimizer;
|
||||
|
||||
/// Elastic net parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ElasticNetParameters<T: RealNumber> {
|
||||
pub struct ElasticNetParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Regularization parameter.
|
||||
pub alpha: T,
|
||||
pub alpha: f64,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
|
||||
/// For l1_ratio = 0 the penalty is an L2 penalty.
|
||||
/// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
|
||||
pub l1_ratio: T,
|
||||
pub l1_ratio: f64,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
|
||||
pub normalize: bool,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The tolerance for the optimization
|
||||
pub tol: T,
|
||||
pub tol: f64,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum number of iterations
|
||||
pub max_iter: usize,
|
||||
@@ -93,21 +95,23 @@ pub struct ElasticNetParameters<T: RealNumber> {
|
||||
/// Elastic net
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct ElasticNet<T: RealNumber, M: Matrix<T>> {
|
||||
coefficients: M,
|
||||
intercept: T,
|
||||
pub struct ElasticNet<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>> {
|
||||
coefficients: Option<X>,
|
||||
intercept: Option<TX>,
|
||||
_phantom_ty: PhantomData<TY>,
|
||||
_phantom_y: PhantomData<Y>,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> ElasticNetParameters<T> {
|
||||
impl ElasticNetParameters {
|
||||
/// Regularization parameter.
|
||||
pub fn with_alpha(mut self, alpha: T) -> Self {
|
||||
pub fn with_alpha(mut self, alpha: f64) -> Self {
|
||||
self.alpha = alpha;
|
||||
self
|
||||
}
|
||||
/// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
|
||||
/// For l1_ratio = 0 the penalty is an L2 penalty.
|
||||
/// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
|
||||
pub fn with_l1_ratio(mut self, l1_ratio: T) -> Self {
|
||||
pub fn with_l1_ratio(mut self, l1_ratio: f64) -> Self {
|
||||
self.l1_ratio = l1_ratio;
|
||||
self
|
||||
}
|
||||
@@ -117,7 +121,7 @@ impl<T: RealNumber> ElasticNetParameters<T> {
|
||||
self
|
||||
}
|
||||
/// The tolerance for the optimization
|
||||
pub fn with_tol(mut self, tol: T) -> Self {
|
||||
pub fn with_tol(mut self, tol: f64) -> Self {
|
||||
self.tol = tol;
|
||||
self
|
||||
}
|
||||
@@ -128,13 +132,13 @@ impl<T: RealNumber> ElasticNetParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Default for ElasticNetParameters<T> {
|
||||
impl Default for ElasticNetParameters {
|
||||
fn default() -> Self {
|
||||
ElasticNetParameters {
|
||||
alpha: T::one(),
|
||||
l1_ratio: T::half(),
|
||||
alpha: 1.0,
|
||||
l1_ratio: 0.5,
|
||||
normalize: true,
|
||||
tol: T::from_f64(1e-4).unwrap(),
|
||||
tol: 1e-4,
|
||||
max_iter: 1000,
|
||||
}
|
||||
}
|
||||
@@ -143,29 +147,29 @@ impl<T: RealNumber> Default for ElasticNetParameters<T> {
|
||||
/// ElasticNet grid search parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ElasticNetSearchParameters<T: RealNumber> {
|
||||
pub struct ElasticNetSearchParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Regularization parameter.
|
||||
pub alpha: Vec<T>,
|
||||
pub alpha: Vec<f64>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
|
||||
/// For l1_ratio = 0 the penalty is an L2 penalty.
|
||||
/// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
|
||||
pub l1_ratio: Vec<T>,
|
||||
pub l1_ratio: Vec<f64>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
|
||||
pub normalize: Vec<bool>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The tolerance for the optimization
|
||||
pub tol: Vec<T>,
|
||||
pub tol: Vec<f64>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum number of iterations
|
||||
pub max_iter: Vec<usize>,
|
||||
}
|
||||
|
||||
/// ElasticNet grid search iterator
|
||||
pub struct ElasticNetSearchParametersIterator<T: RealNumber> {
|
||||
lasso_regression_search_parameters: ElasticNetSearchParameters<T>,
|
||||
pub struct ElasticNetSearchParametersIterator {
|
||||
lasso_regression_search_parameters: ElasticNetSearchParameters,
|
||||
current_alpha: usize,
|
||||
current_l1_ratio: usize,
|
||||
current_normalize: usize,
|
||||
@@ -173,9 +177,9 @@ pub struct ElasticNetSearchParametersIterator<T: RealNumber> {
|
||||
current_max_iter: usize,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> IntoIterator for ElasticNetSearchParameters<T> {
|
||||
type Item = ElasticNetParameters<T>;
|
||||
type IntoIter = ElasticNetSearchParametersIterator<T>;
|
||||
impl IntoIterator for ElasticNetSearchParameters {
|
||||
type Item = ElasticNetParameters;
|
||||
type IntoIter = ElasticNetSearchParametersIterator;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
ElasticNetSearchParametersIterator {
|
||||
@@ -189,8 +193,8 @@ impl<T: RealNumber> IntoIterator for ElasticNetSearchParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Iterator for ElasticNetSearchParametersIterator<T> {
|
||||
type Item = ElasticNetParameters<T>;
|
||||
impl Iterator for ElasticNetSearchParametersIterator {
|
||||
type Item = ElasticNetParameters;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.current_alpha == self.lasso_regression_search_parameters.alpha.len()
|
||||
@@ -246,7 +250,7 @@ impl<T: RealNumber> Iterator for ElasticNetSearchParametersIterator<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Default for ElasticNetSearchParameters<T> {
|
||||
impl Default for ElasticNetSearchParameters {
|
||||
fn default() -> Self {
|
||||
let default_params = ElasticNetParameters::default();
|
||||
|
||||
@@ -260,49 +264,73 @@ impl<T: RealNumber> Default for ElasticNetSearchParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> PartialEq for ElasticNet<T, M> {
|
||||
impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>> PartialEq
|
||||
for ElasticNet<TX, TY, X, Y>
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.coefficients == other.coefficients
|
||||
&& (self.intercept - other.intercept).abs() <= T::epsilon()
|
||||
if self.intercept() != other.intercept() {
|
||||
return false;
|
||||
}
|
||||
if self.coefficients().shape() != other.coefficients().shape() {
|
||||
return false;
|
||||
}
|
||||
self.coefficients()
|
||||
.iterator(0)
|
||||
.zip(other.coefficients().iterator(0))
|
||||
.all(|(&a, &b)| (a - b).abs() <= TX::epsilon())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, ElasticNetParameters<T>>
|
||||
for ElasticNet<T, M>
|
||||
impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
||||
SupervisedEstimator<X, Y, ElasticNetParameters> for ElasticNet<TX, TY, X, Y>
|
||||
{
|
||||
fn fit(x: &M, y: &M::RowVector, parameters: ElasticNetParameters<T>) -> Result<Self, Failed> {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
coefficients: Option::None,
|
||||
intercept: Option::None,
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn fit(x: &X, y: &Y, parameters: ElasticNetParameters) -> Result<Self, Failed> {
|
||||
ElasticNet::fit(x, y, parameters)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for ElasticNet<T, M> {
|
||||
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>> Predictor<X, Y>
|
||||
for ElasticNet<TX, TY, X, Y>
|
||||
{
|
||||
fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
self.predict(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
|
||||
impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
||||
ElasticNet<TX, TY, X, Y>
|
||||
{
|
||||
/// Fits elastic net regression to your data.
|
||||
/// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
|
||||
/// * `y` - target values
|
||||
/// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
|
||||
pub fn fit(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
parameters: ElasticNetParameters<T>,
|
||||
) -> Result<ElasticNet<T, M>, Failed> {
|
||||
x: &X,
|
||||
y: &Y,
|
||||
parameters: ElasticNetParameters,
|
||||
) -> Result<ElasticNet<TX, TY, X, Y>, Failed> {
|
||||
let (n, p) = x.shape();
|
||||
|
||||
if y.len() != n {
|
||||
if y.shape() != n {
|
||||
return Err(Failed::fit("Number of rows in X should = len(y)"));
|
||||
}
|
||||
|
||||
let n_float = T::from_usize(n).unwrap();
|
||||
let n_float = n as f64;
|
||||
|
||||
let l1_reg = parameters.alpha * parameters.l1_ratio * n_float;
|
||||
let l2_reg = parameters.alpha * (T::one() - parameters.l1_ratio) * n_float;
|
||||
let l1_reg = TX::from_f64(parameters.alpha * parameters.l1_ratio * n_float).unwrap();
|
||||
let l2_reg =
|
||||
TX::from_f64(parameters.alpha * (1.0 - parameters.l1_ratio) * n_float).unwrap();
|
||||
|
||||
let y_mean = y.mean();
|
||||
let y_mean = TX::from_f64(y.mean_by()).unwrap();
|
||||
|
||||
let (w, b) = if parameters.normalize {
|
||||
let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?;
|
||||
@@ -311,68 +339,92 @@ impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
|
||||
|
||||
let mut optimizer = InteriorPointOptimizer::new(&x, p);
|
||||
|
||||
let mut w =
|
||||
optimizer.optimize(&x, &y, l1_reg * gamma, parameters.max_iter, parameters.tol)?;
|
||||
let mut w = optimizer.optimize(
|
||||
&x,
|
||||
&y,
|
||||
l1_reg * gamma,
|
||||
parameters.max_iter,
|
||||
TX::from_f64(parameters.tol).unwrap(),
|
||||
)?;
|
||||
|
||||
for i in 0..p {
|
||||
w.set(i, 0, gamma * w.get(i, 0) / col_std[i]);
|
||||
w.set(i, gamma * *w.get(i) / col_std[i]);
|
||||
}
|
||||
|
||||
let mut b = T::zero();
|
||||
let mut b = TX::zero();
|
||||
|
||||
for i in 0..p {
|
||||
b += w.get(i, 0) * col_mean[i];
|
||||
b += *w.get(i) * col_mean[i];
|
||||
}
|
||||
|
||||
b = y_mean - b;
|
||||
|
||||
(w, b)
|
||||
(X::from_column(&w), b)
|
||||
} else {
|
||||
let (x, y, gamma) = Self::augment_x_and_y(x, y, l2_reg);
|
||||
|
||||
let mut optimizer = InteriorPointOptimizer::new(&x, p);
|
||||
|
||||
let mut w =
|
||||
optimizer.optimize(&x, &y, l1_reg * gamma, parameters.max_iter, parameters.tol)?;
|
||||
let mut w = optimizer.optimize(
|
||||
&x,
|
||||
&y,
|
||||
l1_reg * gamma,
|
||||
parameters.max_iter,
|
||||
TX::from_f64(parameters.tol).unwrap(),
|
||||
)?;
|
||||
|
||||
for i in 0..p {
|
||||
w.set(i, 0, gamma * w.get(i, 0));
|
||||
w.set(i, gamma * *w.get(i));
|
||||
}
|
||||
|
||||
(w, y_mean)
|
||||
(X::from_column(&w), y_mean)
|
||||
};
|
||||
|
||||
Ok(ElasticNet {
|
||||
intercept: b,
|
||||
coefficients: w,
|
||||
intercept: Some(b),
|
||||
coefficients: Some(w),
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
/// Predict target values from `x`
|
||||
/// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
|
||||
pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
let (nrows, _) = x.shape();
|
||||
let mut y_hat = x.matmul(&self.coefficients);
|
||||
y_hat.add_mut(&M::fill(nrows, 1, self.intercept));
|
||||
Ok(y_hat.transpose().to_row_vector())
|
||||
let mut y_hat = x.matmul(self.coefficients.as_ref().unwrap());
|
||||
let bias = X::fill(nrows, 1, self.intercept.unwrap());
|
||||
y_hat.add_mut(&bias);
|
||||
Ok(Y::from_iterator(
|
||||
y_hat.iterator(0).map(|&v| TY::from(v).unwrap()),
|
||||
nrows,
|
||||
))
|
||||
}
|
||||
|
||||
/// Get estimates regression coefficients
|
||||
pub fn coefficients(&self) -> &M {
|
||||
&self.coefficients
|
||||
pub fn coefficients(&self) -> &X {
|
||||
self.coefficients.as_ref().unwrap()
|
||||
}
|
||||
|
||||
/// Get estimate of intercept
|
||||
pub fn intercept(&self) -> T {
|
||||
self.intercept
|
||||
pub fn intercept(&self) -> &TX {
|
||||
self.intercept.as_ref().unwrap()
|
||||
}
|
||||
|
||||
fn rescale_x(x: &M) -> Result<(M, Vec<T>, Vec<T>), Failed> {
|
||||
let col_mean = x.mean(0);
|
||||
let col_std = x.std(0);
|
||||
fn rescale_x(x: &X) -> Result<(X, Vec<TX>, Vec<TX>), Failed> {
|
||||
let col_mean: Vec<TX> = x
|
||||
.mean_by(0)
|
||||
.iter()
|
||||
.map(|&v| TX::from_f64(v).unwrap())
|
||||
.collect();
|
||||
let col_std: Vec<TX> = x
|
||||
.std_dev(0)
|
||||
.iter()
|
||||
.map(|&v| TX::from_f64(v).unwrap())
|
||||
.collect();
|
||||
|
||||
for i in 0..col_std.len() {
|
||||
if (col_std[i] - T::zero()).abs() < T::epsilon() {
|
||||
for (i, col_std_i) in col_std.iter().enumerate() {
|
||||
if (*col_std_i - TX::zero()).abs() < TX::epsilon() {
|
||||
return Err(Failed::fit(&format!(
|
||||
"Cannot rescale constant column {}",
|
||||
i
|
||||
@@ -385,25 +437,25 @@ impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
|
||||
Ok((scaled_x, col_mean, col_std))
|
||||
}
|
||||
|
||||
fn augment_x_and_y(x: &M, y: &M::RowVector, l2_reg: T) -> (M, M::RowVector, T) {
|
||||
fn augment_x_and_y(x: &X, y: &Y, l2_reg: TX) -> (X, Vec<TX>, TX) {
|
||||
let (n, p) = x.shape();
|
||||
|
||||
let gamma = T::one() / (T::one() + l2_reg).sqrt();
|
||||
let gamma = TX::one() / (TX::one() + l2_reg).sqrt();
|
||||
let padding = gamma * l2_reg.sqrt();
|
||||
|
||||
let mut y2 = M::RowVector::zeros(n + p);
|
||||
for i in 0..y.len() {
|
||||
y2.set(i, y.get(i));
|
||||
let mut y2 = Vec::<TX>::zeros(n + p);
|
||||
for i in 0..y.shape() {
|
||||
y2.set(i, TX::from(*y.get(i)).unwrap());
|
||||
}
|
||||
|
||||
let mut x2 = M::zeros(n + p, p);
|
||||
let mut x2 = X::zeros(n + p, p);
|
||||
|
||||
for j in 0..p {
|
||||
for i in 0..n {
|
||||
x2.set(i, j, gamma * x.get(i, j));
|
||||
x2.set((i, j), gamma * *x.get((i, j)));
|
||||
}
|
||||
|
||||
x2.set(j + n, j, padding);
|
||||
x2.set((j + n, j), padding);
|
||||
}
|
||||
|
||||
(x2, y2, gamma)
|
||||
@@ -413,7 +465,7 @@ impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::metrics::mean_absolute_error;
|
||||
|
||||
#[test]
|
||||
@@ -546,43 +598,44 @@ mod tests {
|
||||
assert!(mae_l1 < 2.0);
|
||||
assert!(mae_l2 < 2.0);
|
||||
|
||||
assert!(l1_model.coefficients().get(0, 0) > l1_model.coefficients().get(1, 0));
|
||||
assert!(l1_model.coefficients().get(0, 0) > l1_model.coefficients().get(2, 0));
|
||||
assert!(l1_model.coefficients().get((0, 0)) > l1_model.coefficients().get((1, 0)));
|
||||
assert!(l1_model.coefficients().get((0, 0)) > l1_model.coefficients().get((2, 0)));
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
#[cfg(feature = "serde")]
|
||||
fn serde() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
|
||||
&[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
&[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
&[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
|
||||
&[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||
&[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
|
||||
&[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
|
||||
&[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
|
||||
&[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||
&[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
|
||||
&[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||
&[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
|
||||
&[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
|
||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||
]);
|
||||
// TODO: serialization for the new DenseMatrix needs to be implemented
|
||||
// #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
// #[test]
|
||||
// #[cfg(feature = "serde")]
|
||||
// fn serde() {
|
||||
// let x = DenseMatrix::from_2d_array(&[
|
||||
// &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
|
||||
// &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
// &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
// &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
|
||||
// &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||
// &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
|
||||
// &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
|
||||
// &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
|
||||
// &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||
// &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
|
||||
// &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||
// &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
|
||||
// &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
|
||||
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||
// ]);
|
||||
|
||||
let y = vec![
|
||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
114.2, 115.7, 116.9,
|
||||
];
|
||||
// let y = vec![
|
||||
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
// 114.2, 115.7, 116.9,
|
||||
// ];
|
||||
|
||||
let lr = ElasticNet::fit(&x, &y, Default::default()).unwrap();
|
||||
// let lr = ElasticNet::fit(&x, &y, Default::default()).unwrap();
|
||||
|
||||
let deserialized_lr: ElasticNet<f64, DenseMatrix<f64>> =
|
||||
serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
// let deserialized_lr: ElasticNet<f64, f64, DenseMatrix<f64>, Vec<f64>> =
|
||||
// serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
|
||||
assert_eq!(lr, deserialized_lr);
|
||||
}
|
||||
// assert_eq!(lr, deserialized_lr);
|
||||
// }
|
||||
}
|
||||
|
||||
+145
-99
@@ -23,31 +23,33 @@
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::api::{Predictor, SupervisedEstimator};
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1};
|
||||
use crate::linear::lasso_optimizer::InteriorPointOptimizer;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::floatnum::FloatNumber;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
|
||||
/// Lasso regression parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LassoParameters<T: RealNumber> {
|
||||
pub struct LassoParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Controls the strength of the penalty to the loss function.
|
||||
pub alpha: T,
|
||||
pub alpha: f64,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// If true the regressors X will be normalized before regression
|
||||
/// by subtracting the mean and dividing by the standard deviation.
|
||||
pub normalize: bool,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The tolerance for the optimization
|
||||
pub tol: T,
|
||||
pub tol: f64,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum number of iterations
|
||||
pub max_iter: usize,
|
||||
@@ -56,14 +58,16 @@ pub struct LassoParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
/// Lasso regressor
|
||||
pub struct Lasso<T: RealNumber, M: Matrix<T>> {
|
||||
coefficients: M,
|
||||
intercept: T,
|
||||
pub struct Lasso<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>> {
|
||||
coefficients: Option<X>,
|
||||
intercept: Option<TX>,
|
||||
_phantom_ty: PhantomData<TY>,
|
||||
_phantom_y: PhantomData<Y>,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> LassoParameters<T> {
|
||||
impl LassoParameters {
|
||||
/// Regularization parameter.
|
||||
pub fn with_alpha(mut self, alpha: T) -> Self {
|
||||
pub fn with_alpha(mut self, alpha: f64) -> Self {
|
||||
self.alpha = alpha;
|
||||
self
|
||||
}
|
||||
@@ -73,7 +77,7 @@ impl<T: RealNumber> LassoParameters<T> {
|
||||
self
|
||||
}
|
||||
/// The tolerance for the optimization
|
||||
pub fn with_tol(mut self, tol: T) -> Self {
|
||||
pub fn with_tol(mut self, tol: f64) -> Self {
|
||||
self.tol = tol;
|
||||
self
|
||||
}
|
||||
@@ -84,34 +88,52 @@ impl<T: RealNumber> LassoParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Default for LassoParameters<T> {
|
||||
impl Default for LassoParameters {
|
||||
fn default() -> Self {
|
||||
LassoParameters {
|
||||
alpha: T::one(),
|
||||
alpha: 1f64,
|
||||
normalize: true,
|
||||
tol: T::from_f64(1e-4).unwrap(),
|
||||
tol: 1e-4,
|
||||
max_iter: 1000,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> PartialEq for Lasso<T, M> {
|
||||
impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>> PartialEq
|
||||
for Lasso<TX, TY, X, Y>
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.coefficients == other.coefficients
|
||||
&& (self.intercept - other.intercept).abs() <= T::epsilon()
|
||||
self.intercept == other.intercept
|
||||
&& self.coefficients().shape() == other.coefficients().shape()
|
||||
&& self
|
||||
.coefficients()
|
||||
.iterator(0)
|
||||
.zip(other.coefficients().iterator(0))
|
||||
.all(|(&a, &b)| (a - b).abs() <= TX::epsilon())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, LassoParameters<T>>
|
||||
for Lasso<T, M>
|
||||
impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
||||
SupervisedEstimator<X, Y, LassoParameters> for Lasso<TX, TY, X, Y>
|
||||
{
|
||||
fn fit(x: &M, y: &M::RowVector, parameters: LassoParameters<T>) -> Result<Self, Failed> {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
coefficients: Option::None,
|
||||
intercept: Option::None,
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn fit(x: &X, y: &Y, parameters: LassoParameters) -> Result<Self, Failed> {
|
||||
Lasso::fit(x, y, parameters)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for Lasso<T, M> {
|
||||
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>> Predictor<X, Y>
|
||||
for Lasso<TX, TY, X, Y>
|
||||
{
|
||||
fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
self.predict(x)
|
||||
}
|
||||
}
|
||||
@@ -119,34 +141,34 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for Lasso<T, M> {
|
||||
/// Lasso grid search parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LassoSearchParameters<T: RealNumber> {
|
||||
pub struct LassoSearchParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Controls the strength of the penalty to the loss function.
|
||||
pub alpha: Vec<T>,
|
||||
pub alpha: Vec<f64>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// If true the regressors X will be normalized before regression
|
||||
/// by subtracting the mean and dividing by the standard deviation.
|
||||
pub normalize: Vec<bool>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The tolerance for the optimization
|
||||
pub tol: Vec<T>,
|
||||
pub tol: Vec<f64>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum number of iterations
|
||||
pub max_iter: Vec<usize>,
|
||||
}
|
||||
|
||||
/// Lasso grid search iterator
|
||||
pub struct LassoSearchParametersIterator<T: RealNumber> {
|
||||
lasso_search_parameters: LassoSearchParameters<T>,
|
||||
pub struct LassoSearchParametersIterator {
|
||||
lasso_search_parameters: LassoSearchParameters,
|
||||
current_alpha: usize,
|
||||
current_normalize: usize,
|
||||
current_tol: usize,
|
||||
current_max_iter: usize,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> IntoIterator for LassoSearchParameters<T> {
|
||||
type Item = LassoParameters<T>;
|
||||
type IntoIter = LassoSearchParametersIterator<T>;
|
||||
impl IntoIterator for LassoSearchParameters {
|
||||
type Item = LassoParameters;
|
||||
type IntoIter = LassoSearchParametersIterator;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
LassoSearchParametersIterator {
|
||||
@@ -159,8 +181,8 @@ impl<T: RealNumber> IntoIterator for LassoSearchParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Iterator for LassoSearchParametersIterator<T> {
|
||||
type Item = LassoParameters<T>;
|
||||
impl Iterator for LassoSearchParametersIterator {
|
||||
type Item = LassoParameters;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.current_alpha == self.lasso_search_parameters.alpha.len()
|
||||
@@ -203,7 +225,7 @@ impl<T: RealNumber> Iterator for LassoSearchParametersIterator<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Default for LassoSearchParameters<T> {
|
||||
impl Default for LassoSearchParameters {
|
||||
fn default() -> Self {
|
||||
let default_params = LassoParameters::default();
|
||||
|
||||
@@ -216,16 +238,12 @@ impl<T: RealNumber> Default for LassoSearchParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
|
||||
impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>> Lasso<TX, TY, X, Y> {
|
||||
/// Fits Lasso regression to your data.
|
||||
/// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
|
||||
/// * `y` - target values
|
||||
/// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
|
||||
pub fn fit(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
parameters: LassoParameters<T>,
|
||||
) -> Result<Lasso<T, M>, Failed> {
|
||||
pub fn fit(x: &X, y: &Y, parameters: LassoParameters) -> Result<Lasso<TX, TY, X, Y>, Failed> {
|
||||
let (n, p) = x.shape();
|
||||
|
||||
if n <= p {
|
||||
@@ -234,11 +252,11 @@ impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
|
||||
));
|
||||
}
|
||||
|
||||
if parameters.alpha < T::zero() {
|
||||
if parameters.alpha < 0f64 {
|
||||
return Err(Failed::fit("alpha should be >= 0"));
|
||||
}
|
||||
|
||||
if parameters.tol <= T::zero() {
|
||||
if parameters.tol <= 0f64 {
|
||||
return Err(Failed::fit("tol should be > 0"));
|
||||
}
|
||||
|
||||
@@ -246,71 +264,98 @@ impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
|
||||
return Err(Failed::fit("max_iter should be > 0"));
|
||||
}
|
||||
|
||||
if y.len() != n {
|
||||
if y.shape() != n {
|
||||
return Err(Failed::fit("Number of rows in X should = len(y)"));
|
||||
}
|
||||
|
||||
let l1_reg = parameters.alpha * T::from_usize(n).unwrap();
|
||||
let y: Vec<TX> = y.iterator(0).map(|&v| TX::from(v).unwrap()).collect();
|
||||
|
||||
let l1_reg = TX::from_f64(parameters.alpha * n as f64).unwrap();
|
||||
|
||||
let (w, b) = if parameters.normalize {
|
||||
let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?;
|
||||
|
||||
let mut optimizer = InteriorPointOptimizer::new(&scaled_x, p);
|
||||
|
||||
let mut w =
|
||||
optimizer.optimize(&scaled_x, y, l1_reg, parameters.max_iter, parameters.tol)?;
|
||||
let mut w = optimizer.optimize(
|
||||
&scaled_x,
|
||||
&y,
|
||||
l1_reg,
|
||||
parameters.max_iter,
|
||||
TX::from_f64(parameters.tol).unwrap(),
|
||||
)?;
|
||||
|
||||
for (j, col_std_j) in col_std.iter().enumerate().take(p) {
|
||||
w.set(j, 0, w.get(j, 0) / *col_std_j);
|
||||
w[j] /= *col_std_j;
|
||||
}
|
||||
|
||||
let mut b = T::zero();
|
||||
let mut b = TX::zero();
|
||||
|
||||
for (i, col_mean_i) in col_mean.iter().enumerate().take(p) {
|
||||
b += w.get(i, 0) * *col_mean_i;
|
||||
b += w[i] * *col_mean_i;
|
||||
}
|
||||
|
||||
b = y.mean() - b;
|
||||
(w, b)
|
||||
b = TX::from_f64(y.mean_by()).unwrap() - b;
|
||||
(X::from_column(&w), b)
|
||||
} else {
|
||||
let mut optimizer = InteriorPointOptimizer::new(x, p);
|
||||
|
||||
let w = optimizer.optimize(x, y, l1_reg, parameters.max_iter, parameters.tol)?;
|
||||
let w = optimizer.optimize(
|
||||
x,
|
||||
&y,
|
||||
l1_reg,
|
||||
parameters.max_iter,
|
||||
TX::from_f64(parameters.tol).unwrap(),
|
||||
)?;
|
||||
|
||||
(w, y.mean())
|
||||
(X::from_column(&w), TX::from_f64(y.mean_by()).unwrap())
|
||||
};
|
||||
|
||||
Ok(Lasso {
|
||||
intercept: b,
|
||||
coefficients: w,
|
||||
intercept: Some(b),
|
||||
coefficients: Some(w),
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
/// Predict target values from `x`
|
||||
/// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
|
||||
pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
let (nrows, _) = x.shape();
|
||||
let mut y_hat = x.matmul(&self.coefficients);
|
||||
y_hat.add_mut(&M::fill(nrows, 1, self.intercept));
|
||||
Ok(y_hat.transpose().to_row_vector())
|
||||
let mut y_hat = x.matmul(self.coefficients());
|
||||
let bias = X::fill(nrows, 1, self.intercept.unwrap());
|
||||
y_hat.add_mut(&bias);
|
||||
Ok(Y::from_iterator(
|
||||
y_hat.iterator(0).map(|&v| TY::from(v).unwrap()),
|
||||
nrows,
|
||||
))
|
||||
}
|
||||
|
||||
/// Get estimates regression coefficients
|
||||
pub fn coefficients(&self) -> &M {
|
||||
&self.coefficients
|
||||
pub fn coefficients(&self) -> &X {
|
||||
self.coefficients.as_ref().unwrap()
|
||||
}
|
||||
|
||||
/// Get estimate of intercept
|
||||
pub fn intercept(&self) -> T {
|
||||
self.intercept
|
||||
pub fn intercept(&self) -> &TX {
|
||||
self.intercept.as_ref().unwrap()
|
||||
}
|
||||
|
||||
fn rescale_x(x: &M) -> Result<(M, Vec<T>, Vec<T>), Failed> {
|
||||
let col_mean = x.mean(0);
|
||||
let col_std = x.std(0);
|
||||
fn rescale_x(x: &X) -> Result<(X, Vec<TX>, Vec<TX>), Failed> {
|
||||
let col_mean: Vec<TX> = x
|
||||
.mean_by(0)
|
||||
.iter()
|
||||
.map(|&v| TX::from_f64(v).unwrap())
|
||||
.collect();
|
||||
let col_std: Vec<TX> = x
|
||||
.std_dev(0)
|
||||
.iter()
|
||||
.map(|&v| TX::from_f64(v).unwrap())
|
||||
.collect();
|
||||
|
||||
for (i, col_std_i) in col_std.iter().enumerate() {
|
||||
if (*col_std_i - T::zero()).abs() < T::epsilon() {
|
||||
if (*col_std_i - TX::zero()).abs() < TX::epsilon() {
|
||||
return Err(Failed::fit(&format!(
|
||||
"Cannot rescale constant column {}",
|
||||
i
|
||||
@@ -327,7 +372,7 @@ impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::metrics::mean_absolute_error;
|
||||
|
||||
#[test]
|
||||
@@ -402,39 +447,40 @@ mod tests {
|
||||
assert!(mean_absolute_error(&y_hat, &y) < 2.0);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
#[cfg(feature = "serde")]
|
||||
fn serde() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
|
||||
&[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
&[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
&[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
|
||||
&[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||
&[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
|
||||
&[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
|
||||
&[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
|
||||
&[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||
&[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
|
||||
&[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||
&[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
|
||||
&[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
|
||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||
]);
|
||||
// TODO: serialization for the new DenseMatrix needs to be implemented
|
||||
// #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
// #[test]
|
||||
// #[cfg(feature = "serde")]
|
||||
// fn serde() {
|
||||
// let x = DenseMatrix::from_2d_array(&[
|
||||
// &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
|
||||
// &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
// &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
// &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
|
||||
// &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||
// &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
|
||||
// &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
|
||||
// &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
|
||||
// &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||
// &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
|
||||
// &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||
// &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
|
||||
// &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
|
||||
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||
// ]);
|
||||
|
||||
let y = vec![
|
||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
114.2, 115.7, 116.9,
|
||||
];
|
||||
// let y = vec![
|
||||
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
// 114.2, 115.7, 116.9,
|
||||
// ];
|
||||
|
||||
let lr = Lasso::fit(&x, &y, Default::default()).unwrap();
|
||||
// let lr = Lasso::fit(&x, &y, Default::default()).unwrap();
|
||||
|
||||
let deserialized_lr: Lasso<f64, DenseMatrix<f64>> =
|
||||
serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
// let deserialized_lr: Lasso<f64, f64, DenseMatrix<f64>, Vec<f64>> =
|
||||
// serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
|
||||
assert_eq!(lr, deserialized_lr);
|
||||
}
|
||||
// assert_eq!(lr, deserialized_lr);
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -12,21 +12,23 @@
|
||||
//!
|
||||
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1, MutArray, MutArrayView1};
|
||||
use crate::linear::bg_solver::BiconjugateGradientSolver;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::numbers::floatnum::FloatNumber;
|
||||
|
||||
pub struct InteriorPointOptimizer<T: RealNumber, M: Matrix<T>> {
|
||||
ata: M,
|
||||
///
|
||||
pub struct InteriorPointOptimizer<T: FloatNumber, X: Array2<T>> {
|
||||
ata: X,
|
||||
d1: Vec<T>,
|
||||
d2: Vec<T>,
|
||||
prb: Vec<T>,
|
||||
prs: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
|
||||
pub fn new(a: &M, n: usize) -> InteriorPointOptimizer<T, M> {
|
||||
///
|
||||
impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
||||
///
|
||||
pub fn new(a: &X, n: usize) -> InteriorPointOptimizer<T, X> {
|
||||
InteriorPointOptimizer {
|
||||
ata: a.ab(true, a, false),
|
||||
d1: vec![T::zero(); n],
|
||||
@@ -36,14 +38,15 @@ impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
pub fn optimize(
|
||||
&mut self,
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
x: &X,
|
||||
y: &Vec<T>,
|
||||
lambda: T,
|
||||
max_iter: usize,
|
||||
tol: T,
|
||||
) -> Result<M, Failed> {
|
||||
) -> Result<Vec<T>, Failed> {
|
||||
let (n, p) = x.shape();
|
||||
let p_f64 = T::from_usize(p).unwrap();
|
||||
|
||||
@@ -58,50 +61,53 @@ impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
|
||||
let gamma = T::from_f64(-0.25).unwrap();
|
||||
let mu = T::two();
|
||||
|
||||
let y = M::from_row_vector(y.sub_scalar(y.mean())).transpose();
|
||||
// let y = M::from_row_vector(y.sub_scalar(y.mean_by())).transpose();
|
||||
let y = y.sub_scalar(T::from_f64(y.mean_by()).unwrap());
|
||||
|
||||
let mut max_ls_iter = 100;
|
||||
let mut pitr = 0;
|
||||
let mut w = M::zeros(p, 1);
|
||||
let mut w = Vec::zeros(p);
|
||||
let mut neww = w.clone();
|
||||
let mut u = M::ones(p, 1);
|
||||
let mut u = Vec::ones(p);
|
||||
let mut newu = u.clone();
|
||||
|
||||
let mut f = M::fill(p, 2, -T::one());
|
||||
let mut f = X::fill(p, 2, -T::one());
|
||||
let mut newf = f.clone();
|
||||
|
||||
let mut q1 = vec![T::zero(); p];
|
||||
let mut q2 = vec![T::zero(); p];
|
||||
|
||||
let mut dx = M::zeros(p, 1);
|
||||
let mut du = M::zeros(p, 1);
|
||||
let mut dxu = M::zeros(2 * p, 1);
|
||||
let mut grad = M::zeros(2 * p, 1);
|
||||
let mut dx = Vec::zeros(p);
|
||||
let mut du = Vec::zeros(p);
|
||||
let mut dxu = Vec::zeros(2 * p);
|
||||
let mut grad = Vec::zeros(2 * p);
|
||||
|
||||
let mut nu = M::zeros(n, 1);
|
||||
let mut nu = Vec::zeros(n);
|
||||
let mut dobj = T::zero();
|
||||
let mut s = T::infinity();
|
||||
let mut t = T::one()
|
||||
.max(T::one() / lambda)
|
||||
.min(T::two() * p_f64 / T::from(1e-3).unwrap());
|
||||
|
||||
let lambda_f64 = lambda.to_f64().unwrap();
|
||||
|
||||
for ntiter in 0..max_iter {
|
||||
let mut z = x.matmul(&w);
|
||||
let mut z = w.xa(true, x);
|
||||
|
||||
for i in 0..n {
|
||||
z.set(i, 0, z.get(i, 0) - y.get(i, 0));
|
||||
nu.set(i, 0, T::two() * z.get(i, 0));
|
||||
z[i] -= y[i];
|
||||
nu[i] = T::two() * z[i];
|
||||
}
|
||||
|
||||
// CALCULATE DUALITY GAP
|
||||
let xnu = x.ab(true, &nu, false);
|
||||
let max_xnu = xnu.norm(T::infinity());
|
||||
if max_xnu > lambda {
|
||||
let lnu = lambda / max_xnu;
|
||||
let xnu = nu.xa(false, x);
|
||||
let max_xnu = xnu.norm(std::f64::INFINITY);
|
||||
if max_xnu > lambda_f64 {
|
||||
let lnu = T::from_f64(lambda_f64 / max_xnu).unwrap();
|
||||
nu.mul_scalar_mut(lnu);
|
||||
}
|
||||
|
||||
let pobj = z.dot(&z) + lambda * w.norm(T::one());
|
||||
let pobj = z.dot(&z) + lambda * T::from_f64(w.norm(1f64)).unwrap();
|
||||
dobj = dobj.max(gamma * nu.dot(&nu) - nu.dot(&y));
|
||||
|
||||
let gap = pobj - dobj;
|
||||
@@ -118,22 +124,22 @@ impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
|
||||
|
||||
// CALCULATE NEWTON STEP
|
||||
for i in 0..p {
|
||||
let q1i = T::one() / (u.get(i, 0) + w.get(i, 0));
|
||||
let q2i = T::one() / (u.get(i, 0) - w.get(i, 0));
|
||||
let q1i = T::one() / (u[i] + w[i]);
|
||||
let q2i = T::one() / (u[i] - w[i]);
|
||||
q1[i] = q1i;
|
||||
q2[i] = q2i;
|
||||
self.d1[i] = (q1i * q1i + q2i * q2i) / t;
|
||||
self.d2[i] = (q1i * q1i - q2i * q2i) / t;
|
||||
}
|
||||
|
||||
let mut gradphi = x.ab(true, &z, false);
|
||||
let mut gradphi = z.xa(false, x);
|
||||
|
||||
for i in 0..p {
|
||||
let g1 = T::two() * gradphi.get(i, 0) - (q1[i] - q2[i]) / t;
|
||||
let g1 = T::two() * gradphi[i] - (q1[i] - q2[i]) / t;
|
||||
let g2 = lambda - (q1[i] + q2[i]) / t;
|
||||
gradphi.set(i, 0, g1);
|
||||
grad.set(i, 0, -g1);
|
||||
grad.set(i + p, 0, -g2);
|
||||
gradphi[i] = g1;
|
||||
grad[i] = -g1;
|
||||
grad[i + p] = -g2;
|
||||
}
|
||||
|
||||
for i in 0..p {
|
||||
@@ -141,7 +147,7 @@ impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
|
||||
self.prs[i] = self.prb[i] * self.d1[i] - self.d2[i].powi(2);
|
||||
}
|
||||
|
||||
let normg = grad.norm2();
|
||||
let normg = T::from_f64(grad.norm2()).unwrap();
|
||||
let mut pcgtol = min_pcgtol.min(eta * gap / T::one().min(normg));
|
||||
if ntiter != 0 && pitr == 0 {
|
||||
pcgtol *= min_pcgtol;
|
||||
@@ -152,10 +158,8 @@ impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
|
||||
pitr = pcgmaxi;
|
||||
}
|
||||
|
||||
for i in 0..p {
|
||||
dx.set(i, 0, dxu.get(i, 0));
|
||||
du.set(i, 0, dxu.get(i + p, 0));
|
||||
}
|
||||
dx[..p].copy_from_slice(&dxu[..p]);
|
||||
du[..p].copy_from_slice(&dxu[p..(p + p)]);
|
||||
|
||||
// BACKTRACKING LINE SEARCH
|
||||
let phi = z.dot(&z) + lambda * u.sum() - Self::sumlogneg(&f) / t;
|
||||
@@ -165,16 +169,20 @@ impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
|
||||
let lsiter = 0;
|
||||
while lsiter < max_ls_iter {
|
||||
for i in 0..p {
|
||||
neww.set(i, 0, w.get(i, 0) + s * dx.get(i, 0));
|
||||
newu.set(i, 0, u.get(i, 0) + s * du.get(i, 0));
|
||||
newf.set(i, 0, neww.get(i, 0) - newu.get(i, 0));
|
||||
newf.set(i, 1, -neww.get(i, 0) - newu.get(i, 0));
|
||||
neww[i] = w[i] + s * dx[i];
|
||||
newu[i] = u[i] + s * du[i];
|
||||
newf.set((i, 0), neww[i] - newu[i]);
|
||||
newf.set((i, 1), -neww[i] - newu[i]);
|
||||
}
|
||||
|
||||
if newf.max() < T::zero() {
|
||||
let mut newz = x.matmul(&neww);
|
||||
if newf
|
||||
.iterator(0)
|
||||
.fold(T::neg_infinity(), |max, v| v.max(max))
|
||||
< T::zero()
|
||||
{
|
||||
let mut newz = neww.xa(true, x);
|
||||
for i in 0..n {
|
||||
newz.set(i, 0, newz.get(i, 0) - y.get(i, 0));
|
||||
newz[i] -= y[i];
|
||||
}
|
||||
|
||||
let newphi = newz.dot(&newz) + lambda * newu.sum() - Self::sumlogneg(&newf) / t;
|
||||
@@ -200,54 +208,46 @@ impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
|
||||
Ok(w)
|
||||
}
|
||||
|
||||
fn sumlogneg(f: &M) -> T {
|
||||
///
|
||||
fn sumlogneg(f: &X) -> T {
|
||||
let (n, _) = f.shape();
|
||||
let mut sum = T::zero();
|
||||
for i in 0..n {
|
||||
sum += (-f.get(i, 0)).ln();
|
||||
sum += (-f.get(i, 1)).ln();
|
||||
sum += (-*f.get((i, 0))).ln();
|
||||
sum += (-*f.get((i, 1))).ln();
|
||||
}
|
||||
sum
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> BiconjugateGradientSolver<T, M> for InteriorPointOptimizer<T, M> {
|
||||
fn solve_preconditioner(&self, a: &M, b: &M, x: &mut M) {
|
||||
///
|
||||
impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X>
|
||||
for InteriorPointOptimizer<T, X>
|
||||
{
|
||||
///
|
||||
fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) {
|
||||
let (_, p) = a.shape();
|
||||
|
||||
for i in 0..p {
|
||||
x.set(
|
||||
i,
|
||||
0,
|
||||
(self.d1[i] * b.get(i, 0) - self.d2[i] * b.get(i + p, 0)) / self.prs[i],
|
||||
);
|
||||
x.set(
|
||||
i + p,
|
||||
0,
|
||||
(-self.d2[i] * b.get(i, 0) + self.prb[i] * b.get(i + p, 0)) / self.prs[i],
|
||||
);
|
||||
x[i] = (self.d1[i] * b[i] - self.d2[i] * b[i + p]) / self.prs[i];
|
||||
x[i + p] = (-self.d2[i] * b[i] + self.prb[i] * b[i + p]) / self.prs[i];
|
||||
}
|
||||
}
|
||||
|
||||
fn mat_vec_mul(&self, _: &M, x: &M, y: &mut M) {
|
||||
///
|
||||
fn mat_vec_mul(&self, _: &X, x: &Vec<T>, y: &mut Vec<T>) {
|
||||
let (_, p) = self.ata.shape();
|
||||
let atax = self.ata.matmul(&x.slice(0..p, 0..1));
|
||||
let x_slice = Vec::from_slice(x.slice(0..p).as_ref());
|
||||
let atax = x_slice.xa(true, &self.ata);
|
||||
|
||||
for i in 0..p {
|
||||
y.set(
|
||||
i,
|
||||
0,
|
||||
T::two() * atax.get(i, 0) + self.d1[i] * x.get(i, 0) + self.d2[i] * x.get(i + p, 0),
|
||||
);
|
||||
y.set(
|
||||
i + p,
|
||||
0,
|
||||
self.d2[i] * x.get(i, 0) + self.d1[i] * x.get(i + p, 0),
|
||||
);
|
||||
y[i] = T::two() * atax[i] + self.d1[i] * x[i] + self.d2[i] * x[i + p];
|
||||
y[i + p] = self.d2[i] * x[i] + self.d1[i] * x[i + p];
|
||||
}
|
||||
}
|
||||
|
||||
fn mat_t_vec_mul(&self, a: &M, x: &M, y: &mut M) {
|
||||
///
|
||||
fn mat_t_vec_mul(&self, a: &X, x: &Vec<T>, y: &mut Vec<T>) {
|
||||
self.mat_vec_mul(a, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
+138
-80
@@ -19,7 +19,7 @@
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::linalg::naive::dense_matrix::*;
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::linear::linear_regression::*;
|
||||
//!
|
||||
//! // Longley dataset (https://www.statsmodels.org/stable/datasets/generated/longley.html)
|
||||
@@ -61,14 +61,18 @@
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::api::{Predictor, SupervisedEstimator};
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::{Array1, Array2};
|
||||
use crate::linalg::traits::qr::QRDecomposable;
|
||||
use crate::linalg::traits::svd::SVDDecomposable;
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Default, Clone, Eq, PartialEq)]
|
||||
@@ -83,20 +87,35 @@ pub enum LinearRegressionSolverName {
|
||||
|
||||
/// Linear Regression parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LinearRegressionParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: LinearRegressionSolverName,
|
||||
}
|
||||
|
||||
impl Default for LinearRegressionParameters {
|
||||
fn default() -> Self {
|
||||
LinearRegressionParameters {
|
||||
solver: LinearRegressionSolverName::SVD,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Linear Regression
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct LinearRegression<T: RealNumber, M: Matrix<T>> {
|
||||
coefficients: M,
|
||||
intercept: T,
|
||||
_solver: LinearRegressionSolverName,
|
||||
pub struct LinearRegression<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + QRDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> {
|
||||
coefficients: Option<X>,
|
||||
intercept: Option<TX>,
|
||||
solver: LinearRegressionSolverName,
|
||||
_phantom_ty: PhantomData<TY>,
|
||||
_phantom_y: PhantomData<Y>,
|
||||
}
|
||||
|
||||
impl LinearRegressionParameters {
|
||||
@@ -162,43 +181,80 @@ impl Default for LinearRegressionSearchParameters {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> PartialEq for LinearRegression<T, M> {
|
||||
impl<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + QRDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> PartialEq for LinearRegression<TX, TY, X, Y>
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.coefficients == other.coefficients
|
||||
&& (self.intercept - other.intercept).abs() <= T::epsilon()
|
||||
self.intercept == other.intercept
|
||||
&& self.coefficients().shape() == other.coefficients().shape()
|
||||
&& self
|
||||
.coefficients()
|
||||
.iterator(0)
|
||||
.zip(other.coefficients().iterator(0))
|
||||
.all(|(&a, &b)| (a - b).abs() <= TX::epsilon())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, LinearRegressionParameters>
|
||||
for LinearRegression<T, M>
|
||||
impl<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + QRDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> SupervisedEstimator<X, Y, LinearRegressionParameters> for LinearRegression<TX, TY, X, Y>
|
||||
{
|
||||
fn fit(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
parameters: LinearRegressionParameters,
|
||||
) -> Result<Self, Failed> {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
coefficients: Option::None,
|
||||
intercept: Option::None,
|
||||
solver: LinearRegressionParameters::default().solver,
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn fit(x: &X, y: &Y, parameters: LinearRegressionParameters) -> Result<Self, Failed> {
|
||||
LinearRegression::fit(x, y, parameters)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for LinearRegression<T, M> {
|
||||
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
impl<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + QRDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> Predictor<X, Y> for LinearRegression<TX, TY, X, Y>
|
||||
{
|
||||
fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
self.predict(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> LinearRegression<T, M> {
|
||||
impl<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + QRDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> LinearRegression<TX, TY, X, Y>
|
||||
{
|
||||
/// Fits Linear Regression to your data.
|
||||
/// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
|
||||
/// * `y` - target values
|
||||
/// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
|
||||
pub fn fit(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
x: &X,
|
||||
y: &Y,
|
||||
parameters: LinearRegressionParameters,
|
||||
) -> Result<LinearRegression<T, M>, Failed> {
|
||||
let y_m = M::from_row_vector(y.clone());
|
||||
let b = y_m.transpose();
|
||||
) -> Result<LinearRegression<TX, TY, X, Y>, Failed> {
|
||||
let b = X::from_iterator(
|
||||
y.iterator(0).map(|&v| TX::from(v).unwrap()),
|
||||
y.shape(),
|
||||
1,
|
||||
0,
|
||||
);
|
||||
let (x_nrows, num_attributes) = x.shape();
|
||||
let (y_nrows, _) = b.shape();
|
||||
|
||||
@@ -208,46 +264,52 @@ impl<T: RealNumber, M: Matrix<T>> LinearRegression<T, M> {
|
||||
));
|
||||
}
|
||||
|
||||
let a = x.h_stack(&M::ones(x_nrows, 1));
|
||||
let a = x.h_stack(&X::ones(x_nrows, 1));
|
||||
|
||||
let w = match parameters.solver {
|
||||
LinearRegressionSolverName::QR => a.qr_solve_mut(b)?,
|
||||
LinearRegressionSolverName::SVD => a.svd_solve_mut(b)?,
|
||||
};
|
||||
|
||||
let wights = w.slice(0..num_attributes, 0..1);
|
||||
let weights = X::from_slice(w.slice(0..num_attributes, 0..1).as_ref());
|
||||
|
||||
Ok(LinearRegression {
|
||||
intercept: w.get(num_attributes, 0),
|
||||
coefficients: wights,
|
||||
_solver: parameters.solver,
|
||||
intercept: Some(*w.get((num_attributes, 0))),
|
||||
coefficients: Some(weights),
|
||||
solver: parameters.solver,
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
/// Predict target values from `x`
|
||||
/// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
|
||||
pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
let (nrows, _) = x.shape();
|
||||
let mut y_hat = x.matmul(&self.coefficients);
|
||||
y_hat.add_mut(&M::fill(nrows, 1, self.intercept));
|
||||
Ok(y_hat.transpose().to_row_vector())
|
||||
let bias = X::fill(nrows, 1, *self.intercept());
|
||||
let mut y_hat = x.matmul(self.coefficients());
|
||||
y_hat.add_mut(&bias);
|
||||
Ok(Y::from_iterator(
|
||||
y_hat.iterator(0).map(|&v| TY::from(v).unwrap()),
|
||||
nrows,
|
||||
))
|
||||
}
|
||||
|
||||
/// Get estimates regression coefficients
|
||||
pub fn coefficients(&self) -> &M {
|
||||
&self.coefficients
|
||||
pub fn coefficients(&self) -> &X {
|
||||
self.coefficients.as_ref().unwrap()
|
||||
}
|
||||
|
||||
/// Get estimate of intercept
|
||||
pub fn intercept(&self) -> T {
|
||||
self.intercept
|
||||
pub fn intercept(&self) -> &TX {
|
||||
self.intercept.as_ref().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
|
||||
#[test]
|
||||
fn search_parameters() {
|
||||
@@ -268,13 +330,9 @@ mod tests {
|
||||
fn ols_fit_predict() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
|
||||
&[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
&[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
&[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
|
||||
&[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||
&[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
|
||||
&[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
|
||||
&[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
|
||||
&[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||
&[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
|
||||
&[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||
@@ -286,8 +344,7 @@ mod tests {
|
||||
]);
|
||||
|
||||
let y: Vec<f64> = vec![
|
||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
114.2, 115.7, 116.9,
|
||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8,
|
||||
];
|
||||
|
||||
let y_hat_qr = LinearRegression::fit(
|
||||
@@ -314,43 +371,44 @@ mod tests {
|
||||
.all(|(&a, &b)| (a - b).abs() <= 5.0));
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
#[cfg(feature = "serde")]
|
||||
fn serde() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
|
||||
&[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
&[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
&[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
|
||||
&[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||
&[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
|
||||
&[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
|
||||
&[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
|
||||
&[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||
&[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
|
||||
&[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||
&[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
|
||||
&[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
|
||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||
]);
|
||||
// TODO: serialization for the new DenseMatrix needs to be implemented
|
||||
// #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
// #[test]
|
||||
// #[cfg(feature = "serde")]
|
||||
// fn serde() {
|
||||
// let x = DenseMatrix::from_2d_array(&[
|
||||
// &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
|
||||
// &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
// &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
// &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
|
||||
// &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||
// &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
|
||||
// &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
|
||||
// &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
|
||||
// &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||
// &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
|
||||
// &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||
// &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
|
||||
// &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
|
||||
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||
// ]);
|
||||
|
||||
let y = vec![
|
||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
114.2, 115.7, 116.9,
|
||||
];
|
||||
// let y = vec![
|
||||
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
// 114.2, 115.7, 116.9,
|
||||
// ];
|
||||
|
||||
let lr = LinearRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
// let lr = LinearRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
|
||||
let deserialized_lr: LinearRegression<f64, DenseMatrix<f64>> =
|
||||
serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
// let deserialized_lr: LinearRegression<f64, f64, DenseMatrix<f64>, Vec<f64>> =
|
||||
// serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
|
||||
assert_eq!(lr, deserialized_lr);
|
||||
// assert_eq!(lr, deserialized_lr);
|
||||
|
||||
let default = LinearRegressionParameters::default();
|
||||
let parameters: LinearRegressionParameters = serde_json::from_str("{}").unwrap();
|
||||
assert_eq!(parameters.solver, default.solver);
|
||||
}
|
||||
// let default = LinearRegressionParameters::default();
|
||||
// let parameters: LinearRegressionParameters = serde_json::from_str("{}").unwrap();
|
||||
// assert_eq!(parameters.solver, default.solver);
|
||||
// }
|
||||
}
|
||||
|
||||
+264
-212
@@ -10,7 +10,7 @@
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::linalg::naive::dense_matrix::*;
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::linear::logistic_regression::*;
|
||||
//!
|
||||
//! //Iris data
|
||||
@@ -36,8 +36,8 @@
|
||||
//! &[6.6, 2.9, 4.6, 1.3],
|
||||
//! &[5.2, 2.7, 3.9, 1.4],
|
||||
//! ]);
|
||||
//! let y: Vec<f64> = vec![
|
||||
//! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
|
||||
//! let y: Vec<i32> = vec![
|
||||
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
//! ];
|
||||
//!
|
||||
//! let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
@@ -54,14 +54,17 @@
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::api::{Predictor, SupervisedEstimator};
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::{Array1, Array2, MutArrayView1};
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::floatnum::FloatNumber;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
use crate::optimization::first_order::lbfgs::LBFGS;
|
||||
use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
|
||||
use crate::optimization::line_search::Backtracking;
|
||||
@@ -84,7 +87,7 @@ impl Default for LogisticRegressionSolverName {
|
||||
/// Logistic Regression parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LogisticRegressionParameters<T: RealNumber> {
|
||||
pub struct LogisticRegressionParameters<T: Number + FloatNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: LogisticRegressionSolverName,
|
||||
@@ -96,7 +99,7 @@ pub struct LogisticRegressionParameters<T: RealNumber> {
|
||||
/// Logistic Regression grid search parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LogisticRegressionSearchParameters<T: RealNumber> {
|
||||
pub struct LogisticRegressionSearchParameters<T: Number> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: Vec<LogisticRegressionSolverName>,
|
||||
@@ -106,13 +109,13 @@ pub struct LogisticRegressionSearchParameters<T: RealNumber> {
|
||||
}
|
||||
|
||||
/// Logistic Regression grid search iterator
|
||||
pub struct LogisticRegressionSearchParametersIterator<T: RealNumber> {
|
||||
pub struct LogisticRegressionSearchParametersIterator<T: Number> {
|
||||
logistic_regression_search_parameters: LogisticRegressionSearchParameters<T>,
|
||||
current_solver: usize,
|
||||
current_alpha: usize,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> IntoIterator for LogisticRegressionSearchParameters<T> {
|
||||
impl<T: Number + FloatNumber> IntoIterator for LogisticRegressionSearchParameters<T> {
|
||||
type Item = LogisticRegressionParameters<T>;
|
||||
type IntoIter = LogisticRegressionSearchParametersIterator<T>;
|
||||
|
||||
@@ -125,7 +128,7 @@ impl<T: RealNumber> IntoIterator for LogisticRegressionSearchParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Iterator for LogisticRegressionSearchParametersIterator<T> {
|
||||
impl<T: Number + FloatNumber> Iterator for LogisticRegressionSearchParametersIterator<T> {
|
||||
type Item = LogisticRegressionParameters<T>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
@@ -155,7 +158,7 @@ impl<T: RealNumber> Iterator for LogisticRegressionSearchParametersIterator<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Default for LogisticRegressionSearchParameters<T> {
|
||||
impl<T: Number + FloatNumber> Default for LogisticRegressionSearchParameters<T> {
|
||||
fn default() -> Self {
|
||||
let default_params = LogisticRegressionParameters::default();
|
||||
|
||||
@@ -169,36 +172,50 @@ impl<T: RealNumber> Default for LogisticRegressionSearchParameters<T> {
|
||||
/// Logistic Regression
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct LogisticRegression<T: RealNumber, M: Matrix<T>> {
|
||||
coefficients: M,
|
||||
intercept: M,
|
||||
classes: Vec<T>,
|
||||
pub struct LogisticRegression<
|
||||
TX: Number + FloatNumber + RealNumber,
|
||||
TY: Number + Ord,
|
||||
X: Array2<TX>,
|
||||
Y: Array1<TY>,
|
||||
> {
|
||||
coefficients: Option<X>,
|
||||
intercept: Option<X>,
|
||||
classes: Option<Vec<TY>>,
|
||||
num_attributes: usize,
|
||||
num_classes: usize,
|
||||
_phantom_tx: PhantomData<TX>,
|
||||
_phantom_y: PhantomData<Y>,
|
||||
}
|
||||
|
||||
trait ObjectiveFunction<T: RealNumber, M: Matrix<T>> {
|
||||
fn f(&self, w_bias: &M) -> T;
|
||||
fn df(&self, g: &mut M, w_bias: &M);
|
||||
trait ObjectiveFunction<T: Number + FloatNumber, X: Array2<T>> {
|
||||
///
|
||||
fn f(&self, w_bias: &[T]) -> T;
|
||||
|
||||
fn partial_dot(w: &M, x: &M, v_col: usize, m_row: usize) -> T {
|
||||
///
|
||||
#[allow(clippy::ptr_arg)]
|
||||
fn df(&self, g: &mut Vec<T>, w_bias: &Vec<T>);
|
||||
|
||||
///
|
||||
#[allow(clippy::ptr_arg)]
|
||||
fn partial_dot(w: &[T], x: &X, v_col: usize, m_row: usize) -> T {
|
||||
let mut sum = T::zero();
|
||||
let p = x.shape().1;
|
||||
for i in 0..p {
|
||||
sum += x.get(m_row, i) * w.get(0, i + v_col);
|
||||
sum += *x.get((m_row, i)) * w[i + v_col];
|
||||
}
|
||||
|
||||
sum + w.get(0, p + v_col)
|
||||
sum + w[p + v_col]
|
||||
}
|
||||
}
|
||||
|
||||
struct BinaryObjectiveFunction<'a, T: RealNumber, M: Matrix<T>> {
|
||||
x: &'a M,
|
||||
struct BinaryObjectiveFunction<'a, T: Number + FloatNumber, X: Array2<T>> {
|
||||
x: &'a X,
|
||||
y: Vec<usize>,
|
||||
alpha: T,
|
||||
_phantom_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> LogisticRegressionParameters<T> {
|
||||
impl<T: Number + FloatNumber> LogisticRegressionParameters<T> {
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub fn with_solver(mut self, solver: LogisticRegressionSolverName) -> Self {
|
||||
self.solver = solver;
|
||||
@@ -211,7 +228,7 @@ impl<T: RealNumber> LogisticRegressionParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Default for LogisticRegressionParameters<T> {
|
||||
impl<T: Number + FloatNumber> Default for LogisticRegressionParameters<T> {
|
||||
fn default() -> Self {
|
||||
LogisticRegressionParameters {
|
||||
solver: LogisticRegressionSolverName::default(),
|
||||
@@ -220,29 +237,39 @@ impl<T: RealNumber> Default for LogisticRegressionParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> PartialEq for LogisticRegression<T, M> {
|
||||
impl<TX: Number + FloatNumber + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
||||
PartialEq for LogisticRegression<TX, TY, X, Y>
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
if self.num_classes != other.num_classes
|
||||
|| self.num_attributes != other.num_attributes
|
||||
|| self.classes.len() != other.classes.len()
|
||||
|| self.classes().len() != other.classes().len()
|
||||
{
|
||||
false
|
||||
} else {
|
||||
for i in 0..self.classes.len() {
|
||||
if (self.classes[i] - other.classes[i]).abs() > T::epsilon() {
|
||||
for i in 0..self.classes().len() {
|
||||
if self.classes()[i] != other.classes()[i] {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
self.coefficients == other.coefficients && self.intercept == other.intercept
|
||||
self.coefficients()
|
||||
.iterator(0)
|
||||
.zip(other.coefficients().iterator(0))
|
||||
.all(|(&a, &b)| (a - b).abs() <= TX::epsilon())
|
||||
&& self
|
||||
.intercept()
|
||||
.iterator(0)
|
||||
.zip(other.intercept().iterator(0))
|
||||
.all(|(&a, &b)| (a - b).abs() <= TX::epsilon())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
|
||||
for BinaryObjectiveFunction<'a, T, M>
|
||||
impl<'a, T: Number + FloatNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
||||
for BinaryObjectiveFunction<'a, T, X>
|
||||
{
|
||||
fn f(&self, w_bias: &M) -> T {
|
||||
fn f(&self, w_bias: &[T]) -> T {
|
||||
let mut f = T::zero();
|
||||
let (n, p) = self.x.shape();
|
||||
|
||||
@@ -253,18 +280,17 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
|
||||
|
||||
if self.alpha > T::zero() {
|
||||
let mut w_squared = T::zero();
|
||||
for i in 0..p {
|
||||
let w = w_bias.get(0, i);
|
||||
w_squared += w * w;
|
||||
for w_bias_i in w_bias.iter().take(p) {
|
||||
w_squared += *w_bias_i * *w_bias_i;
|
||||
}
|
||||
f += T::half() * self.alpha * w_squared;
|
||||
f += T::from_f64(0.5).unwrap() * self.alpha * w_squared;
|
||||
}
|
||||
|
||||
f
|
||||
}
|
||||
|
||||
fn df(&self, g: &mut M, w_bias: &M) {
|
||||
g.copy_from(&M::zeros(1, g.shape().1));
|
||||
fn df(&self, g: &mut Vec<T>, w_bias: &Vec<T>) {
|
||||
g.copy_from(&Vec::zeros(g.len()));
|
||||
|
||||
let (n, p) = self.x.shape();
|
||||
|
||||
@@ -272,86 +298,79 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
|
||||
let wx = BinaryObjectiveFunction::partial_dot(w_bias, self.x, 0, i);
|
||||
|
||||
let dyi = (T::from(self.y[i]).unwrap()) - wx.sigmoid();
|
||||
for j in 0..p {
|
||||
g.set(0, j, g.get(0, j) - dyi * self.x.get(i, j));
|
||||
for (j, g_j) in g.iter_mut().enumerate().take(p) {
|
||||
*g_j -= dyi * *self.x.get((i, j));
|
||||
}
|
||||
g.set(0, p, g.get(0, p) - dyi);
|
||||
g[p] -= dyi;
|
||||
}
|
||||
|
||||
if self.alpha > T::zero() {
|
||||
for i in 0..p {
|
||||
let w = w_bias.get(0, i);
|
||||
g.set(0, i, g.get(0, i) + self.alpha * w);
|
||||
let w = w_bias[i];
|
||||
g[i] += self.alpha * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct MultiClassObjectiveFunction<'a, T: RealNumber, M: Matrix<T>> {
|
||||
x: &'a M,
|
||||
struct MultiClassObjectiveFunction<'a, T: Number + FloatNumber, X: Array2<T>> {
|
||||
x: &'a X,
|
||||
y: Vec<usize>,
|
||||
k: usize,
|
||||
alpha: T,
|
||||
_phantom_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
|
||||
for MultiClassObjectiveFunction<'a, T, M>
|
||||
impl<'a, T: Number + FloatNumber + RealNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
||||
for MultiClassObjectiveFunction<'a, T, X>
|
||||
{
|
||||
fn f(&self, w_bias: &M) -> T {
|
||||
fn f(&self, w_bias: &[T]) -> T {
|
||||
let mut f = T::zero();
|
||||
let mut prob = M::zeros(1, self.k);
|
||||
let mut prob = vec![T::zero(); self.k];
|
||||
let (n, p) = self.x.shape();
|
||||
for i in 0..n {
|
||||
for j in 0..self.k {
|
||||
prob.set(
|
||||
0,
|
||||
j,
|
||||
MultiClassObjectiveFunction::partial_dot(w_bias, self.x, j * (p + 1), i),
|
||||
);
|
||||
for (j, prob_j) in prob.iter_mut().enumerate().take(self.k) {
|
||||
*prob_j = MultiClassObjectiveFunction::partial_dot(w_bias, self.x, j * (p + 1), i);
|
||||
}
|
||||
prob.softmax_mut();
|
||||
f -= prob.get(0, self.y[i]).ln();
|
||||
f -= prob[self.y[i]].ln();
|
||||
}
|
||||
|
||||
if self.alpha > T::zero() {
|
||||
let mut w_squared = T::zero();
|
||||
for i in 0..self.k {
|
||||
for j in 0..p {
|
||||
let wi = w_bias.get(0, i * (p + 1) + j);
|
||||
let wi = w_bias[i * (p + 1) + j];
|
||||
w_squared += wi * wi;
|
||||
}
|
||||
}
|
||||
f += T::half() * self.alpha * w_squared;
|
||||
f += T::from_f64(0.5).unwrap() * self.alpha * w_squared;
|
||||
}
|
||||
|
||||
f
|
||||
}
|
||||
|
||||
fn df(&self, g: &mut M, w: &M) {
|
||||
g.copy_from(&M::zeros(1, g.shape().1));
|
||||
fn df(&self, g: &mut Vec<T>, w: &Vec<T>) {
|
||||
g.copy_from(&Vec::zeros(g.len()));
|
||||
|
||||
let mut prob = M::zeros(1, self.k);
|
||||
let mut prob = vec![T::zero(); self.k];
|
||||
let (n, p) = self.x.shape();
|
||||
|
||||
for i in 0..n {
|
||||
for j in 0..self.k {
|
||||
prob.set(
|
||||
0,
|
||||
j,
|
||||
MultiClassObjectiveFunction::partial_dot(w, self.x, j * (p + 1), i),
|
||||
);
|
||||
for (j, prob_j) in prob.iter_mut().enumerate().take(self.k) {
|
||||
*prob_j = MultiClassObjectiveFunction::partial_dot(w, self.x, j * (p + 1), i);
|
||||
}
|
||||
|
||||
prob.softmax_mut();
|
||||
|
||||
for j in 0..self.k {
|
||||
let yi = (if self.y[i] == j { T::one() } else { T::zero() }) - prob.get(0, j);
|
||||
let yi = (if self.y[i] == j { T::one() } else { T::zero() }) - prob[j];
|
||||
|
||||
for l in 0..p {
|
||||
let pos = j * (p + 1);
|
||||
g.set(0, pos + l, g.get(0, pos + l) - yi * self.x.get(i, l));
|
||||
g[pos + l] -= yi * *self.x.get((i, l));
|
||||
}
|
||||
g.set(0, j * (p + 1) + p, g.get(0, j * (p + 1) + p) - yi);
|
||||
g[j * (p + 1) + p] -= yi;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -359,46 +378,57 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
|
||||
for i in 0..self.k {
|
||||
for j in 0..p {
|
||||
let pos = i * (p + 1);
|
||||
let wi = w.get(0, pos + j);
|
||||
g.set(0, pos + j, g.get(0, pos + j) + self.alpha * wi);
|
||||
let wi = w[pos + j];
|
||||
g[pos + j] += self.alpha * wi;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>>
|
||||
SupervisedEstimator<M, M::RowVector, LogisticRegressionParameters<T>>
|
||||
for LogisticRegression<T, M>
|
||||
impl<TX: Number + FloatNumber + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
||||
SupervisedEstimator<X, Y, LogisticRegressionParameters<TX>>
|
||||
for LogisticRegression<TX, TY, X, Y>
|
||||
{
|
||||
fn fit(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
parameters: LogisticRegressionParameters<T>,
|
||||
) -> Result<Self, Failed> {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
coefficients: Option::None,
|
||||
intercept: Option::None,
|
||||
classes: Option::None,
|
||||
num_attributes: 0,
|
||||
num_classes: 0,
|
||||
_phantom_tx: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn fit(x: &X, y: &Y, parameters: LogisticRegressionParameters<TX>) -> Result<Self, Failed> {
|
||||
LogisticRegression::fit(x, y, parameters)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for LogisticRegression<T, M> {
|
||||
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
impl<TX: Number + FloatNumber + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
||||
Predictor<X, Y> for LogisticRegression<TX, TY, X, Y>
|
||||
{
|
||||
fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
self.predict(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
|
||||
impl<TX: Number + FloatNumber + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
||||
LogisticRegression<TX, TY, X, Y>
|
||||
{
|
||||
/// Fits Logistic Regression to your data.
|
||||
/// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
|
||||
/// * `y` - target class values
|
||||
/// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
|
||||
pub fn fit(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
parameters: LogisticRegressionParameters<T>,
|
||||
) -> Result<LogisticRegression<T, M>, Failed> {
|
||||
let y_m = M::from_row_vector(y.clone());
|
||||
x: &X,
|
||||
y: &Y,
|
||||
parameters: LogisticRegressionParameters<TX>,
|
||||
) -> Result<LogisticRegression<TX, TY, X, Y>, Failed> {
|
||||
let (x_nrows, num_attributes) = x.shape();
|
||||
let (_, y_nrows) = y_m.shape();
|
||||
let y_nrows = y.shape();
|
||||
|
||||
if x_nrows != y_nrows {
|
||||
return Err(Failed::fit(
|
||||
@@ -406,15 +436,15 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
|
||||
));
|
||||
}
|
||||
|
||||
let classes = y_m.unique();
|
||||
let classes = y.unique();
|
||||
|
||||
let k = classes.len();
|
||||
|
||||
let mut yi: Vec<usize> = vec![0; y_nrows];
|
||||
|
||||
for (i, yi_i) in yi.iter_mut().enumerate().take(y_nrows) {
|
||||
let yc = y_m.get(0, i);
|
||||
*yi_i = classes.iter().position(|c| yc == *c).unwrap();
|
||||
let yc = y.get(i);
|
||||
*yi_i = classes.iter().position(|c| yc == c).unwrap();
|
||||
}
|
||||
|
||||
match k.cmp(&2) {
|
||||
@@ -423,45 +453,55 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
|
||||
k
|
||||
))),
|
||||
Ordering::Equal => {
|
||||
let x0 = M::zeros(1, num_attributes + 1);
|
||||
let x0 = Vec::zeros(num_attributes + 1);
|
||||
|
||||
let objective = BinaryObjectiveFunction {
|
||||
x,
|
||||
y: yi,
|
||||
alpha: parameters.alpha,
|
||||
_phantom_t: PhantomData,
|
||||
};
|
||||
|
||||
let result = LogisticRegression::minimize(x0, objective);
|
||||
let result = Self::minimize(x0, objective);
|
||||
|
||||
let weights = result.x;
|
||||
let weights = X::from_iterator(result.x.into_iter(), 1, num_attributes + 1, 0);
|
||||
let coefficients = weights.slice(0..1, 0..num_attributes);
|
||||
let intercept = weights.slice(0..1, num_attributes..num_attributes + 1);
|
||||
|
||||
Ok(LogisticRegression {
|
||||
coefficients: weights.slice(0..1, 0..num_attributes),
|
||||
intercept: weights.slice(0..1, num_attributes..num_attributes + 1),
|
||||
classes,
|
||||
coefficients: Some(X::from_slice(coefficients.as_ref())),
|
||||
intercept: Some(X::from_slice(intercept.as_ref())),
|
||||
classes: Some(classes),
|
||||
num_attributes,
|
||||
num_classes: k,
|
||||
_phantom_tx: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
})
|
||||
}
|
||||
Ordering::Greater => {
|
||||
let x0 = M::zeros(1, (num_attributes + 1) * k);
|
||||
let x0 = Vec::zeros((num_attributes + 1) * k);
|
||||
|
||||
let objective = MultiClassObjectiveFunction {
|
||||
x,
|
||||
y: yi,
|
||||
k,
|
||||
alpha: parameters.alpha,
|
||||
_phantom_t: PhantomData,
|
||||
};
|
||||
|
||||
let result = LogisticRegression::minimize(x0, objective);
|
||||
let weights = result.x.reshape(k, num_attributes + 1);
|
||||
let result = Self::minimize(x0, objective);
|
||||
let weights = X::from_iterator(result.x.into_iter(), k, num_attributes + 1, 0);
|
||||
let coefficients = weights.slice(0..k, 0..num_attributes);
|
||||
let intercept = weights.slice(0..k, num_attributes..num_attributes + 1);
|
||||
|
||||
Ok(LogisticRegression {
|
||||
coefficients: weights.slice(0..k, 0..num_attributes),
|
||||
intercept: weights.slice(0..k, num_attributes..num_attributes + 1),
|
||||
classes,
|
||||
coefficients: Some(X::from_slice(coefficients.as_ref())),
|
||||
intercept: Some(X::from_slice(intercept.as_ref())),
|
||||
classes: Some(classes),
|
||||
num_attributes,
|
||||
num_classes: k,
|
||||
_phantom_tx: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -469,17 +509,17 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
|
||||
|
||||
/// Predict class labels for samples in `x`.
|
||||
/// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
|
||||
pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
let n = x.shape().0;
|
||||
let mut result = M::zeros(1, n);
|
||||
let mut result = Y::zeros(n);
|
||||
if self.num_classes == 2 {
|
||||
let y_hat: Vec<T> = x.ab(false, &self.coefficients, true).get_col_as_vec(0);
|
||||
let intercept = self.intercept.get(0, 0);
|
||||
for (i, y_hat_i) in y_hat.iter().enumerate().take(n) {
|
||||
let y_hat = x.ab(false, self.coefficients(), true);
|
||||
let intercept = *self.intercept().get((0, 0));
|
||||
for (i, y_hat_i) in y_hat.iterator(0).enumerate().take(n) {
|
||||
result.set(
|
||||
0,
|
||||
i,
|
||||
self.classes[if (*y_hat_i + intercept).sigmoid() > T::half() {
|
||||
self.classes()[if RealNumber::sigmoid(*y_hat_i + intercept) > RealNumber::half()
|
||||
{
|
||||
1
|
||||
} else {
|
||||
0
|
||||
@@ -487,40 +527,48 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
|
||||
);
|
||||
}
|
||||
} else {
|
||||
let mut y_hat = x.matmul(&self.coefficients.transpose());
|
||||
let mut y_hat = x.matmul(&self.coefficients().transpose());
|
||||
for r in 0..n {
|
||||
for c in 0..self.num_classes {
|
||||
y_hat.set(r, c, y_hat.get(r, c) + self.intercept.get(c, 0));
|
||||
y_hat.set((r, c), *y_hat.get((r, c)) + *self.intercept().get((c, 0)));
|
||||
}
|
||||
}
|
||||
let class_idxs = y_hat.argmax();
|
||||
let class_idxs = y_hat.argmax(1);
|
||||
for (i, class_i) in class_idxs.iter().enumerate().take(n) {
|
||||
result.set(0, i, self.classes[*class_i]);
|
||||
result.set(i, self.classes()[*class_i]);
|
||||
}
|
||||
}
|
||||
Ok(result.to_row_vector())
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Get estimates regression coefficients
|
||||
pub fn coefficients(&self) -> &M {
|
||||
&self.coefficients
|
||||
/// Get estimates regression coefficients, this create a sharable reference
|
||||
pub fn coefficients(&self) -> &X {
|
||||
self.coefficients.as_ref().unwrap()
|
||||
}
|
||||
|
||||
/// Get estimate of intercept
|
||||
pub fn intercept(&self) -> &M {
|
||||
&self.intercept
|
||||
/// Get estimate of intercept, this create a sharable reference
|
||||
pub fn intercept(&self) -> &X {
|
||||
self.intercept.as_ref().unwrap()
|
||||
}
|
||||
|
||||
fn minimize(x0: M, objective: impl ObjectiveFunction<T, M>) -> OptimizerResult<T, M> {
|
||||
let f = |w: &M| -> T { objective.f(w) };
|
||||
/// Get classes, this create a sharable reference
|
||||
pub fn classes(&self) -> &Vec<TY> {
|
||||
self.classes.as_ref().unwrap()
|
||||
}
|
||||
|
||||
let df = |g: &mut M, w: &M| objective.df(g, w);
|
||||
fn minimize(
|
||||
x0: Vec<TX>,
|
||||
objective: impl ObjectiveFunction<TX, X>,
|
||||
) -> OptimizerResult<TX, Vec<TX>> {
|
||||
let f = |w: &Vec<TX>| -> TX { objective.f(w) };
|
||||
|
||||
let ls: Backtracking<T> = Backtracking {
|
||||
let df = |g: &mut Vec<TX>, w: &Vec<TX>| objective.df(g, w);
|
||||
|
||||
let ls: Backtracking<TX> = Backtracking {
|
||||
order: FunctionOrder::THIRD,
|
||||
..Default::default()
|
||||
};
|
||||
let optimizer: LBFGS<T> = Default::default();
|
||||
let optimizer: LBFGS = Default::default();
|
||||
|
||||
optimizer.optimize(&f, &df, &x0, &ls)
|
||||
}
|
||||
@@ -530,8 +578,8 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::dataset::generator::make_blobs;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::metrics::accuracy;
|
||||
use crate::linalg::basic::arrays::Array;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
|
||||
#[test]
|
||||
fn search_parameters() {
|
||||
@@ -576,24 +624,17 @@ mod tests {
|
||||
y: y.clone(),
|
||||
k: 3,
|
||||
alpha: 0.0,
|
||||
_phantom_t: PhantomData,
|
||||
};
|
||||
|
||||
let mut g: DenseMatrix<f64> = DenseMatrix::zeros(1, 9);
|
||||
let mut g = vec![0f64; 9];
|
||||
|
||||
objective.df(
|
||||
&mut g,
|
||||
&DenseMatrix::row_vector_from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]),
|
||||
);
|
||||
objective.df(
|
||||
&mut g,
|
||||
&DenseMatrix::row_vector_from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]),
|
||||
);
|
||||
objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||
objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||
|
||||
assert!((g.get(0, 0) + 33.000068218163484).abs() < std::f64::EPSILON);
|
||||
assert!((g[0] + 33.000068218163484).abs() < std::f64::EPSILON);
|
||||
|
||||
let f = objective.f(&DenseMatrix::row_vector_from_array(&[
|
||||
1., 2., 3., 4., 5., 6., 7., 8., 9.,
|
||||
]));
|
||||
let f = objective.f(&vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||
|
||||
assert!((f - 408.0052230582765).abs() < std::f64::EPSILON);
|
||||
|
||||
@@ -602,18 +643,14 @@ mod tests {
|
||||
y: y.clone(),
|
||||
k: 3,
|
||||
alpha: 1.0,
|
||||
_phantom_t: PhantomData,
|
||||
};
|
||||
|
||||
let f = objective_reg.f(&DenseMatrix::row_vector_from_array(&[
|
||||
1., 2., 3., 4., 5., 6., 7., 8., 9.,
|
||||
]));
|
||||
let f = objective_reg.f(&vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||
assert!((f - 487.5052).abs() < 1e-4);
|
||||
|
||||
objective_reg.df(
|
||||
&mut g,
|
||||
&DenseMatrix::row_vector_from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]),
|
||||
);
|
||||
assert!((g.get(0, 0).abs() - 32.0).abs() < 1e-4);
|
||||
objective_reg.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||
assert!((g[0].abs() - 32.0).abs() < 1e-4);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
@@ -643,18 +680,19 @@ mod tests {
|
||||
x: &x,
|
||||
y: y.clone(),
|
||||
alpha: 0.0,
|
||||
_phantom_t: PhantomData,
|
||||
};
|
||||
|
||||
let mut g: DenseMatrix<f64> = DenseMatrix::zeros(1, 3);
|
||||
let mut g = vec![0f64; 3];
|
||||
|
||||
objective.df(&mut g, &DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
|
||||
objective.df(&mut g, &DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
|
||||
objective.df(&mut g, &vec![1., 2., 3.]);
|
||||
objective.df(&mut g, &vec![1., 2., 3.]);
|
||||
|
||||
assert!((g.get(0, 0) - 26.051064349381285).abs() < std::f64::EPSILON);
|
||||
assert!((g.get(0, 1) - 10.239000702928523).abs() < std::f64::EPSILON);
|
||||
assert!((g.get(0, 2) - 3.869294270156324).abs() < std::f64::EPSILON);
|
||||
assert!((g[0] - 26.051064349381285).abs() < std::f64::EPSILON);
|
||||
assert!((g[1] - 10.239000702928523).abs() < std::f64::EPSILON);
|
||||
assert!((g[2] - 3.869294270156324).abs() < std::f64::EPSILON);
|
||||
|
||||
let f = objective.f(&DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
|
||||
let f = objective.f(&vec![1., 2., 3.]);
|
||||
|
||||
assert!((f - 59.76994756647412).abs() < std::f64::EPSILON);
|
||||
|
||||
@@ -662,21 +700,22 @@ mod tests {
|
||||
x: &x,
|
||||
y: y.clone(),
|
||||
alpha: 1.0,
|
||||
_phantom_t: PhantomData,
|
||||
};
|
||||
|
||||
let f = objective_reg.f(&DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
|
||||
let f = objective_reg.f(&vec![1., 2., 3.]);
|
||||
assert!((f - 62.2699).abs() < 1e-4);
|
||||
|
||||
objective_reg.df(&mut g, &DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
|
||||
assert!((g.get(0, 0) - 27.0511).abs() < 1e-4);
|
||||
assert!((g.get(0, 1) - 12.239).abs() < 1e-4);
|
||||
assert!((g.get(0, 2) - 3.8693).abs() < 1e-4);
|
||||
objective_reg.df(&mut g, &vec![1., 2., 3.]);
|
||||
assert!((g[0] - 27.0511).abs() < 1e-4);
|
||||
assert!((g[1] - 12.239).abs() < 1e-4);
|
||||
assert!((g[2] - 3.8693).abs() < 1e-4);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn lr_fit_predict() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
|
||||
&[1., -5.],
|
||||
&[2., 5.],
|
||||
&[3., -2.],
|
||||
@@ -693,22 +732,23 @@ mod tests {
|
||||
&[8., 2.],
|
||||
&[9., 0.],
|
||||
]);
|
||||
let y: Vec<f64> = vec![0., 0., 1., 1., 2., 1., 1., 0., 0., 2., 1., 1., 0., 0., 1.];
|
||||
let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
|
||||
|
||||
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
|
||||
assert_eq!(lr.coefficients().shape(), (3, 2));
|
||||
assert_eq!(lr.intercept().shape(), (3, 1));
|
||||
|
||||
assert!((lr.coefficients().get(0, 0) - 0.0435).abs() < 1e-4);
|
||||
assert!((lr.intercept().get(0, 0) - 0.1250).abs() < 1e-4);
|
||||
assert!((*lr.coefficients().get((0, 0)) - 0.0435).abs() < 1e-4);
|
||||
assert!(
|
||||
(*lr.intercept().get((0, 0)) - 0.1250).abs() < 1e-4,
|
||||
"expected to be least than 1e-4, got {}",
|
||||
(*lr.intercept().get((0, 0)) - 0.1250).abs()
|
||||
);
|
||||
|
||||
let y_hat = lr.predict(&x).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
y_hat,
|
||||
vec![0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
|
||||
);
|
||||
assert_eq!(y_hat, vec![0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
@@ -716,14 +756,14 @@ mod tests {
|
||||
fn lr_fit_predict_multiclass() {
|
||||
let blobs = make_blobs(15, 4, 3);
|
||||
|
||||
let x = DenseMatrix::from_vec(15, 4, &blobs.data);
|
||||
let y = blobs.target;
|
||||
let x: DenseMatrix<f32> = DenseMatrix::from_iterator(blobs.data.into_iter(), 15, 4, 0);
|
||||
let y: Vec<i32> = blobs.target.into_iter().map(|v| v as i32).collect();
|
||||
|
||||
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
|
||||
let y_hat = lr.predict(&x).unwrap();
|
||||
|
||||
assert!(accuracy(&y_hat, &y) > 0.9);
|
||||
assert_eq!(y_hat, vec![0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]);
|
||||
|
||||
let lr_reg = LogisticRegression::fit(
|
||||
&x,
|
||||
@@ -732,7 +772,10 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(lr_reg.coefficients().abs().sum() < lr.coefficients().abs().sum());
|
||||
let reg_coeff_sum: f32 = lr_reg.coefficients().abs().iter().sum();
|
||||
let coeff: f32 = lr.coefficients().abs().iter().sum();
|
||||
|
||||
assert!(reg_coeff_sum < coeff);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
@@ -740,14 +783,17 @@ mod tests {
|
||||
fn lr_fit_predict_binary() {
|
||||
let blobs = make_blobs(20, 4, 2);
|
||||
|
||||
let x = DenseMatrix::from_vec(20, 4, &blobs.data);
|
||||
let y = blobs.target;
|
||||
let x = DenseMatrix::from_iterator(blobs.data.into_iter(), 20, 4, 0);
|
||||
let y: Vec<i32> = blobs.target.into_iter().map(|v| v as i32).collect();
|
||||
|
||||
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
|
||||
let y_hat = lr.predict(&x).unwrap();
|
||||
|
||||
assert!(accuracy(&y_hat, &y) > 0.9);
|
||||
assert_eq!(
|
||||
y_hat,
|
||||
vec![0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
|
||||
);
|
||||
|
||||
let lr_reg = LogisticRegression::fit(
|
||||
&x,
|
||||
@@ -756,39 +802,43 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(lr_reg.coefficients().abs().sum() < lr.coefficients().abs().sum());
|
||||
let reg_coeff_sum: f32 = lr_reg.coefficients().abs().iter().sum();
|
||||
let coeff: f32 = lr.coefficients().abs().iter().sum();
|
||||
|
||||
assert!(reg_coeff_sum < coeff);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
#[cfg(feature = "serde")]
|
||||
fn serde() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
&[1., -5.],
|
||||
&[2., 5.],
|
||||
&[3., -2.],
|
||||
&[1., 2.],
|
||||
&[2., 0.],
|
||||
&[6., -5.],
|
||||
&[7., 5.],
|
||||
&[6., -2.],
|
||||
&[7., 2.],
|
||||
&[6., 0.],
|
||||
&[8., -5.],
|
||||
&[9., 5.],
|
||||
&[10., -2.],
|
||||
&[8., 2.],
|
||||
&[9., 0.],
|
||||
]);
|
||||
let y: Vec<f64> = vec![0., 0., 1., 1., 2., 1., 1., 0., 0., 2., 1., 1., 0., 0., 1.];
|
||||
// TODO: serialization for the new DenseMatrix needs to be implemented
|
||||
// #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
// #[test]
|
||||
// #[cfg(feature = "serde")]
|
||||
// fn serde() {
|
||||
// let x = DenseMatrix::from_2d_array(&[
|
||||
// &[1., -5.],
|
||||
// &[2., 5.],
|
||||
// &[3., -2.],
|
||||
// &[1., 2.],
|
||||
// &[2., 0.],
|
||||
// &[6., -5.],
|
||||
// &[7., 5.],
|
||||
// &[6., -2.],
|
||||
// &[7., 2.],
|
||||
// &[6., 0.],
|
||||
// &[8., -5.],
|
||||
// &[9., 5.],
|
||||
// &[10., -2.],
|
||||
// &[8., 2.],
|
||||
// &[9., 0.],
|
||||
// ]);
|
||||
// let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
|
||||
|
||||
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
// let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
|
||||
let deserialized_lr: LogisticRegression<f64, DenseMatrix<f64>> =
|
||||
serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
// let deserialized_lr: LogisticRegression<f64, i32, DenseMatrix<f64>, Vec<i32>> =
|
||||
// serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
|
||||
assert_eq!(lr, deserialized_lr);
|
||||
}
|
||||
// assert_eq!(lr, deserialized_lr);
|
||||
// }
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
@@ -815,9 +865,7 @@ mod tests {
|
||||
&[6.6, 2.9, 4.6, 1.3],
|
||||
&[5.2, 2.7, 3.9, 1.4],
|
||||
]);
|
||||
let y: Vec<f64> = vec![
|
||||
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
|
||||
];
|
||||
let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||
|
||||
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
let lr_reg = LogisticRegression::fit(
|
||||
@@ -829,13 +877,17 @@ mod tests {
|
||||
|
||||
let y_hat = lr.predict(&x).unwrap();
|
||||
|
||||
let error: f64 = y
|
||||
let error: i32 = y
|
||||
.into_iter()
|
||||
.zip(y_hat.into_iter())
|
||||
.map(|(a, b)| (a - b).abs())
|
||||
.sum();
|
||||
|
||||
assert!(error <= 1.0);
|
||||
assert!(lr_reg.coefficients().abs().sum() < lr.coefficients().abs().sum());
|
||||
assert!(error <= 1);
|
||||
|
||||
let reg_coeff_sum: f32 = lr_reg.coefficients().abs().iter().sum();
|
||||
let coeff: f32 = lr.coefficients().abs().iter().sum();
|
||||
|
||||
assert!(reg_coeff_sum < coeff);
|
||||
}
|
||||
}
|
||||
|
||||
+2
-2
@@ -20,10 +20,10 @@
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
|
||||
pub(crate) mod bg_solver;
|
||||
pub mod bg_solver;
|
||||
pub mod elastic_net;
|
||||
pub mod lasso;
|
||||
pub(crate) mod lasso_optimizer;
|
||||
pub mod lasso_optimizer;
|
||||
pub mod linear_regression;
|
||||
pub mod logistic_regression;
|
||||
pub mod ridge_regression;
|
||||
|
||||
+152
-90
@@ -19,7 +19,7 @@
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::linalg::naive::dense_matrix::*;
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::linear::ridge_regression::*;
|
||||
//!
|
||||
//! // Longley dataset (https://www.statsmodels.org/stable/datasets/generated/longley.html)
|
||||
@@ -57,15 +57,18 @@
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::api::{Predictor, SupervisedEstimator};
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::{Array1, Array2};
|
||||
use crate::linalg::traits::cholesky::CholeskyDecomposable;
|
||||
use crate::linalg::traits::svd::SVDDecomposable;
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
@@ -86,7 +89,7 @@ impl Default for RidgeRegressionSolverName {
|
||||
/// Ridge Regression parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RidgeRegressionParameters<T: RealNumber> {
|
||||
pub struct RidgeRegressionParameters<T: Number + RealNumber> {
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: RidgeRegressionSolverName,
|
||||
/// Controls the strength of the penalty to the loss function.
|
||||
@@ -99,7 +102,7 @@ pub struct RidgeRegressionParameters<T: RealNumber> {
|
||||
/// Ridge Regression grid search parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RidgeRegressionSearchParameters<T: RealNumber> {
|
||||
pub struct RidgeRegressionSearchParameters<T: Number + RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: Vec<RidgeRegressionSolverName>,
|
||||
@@ -113,14 +116,14 @@ pub struct RidgeRegressionSearchParameters<T: RealNumber> {
|
||||
}
|
||||
|
||||
/// Ridge Regression grid search iterator
|
||||
pub struct RidgeRegressionSearchParametersIterator<T: RealNumber> {
|
||||
pub struct RidgeRegressionSearchParametersIterator<T: Number + RealNumber> {
|
||||
ridge_regression_search_parameters: RidgeRegressionSearchParameters<T>,
|
||||
current_solver: usize,
|
||||
current_alpha: usize,
|
||||
current_normalize: usize,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> IntoIterator for RidgeRegressionSearchParameters<T> {
|
||||
impl<T: Number + RealNumber> IntoIterator for RidgeRegressionSearchParameters<T> {
|
||||
type Item = RidgeRegressionParameters<T>;
|
||||
type IntoIter = RidgeRegressionSearchParametersIterator<T>;
|
||||
|
||||
@@ -134,7 +137,7 @@ impl<T: RealNumber> IntoIterator for RidgeRegressionSearchParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Iterator for RidgeRegressionSearchParametersIterator<T> {
|
||||
impl<T: Number + RealNumber> Iterator for RidgeRegressionSearchParametersIterator<T> {
|
||||
type Item = RidgeRegressionParameters<T>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
@@ -171,7 +174,7 @@ impl<T: RealNumber> Iterator for RidgeRegressionSearchParametersIterator<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Default for RidgeRegressionSearchParameters<T> {
|
||||
impl<T: Number + RealNumber> Default for RidgeRegressionSearchParameters<T> {
|
||||
fn default() -> Self {
|
||||
let default_params = RidgeRegressionParameters::default();
|
||||
|
||||
@@ -186,13 +189,20 @@ impl<T: RealNumber> Default for RidgeRegressionSearchParameters<T> {
|
||||
/// Ridge regression
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct RidgeRegression<T: RealNumber, M: Matrix<T>> {
|
||||
coefficients: M,
|
||||
intercept: T,
|
||||
_solver: RidgeRegressionSolverName,
|
||||
pub struct RidgeRegression<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + CholeskyDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> {
|
||||
coefficients: Option<X>,
|
||||
intercept: Option<TX>,
|
||||
solver: Option<RidgeRegressionSolverName>,
|
||||
_phantom_ty: PhantomData<TY>,
|
||||
_phantom_y: PhantomData<Y>,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> RidgeRegressionParameters<T> {
|
||||
impl<T: Number + RealNumber> RidgeRegressionParameters<T> {
|
||||
/// Regularization parameter.
|
||||
pub fn with_alpha(mut self, alpha: T) -> Self {
|
||||
self.alpha = alpha;
|
||||
@@ -210,51 +220,84 @@ impl<T: RealNumber> RidgeRegressionParameters<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber> Default for RidgeRegressionParameters<T> {
|
||||
impl<T: Number + RealNumber> Default for RidgeRegressionParameters<T> {
|
||||
fn default() -> Self {
|
||||
RidgeRegressionParameters {
|
||||
solver: RidgeRegressionSolverName::default(),
|
||||
alpha: T::one(),
|
||||
alpha: T::from_f64(1.0).unwrap(),
|
||||
normalize: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> PartialEq for RidgeRegression<T, M> {
|
||||
impl<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + CholeskyDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> PartialEq for RidgeRegression<TX, TY, X, Y>
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.coefficients == other.coefficients
|
||||
&& (self.intercept - other.intercept).abs() <= T::epsilon()
|
||||
self.intercept() == other.intercept()
|
||||
&& self.coefficients().shape() == other.coefficients().shape()
|
||||
&& self
|
||||
.coefficients()
|
||||
.iterator(0)
|
||||
.zip(other.coefficients().iterator(0))
|
||||
.all(|(&a, &b)| (a - b).abs() <= TX::epsilon())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, RidgeRegressionParameters<T>>
|
||||
for RidgeRegression<T, M>
|
||||
impl<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + CholeskyDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> SupervisedEstimator<X, Y, RidgeRegressionParameters<TX>> for RidgeRegression<TX, TY, X, Y>
|
||||
{
|
||||
fn fit(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
parameters: RidgeRegressionParameters<T>,
|
||||
) -> Result<Self, Failed> {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
coefficients: Option::None,
|
||||
intercept: Option::None,
|
||||
solver: Option::None,
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn fit(x: &X, y: &Y, parameters: RidgeRegressionParameters<TX>) -> Result<Self, Failed> {
|
||||
RidgeRegression::fit(x, y, parameters)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RidgeRegression<T, M> {
|
||||
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
impl<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + CholeskyDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> Predictor<X, Y> for RidgeRegression<TX, TY, X, Y>
|
||||
{
|
||||
fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
self.predict(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
|
||||
impl<
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX> + CholeskyDecomposable<TX> + SVDDecomposable<TX>,
|
||||
Y: Array1<TY>,
|
||||
> RidgeRegression<TX, TY, X, Y>
|
||||
{
|
||||
/// Fits ridge regression to your data.
|
||||
/// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
|
||||
/// * `y` - target values
|
||||
/// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
|
||||
pub fn fit(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
parameters: RidgeRegressionParameters<T>,
|
||||
) -> Result<RidgeRegression<T, M>, Failed> {
|
||||
x: &X,
|
||||
y: &Y,
|
||||
parameters: RidgeRegressionParameters<TX>,
|
||||
) -> Result<RidgeRegression<TX, TY, X, Y>, Failed> {
|
||||
//w = inv(X^t X + alpha*Id) * X.T y
|
||||
|
||||
let (n, p) = x.shape();
|
||||
@@ -265,11 +308,16 @@ impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
|
||||
));
|
||||
}
|
||||
|
||||
if y.len() != n {
|
||||
if y.shape() != n {
|
||||
return Err(Failed::fit("Number of rows in X should = len(y)"));
|
||||
}
|
||||
|
||||
let y_column = M::from_row_vector(y.clone()).transpose();
|
||||
let y_column = X::from_iterator(
|
||||
y.iterator(0).map(|&v| TX::from(v).unwrap()),
|
||||
y.shape(),
|
||||
1,
|
||||
0,
|
||||
);
|
||||
|
||||
let (w, b) = if parameters.normalize {
|
||||
let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?;
|
||||
@@ -278,7 +326,7 @@ impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
|
||||
let mut x_t_x = x_t.matmul(&scaled_x);
|
||||
|
||||
for i in 0..p {
|
||||
x_t_x.add_element_mut(i, i, parameters.alpha);
|
||||
x_t_x.add_element_mut((i, i), parameters.alpha);
|
||||
}
|
||||
|
||||
let mut w = match parameters.solver {
|
||||
@@ -287,16 +335,16 @@ impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
|
||||
};
|
||||
|
||||
for (i, col_std_i) in col_std.iter().enumerate().take(p) {
|
||||
w.set(i, 0, w.get(i, 0) / *col_std_i);
|
||||
w.set((i, 0), *w.get((i, 0)) / *col_std_i);
|
||||
}
|
||||
|
||||
let mut b = T::zero();
|
||||
let mut b = TX::zero();
|
||||
|
||||
for (i, col_mean_i) in col_mean.iter().enumerate().take(p) {
|
||||
b += w.get(i, 0) * *col_mean_i;
|
||||
b += *w.get((i, 0)) * *col_mean_i;
|
||||
}
|
||||
|
||||
let b = y.mean() - b;
|
||||
let b = TX::from_f64(y.mean_by()).unwrap() - b;
|
||||
|
||||
(w, b)
|
||||
} else {
|
||||
@@ -305,7 +353,7 @@ impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
|
||||
let mut x_t_x = x_t.matmul(x);
|
||||
|
||||
for i in 0..p {
|
||||
x_t_x.add_element_mut(i, i, parameters.alpha);
|
||||
x_t_x.add_element_mut((i, i), parameters.alpha);
|
||||
}
|
||||
|
||||
let w = match parameters.solver {
|
||||
@@ -313,22 +361,32 @@ impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
|
||||
RidgeRegressionSolverName::SVD => x_t_x.svd_solve_mut(x_t_y)?,
|
||||
};
|
||||
|
||||
(w, T::zero())
|
||||
(w, TX::zero())
|
||||
};
|
||||
|
||||
Ok(RidgeRegression {
|
||||
intercept: b,
|
||||
coefficients: w,
|
||||
_solver: parameters.solver,
|
||||
intercept: Some(b),
|
||||
coefficients: Some(w),
|
||||
solver: Some(parameters.solver),
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
fn rescale_x(x: &M) -> Result<(M, Vec<T>, Vec<T>), Failed> {
|
||||
let col_mean = x.mean(0);
|
||||
let col_std = x.std(0);
|
||||
fn rescale_x(x: &X) -> Result<(X, Vec<TX>, Vec<TX>), Failed> {
|
||||
let col_mean: Vec<TX> = x
|
||||
.mean_by(0)
|
||||
.iter()
|
||||
.map(|&v| TX::from_f64(v).unwrap())
|
||||
.collect();
|
||||
let col_std: Vec<TX> = x
|
||||
.std_dev(0)
|
||||
.iter()
|
||||
.map(|&v| TX::from_f64(v).unwrap())
|
||||
.collect();
|
||||
|
||||
for (i, col_std_i) in col_std.iter().enumerate() {
|
||||
if (*col_std_i - T::zero()).abs() < T::epsilon() {
|
||||
if (*col_std_i - TX::zero()).abs() < TX::epsilon() {
|
||||
return Err(Failed::fit(&format!(
|
||||
"Cannot rescale constant column {}",
|
||||
i
|
||||
@@ -343,28 +401,31 @@ impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
|
||||
|
||||
/// Predict target values from `x`
|
||||
/// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
|
||||
pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
let (nrows, _) = x.shape();
|
||||
let mut y_hat = x.matmul(&self.coefficients);
|
||||
y_hat.add_mut(&M::fill(nrows, 1, self.intercept));
|
||||
Ok(y_hat.transpose().to_row_vector())
|
||||
let mut y_hat = x.matmul(self.coefficients());
|
||||
y_hat.add_mut(&X::fill(nrows, 1, self.intercept.unwrap()));
|
||||
Ok(Y::from_iterator(
|
||||
y_hat.iterator(0).map(|&v| TY::from(v).unwrap()),
|
||||
nrows,
|
||||
))
|
||||
}
|
||||
|
||||
/// Get estimates regression coefficients
|
||||
pub fn coefficients(&self) -> &M {
|
||||
&self.coefficients
|
||||
pub fn coefficients(&self) -> &X {
|
||||
self.coefficients.as_ref().unwrap()
|
||||
}
|
||||
|
||||
/// Get estimate of intercept
|
||||
pub fn intercept(&self) -> T {
|
||||
self.intercept
|
||||
pub fn intercept(&self) -> &TX {
|
||||
self.intercept.as_ref().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::metrics::mean_absolute_error;
|
||||
|
||||
#[test]
|
||||
@@ -438,39 +499,40 @@ mod tests {
|
||||
assert!(mean_absolute_error(&y_hat_svd, &y) < 2.0);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
#[cfg(feature = "serde")]
|
||||
fn serde() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
|
||||
&[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
&[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
&[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
|
||||
&[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||
&[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
|
||||
&[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
|
||||
&[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
|
||||
&[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||
&[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
|
||||
&[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||
&[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
|
||||
&[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
|
||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||
]);
|
||||
// TODO: implement serialization for new DenseMatrix
|
||||
// #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
// #[test]
|
||||
// #[cfg(feature = "serde")]
|
||||
// fn serde() {
|
||||
// let x = DenseMatrix::from_2d_array(&[
|
||||
// &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
|
||||
// &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
// &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
// &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
|
||||
// &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||
// &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
|
||||
// &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
|
||||
// &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
|
||||
// &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||
// &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
|
||||
// &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||
// &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
|
||||
// &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
|
||||
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||
// ]);
|
||||
|
||||
let y = vec![
|
||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
114.2, 115.7, 116.9,
|
||||
];
|
||||
// let y = vec![
|
||||
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
// 114.2, 115.7, 116.9,
|
||||
// ];
|
||||
|
||||
let lr = RidgeRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
// let lr = RidgeRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
|
||||
let deserialized_lr: RidgeRegression<f64, DenseMatrix<f64>> =
|
||||
serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
// let deserialized_lr: RidgeRegression<f64, f64, DenseMatrix<f64>, Vec<f64>> =
|
||||
// serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
|
||||
assert_eq!(lr, deserialized_lr);
|
||||
}
|
||||
// assert_eq!(lr, deserialized_lr);
|
||||
// }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user