diff --git a/src/classification/logistic_regression.rs b/src/classification/logistic_regression.rs index a443d38..b784e92 100644 --- a/src/classification/logistic_regression.rs +++ b/src/classification/logistic_regression.rs @@ -1,17 +1,71 @@ -use std::marker::PhantomData; -use crate::linalg::{Matrix, Vector}; +use crate::math::NumericExt; +use crate::linalg::Matrix; use crate::optimization::FunctionOrder; -use crate::optimization::first_order::FirstOrderOptimizer; +use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; use crate::optimization::line_search::Backtracking; use crate::optimization::first_order::lbfgs::LBFGS; #[derive(Debug)] -pub struct LogisticRegression { +pub struct LogisticRegression { weights: M, classes: Vec, num_attributes: usize, - num_classes: usize, - v_phantom: PhantomData + num_classes: usize +} + +trait ObjectiveFunction { + fn f(&self, w_bias: &M) -> f64; + fn df(&self, g: &mut M, w_bias: &M); + + fn partial_dot(w: &M, x: &M, v_col: usize, m_row: usize) -> f64 { + let mut sum = 0f64; + let p = x.shape().1; + for i in 0..p { + sum += x.get(m_row, i) * w.get(0, i + v_col); + } + + sum + w.get(0, p + v_col) + } +} + +struct BinaryObjectiveFunction<'a, M: Matrix> { + x: &'a M, + y: Vec +} + +impl<'a, M: Matrix> ObjectiveFunction for BinaryObjectiveFunction<'a, M> { + + fn f(&self, w_bias: &M) -> f64 { + let mut f = 0.; + let (n, _) = self.x.shape(); + + for i in 0..n { + let wx = BinaryObjectiveFunction::partial_dot(w_bias, self.x, 0, i); + f += wx.ln_1pe() - (self.y[i] as f64) * wx; + } + + f + } + + fn df(&self, g: &mut M, w_bias: &M) { + + g.copy_from(&M::zeros(1, g.shape().1)); + + let (n, p) = self.x.shape(); + + for i in 0..n { + + let wx = BinaryObjectiveFunction::partial_dot(w_bias, self.x, 0, i); + + let dyi = (self.y[i] as f64) - wx.sigmoid(); + for j in 0..p { + g.set(0, j, g.get(0, j) - dyi * self.x.get(i, j)); + } + g.set(0, p, g.get(0, p) - dyi); + } + + } + } struct MultiClassObjectiveFunction<'a, M: Matrix> { @@ -20,67 +74,55 @@ struct MultiClassObjectiveFunction<'a, M: Matrix> { k: usize } -impl<'a, M: Matrix> MultiClassObjectiveFunction<'a, M> { +impl<'a, M: Matrix> ObjectiveFunction for MultiClassObjectiveFunction<'a, M> { - fn f(&self, w: &X) -> f64 { + fn f(&self, w_bias: &M) -> f64 { let mut f = 0.; - let mut prob = X::zeros(self.k); + let mut prob = M::zeros(1, self.k); let (n, p) = self.x.shape(); - for i in 0..n { - for j in 0..self.k { - prob.set(j, MultiClassObjectiveFunction::dot(w, self.x, j * (p + 1), i)); + for i in 0..n { + for j in 0..self.k { + prob.set(0, j, MultiClassObjectiveFunction::partial_dot(w_bias, self.x, j * (p + 1), i)); } prob.softmax_mut(); - f -= prob.get(self.y[i]).ln(); + f -= prob.get(0, self.y[i]).ln(); } f } - fn df(&self, g: &mut X, w: &X) { + fn df(&self, g: &mut M, w: &M) { - g.copy_from(&X::zeros(g.shape().1)); - - let mut f = 0.; - let mut prob = X::zeros(self.k); + g.copy_from(&M::zeros(1, g.shape().1)); + + let mut prob = M::zeros(1, self.k); let (n, p) = self.x.shape(); - for i in 0..n { - for j in 0..self.k { - prob.set(j, MultiClassObjectiveFunction::dot(w, self.x, j * (p + 1), i)); - } + for i in 0..n { + for j in 0..self.k { + prob.set(0, j, MultiClassObjectiveFunction::partial_dot(w, self.x, j * (p + 1), i)); + } - prob.softmax_mut(); - f -= prob.get(self.y[i]).ln(); + prob.softmax_mut(); for j in 0..self.k { - let yi =(if self.y[i] == j { 1.0 } else { 0.0 }) - prob.get(j); + let yi =(if self.y[i] == j { 1.0 } else { 0.0 }) - prob.get(0, j); for l in 0..p { let pos = j * (p + 1); - g.set(pos + l, g.get(pos + l) - yi * self.x.get(i, l)); + g.set(0, pos + l, g.get(0, pos + l) - yi * self.x.get(i, l)); } - g.set(j * (p + 1) + p, g.get(j * (p + 1) + p) - yi); + g.set(0, j * (p + 1) + p, g.get(0, j * (p + 1) + p) - yi); } } } - - fn dot(v: &X, m: &M, v_pos: usize, w_row: usize) -> f64 { - let mut sum = 0f64; - let p = m.shape().1; - for i in 0..p { - sum += m.get(w_row, i) * v.get(i + v_pos); - } - - sum + v.get(p + v_pos) - } } -impl LogisticRegression { +impl LogisticRegression { - pub fn fit(x: &M, y: &V) -> LogisticRegression{ + pub fn fit(x: &M, y: &M) -> LogisticRegression{ let (x_nrows, num_attributes) = x.shape(); let (_, y_nrows) = y.shape(); @@ -89,16 +131,14 @@ impl LogisticRegression { panic!("Number of rows of X doesn't match number of rows of Y"); } - let mut classes = y.unique(); + let classes = y.unique(); - let k = classes.len(); - - let x0 = V::zeros((num_attributes + 1) * k); + let k = classes.len(); let mut yi: Vec = vec![0; y_nrows]; for i in 0..y_nrows { - let yc = y.get(i); + let yc = y.get(0, i); let j = classes.iter().position(|c| yc == *c).unwrap(); yi[i] = classes.iter().position(|c| yc == *c).unwrap(); } @@ -109,57 +149,61 @@ impl LogisticRegression { } else if k == 2 { - LogisticRegression { - weights: x.clone(), + let x0 = M::zeros(1, num_attributes + 1); + + let objective = BinaryObjectiveFunction{ + x: x, + y: yi + }; + + let result = LogisticRegression::minimize(x0, objective); + + LogisticRegression { + weights: result.x, classes: classes, num_attributes: num_attributes, - num_classes: k, - v_phantom: PhantomData - } + num_classes: k, + } } else { + let x0 = M::zeros(1, (num_attributes + 1) * k); + let objective = MultiClassObjectiveFunction{ x: x, y: yi, k: k - }; - - let f = |w: &V| -> f64 { - objective.f(w) - }; - - let df = |g: &mut V, w: &V| { - objective.df(g, w) - }; - - let mut ls: Backtracking = Default::default(); - ls.order = FunctionOrder::THIRD; - let optimizer: LBFGS = Default::default(); + }; - let result = optimizer.optimize(&f, &df, &x0, &ls); + let result = LogisticRegression::minimize(x0, objective); - let weights = M::from_vector(&result.x, k, num_attributes + 1); + let weights = result.x.reshape(k, num_attributes + 1); LogisticRegression { weights: weights, classes: classes, num_attributes: num_attributes, - num_classes: k, - v_phantom: PhantomData + num_classes: k } } } - pub fn predict(&self, x: &M) -> V { - let (nrows, _) = x.shape(); - let x_and_bias = x.h_stack(&M::ones(nrows, 1)); - let mut y_hat = x_and_bias.dot(&self.weights.transpose()); - y_hat.softmax_mut(); - let class_idxs = y_hat.argmax(); - V::from_vec(&class_idxs.iter().map(|class_idx| self.classes[*class_idx]).collect()) + pub fn predict(&self, x: &M) -> M { + if self.num_classes == 2 { + let (nrows, _) = x.shape(); + let x_and_bias = x.h_stack(&M::ones(nrows, 1)); + let y_hat: Vec = x_and_bias.dot(&self.weights.transpose()).to_raw_vector(); + M::from_vec(1, nrows, y_hat.iter().map(|y_hat| self.classes[if y_hat.sigmoid() > 0.5 { 1 } else { 0 }]).collect()) + + } else { + let (nrows, _) = x.shape(); + let x_and_bias = x.h_stack(&M::ones(nrows, 1)); + let y_hat = x_and_bias.dot(&self.weights.transpose()); + let class_idxs = y_hat.argmax(); + M::from_vec(1, nrows, class_idxs.iter().map(|class_idx| self.classes[*class_idx]).collect()) + } } pub fn coefficients(&self) -> M { @@ -168,6 +212,22 @@ impl LogisticRegression { pub fn intercept(&self) -> M { self.weights.slice(0..self.num_classes, self.num_attributes..self.num_attributes+1) + } + + fn minimize(x0: M, objective: impl ObjectiveFunction) -> OptimizerResult { + let f = |w: &M| -> f64 { + objective.f(w) + }; + + let df = |g: &mut M, w: &M| { + objective.df(g, w) + }; + + let mut ls: Backtracking = Default::default(); + ls.order = FunctionOrder::THIRD; + let optimizer: LBFGS = Default::default(); + + optimizer.optimize(&f, &df, &x0, &ls) } } @@ -175,8 +235,7 @@ impl LogisticRegression { #[cfg(test)] mod tests { use super::*; - use crate::linalg::naive::dense_matrix::DenseMatrix; - use crate::linalg::naive::dense_vector::DenseVector; + use crate::linalg::naive::dense_matrix::DenseMatrix; #[test] fn multiclass_objective_f() { @@ -206,18 +265,59 @@ mod tests { k: 3 }; - let mut g = DenseVector::zeros(9); + let mut g = DenseMatrix::zeros(1, 9); - objective.df(&mut g, &DenseVector::from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.])); - objective.df(&mut g, &DenseVector::from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.])); + objective.df(&mut g, &DenseMatrix::vector_from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.])); + objective.df(&mut g, &DenseMatrix::vector_from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.])); - assert!((g.get(0) + 33.000068218163484).abs() < std::f64::EPSILON); + assert!((g.get(0, 0) + 33.000068218163484).abs() < std::f64::EPSILON); - let f = objective.f(&DenseVector::from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.])); + let f = objective.f(&DenseMatrix::vector_from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.])); assert!((f - 408.0052230582765).abs() < std::f64::EPSILON); } + #[test] + fn binary_objective_f() { + + let x = DenseMatrix::from_2d_array(&[ + &[1., -5.], + &[ 2., 5.], + &[ 3., -2.], + &[ 1., 2.], + &[ 2., 0.], + &[ 6., -5.], + &[ 7., 5.], + &[ 6., -2.], + &[ 7., 2.], + &[ 6., 0.], + &[ 8., -5.], + &[ 9., 5.], + &[10., -2.], + &[ 8., 2.], + &[ 9., 0.]]); + + let y = vec![0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1]; + + let objective = BinaryObjectiveFunction{ + x: &x, + y: y + }; + + let mut g = DenseMatrix::zeros(1, 3); + + objective.df(&mut g, &DenseMatrix::vector_from_array(&[1., 2., 3.])); + objective.df(&mut g, &DenseMatrix::vector_from_array(&[1., 2., 3.])); + + assert!((g.get(0, 0) - 26.051064349381285).abs() < std::f64::EPSILON); + assert!((g.get(0, 1) - 10.239000702928523).abs() < std::f64::EPSILON); + assert!((g.get(0, 2) - 3.869294270156324).abs() < std::f64::EPSILON); + + let f = objective.f(&DenseMatrix::vector_from_array(&[1., 2., 3.])); + + assert!((f - 59.76994756647412).abs() < std::f64::EPSILON); + } + #[test] fn lr_fit_predict() { @@ -237,7 +337,7 @@ mod tests { &[10., -2.], &[ 8., 2.], &[ 9., 0.]]); - let y = DenseVector::from_array(&[0., 0., 1., 1., 2., 1., 1., 0., 0., 2., 1., 1., 0., 0., 1.]); + let y = DenseMatrix::vector_from_array(&[0., 0., 1., 1., 2., 1., 1., 0., 0., 2., 1., 1., 0., 0., 1.]); let lr = LogisticRegression::fit(&x, &y); @@ -249,7 +349,42 @@ mod tests { let y_hat = lr.predict(&x); - assert_eq!(y_hat, DenseVector::from_array(&[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])); + assert_eq!(y_hat, DenseMatrix::vector_from_array(&[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])); + + + } + + #[test] + fn lr_fit_predict_iris() { + + let x = DenseMatrix::from_2d_array(&[ + &[5.1, 3.5, 1.4, 0.2], + &[4.9, 3.0, 1.4, 0.2], + &[4.7, 3.2, 1.3, 0.2], + &[4.6, 3.1, 1.5, 0.2], + &[5.0, 3.6, 1.4, 0.2], + &[5.4, 3.9, 1.7, 0.4], + &[4.6, 3.4, 1.4, 0.3], + &[5.0, 3.4, 1.5, 0.2], + &[4.4, 2.9, 1.4, 0.2], + &[4.9, 3.1, 1.5, 0.1], + &[7.0, 3.2, 4.7, 1.4], + &[6.4, 3.2, 4.5, 1.5], + &[6.9, 3.1, 4.9, 1.5], + &[5.5, 2.3, 4.0, 1.3], + &[6.5, 2.8, 4.6, 1.5], + &[5.7, 2.8, 4.5, 1.3], + &[6.3, 3.3, 4.7, 1.6], + &[4.9, 2.4, 3.3, 1.0], + &[6.6, 2.9, 4.6, 1.3], + &[5.2, 2.7, 3.9, 1.4]]); + let y = DenseMatrix::vector_from_array(&[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]); + + let lr = LogisticRegression::fit(&x, &y); + + let y_hat = lr.predict(&x); + + assert_eq!(y_hat, DenseMatrix::vector_from_array(&[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])); } diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index 9f3e11e..869313e 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -3,10 +3,16 @@ use std::fmt::Debug; pub mod naive; -pub trait Matrix: Into> + Clone + Debug{ +pub trait Matrix: Clone + Debug { + + fn from_array(nrows: usize, ncols: usize, values: &[f64]) -> Self; + + fn from_vec(nrows: usize, ncols: usize, values: Vec) -> Self; fn get(&self, row: usize, col: usize) -> f64; + fn set(&mut self, row: usize, col: usize, x: f64); + fn qr_solve_mut(&mut self, b: Self) -> Self; fn svd_solve_mut(&mut self, b: Self) -> Self; @@ -15,7 +21,7 @@ pub trait Matrix: Into> + Clone + Debug{ fn ones(nrows: usize, ncols: usize) -> Self; - fn from_vector(v: &V, nrows: usize, ncols: usize) -> Self; + fn to_raw_vector(&self) -> Vec; fn fill(nrows: usize, ncols: usize, value: f64) -> Self; @@ -27,7 +33,9 @@ pub trait Matrix: Into> + Clone + Debug{ fn dot(&self, other: &Self) -> Self; - fn slice(&self, rows: Range, cols: Range) -> Self; + fn vector_dot(&self, other: &Self) -> f64; + + fn slice(&self, rows: Range, cols: Range) -> Self; fn approximate_eq(&self, other: &Self, error: f64) -> bool; @@ -139,120 +147,8 @@ pub trait Matrix: Into> + Clone + Debug{ result } - fn argmax(&self) -> Vec; - -} - -pub trait Vector: Into> + Clone + Debug { - - fn from_array(values: &[f64]) -> Self; - - fn from_vec(values: &Vec) -> Self; - - fn get(&self, i: usize) -> f64; - - fn set(&mut self, i: usize, value: f64); - - fn zeros(size: usize) -> Self; - - fn ones(size: usize) -> Self; - - fn fill(size: usize, value: f64) -> Self; - - fn shape(&self) -> (usize, usize); - - fn norm2(&self) -> f64; - - fn norm(&self, p:f64) -> f64; - - fn negative_mut(&mut self) -> &Self; - - fn negative(&self) -> Self; - - fn add_mut(&mut self, other: &Self) -> &Self; - - fn sub_mut(&mut self, other: &Self) -> &Self; - - fn mul_mut(&mut self, other: &Self) -> &Self; - - fn div_mut(&mut self, other: &Self) -> &Self; - - fn add(&self, other: &Self) -> Self { - let mut r = self.clone(); - r.add_mut(other); - r - } - - fn sub(&self, other: &Self) -> Self { - let mut r = self.clone(); - r.sub_mut(other); - r - } - - fn mul(&self, other: &Self) -> Self { - let mut r = self.clone(); - r.mul_mut(other); - r - } - - fn div(&self, other: &Self) -> Self { - let mut r = self.clone(); - r.div_mut(other); - r - } - - fn add_scalar_mut(&mut self, scalar: f64) -> &Self; - - fn sub_scalar_mut(&mut self, scalar: f64) -> &Self; - - fn mul_scalar_mut(&mut self, scalar: f64) -> &Self; - - fn div_scalar_mut(&mut self, scalar: f64) -> &Self; - - fn add_scalar(&self, scalar: f64) -> Self{ - let mut r = self.clone(); - r.add_scalar_mut(scalar); - r - } - - fn sub_scalar(&self, scalar: f64) -> Self{ - let mut r = self.clone(); - r.sub_scalar_mut(scalar); - r - } - - fn mul_scalar(&self, scalar: f64) -> Self{ - let mut r = self.clone(); - r.mul_scalar_mut(scalar); - r - } - - fn div_scalar(&self, scalar: f64) -> Self{ - let mut r = self.clone(); - r.div_scalar_mut(scalar); - r - } - - fn dot(&self, other: &Self) -> f64; - - fn copy_from(&mut self, other: &Self); - - fn abs_mut(&mut self) -> &Self; - - fn pow_mut(&mut self, p: f64) -> &Self; - - fn sum(&self) -> f64; - - fn abs(&self) -> Self{ - let mut r = self.clone(); - r.abs_mut(); - r - } - - fn max_diff(&self, other: &Self) -> f64; + fn argmax(&self) -> Vec; - fn softmax_mut(&mut self); - - fn unique(&self) -> Vec; + fn unique(&self) -> Vec; -} \ No newline at end of file +} \ No newline at end of file diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index 405aca7..ec86a00 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -1,5 +1,5 @@ use std::ops::Range; -use crate::linalg::{Matrix, Vector}; +use crate::linalg::{Matrix}; use crate::math; use rand::prelude::*; @@ -12,7 +12,7 @@ pub struct DenseMatrix { } -impl DenseMatrix { +impl DenseMatrix { pub fn from_2d_array(values: &[&[f64]]) -> DenseMatrix { DenseMatrix::from_2d_vec(&values.into_iter().map(|row| Vec::from(*row)).collect()) @@ -32,19 +32,7 @@ impl DenseMatrix { } } m - } - - pub fn from_array(nrows: usize, ncols: usize, values: &[f64]) -> DenseMatrix { - DenseMatrix::from_vec(nrows, ncols, Vec::from(values)) - } - - pub fn from_vec(nrows: usize, ncols: usize, values: Vec) -> DenseMatrix { - DenseMatrix { - ncols: ncols, - nrows: nrows, - values: values - } - } + } pub fn vector_from_array(values: &[f64]) -> DenseMatrix { DenseMatrix::vector_from_vec(Vec::from(values)) @@ -66,10 +54,10 @@ impl DenseMatrix { for i in 0..self.values.len() { self.values[i] /= b.values[i]; } - } + } - pub fn set(&mut self, row: usize, col: usize, x: f64) { - self.values[col*self.nrows + row] = x; + pub fn get_raw_values(&self) -> &Vec { + &self.values } fn div_element_mut(&mut self, row: usize, col: usize, x: f64) { @@ -86,7 +74,7 @@ impl DenseMatrix { fn sub_element_mut(&mut self, row: usize, col: usize, x: f64) { self.values[col*self.nrows + row] -= x; - } + } } @@ -119,38 +107,46 @@ impl Into> for DenseMatrix { } } -impl Matrix for DenseMatrix { +impl Matrix for DenseMatrix { + + fn from_array(nrows: usize, ncols: usize, values: &[f64]) -> DenseMatrix { + DenseMatrix::from_vec(nrows, ncols, Vec::from(values)) + } + + fn from_vec(nrows: usize, ncols: usize, values: Vec) -> DenseMatrix { + DenseMatrix { + ncols: ncols, + nrows: nrows, + values: values + } + } fn get(&self, row: usize, col: usize) -> f64 { self.values[col*self.nrows + row] } + fn set(&mut self, row: usize, col: usize, x: f64) { + self.values[col*self.nrows + row] = x; + } + fn zeros(nrows: usize, ncols: usize) -> DenseMatrix { DenseMatrix::fill(nrows, ncols, 0f64) } fn ones(nrows: usize, ncols: usize) -> DenseMatrix { DenseMatrix::fill(nrows, ncols, 1f64) - } + } - fn from_vector(v: &V, nrows: usize, ncols: usize) -> Self { - let (_, v_size) = v.shape(); - if nrows * ncols != v_size { - panic!("Can't reshape {}-long vector into {}x{} matrix.", v_size, nrows, ncols); + fn to_raw_vector(&self) -> Vec{ + let mut v = vec![0.; self.nrows * self.ncols]; + + for r in 0..self.nrows{ + for c in 0..self.ncols { + v[r * self.ncols + c] = self.get(r, c); + } } - let mut dst = DenseMatrix::zeros(nrows, ncols); - let mut dst_r = 0; - let mut dst_c = 0; - for i in 0..v_size { - dst.set(dst_r, dst_c, v.get(i)); - if dst_c + 1 >= ncols { - dst_c = 0; - dst_r += 1; - } else { - dst_c += 1; - } - } - dst + + v } fn shape(&self) -> (usize, usize) { @@ -212,6 +208,22 @@ impl Matrix for DenseMatrix { result } + fn vector_dot(&self, other: &Self) -> f64 { + if (self.nrows != 1 || self.nrows != 1) && (other.nrows != 1 || other.ncols != 1) { + panic!("A and B should both be 1-dimentional vectors."); + } + if self.nrows * self.ncols != other.nrows * other.ncols { + panic!("A and B should have the same size"); + } + + let mut result = 0f64; + for i in 0..(self.nrows * self.ncols) { + result += self.values[i] * other.values[i]; + } + + result + } + fn slice(&self, rows: Range, cols: Range) -> DenseMatrix { let ncols = cols.len(); @@ -226,7 +238,7 @@ impl Matrix for DenseMatrix { } m - } + } fn qr_solve_mut(&mut self, mut b: DenseMatrix) -> DenseMatrix { let m = self.nrows; @@ -943,6 +955,13 @@ impl Matrix for DenseMatrix { } + fn unique(&self) -> Vec { + let mut result = self.values.clone(); + result.sort_by(|a, b| a.partial_cmp(b).unwrap()); + result.dedup(); + result + } + } #[cfg(test)] diff --git a/src/linalg/naive/dense_vector.rs b/src/linalg/naive/dense_vector.rs deleted file mode 100644 index 0f0e3fe..0000000 --- a/src/linalg/naive/dense_vector.rs +++ /dev/null @@ -1,304 +0,0 @@ -use crate::linalg::{Vector, Matrix}; -use crate::math; -use crate::linalg::naive::dense_matrix::DenseMatrix; - -#[derive(Debug, Clone)] -pub struct DenseVector { - - size: usize, - values: Vec - -} - -impl Into> for DenseVector { - fn into(self) -> Vec { - self.values - } -} - -impl PartialEq for DenseVector { - fn eq(&self, other: &Self) -> bool { - if self.size != other.size { - return false - } - - let len = self.values.len(); - let other_len = other.values.len(); - - if len != other_len { - return false; - } - - for i in 0..len { - if (self.values[i] - other.values[i]).abs() > math::EPSILON { - return false; - } - } - - true - } -} - -impl Vector for DenseVector { - - fn from_array(values: &[f64]) -> Self { - DenseVector::from_vec(&Vec::from(values)) - } - - fn from_vec(values: &Vec) -> Self { - DenseVector { - size: values.len(), - values: values.clone() - } - } - - fn get(&self, i: usize) -> f64 { - self.values[i] - } - - fn set(&mut self, i: usize, value: f64) { - self.values[i] = value; - } - - fn zeros(size: usize) -> Self { - DenseVector::fill(size, 0f64) - } - - fn ones(size: usize) -> Self { - DenseVector::fill(size, 1f64) - } - - fn fill(size: usize, value: f64) -> Self { - DenseVector::from_vec(&vec![value; size]) - } - - fn shape(&self) -> (usize, usize) { - (1, self.size) - } - - fn add_mut(&mut self, other: &Self) -> &Self { - if self.size != other.size { - panic!("A and B should have the same shape"); - } - for i in 0..self.size { - self.values[i] += other.values[i]; - } - - self - } - - fn mul_mut(&mut self, other: &Self) -> &Self { - if self.size != other.size { - panic!("A and B should have the same shape"); - } - for i in 0..self.size { - self.values[i] *= other.values[i]; - } - - self - } - - fn sub_mut(&mut self, other: &Self) -> &Self { - if self.size != other.size { - panic!("A and B should have the same shape"); - } - for i in 0..self.size { - self.values[i] -= other.values[i]; - } - - self - } - - fn div_mut(&mut self, other: &Self) -> &Self { - if self.size != other.size { - panic!("A and B should have the same shape"); - } - for i in 0..self.size { - self.values[i] /= other.values[i]; - } - - self - } - - fn dot(&self, other: &Self) -> f64 { - if self.size != other.size { - panic!("A and B should be of the same size"); - } - - let mut result = 0f64; - for i in 0..self.size { - result += self.get(i) * other.get(i); - } - - result - } - - fn norm2(&self) -> f64 { - let mut norm = 0f64; - - for xi in self.values.iter() { - norm += xi * xi; - } - - norm.sqrt() - } - - fn norm(&self, p:f64) -> f64 { - - if p.is_infinite() && p.is_sign_positive() { - self.values.iter().map(|x| x.abs()).fold(std::f64::NEG_INFINITY, |a, b| a.max(b)) - } else if p.is_infinite() && p.is_sign_negative() { - self.values.iter().map(|x| x.abs()).fold(std::f64::INFINITY, |a, b| a.min(b)) - } else { - - let mut norm = 0f64; - - for xi in self.values.iter() { - norm += xi.abs().powf(p); - } - - norm.powf(1.0/p) - } - } - - fn add_scalar_mut(&mut self, scalar: f64) -> &Self { - for i in 0..self.values.len() { - self.values[i] += scalar; - } - self - } - - fn sub_scalar_mut(&mut self, scalar: f64) -> &Self { - for i in 0..self.values.len() { - self.values[i] -= scalar; - } - self - } - - fn mul_scalar_mut(&mut self, scalar: f64) -> &Self { - for i in 0..self.values.len() { - self.values[i] *= scalar; - } - self - } - - fn div_scalar_mut(&mut self, scalar: f64) -> &Self { - for i in 0..self.values.len() { - self.values[i] /= scalar; - } - self - } - - fn negative_mut(&mut self) -> &Self { - for i in 0..self.values.len() { - self.values[i] = -self.values[i]; - } - self - } - - fn abs_mut(&mut self) -> &Self{ - for i in 0..self.values.len() { - self.values[i] = self.values[i].abs(); - } - self - } - - fn pow_mut(&mut self, p: f64) -> &Self{ - for i in 0..self.values.len() { - self.values[i] = self.values[i].powf(p); - } - self - } - - fn sum(&self) -> f64 { - let mut sum = 0.; - for i in 0..self.values.len() { - sum += self.values[i]; - } - sum - } - - fn negative(&self) -> Self { - let mut result = DenseVector { - size: self.size, - values: self.values.clone() - }; - for i in 0..self.values.len() { - result.values[i] = -self.values[i]; - } - result - } - - fn copy_from(&mut self, other: &Self) { - for i in 0..self.values.len() { - self.values[i] = other.values[i]; - } - } - - fn max_diff(&self, other: &Self) -> f64{ - let mut max_diff = 0f64; - for i in 0..self.values.len() { - max_diff = max_diff.max((self.values[i] - other.values[i]).abs()); - } - max_diff - - } - - fn softmax_mut(&mut self) { - let max = self.values.iter().map(|x| x.abs()).fold(std::f64::NEG_INFINITY, |a, b| a.max(b)); - let mut z = 0.; - for i in 0..self.size { - let p = (self.values[i] - max).exp(); - self.values[i] = p; - z += p; - } - for i in 0..self.size { - self.values[i] /= z; - } - } - - fn unique(&self) -> Vec { - let mut result = self.values.clone(); - result.sort_by(|a, b| a.partial_cmp(b).unwrap()); - result.dedup(); - result - } - -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn qr_solve_mut() { - - let v = DenseVector::from_array(&[3., -2., 6.]); - assert_eq!(v.norm(1.), 11.); - assert_eq!(v.norm(2.), 7.); - assert_eq!(v.norm(std::f64::INFINITY), 6.); - assert_eq!(v.norm(std::f64::NEG_INFINITY), 2.); - } - - #[test] - fn copy_from() { - - let mut a = DenseVector::from_array(&[0., 0., 0.]); - let b = DenseVector::from_array(&[-1., 0., 2.]); - a.copy_from(&b); - assert_eq!(a.get(0), b.get(0)); - assert_eq!(a.get(1), b.get(1)); - assert_eq!(a.get(2), b.get(2)); - } - - #[test] - fn softmax_mut() { - - let mut prob = DenseVector::from_array(&[1., 2., 3.]); - prob.softmax_mut(); - assert!((prob.get(0) - 0.09).abs() < 0.01); - assert!((prob.get(1) - 0.24).abs() < 0.01); - assert!((prob.get(2) - 0.66).abs() < 0.01); - } - -} \ No newline at end of file diff --git a/src/linalg/naive/mod.rs b/src/linalg/naive/mod.rs index 0d5d309..54d0c2d 100644 --- a/src/linalg/naive/mod.rs +++ b/src/linalg/naive/mod.rs @@ -1,2 +1 @@ -pub mod dense_matrix; -pub mod dense_vector; \ No newline at end of file +pub mod dense_matrix; \ No newline at end of file diff --git a/src/math/mod.rs b/src/math/mod.rs index 5a31356..0946afb 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -1,3 +1,46 @@ pub mod distance; -pub static EPSILON:f64 = 2.2204460492503131e-16_f64; \ No newline at end of file +pub static EPSILON:f64 = 2.2204460492503131e-16_f64; + +pub trait NumericExt { + fn ln_1pe(&self) -> f64; + fn sigmoid(&self) -> f64; +} + +impl NumericExt for f64 { + + fn ln_1pe(&self) -> f64{ + let y = 0.; + + if *self > 15. { + return *self; + } else { + return self.exp().ln_1p(); + } + + } + + fn sigmoid(&self) -> f64 { + + if *self < -40. { + return 0.; + } else if *self > 40. { + return 1.; + } else { + return 1. / (1. + f64::exp(-self)) + } + + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sigmoid() { + assert_eq!(1.0.sigmoid(), 0.7310585786300049); + assert_eq!(41.0.sigmoid(), 1.); + assert_eq!((-41.0).sigmoid(), 0.); + } +} \ No newline at end of file diff --git a/src/optimization/first_order/gradient_descent.rs b/src/optimization/first_order/gradient_descent.rs index 2cb5c42..4b7be7b 100644 --- a/src/optimization/first_order/gradient_descent.rs +++ b/src/optimization/first_order/gradient_descent.rs @@ -1,6 +1,6 @@ use std::default::Default; use crate::math::EPSILON; -use crate::linalg::Vector; +use crate::linalg::Matrix; use crate::optimization::{F, DF}; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; @@ -24,7 +24,7 @@ impl Default for GradientDescent { impl FirstOrderOptimizer for GradientDescent { - fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult { + fn optimize<'a, X: Matrix, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult { let mut x = x0.clone(); let mut fx = f(&x); @@ -54,10 +54,10 @@ impl FirstOrderOptimizer for GradientDescent let mut dg = gvec.clone(); dx.mul_scalar_mut(alpha); df(&mut dg, &dx.add_mut(&x)); //df(x) = df(x .+ gvec .* alpha) - gvec.dot(&dg) + gvec.vector_dot(&dg) }; - let df0 = step.dot(&gvec); + let df0 = step.vector_dot(&gvec); let ls_r = ls.search(&f_alpha, &df_alpha, alpha, fx, df0); alpha = ls_r.alpha; @@ -80,21 +80,21 @@ impl FirstOrderOptimizer for GradientDescent #[cfg(test)] mod tests { use super::*; - use crate::linalg::naive::dense_vector::DenseVector; + use crate::linalg::naive::dense_matrix::DenseMatrix; use crate::optimization::line_search::Backtracking; use crate::optimization::FunctionOrder; #[test] fn gradient_descent() { - let x0 = DenseVector::from_array(&[-1., 1.]); - let f = |x: &DenseVector| { - (1.0 - x.get(0)).powf(2.) + 100.0 * (x.get(1) - x.get(0).powf(2.)).powf(2.) + let x0 = DenseMatrix::vector_from_array(&[-1., 1.]); + let f = |x: &DenseMatrix| { + (1.0 - x.get(0, 0)).powf(2.) + 100.0 * (x.get(0, 1) - x.get(0, 0).powf(2.)).powf(2.) }; - let df = |g: &mut DenseVector, x: &DenseVector| { - g.set(0, -2. * (1. - x.get(0)) - 400. * (x.get(1) - x.get(0).powf(2.)) * x.get(0)); - g.set(1, 200. * (x.get(1) - x.get(0).powf(2.))); + let df = |g: &mut DenseMatrix, x: &DenseMatrix| { + g.set(0, 0, -2. * (1. - x.get(0, 0)) - 400. * (x.get(0, 1) - x.get(0, 0).powf(2.)) * x.get(0, 0)); + g.set(0, 1, 200. * (x.get(0, 1) - x.get(0, 0).powf(2.))); }; let mut ls: Backtracking = Default::default(); @@ -106,8 +106,8 @@ mod tests { println!("{:?}", result); assert!((result.f_x - 0.0).abs() < 1e-5); - assert!((result.x.get(0) - 1.0).abs() < 1e-2); - assert!((result.x.get(1) - 1.0).abs() < 1e-2); + assert!((result.x.get(0, 0) - 1.0).abs() < 1e-2); + assert!((result.x.get(0, 1) - 1.0).abs() < 1e-2); } diff --git a/src/optimization/first_order/lbfgs.rs b/src/optimization/first_order/lbfgs.rs index 906ad1c..d887ab8 100644 --- a/src/optimization/first_order/lbfgs.rs +++ b/src/optimization/first_order/lbfgs.rs @@ -1,5 +1,5 @@ use std::default::Default; -use crate::linalg::Vector; +use crate::linalg::Matrix; use crate::optimization::{F, DF}; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; @@ -35,7 +35,7 @@ impl Default for LBFGS { impl LBFGS { - fn two_loops(&self, state: &mut LBFGSState) { + fn two_loops(&self, state: &mut LBFGSState) { let lower = state.iteration.max(self.m) - self.m; let upper = state.iteration; @@ -46,7 +46,7 @@ impl LBFGS { let i = index.rem_euclid(self.m); let dgi = &state.dg_history[i]; let dxi = &state.dx_history[i]; - state.twoloop_alpha[i] = state.rho[i] * dxi.dot(&state.twoloop_q); + state.twoloop_alpha[i] = state.rho[i] * dxi.vector_dot(&state.twoloop_q); state.twoloop_q.sub_mut(&dgi.mul_scalar(state.twoloop_alpha[i])); } @@ -54,7 +54,7 @@ impl LBFGS { let i = (upper - 1).rem_euclid(self.m); let dxi = &state.dx_history[i]; let dgi = &state.dg_history[i]; - let scaling = dxi.dot(dgi) / dgi.abs().pow_mut(2.).sum(); + let scaling = dxi.vector_dot(dgi) / dgi.abs().pow_mut(2.).sum(); state.s.copy_from(&state.twoloop_q.mul_scalar(scaling)); } else { state.s.copy_from(&state.twoloop_q); @@ -64,7 +64,7 @@ impl LBFGS { let i = index.rem_euclid(self.m); let dgi = &state.dg_history[i]; let dxi = &state.dx_history[i]; - let beta = state.rho[i] * dgi.dot(&state.s); + let beta = state.rho[i] * dgi.vector_dot(&state.s); state.s.add_mut(&dxi.mul_scalar(state.twoloop_alpha[i] - beta)); } @@ -72,7 +72,7 @@ impl LBFGS { } - fn init_state(&self, x: &X) -> LBFGSState { + fn init_state(&self, x: &X) -> LBFGSState { LBFGSState { x: x.clone(), x_prev: x.clone(), @@ -95,14 +95,14 @@ impl LBFGS { } } - fn update_state<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, ls: &'a LS, state: &mut LBFGSState) { + fn update_state<'a, X: Matrix, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, ls: &'a LS, state: &mut LBFGSState) { self.two_loops(state); df(&mut state.x_df_prev, &state.x); state.x_f_prev = f(&state.x); state.x_prev.copy_from(&state.x); - let df0 = state.x_df.dot(&state.s); + let df0 = state.x_df.vector_dot(&state.s); let f_alpha = |alpha: f64| -> f64 { let mut dx = state.s.clone(); @@ -115,7 +115,7 @@ impl LBFGS { let mut dg = state.x_df.clone(); dx.mul_scalar_mut(alpha); df(&mut dg, &dx.add_mut(&state.x)); //df(x) = df(x .+ gvec .* alpha) - state.x_df.dot(&dg) + state.x_df.vector_dot(&dg) }; let ls_r = ls.search(&f_alpha, &df_alpha, 1.0, state.x_f_prev, df0); @@ -128,7 +128,7 @@ impl LBFGS { } - fn assess_convergence(&self, state: &mut LBFGSState) -> bool { + fn assess_convergence(&self, state: &mut LBFGSState) -> bool { let (mut x_converged, mut g_converged) = (false, false); if state.x.max_diff(&state.x_prev) <= self.x_atol { @@ -154,9 +154,9 @@ impl LBFGS { g_converged || x_converged || state.counter_f_tol > self.successive_f_tol } - fn update_hessian<'a, X: Vector>(&self, df: &'a DF, state: &mut LBFGSState) { - state.dg = state.x_df.sub(&state.x_df_prev); - let rho_iteration = 1. / state.dx.dot(&state.dg); + fn update_hessian<'a, X: Matrix>(&self, df: &'a DF, state: &mut LBFGSState) { + state.dg = state.x_df.sub(&state.x_df_prev); + let rho_iteration = 1. / state.dx.vector_dot(&state.dg); if !rho_iteration.is_infinite() { let idx = state.iteration.rem_euclid(self.m); state.dx_history[idx].copy_from(&state.dx); @@ -167,7 +167,7 @@ impl LBFGS { } #[derive(Debug)] -struct LBFGSState { +struct LBFGSState { x: X, x_prev: X, x_f: f64, @@ -189,7 +189,7 @@ struct LBFGSState { impl FirstOrderOptimizer for LBFGS { - fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult { + fn optimize<'a, X: Matrix, LS: LineSearchMethod>(&self, f: &F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult { let mut state = self.init_state(x0); @@ -207,7 +207,7 @@ impl FirstOrderOptimizer for LBFGS { if !converged { self.update_hessian(df, &mut state); - } + } state.iteration += 1; @@ -226,33 +226,31 @@ impl FirstOrderOptimizer for LBFGS { #[cfg(test)] mod tests { use super::*; - use crate::linalg::naive::dense_vector::DenseVector; + use crate::linalg::naive::dense_matrix::DenseMatrix; use crate::optimization::line_search::Backtracking; use crate::optimization::FunctionOrder; use crate::math::EPSILON; #[test] fn lbfgs() { - let x0 = DenseVector::from_array(&[0., 0.]); - let f = |x: &DenseVector| { - (1.0 - x.get(0)).powf(2.) + 100.0 * (x.get(1) - x.get(0).powf(2.)).powf(2.) + let x0 = DenseMatrix::vector_from_array(&[0., 0.]); + let f = |x: &DenseMatrix| { + (1.0 - x.get(0, 0)).powf(2.) + 100.0 * (x.get(0, 1) - x.get(0, 0).powf(2.)).powf(2.) }; - let df = |g: &mut DenseVector, x: &DenseVector| { - g.set(0, -2. * (1. - x.get(0)) - 400. * (x.get(1) - x.get(0).powf(2.)) * x.get(0)); - g.set(1, 200. * (x.get(1) - x.get(0).powf(2.))); + let df = |g: &mut DenseMatrix, x: &DenseMatrix| { + g.set(0, 0, -2. * (1. - x.get(0, 0)) - 400. * (x.get(0, 1) - x.get(0, 0).powf(2.)) * x.get(0, 0)); + g.set(0, 1, 200. * (x.get(0, 1) - x.get(0, 0).powf(2.))); }; let mut ls: Backtracking = Default::default(); ls.order = FunctionOrder::THIRD; let optimizer: LBFGS = Default::default(); - let result = optimizer.optimize(&f, &df, &x0, &ls); - - println!("result: {:?}", result); + let result = optimizer.optimize(&f, &df, &x0, &ls); assert!((result.f_x - 0.0).abs() < EPSILON); - assert!((result.x.get(0) - 1.0).abs() < 1e-8); - assert!((result.x.get(1) - 1.0).abs() < 1e-8); + assert!((result.x.get(0, 0) - 1.0).abs() < 1e-8); + assert!((result.x.get(0, 1) - 1.0).abs() < 1e-8); assert!(result.iterations <= 24); } } \ No newline at end of file diff --git a/src/optimization/first_order/mod.rs b/src/optimization/first_order/mod.rs index 5079c05..3ca2f6e 100644 --- a/src/optimization/first_order/mod.rs +++ b/src/optimization/first_order/mod.rs @@ -1,16 +1,16 @@ pub mod lbfgs; pub mod gradient_descent; -use crate::linalg::Vector; +use crate::linalg::Matrix; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{F, DF}; pub trait FirstOrderOptimizer { - fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult; + fn optimize<'a, X: Matrix, LS: LineSearchMethod>(&self, f: &F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult; } #[derive(Debug, Clone)] pub struct OptimizerResult -where X: Vector +where X: Matrix { pub x: X, pub f_x: f64,