diff --git a/src/classification/logistic_regression.rs b/src/classification/logistic_regression.rs new file mode 100644 index 0000000..a443d38 --- /dev/null +++ b/src/classification/logistic_regression.rs @@ -0,0 +1,256 @@ +use std::marker::PhantomData; +use crate::linalg::{Matrix, Vector}; +use crate::optimization::FunctionOrder; +use crate::optimization::first_order::FirstOrderOptimizer; +use crate::optimization::line_search::Backtracking; +use crate::optimization::first_order::lbfgs::LBFGS; + +#[derive(Debug)] +pub struct LogisticRegression { + weights: M, + classes: Vec, + num_attributes: usize, + num_classes: usize, + v_phantom: PhantomData +} + +struct MultiClassObjectiveFunction<'a, M: Matrix> { + x: &'a M, + y: Vec, + k: usize +} + +impl<'a, M: Matrix> MultiClassObjectiveFunction<'a, M> { + + fn f(&self, w: &X) -> f64 { + let mut f = 0.; + let mut prob = X::zeros(self.k); + let (n, p) = self.x.shape(); + for i in 0..n { + for j in 0..self.k { + prob.set(j, MultiClassObjectiveFunction::dot(w, self.x, j * (p + 1), i)); + } + prob.softmax_mut(); + f -= prob.get(self.y[i]).ln(); + } + + f + } + + fn df(&self, g: &mut X, w: &X) { + + g.copy_from(&X::zeros(g.shape().1)); + + let mut f = 0.; + let mut prob = X::zeros(self.k); + let (n, p) = self.x.shape(); + + for i in 0..n { + for j in 0..self.k { + prob.set(j, MultiClassObjectiveFunction::dot(w, self.x, j * (p + 1), i)); + } + + prob.softmax_mut(); + f -= prob.get(self.y[i]).ln(); + + for j in 0..self.k { + let yi =(if self.y[i] == j { 1.0 } else { 0.0 }) - prob.get(j); + + for l in 0..p { + let pos = j * (p + 1); + g.set(pos + l, g.get(pos + l) - yi * self.x.get(i, l)); + } + g.set(j * (p + 1) + p, g.get(j * (p + 1) + p) - yi); + } + } + + } + + fn dot(v: &X, m: &M, v_pos: usize, w_row: usize) -> f64 { + let mut sum = 0f64; + let p = m.shape().1; + for i in 0..p { + sum += m.get(w_row, i) * v.get(i + v_pos); + } + + sum + v.get(p + v_pos) + } + +} + +impl LogisticRegression { + + pub fn fit(x: &M, y: &V) -> LogisticRegression{ + + let (x_nrows, num_attributes) = x.shape(); + let (_, y_nrows) = y.shape(); + + if x_nrows != y_nrows { + panic!("Number of rows of X doesn't match number of rows of Y"); + } + + let mut classes = y.unique(); + + let k = classes.len(); + + let x0 = V::zeros((num_attributes + 1) * k); + + let mut yi: Vec = vec![0; y_nrows]; + + for i in 0..y_nrows { + let yc = y.get(i); + let j = classes.iter().position(|c| yc == *c).unwrap(); + yi[i] = classes.iter().position(|c| yc == *c).unwrap(); + } + + if k < 2 { + + panic!("Incorrect number of classes: {}", k); + + } else if k == 2 { + + LogisticRegression { + weights: x.clone(), + classes: classes, + num_attributes: num_attributes, + num_classes: k, + v_phantom: PhantomData + } + + } else { + + let objective = MultiClassObjectiveFunction{ + x: x, + y: yi, + k: k + }; + + let f = |w: &V| -> f64 { + objective.f(w) + }; + + let df = |g: &mut V, w: &V| { + objective.df(g, w) + }; + + let mut ls: Backtracking = Default::default(); + ls.order = FunctionOrder::THIRD; + let optimizer: LBFGS = Default::default(); + + let result = optimizer.optimize(&f, &df, &x0, &ls); + + let weights = M::from_vector(&result.x, k, num_attributes + 1); + + LogisticRegression { + weights: weights, + classes: classes, + num_attributes: num_attributes, + num_classes: k, + v_phantom: PhantomData + } + } + + + } + + pub fn predict(&self, x: &M) -> V { + let (nrows, _) = x.shape(); + let x_and_bias = x.h_stack(&M::ones(nrows, 1)); + let mut y_hat = x_and_bias.dot(&self.weights.transpose()); + y_hat.softmax_mut(); + let class_idxs = y_hat.argmax(); + V::from_vec(&class_idxs.iter().map(|class_idx| self.classes[*class_idx]).collect()) + } + + pub fn coefficients(&self) -> M { + self.weights.slice(0..self.num_classes, 0..self.num_attributes) + } + + pub fn intercept(&self) -> M { + self.weights.slice(0..self.num_classes, self.num_attributes..self.num_attributes+1) + } + +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_matrix::DenseMatrix; + use crate::linalg::naive::dense_vector::DenseVector; + + #[test] + fn multiclass_objective_f() { + + let x = DenseMatrix::from_2d_array(&[ + &[1., -5.], + &[ 2., 5.], + &[ 3., -2.], + &[ 1., 2.], + &[ 2., 0.], + &[ 6., -5.], + &[ 7., 5.], + &[ 6., -2.], + &[ 7., 2.], + &[ 6., 0.], + &[ 8., -5.], + &[ 9., 5.], + &[10., -2.], + &[ 8., 2.], + &[ 9., 0.]]); + + let y = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; + + let objective = MultiClassObjectiveFunction{ + x: &x, + y: y, + k: 3 + }; + + let mut g = DenseVector::zeros(9); + + objective.df(&mut g, &DenseVector::from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.])); + objective.df(&mut g, &DenseVector::from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.])); + + assert!((g.get(0) + 33.000068218163484).abs() < std::f64::EPSILON); + + let f = objective.f(&DenseVector::from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.])); + + assert!((f - 408.0052230582765).abs() < std::f64::EPSILON); + } + + #[test] + fn lr_fit_predict() { + + let x = DenseMatrix::from_2d_array(&[ + &[1., -5.], + &[ 2., 5.], + &[ 3., -2.], + &[ 1., 2.], + &[ 2., 0.], + &[ 6., -5.], + &[ 7., 5.], + &[ 6., -2.], + &[ 7., 2.], + &[ 6., 0.], + &[ 8., -5.], + &[ 9., 5.], + &[10., -2.], + &[ 8., 2.], + &[ 9., 0.]]); + let y = DenseVector::from_array(&[0., 0., 1., 1., 2., 1., 1., 0., 0., 2., 1., 1., 0., 0., 1.]); + + let lr = LogisticRegression::fit(&x, &y); + + assert_eq!(lr.coefficients().shape(), (3, 2)); + assert_eq!(lr.intercept().shape(), (3, 1)); + + assert!((lr.coefficients().get(0, 0) - 0.0435).abs() < 1e-4); + assert!((lr.intercept().get(0, 0) - 0.1250).abs() < 1e-4); + + let y_hat = lr.predict(&x); + + assert_eq!(y_hat, DenseVector::from_array(&[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])); + + + } +} \ No newline at end of file diff --git a/src/classification/mod.rs b/src/classification/mod.rs index 89f03b5..0d14356 100644 --- a/src/classification/mod.rs +++ b/src/classification/mod.rs @@ -1,6 +1,7 @@ use crate::common::Nominal; pub mod knn; +pub mod logistic_regression; pub trait Classifier where diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index adf0c96..9f3e11e 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -3,7 +3,7 @@ use std::fmt::Debug; pub mod naive; -pub trait Matrix: Into> + Clone{ +pub trait Matrix: Into> + Clone + Debug{ fn get(&self, row: usize, col: usize) -> f64; @@ -15,9 +15,11 @@ pub trait Matrix: Into> + Clone{ fn ones(nrows: usize, ncols: usize) -> Self; + fn from_vector(v: &V, nrows: usize, ncols: usize) -> Self; + fn fill(nrows: usize, ncols: usize, value: f64) -> Self; - fn shape(&self) -> (usize, usize); + fn shape(&self) -> (usize, usize); fn v_stack(&self, other: &Self) -> Self; @@ -29,15 +31,69 @@ pub trait Matrix: Into> + Clone{ fn approximate_eq(&self, other: &Self, error: f64) -> bool; - fn add_mut(&mut self, other: &Self); + fn add_mut(&mut self, other: &Self) -> &Self; - fn add_scalar_mut(&mut self, scalar: f64); + fn sub_mut(&mut self, other: &Self) -> &Self; - fn sub_scalar_mut(&mut self, scalar: f64); + fn mul_mut(&mut self, other: &Self) -> &Self; - fn mul_scalar_mut(&mut self, scalar: f64); + fn div_mut(&mut self, other: &Self) -> &Self; - fn div_scalar_mut(&mut self, scalar: f64); + fn add(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.add_mut(other); + r + } + + fn sub(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.sub_mut(other); + r + } + + fn mul(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.mul_mut(other); + r + } + + fn div(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.div_mut(other); + r + } + + fn add_scalar_mut(&mut self, scalar: f64) -> &Self; + + fn sub_scalar_mut(&mut self, scalar: f64) -> &Self; + + fn mul_scalar_mut(&mut self, scalar: f64) -> &Self; + + fn div_scalar_mut(&mut self, scalar: f64) -> &Self; + + fn add_scalar(&self, scalar: f64) -> Self{ + let mut r = self.clone(); + r.add_scalar_mut(scalar); + r + } + + fn sub_scalar(&self, scalar: f64) -> Self{ + let mut r = self.clone(); + r.sub_scalar_mut(scalar); + r + } + + fn mul_scalar(&self, scalar: f64) -> Self{ + let mut r = self.clone(); + r.mul_scalar_mut(scalar); + r + } + + fn div_scalar(&self, scalar: f64) -> Self{ + let mut r = self.clone(); + r.div_scalar_mut(scalar); + r + } fn transpose(&self) -> Self; @@ -47,12 +103,52 @@ pub trait Matrix: Into> + Clone{ fn norm2(&self) -> f64; + fn norm(&self, p:f64) -> f64; + fn negative_mut(&mut self); + fn negative(&self) -> Self { + let mut result = self.clone(); + result.negative_mut(); + result + } + + fn reshape(&self, nrows: usize, ncols: usize) -> Self; + + fn copy_from(&mut self, other: &Self); + + fn abs_mut(&mut self) -> &Self; + + fn abs(&self) -> Self { + let mut result = self.clone(); + result.abs_mut(); + result + } + + fn sum(&self) -> f64; + + fn max_diff(&self, other: &Self) -> f64; + + fn softmax_mut(&mut self); + + fn pow_mut(&mut self, p: f64) -> &Self; + + fn pow(&mut self, p: f64) -> Self { + let mut result = self.clone(); + result.pow_mut(p); + result + } + + fn argmax(&self) -> Vec; + } pub trait Vector: Into> + Clone + Debug { + fn from_array(values: &[f64]) -> Self; + + fn from_vec(values: &Vec) -> Self; + fn get(&self, i: usize) -> f64; fn set(&mut self, i: usize, value: f64); @@ -153,6 +249,10 @@ pub trait Vector: Into> + Clone + Debug { r } - fn max_diff(&self, other: &Self) -> f64; + fn max_diff(&self, other: &Self) -> f64; + + fn softmax_mut(&mut self); + + fn unique(&self) -> Vec; } \ No newline at end of file diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index 5a8bf94..405aca7 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -1,5 +1,5 @@ use std::ops::Range; -use crate::linalg::Matrix; +use crate::linalg::{Matrix, Vector}; use crate::math; use rand::prelude::*; @@ -46,6 +46,18 @@ impl DenseMatrix { } } + pub fn vector_from_array(values: &[f64]) -> DenseMatrix { + DenseMatrix::vector_from_vec(Vec::from(values)) + } + + pub fn vector_from_vec(values: Vec) -> DenseMatrix { + DenseMatrix { + ncols: values.len(), + nrows: 1, + values: values + } + } + pub fn div_mut(&mut self, b: DenseMatrix) -> () { if self.nrows != b.nrows || self.ncols != b.ncols { panic!("Can't divide matrices of different sizes."); @@ -56,7 +68,7 @@ impl DenseMatrix { } } - fn set(&mut self, row: usize, col: usize, x: f64) { + pub fn set(&mut self, row: usize, col: usize, x: f64) { self.values[col*self.nrows + row] = x; } @@ -121,6 +133,26 @@ impl Matrix for DenseMatrix { DenseMatrix::fill(nrows, ncols, 1f64) } + fn from_vector(v: &V, nrows: usize, ncols: usize) -> Self { + let (_, v_size) = v.shape(); + if nrows * ncols != v_size { + panic!("Can't reshape {}-long vector into {}x{} matrix.", v_size, nrows, ncols); + } + let mut dst = DenseMatrix::zeros(nrows, ncols); + let mut dst_r = 0; + let mut dst_c = 0; + for i in 0..v_size { + dst.set(dst_r, dst_c, v.get(i)); + if dst_c + 1 >= ncols { + dst_c = 0; + dst_r += 1; + } else { + dst_c += 1; + } + } + dst + } + fn shape(&self) -> (usize, usize) { (self.nrows, self.ncols) } @@ -160,6 +192,7 @@ impl Matrix for DenseMatrix { } fn dot(&self, other: &Self) -> Self { + if self.ncols != other.nrows { panic!("Number of rows of A should equal number of columns of B"); } @@ -663,7 +696,7 @@ impl Matrix for DenseMatrix { DenseMatrix::from_vec(nrows, ncols, vec![value; ncols * nrows]) } - fn add_mut(&mut self, other: &Self) { + fn add_mut(&mut self, other: &Self) -> &Self { if self.ncols != other.ncols || self.nrows != other.nrows { panic!("A and B should have the same shape"); } @@ -672,6 +705,47 @@ impl Matrix for DenseMatrix { self.add_element_mut(r, c, other.get(r, c)); } } + + self + } + + fn sub_mut(&mut self, other: &Self) -> &Self { + if self.ncols != other.ncols || self.nrows != other.nrows { + panic!("A and B should have the same shape"); + } + for c in 0..self.ncols { + for r in 0..self.nrows { + self.sub_element_mut(r, c, other.get(r, c)); + } + } + + self + } + + fn mul_mut(&mut self, other: &Self) -> &Self { + if self.ncols != other.ncols || self.nrows != other.nrows { + panic!("A and B should have the same shape"); + } + for c in 0..self.ncols { + for r in 0..self.nrows { + self.mul_element_mut(r, c, other.get(r, c)); + } + } + + self + } + + fn div_mut(&mut self, other: &Self) -> &Self { + if self.ncols != other.ncols || self.nrows != other.nrows { + panic!("A and B should have the same shape"); + } + for c in 0..self.ncols { + for r in 0..self.nrows { + self.div_element_mut(r, c, other.get(r, c)); + } + } + + self } fn generate_positive_definite(nrows: usize, ncols: usize) -> Self { @@ -716,34 +790,157 @@ impl Matrix for DenseMatrix { norm.sqrt() } - fn add_scalar_mut(&mut self, scalar: f64) { + fn norm(&self, p:f64) -> f64 { + + if p.is_infinite() && p.is_sign_positive() { + self.values.iter().map(|x| x.abs()).fold(std::f64::NEG_INFINITY, |a, b| a.max(b)) + } else if p.is_infinite() && p.is_sign_negative() { + self.values.iter().map(|x| x.abs()).fold(std::f64::INFINITY, |a, b| a.min(b)) + } else { + + let mut norm = 0f64; + + for xi in self.values.iter() { + norm += xi.abs().powf(p); + } + + norm.powf(1.0/p) + } + } + + fn add_scalar_mut(&mut self, scalar: f64) -> &Self { for i in 0..self.values.len() { self.values[i] += scalar; } + self } - fn sub_scalar_mut(&mut self, scalar: f64) { + fn sub_scalar_mut(&mut self, scalar: f64) -> &Self { for i in 0..self.values.len() { self.values[i] -= scalar; } + self } - fn mul_scalar_mut(&mut self, scalar: f64) { + fn mul_scalar_mut(&mut self, scalar: f64) -> &Self { for i in 0..self.values.len() { self.values[i] *= scalar; } + self } - fn div_scalar_mut(&mut self, scalar: f64) { + fn div_scalar_mut(&mut self, scalar: f64) -> &Self { for i in 0..self.values.len() { self.values[i] /= scalar; } + self } fn negative_mut(&mut self) { for i in 0..self.values.len() { self.values[i] = -self.values[i]; } + } + + fn reshape(&self, nrows: usize, ncols: usize) -> Self { + if self.nrows * self.ncols != nrows * ncols { + panic!("Can't reshape {}x{} matrix into {}x{}.", self.nrows, self.ncols, nrows, ncols); + } + let mut dst = DenseMatrix::zeros(nrows, ncols); + let mut dst_r = 0; + let mut dst_c = 0; + for r in 0..self.nrows { + for c in 0..self.ncols { + dst.set(dst_r, dst_c, self.get(r, c)); + if dst_c + 1 >= ncols { + dst_c = 0; + dst_r += 1; + } else { + dst_c += 1; + } + } + } + dst + } + + fn copy_from(&mut self, other: &Self) { + + if self.nrows != other.nrows || self.ncols != other.ncols { + panic!("Can't copy {}x{} matrix into {}x{}.", self.nrows, self.ncols, other.nrows, other.ncols); + } + + for i in 0..self.values.len() { + self.values[i] = other.values[i]; + } + } + + fn abs_mut(&mut self) -> &Self{ + for i in 0..self.values.len() { + self.values[i] = self.values[i].abs(); + } + self + } + + fn max_diff(&self, other: &Self) -> f64{ + let mut max_diff = 0f64; + for i in 0..self.values.len() { + max_diff = max_diff.max((self.values[i] - other.values[i]).abs()); + } + max_diff + + } + + fn sum(&self) -> f64 { + let mut sum = 0.; + for i in 0..self.values.len() { + sum += self.values[i]; + } + sum + } + + fn softmax_mut(&mut self) { + let max = self.values.iter().map(|x| x.abs()).fold(std::f64::NEG_INFINITY, |a, b| a.max(b)); + let mut z = 0.; + for r in 0..self.nrows { + for c in 0..self.ncols { + let p = (self.get(r, c) - max).exp(); + self.set(r, c, p); + z += p; + } + } + for r in 0..self.nrows { + for c in 0..self.ncols { + self.set(r, c, self.get(r, c) / z); + } + } + } + + fn pow_mut(&mut self, p: f64) -> &Self { + for i in 0..self.values.len() { + self.values[i] = self.values[i].powf(p); + } + self + } + + fn argmax(&self) -> Vec { + + let mut res = vec![0usize; self.nrows]; + + for r in 0..self.nrows { + let mut max = std::f64::NEG_INFINITY; + let mut max_pos = 0usize; + for c in 0..self.ncols { + let v = self.get(r, c); + if max < v{ + max = v; + max_pos = c; + } + } + res[r] = max_pos; + } + + res + } } @@ -899,5 +1096,35 @@ mod tests { let m = DenseMatrix::generate_positive_definite(3, 3); } -} + #[test] + fn reshape() { + let m_orig = DenseMatrix::vector_from_array(&[1., 2., 3., 4., 5., 6.]); + let m_2_by_3 = m_orig.reshape(2, 3); + let m_result = m_2_by_3.reshape(1, 6); + assert_eq!(m_2_by_3.shape(), (2, 3)); + assert_eq!(m_2_by_3.get(1, 1), 5.); + assert_eq!(m_result.get(0, 1), 2.); + assert_eq!(m_result.get(0, 3), 4.); + } + #[test] + fn norm() { + + let v = DenseMatrix::vector_from_array(&[3., -2., 6.]); + assert_eq!(v.norm(1.), 11.); + assert_eq!(v.norm(2.), 7.); + assert_eq!(v.norm(std::f64::INFINITY), 6.); + assert_eq!(v.norm(std::f64::NEG_INFINITY), 2.); + } + + #[test] + fn softmax_mut() { + + let mut prob = DenseMatrix::vector_from_array(&[1., 2., 3.]); + prob.softmax_mut(); + assert!((prob.get(0, 0) - 0.09).abs() < 0.01); + assert!((prob.get(0, 1) - 0.24).abs() < 0.01); + assert!((prob.get(0, 2) - 0.66).abs() < 0.01); + } + +} diff --git a/src/linalg/naive/dense_vector.rs b/src/linalg/naive/dense_vector.rs index b056d7d..0f0e3fe 100644 --- a/src/linalg/naive/dense_vector.rs +++ b/src/linalg/naive/dense_vector.rs @@ -1,4 +1,6 @@ -use crate::linalg::Vector; +use crate::linalg::{Vector, Matrix}; +use crate::math; +use crate::linalg::naive::dense_matrix::DenseMatrix; #[derive(Debug, Clone)] pub struct DenseVector { @@ -8,29 +10,48 @@ pub struct DenseVector { } -impl DenseVector { - - pub fn from_array(values: &[f64]) -> DenseVector { - DenseVector::from_vec(Vec::from(values)) - } - - pub fn from_vec(values: Vec) -> DenseVector { - DenseVector { - size: values.len(), - values: values - } - } - -} - impl Into> for DenseVector { fn into(self) -> Vec { self.values } } +impl PartialEq for DenseVector { + fn eq(&self, other: &Self) -> bool { + if self.size != other.size { + return false + } + + let len = self.values.len(); + let other_len = other.values.len(); + + if len != other_len { + return false; + } + + for i in 0..len { + if (self.values[i] - other.values[i]).abs() > math::EPSILON { + return false; + } + } + + true + } +} + impl Vector for DenseVector { + fn from_array(values: &[f64]) -> Self { + DenseVector::from_vec(&Vec::from(values)) + } + + fn from_vec(values: &Vec) -> Self { + DenseVector { + size: values.len(), + values: values.clone() + } + } + fn get(&self, i: usize) -> f64 { self.values[i] } @@ -48,7 +69,7 @@ impl Vector for DenseVector { } fn fill(size: usize, value: f64) -> Self { - DenseVector::from_vec(vec![value; size]) + DenseVector::from_vec(&vec![value; size]) } fn shape(&self) -> (usize, usize) { @@ -223,6 +244,26 @@ impl Vector for DenseVector { } + fn softmax_mut(&mut self) { + let max = self.values.iter().map(|x| x.abs()).fold(std::f64::NEG_INFINITY, |a, b| a.max(b)); + let mut z = 0.; + for i in 0..self.size { + let p = (self.values[i] - max).exp(); + self.values[i] = p; + z += p; + } + for i in 0..self.size { + self.values[i] /= z; + } + } + + fn unique(&self) -> Vec { + let mut result = self.values.clone(); + result.sort_by(|a, b| a.partial_cmp(b).unwrap()); + result.dedup(); + result + } + } #[cfg(test)] @@ -250,4 +291,14 @@ mod tests { assert_eq!(a.get(2), b.get(2)); } + #[test] + fn softmax_mut() { + + let mut prob = DenseVector::from_array(&[1., 2., 3.]); + prob.softmax_mut(); + assert!((prob.get(0) - 0.09).abs() < 0.01); + assert!((prob.get(1) - 0.24).abs() < 0.01); + assert!((prob.get(2) - 0.66).abs() < 0.01); + } + } \ No newline at end of file diff --git a/src/optimization/first_order/gradient_descent.rs b/src/optimization/first_order/gradient_descent.rs index 049cfa8..2cb5c42 100644 --- a/src/optimization/first_order/gradient_descent.rs +++ b/src/optimization/first_order/gradient_descent.rs @@ -38,7 +38,7 @@ impl FirstOrderOptimizer for GradientDescent let mut alpha = 1.0; df(&mut gvec, &x); - while iter < self.max_iter && gnorm > gtol { + while iter < self.max_iter && (iter == 0 || gnorm > gtol) { iter += 1; let mut step = gvec.negative(); @@ -102,10 +102,12 @@ mod tests { let optimizer: GradientDescent = Default::default(); let result = optimizer.optimize(&f, &df, &x0, &ls); + + println!("{:?}", result); - assert!((result.f_x - 0.0).abs() < EPSILON); - assert!((result.x.get(0) - 1.0).abs() < EPSILON); - assert!((result.x.get(1) - 1.0).abs() < EPSILON); + assert!((result.f_x - 0.0).abs() < 1e-5); + assert!((result.x.get(0) - 1.0).abs() < 1e-2); + assert!((result.x.get(1) - 1.0).abs() < 1e-2); } diff --git a/src/optimization/first_order/lbfgs.rs b/src/optimization/first_order/lbfgs.rs index 23e074a..906ad1c 100644 --- a/src/optimization/first_order/lbfgs.rs +++ b/src/optimization/first_order/lbfgs.rs @@ -3,6 +3,7 @@ use crate::linalg::Vector; use crate::optimization::{F, DF}; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; +use std::fmt::Debug; pub struct LBFGS { pub max_iter: usize, @@ -37,37 +38,37 @@ impl LBFGS { fn two_loops(&self, state: &mut LBFGSState) { let lower = state.iteration.max(self.m) - self.m; - let upper = state.iteration; + let upper = state.iteration; - state.twoloop_q.copy_from(&state.dx); + state.twoloop_q.copy_from(&state.x_df); for index in (lower..upper).rev() { - let i = index.rem_euclid(self.m); + let i = index.rem_euclid(self.m); let dgi = &state.dg_history[i]; let dxi = &state.dx_history[i]; state.twoloop_alpha[i] = state.rho[i] * dxi.dot(&state.twoloop_q); - state.twoloop_q.sub_mut(&dgi.mul_scalar(state.twoloop_alpha[i])); - } + state.twoloop_q.sub_mut(&dgi.mul_scalar(state.twoloop_alpha[i])); + } if state.iteration > 0 { - let i = (upper - 1).rem_euclid(self.m); + let i = (upper - 1).rem_euclid(self.m); let dxi = &state.dx_history[i]; let dgi = &state.dg_history[i]; - let scaling = dxi.dot(dgi) / dgi.abs().pow_mut(2.).sum(); + let scaling = dxi.dot(dgi) / dgi.abs().pow_mut(2.).sum(); state.s.copy_from(&state.twoloop_q.mul_scalar(scaling)); } else { state.s.copy_from(&state.twoloop_q); - } + } for index in lower..upper { - let i = index.rem_euclid(self.m); + let i = index.rem_euclid(self.m); let dgi = &state.dg_history[i]; let dxi = &state.dx_history[i]; let beta = state.rho[i] * dgi.dot(&state.s); state.s.add_mut(&dxi.mul_scalar(state.twoloop_alpha[i] - beta)); - } + } - state.s.mul_scalar_mut(-1.); + state.s.mul_scalar_mut(-1.); } @@ -75,14 +76,16 @@ impl LBFGS { LBFGSState { x: x.clone(), x_prev: x.clone(), - fx: std::f64::NAN, - g_prev: x.clone(), + x_f: std::f64::NAN, + x_f_prev: std::f64::NAN, + x_df: x.clone(), + x_df_prev: x.clone(), rho: vec![0.; self.m], dx_history: vec![x.clone(); self.m], dg_history: vec![x.clone(); self.m], dx: x.clone(), dg: x.clone(), - fx_prev: std::f64::NAN, + twoloop_q: x.clone(), twoloop_alpha: vec![0.; self.m], iteration: 0, @@ -92,18 +95,15 @@ impl LBFGS { } } - fn update_state<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, ls: &'a LS, state: &mut LBFGSState) { - df(&mut state.dx, &state.x); + fn update_state<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, ls: &'a LS, state: &mut LBFGSState) { + self.two_loops(state); - self.two_loops(state); - - df(&mut state.g_prev, &state.x); - - let df0 = state.dx.dot(&state.s); - - state.fx_prev = f(&state.x); + df(&mut state.x_df_prev, &state.x); + state.x_f_prev = f(&state.x); state.x_prev.copy_from(&state.x); + let df0 = state.x_df.dot(&state.s); + let f_alpha = |alpha: f64| -> f64 { let mut dx = state.s.clone(); dx.mul_scalar_mut(alpha); @@ -112,17 +112,20 @@ impl LBFGS { let df_alpha = |alpha: f64| -> f64 { let mut dx = state.s.clone(); - let mut dg = state.dx.clone(); + let mut dg = state.x_df.clone(); dx.mul_scalar_mut(alpha); df(&mut dg, &dx.add_mut(&state.x)); //df(x) = df(x .+ gvec .* alpha) - state.dx.dot(&dg) + state.x_df.dot(&dg) }; - let ls_r = ls.search(&f_alpha, &df_alpha, 1.0, state.fx_prev, df0); - state.alpha = ls_r.alpha; + let ls_r = ls.search(&f_alpha, &df_alpha, 1.0, state.x_f_prev, df0); + state.alpha = ls_r.alpha; state.dx.copy_from(state.s.mul_scalar_mut(state.alpha)); state.x.add_mut(&state.dx); + state.x_f = f(&state.x); + df(&mut state.x_df, &state.x); + } fn assess_convergence(&self, state: &mut LBFGSState) -> bool { @@ -136,46 +139,46 @@ impl LBFGS { x_converged = true; } - if (state.fx - state.fx_prev).abs() <= self.f_abstol { + if (state.x_f - state.x_f_prev).abs() <= self.f_abstol { state.counter_f_tol += 1; } - if (state.fx - state.fx_prev).abs() <= self.f_reltol * state.fx.abs() { + if (state.x_f - state.x_f_prev).abs() <= self.f_reltol * state.x_f.abs() { state.counter_f_tol += 1; } - if state.dx.norm(std::f64::INFINITY) <= self.g_atol { + if state.x_df.norm(std::f64::INFINITY) <= self.g_atol { g_converged = true; - } + } g_converged || x_converged || state.counter_f_tol > self.successive_f_tol } - fn update_hessian<'a, X: Vector>(&self, df: &'a DF, state: &mut LBFGSState) { - let mut dx = state.dx.clone(); - df(&mut dx, &state.x); - state.dg = dx.sub(&state.g_prev); + fn update_hessian<'a, X: Vector>(&self, df: &'a DF, state: &mut LBFGSState) { + state.dg = state.x_df.sub(&state.x_df_prev); let rho_iteration = 1. / state.dx.dot(&state.dg); if !rho_iteration.is_infinite() { - let idx = state.iteration.rem_euclid(self.m); + let idx = state.iteration.rem_euclid(self.m); state.dx_history[idx].copy_from(&state.dx); - state.dg_history[idx].copy_from(&state.dg); + state.dg_history[idx].copy_from(&state.dg); state.rho[idx] = rho_iteration; } } } +#[derive(Debug)] struct LBFGSState { x: X, x_prev: X, - fx: f64, - g_prev: X, + x_f: f64, + x_f_prev: f64, + x_df: X, + x_df_prev: X, rho: Vec, dx_history: Vec, dg_history: Vec, dx: X, - dg: X, - fx_prev: f64, + dg: X, twoloop_q: X, twoloop_alpha: Vec, iteration: usize, @@ -186,35 +189,33 @@ struct LBFGSState { impl FirstOrderOptimizer for LBFGS { - fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult { + fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult { let mut state = self.init_state(x0); - df(&mut state.dx, &x0); + df(&mut state.x_df, &x0); - let g_converged = state.dx.norm(std::f64::INFINITY) < self.g_atol; + let g_converged = state.x_df.norm(std::f64::INFINITY) < self.g_atol; let mut converged = g_converged; let stopped = false; - while !converged && !stopped && state.iteration < self.max_iter { + while !converged && !stopped && state.iteration < self.max_iter { - self.update_state(f, df, ls, &mut state); - - state.fx = f(&state.x); + self.update_state(f, df, ls, &mut state); converged = self.assess_convergence(&mut state); if !converged { self.update_hessian(df, &mut state); - } + } - state.iteration += 1; + state.iteration += 1; } OptimizerResult{ x: state.x, - f_x: state.fx, + f_x: state.x_f, iterations: state.iteration } @@ -245,7 +246,9 @@ mod tests { ls.order = FunctionOrder::THIRD; let optimizer: LBFGS = Default::default(); - let result = optimizer.optimize(&f, &df, &x0, &ls); + let result = optimizer.optimize(&f, &df, &x0, &ls); + + println!("result: {:?}", result); assert!((result.f_x - 0.0).abs() < EPSILON); assert!((result.x.get(0) - 1.0).abs() < 1e-8); diff --git a/src/optimization/first_order/mod.rs b/src/optimization/first_order/mod.rs index 2225835..5079c05 100644 --- a/src/optimization/first_order/mod.rs +++ b/src/optimization/first_order/mod.rs @@ -5,7 +5,7 @@ use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{F, DF}; pub trait FirstOrderOptimizer { - fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult; + fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult; } #[derive(Debug, Clone)] diff --git a/src/optimization/line_search.rs b/src/optimization/line_search.rs index dba3348..bd53b1e 100644 --- a/src/optimization/line_search.rs +++ b/src/optimization/line_search.rs @@ -38,7 +38,7 @@ impl LineSearchMethod for Backtracking { fn search<'a>(&self, f: &(dyn Fn(f64) -> f64), _: &(dyn Fn(f64) -> f64), alpha: f64, f0: f64, df0: f64) -> LineSearchResult { let (mut a1, mut a2) = (alpha, alpha); - let (mut fx0, mut fx1) = (f0, f(a1)); + let (mut fx0, mut fx1) = (f0, f(a1)); let mut iterfinite = 0; while !fx1.is_finite() && iterfinite < self.max_infinity_iterations { @@ -58,26 +58,21 @@ impl LineSearchMethod for Backtracking { let a_tmp; - match self.order { + if self.order == FunctionOrder::SECOND || iteration == 0 { - FunctionOrder::FIRST | FunctionOrder::SECOND => { - a_tmp = - (df0 * a2.powf(2.)) / (2. * (fx1 - f0 - df0*a2)) - }, + a_tmp = - (df0 * a2.powf(2.)) / (2. * (fx1 - f0 - df0*a2)) + + } else { - FunctionOrder::THIRD => { + let div = 1. / (a1.powf(2.) * a2.powf(2.) * (a2 - a1)); + let a = (a1.powf(2.) * (fx1 - f0 - df0*a2) - a2.powf(2.)*(fx0 - f0 - df0*a1))*div; + let b = (-a1.powf(3.) * (fx1 - f0 - df0*a2) + a2.powf(3.)*(fx0 - f0 - df0*a1))*div; - let div = 1. / (a1.powf(2.) * a2.powf(2.) * (a2 - a1)); - let a = (a1.powf(2.) * (fx1 - f0 - df0*a2) - a2.powf(2.)*(fx0 - f0 - df0*a1))*div; - let b = (-a1.powf(3.) * (fx1 - f0 - df0*a2) + a2.powf(3.)*(fx0 - f0 - df0*a1))*div; - - - - if (a - 0.).powf(2.).sqrt() <= EPSILON { - a_tmp = df0 / (2. * b); - } else { - let d = f64::max(b.powf(2.) - 3. * a * df0, 0.); - a_tmp = (-b + d.sqrt()) / (3.*a); //root of quadratic equation - } + if (a - 0.).powf(2.).sqrt() <= EPSILON { + a_tmp = df0 / (2. * b); + } else { + let d = f64::max(b.powf(2.) - 3. * a * df0, 0.); + a_tmp = (-b + d.sqrt()) / (3.*a); //root of quadratic equation } } @@ -85,7 +80,7 @@ impl LineSearchMethod for Backtracking { a2 = f64::max(f64::min(a_tmp, a2*self.phi), a2*self.plo); fx0 = fx1; - fx1 = f(a2); + fx1 = f(a2); iteration += 1; } diff --git a/src/optimization/mod.rs b/src/optimization/mod.rs index ce827cc..157a6ca 100644 --- a/src/optimization/mod.rs +++ b/src/optimization/mod.rs @@ -1,12 +1,12 @@ pub mod first_order; pub mod line_search; -use crate::linalg::Vector; +use crate::linalg::Matrix; -type F = dyn Fn(&X) -> f64; -type DF = dyn Fn(&mut X, &X); +pub type F<'a, X: Matrix> = dyn for<'b> Fn(&'b X) -> f64 + 'a; +pub type DF<'a, X: Matrix> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum FunctionOrder { FIRST, SECOND, diff --git a/src/regression/linear_regression.rs b/src/regression/linear_regression.rs index fb5aaaf..4b42f98 100644 --- a/src/regression/linear_regression.rs +++ b/src/regression/linear_regression.rs @@ -63,7 +63,7 @@ mod tests { use crate::linalg::naive::dense_matrix::DenseMatrix; #[test] - fn knn_fit_predict() { + fn ols_fit_predict() { let x = DenseMatrix::from_2d_array(&[ &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],