diff --git a/Cargo.toml b/Cargo.toml index 8c87ed2..bdd9631 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ edition = "2018" ndarray = "0.12.1" ndarray-linalg = "0.10" num-traits = "0.2" +rand = "0.7.2" [dev-dependencies] ndarray = "0.12.1" diff --git a/src/lib.rs b/src/lib.rs index a887a67..cc66764 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,4 +4,5 @@ pub mod linalg; pub mod math; pub mod error; pub mod algorithm; -pub mod common; \ No newline at end of file +pub mod common; +pub mod optimization; \ No newline at end of file diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index 4f708e1..5f57446 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -1,4 +1,5 @@ use std::ops::Range; +use std::fmt::Debug; pub mod naive; @@ -30,4 +31,56 @@ pub trait Matrix: Into> + Clone{ fn add_mut(&mut self, other: &Self); -} \ No newline at end of file + fn add_scalar_mut(&mut self, scalar: f64); + + fn sub_scalar_mut(&mut self, scalar: f64); + + fn mul_scalar_mut(&mut self, scalar: f64); + + fn div_scalar_mut(&mut self, scalar: f64); + + fn transpose(&self) -> Self; + + fn generate_positive_definite(nrows: usize, ncols: usize) -> Self; + + fn rand(nrows: usize, ncols: usize) -> Self; + + fn norm2(&self) -> f64; + + fn negative_mut(&mut self); + +} + +pub trait Vector: Into> + Clone + Debug { + + fn get(&self, i: usize) -> f64; + + fn set(&mut self, i: usize, value: f64); + + fn zeros(size: usize) -> Self; + + fn ones(size: usize) -> Self; + + fn fill(size: usize, value: f64) -> Self; + + fn shape(&self) -> (usize, usize); + + fn norm2(&self) -> f64; + + fn negative_mut(&mut self) -> &Self; + + fn negative(&self) -> Self; + + fn add_mut(&mut self, other: &Self) -> &Self; + + fn add_scalar_mut(&mut self, scalar: f64) -> &Self; + + fn sub_scalar_mut(&mut self, scalar: f64) -> &Self; + + fn mul_scalar_mut(&mut self, scalar: f64) -> &Self; + + fn div_scalar_mut(&mut self, scalar: f64) -> &Self; + + fn dot(&self, other: &Self) -> f64; + +} \ No newline at end of file diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index 00d928e..5a8bf94 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -1,6 +1,7 @@ use std::ops::Range; use crate::linalg::Matrix; use crate::math; +use rand::prelude::*; #[derive(Debug, Clone)] pub struct DenseMatrix { @@ -673,6 +674,78 @@ impl Matrix for DenseMatrix { } } + fn generate_positive_definite(nrows: usize, ncols: usize) -> Self { + let m = DenseMatrix::rand(nrows, ncols); + m.dot(&m.transpose()) + } + + fn transpose(&self) -> Self { + let mut m = DenseMatrix { + ncols: self.nrows, + nrows: self.ncols, + values: vec![0f64; self.ncols * self.nrows] + }; + for c in 0..self.ncols { + for r in 0..self.nrows { + m.set(c, r, self.get(r, c)); + } + } + m + + } + + fn rand(nrows: usize, ncols: usize) -> Self { + let mut rng = rand::thread_rng(); + let values: Vec = (0..nrows*ncols).map(|_| { + rng.gen() + }).collect(); + DenseMatrix { + ncols: ncols, + nrows: nrows, + values: values + } + } + + fn norm2(&self) -> f64 { + let mut norm = 0f64; + + for xi in self.values.iter() { + norm += xi * xi; + } + + norm.sqrt() + } + + fn add_scalar_mut(&mut self, scalar: f64) { + for i in 0..self.values.len() { + self.values[i] += scalar; + } + } + + fn sub_scalar_mut(&mut self, scalar: f64) { + for i in 0..self.values.len() { + self.values[i] -= scalar; + } + } + + fn mul_scalar_mut(&mut self, scalar: f64) { + for i in 0..self.values.len() { + self.values[i] *= scalar; + } + } + + fn div_scalar_mut(&mut self, scalar: f64) { + for i in 0..self.values.len() { + self.values[i] /= scalar; + } + } + + fn negative_mut(&mut self) { + for i in 0..self.values.len() { + self.values[i] = -self.values[i]; + } + } + } #[cfg(test)] @@ -799,5 +872,32 @@ mod tests { assert!(!m.approximate_eq(&m_neq, 0.5)); } + #[test] + fn rand() { + let m = DenseMatrix::rand(3, 3); + for c in 0..3 { + for r in 0..3 { + assert!(m.get(r, c) != 0f64); + } + } + } + + #[test] + fn transpose() { + let m = DenseMatrix::from_2d_array(&[&[1.0, 3.0], &[2.0, 4.0]]); + let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]); + let m_transposed = m.transpose(); + for c in 0..2 { + for r in 0..2 { + assert!(m_transposed.get(r, c) == expected.get(r, c)); + } + } + } + + #[test] + fn generate_positive_definite() { + let m = DenseMatrix::generate_positive_definite(3, 3); + } + } diff --git a/src/linalg/naive/dense_vector.rs b/src/linalg/naive/dense_vector.rs new file mode 100644 index 0000000..0385b9e --- /dev/null +++ b/src/linalg/naive/dense_vector.rs @@ -0,0 +1,138 @@ +use crate::linalg::Vector; + +#[derive(Debug, Clone)] +pub struct DenseVector { + + size: usize, + values: Vec + +} + +impl DenseVector { + + pub fn from_array(values: &[f64]) -> DenseVector { + DenseVector::from_vec(Vec::from(values)) + } + + pub fn from_vec(values: Vec) -> DenseVector { + DenseVector { + size: values.len(), + values: values + } + } + +} + +impl Into> for DenseVector { + fn into(self) -> Vec { + self.values + } +} + +impl Vector for DenseVector { + + fn get(&self, i: usize) -> f64 { + self.values[i] + } + + fn set(&mut self, i: usize, value: f64) { + self.values[i] = value; + } + + fn zeros(size: usize) -> Self { + DenseVector::fill(size, 0f64) + } + + fn ones(size: usize) -> Self { + DenseVector::fill(size, 1f64) + } + + fn fill(size: usize, value: f64) -> Self { + DenseVector::from_vec(vec![value; size]) + } + + fn shape(&self) -> (usize, usize) { + (1, self.size) + } + + fn add_mut(&mut self, other: &Self) -> &Self { + if self.size != other.size { + panic!("A and B should have the same shape"); + } + for i in 0..self.size { + self.values[i] += other.values[i]; + } + + self + } + + fn dot(&self, other: &Self) -> f64 { + if self.size != other.size { + panic!("A and B should be of the same size"); + } + + let mut result = 0f64; + for i in 0..self.size { + result += self.get(i) * other.get(i); + } + + result + } + + fn norm2(&self) -> f64 { + let mut norm = 0f64; + + for xi in self.values.iter() { + norm += xi * xi; + } + + norm.sqrt() + } + + fn add_scalar_mut(&mut self, scalar: f64) -> &Self { + for i in 0..self.values.len() { + self.values[i] += scalar; + } + self + } + + fn sub_scalar_mut(&mut self, scalar: f64) -> &Self { + for i in 0..self.values.len() { + self.values[i] -= scalar; + } + self + } + + fn mul_scalar_mut(&mut self, scalar: f64) -> &Self { + for i in 0..self.values.len() { + self.values[i] *= scalar; + } + self + } + + fn div_scalar_mut(&mut self, scalar: f64) -> &Self { + for i in 0..self.values.len() { + self.values[i] /= scalar; + } + self + } + + fn negative_mut(&mut self) -> &Self { + for i in 0..self.values.len() { + self.values[i] = -self.values[i]; + } + self + } + + fn negative(&self) -> Self { + let mut result = DenseVector { + size: self.size, + values: self.values.clone() + }; + for i in 0..self.values.len() { + result.values[i] = -self.values[i]; + } + result + } + +} \ No newline at end of file diff --git a/src/linalg/naive/mod.rs b/src/linalg/naive/mod.rs index 54d0c2d..0d5d309 100644 --- a/src/linalg/naive/mod.rs +++ b/src/linalg/naive/mod.rs @@ -1 +1,2 @@ -pub mod dense_matrix; \ No newline at end of file +pub mod dense_matrix; +pub mod dense_vector; \ No newline at end of file diff --git a/src/optimization/first_order.rs b/src/optimization/first_order.rs new file mode 100644 index 0000000..612746c --- /dev/null +++ b/src/optimization/first_order.rs @@ -0,0 +1,121 @@ +use std::default::Default; +use crate::math::EPSILON; +use crate::linalg::Vector; +use crate::optimization::{F, DF}; +use crate::optimization::line_search::LineSearchMethod; + +pub trait FirstOrderOptimizer { + fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult; +} + +#[derive(Debug, Clone)] +pub struct OptimizerResult +where X: Vector +{ + pub x: X, + pub f_x: f64 +} + +pub struct GradientDescent { + pub max_iter: usize, + pub g_rtol: f64, + pub g_atol: f64 +} + +impl Default for GradientDescent { + fn default() -> Self { + GradientDescent { + max_iter: 10000, + g_rtol: EPSILON.sqrt(), + g_atol: EPSILON + } + } +} + +impl FirstOrderOptimizer for GradientDescent +{ + + fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult { + + let mut x = x0.clone(); + let mut fx = f(&x); + + let mut gvec = x0.clone(); + let mut gnorm = gvec.norm2(); + + let gtol = (gvec.norm2() * self.g_rtol).max(self.g_atol); + + let mut iter = 0; + let mut alpha = 1.0; + df(&mut gvec, &x); + + while iter < self.max_iter && gnorm > gtol { + iter += 1; + + let mut step = gvec.negative(); + + let f_alpha = |alpha: f64| -> f64 { + let mut dx = step.clone(); + dx.mul_scalar_mut(alpha); + f(&dx.add_mut(&x)) // f(x) = f(x .+ gvec .* alpha) + }; + + let df_alpha = |alpha: f64| -> f64 { + let mut dx = step.clone(); + let mut dg = gvec.clone(); + dx.mul_scalar_mut(alpha); + df(&mut dg, &dx.add_mut(&x)); //df(x) = df(x .+ gvec .* alpha) + gvec.dot(&dg) + }; + + let df0 = step.dot(&gvec); + + let ls_r = ls.search(&f_alpha, &df_alpha, alpha, fx, df0); + alpha = ls_r.alpha; + fx = ls_r.f_x; + x.add_mut(&step.mul_scalar_mut(alpha)); + df(&mut gvec, &x); + gnorm = gvec.norm2(); + } + + let f_x = f(&x); + + OptimizerResult{ + x: x, + f_x: f_x + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_vector::DenseVector; + use crate::optimization::line_search::Backtracking; + use crate::optimization::FunctionOrder; + + #[test] + fn gradient_descent() { + + let x0 = DenseVector::from_array(&[-1., 1.]); + let f = |x: &DenseVector| { + (1.0 - x.get(0)).powf(2.) + 100.0 * (x.get(1) - x.get(0).powf(2.)).powf(2.) + }; + + let df = |g: &mut DenseVector, x: &DenseVector| { + g.set(0, -2. * (1. - x.get(0)) - 400. * (x.get(1) - x.get(0).powf(2.)) * x.get(0)); + g.set(1, 200. * (x.get(1) - x.get(0).powf(2.))); + }; + + let mut ls: Backtracking = Default::default(); + ls.order = FunctionOrder::THIRD; + let optimizer: GradientDescent = Default::default(); + + let result = optimizer.optimize(&f, &df, &x0, &ls); + + assert!((result.f_x - 0.0).abs() < EPSILON); + assert!((result.x.get(0) - 1.0).abs() < EPSILON); + assert!((result.x.get(1) - 1.0).abs() < EPSILON); + + } +} \ No newline at end of file diff --git a/src/optimization/line_search.rs b/src/optimization/line_search.rs new file mode 100644 index 0000000..bad3a3a --- /dev/null +++ b/src/optimization/line_search.rs @@ -0,0 +1,88 @@ +use crate::math::EPSILON; +use crate::optimization::FunctionOrder; + +pub trait LineSearchMethod { + fn search<'a>(&self, f: &(dyn Fn(f64) -> f64), df: &(dyn Fn(f64) -> f64), alpha: f64, f0: f64, df0: f64) -> LineSearchResult; +} + +#[derive(Debug, Clone)] +pub struct LineSearchResult { + pub alpha: f64, + pub f_x: f64 +} + +pub struct Backtracking { + pub c1: f64, + pub max_iterations: usize, + pub phi: f64, + pub plo: f64, + pub order: FunctionOrder +} + +impl Default for Backtracking { + fn default() -> Self { + Backtracking { + c1: 1e-4, + max_iterations: 1000, + phi: 0.5, + plo: 0.1, + order: FunctionOrder::SECOND + } + } +} + +impl LineSearchMethod for Backtracking { + + fn search<'a>(&self, f: &(dyn Fn(f64) -> f64), _: &(dyn Fn(f64) -> f64), alpha: f64, f0: f64, df0: f64) -> LineSearchResult { + + let (mut a1, mut a2) = (alpha, alpha); + let (mut fx0, mut fx1) = (f0, f(a1)); + + let mut iteration = 0; + + while fx1 > f0 + self.c1 * a2 * df0 { + if iteration > self.max_iterations { + panic!("Linesearch failed to converge, reached maximum iterations."); + } + + let a_tmp; + + match self.order { + + FunctionOrder::FIRST | FunctionOrder::SECOND => { + a_tmp = - (df0 * a2.powf(2.)) / (2. * (fx1 - f0 - df0*a2)) + }, + + FunctionOrder::THIRD => { + + let div = 1. / (a1.powf(2.) * a2.powf(2.) * (a2 - a1)); + let a = (a1.powf(2.) * (fx1 - f0 - df0*a2) - a2.powf(2.)*(fx0 - f0 - df0*a1))*div; + let b = (-a1.powf(3.) * (fx1 - f0 - df0*a2) + a2.powf(3.)*(fx0 - f0 - df0*a1))*div; + + + + if (a - 0.).powf(2.).sqrt() <= EPSILON { + a_tmp = df0 / (2. * b); + } else { + let d = f64::max(b.powf(2.) - 3. * a * df0, 0.); + a_tmp = (-b + d.sqrt()) / (3.*a); //root of quadratic equation + } + } + } + + a1 = a2; + a2 = f64::max(f64::min(a_tmp, a2*self.phi), a2*self.plo); + + fx0 = fx1; + fx1 = f(a2); + + iteration += 1; + } + + LineSearchResult { + alpha: a2, + f_x: fx1 + } + + } +} \ No newline at end of file diff --git a/src/optimization/mod.rs b/src/optimization/mod.rs new file mode 100644 index 0000000..ce827cc --- /dev/null +++ b/src/optimization/mod.rs @@ -0,0 +1,14 @@ +pub mod first_order; +pub mod line_search; + +use crate::linalg::Vector; + +type F = dyn Fn(&X) -> f64; +type DF = dyn Fn(&mut X, &X); + +#[derive(Debug)] +pub enum FunctionOrder { + FIRST, + SECOND, + THIRD +} \ No newline at end of file diff --git a/src/regression/linear_regression.rs b/src/regression/linear_regression.rs index 56c3b57..fb5aaaf 100644 --- a/src/regression/linear_regression.rs +++ b/src/regression/linear_regression.rs @@ -25,8 +25,7 @@ impl LinearRegression { if x_nrows != y_nrows { panic!("Number of rows of X doesn't match number of rows of Y"); } - - // let b = y.v_stack(&M::ones(1, 1)); + let b = y.clone(); let mut a = x.h_stack(&M::ones(x_nrows, 1));