diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index 5f57446..adf0c96 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -67,12 +67,44 @@ pub trait Vector: Into> + Clone + Debug { fn norm2(&self) -> f64; + fn norm(&self, p:f64) -> f64; + fn negative_mut(&mut self) -> &Self; fn negative(&self) -> Self; fn add_mut(&mut self, other: &Self) -> &Self; + fn sub_mut(&mut self, other: &Self) -> &Self; + + fn mul_mut(&mut self, other: &Self) -> &Self; + + fn div_mut(&mut self, other: &Self) -> &Self; + + fn add(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.add_mut(other); + r + } + + fn sub(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.sub_mut(other); + r + } + + fn mul(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.mul_mut(other); + r + } + + fn div(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.div_mut(other); + r + } + fn add_scalar_mut(&mut self, scalar: f64) -> &Self; fn sub_scalar_mut(&mut self, scalar: f64) -> &Self; @@ -81,6 +113,46 @@ pub trait Vector: Into> + Clone + Debug { fn div_scalar_mut(&mut self, scalar: f64) -> &Self; + fn add_scalar(&self, scalar: f64) -> Self{ + let mut r = self.clone(); + r.add_scalar_mut(scalar); + r + } + + fn sub_scalar(&self, scalar: f64) -> Self{ + let mut r = self.clone(); + r.sub_scalar_mut(scalar); + r + } + + fn mul_scalar(&self, scalar: f64) -> Self{ + let mut r = self.clone(); + r.mul_scalar_mut(scalar); + r + } + + fn div_scalar(&self, scalar: f64) -> Self{ + let mut r = self.clone(); + r.div_scalar_mut(scalar); + r + } + fn dot(&self, other: &Self) -> f64; + fn copy_from(&mut self, other: &Self); + + fn abs_mut(&mut self) -> &Self; + + fn pow_mut(&mut self, p: f64) -> &Self; + + fn sum(&self) -> f64; + + fn abs(&self) -> Self{ + let mut r = self.clone(); + r.abs_mut(); + r + } + + fn max_diff(&self, other: &Self) -> f64; + } \ No newline at end of file diff --git a/src/linalg/naive/dense_vector.rs b/src/linalg/naive/dense_vector.rs index 0385b9e..b056d7d 100644 --- a/src/linalg/naive/dense_vector.rs +++ b/src/linalg/naive/dense_vector.rs @@ -66,6 +66,39 @@ impl Vector for DenseVector { self } + fn mul_mut(&mut self, other: &Self) -> &Self { + if self.size != other.size { + panic!("A and B should have the same shape"); + } + for i in 0..self.size { + self.values[i] *= other.values[i]; + } + + self + } + + fn sub_mut(&mut self, other: &Self) -> &Self { + if self.size != other.size { + panic!("A and B should have the same shape"); + } + for i in 0..self.size { + self.values[i] -= other.values[i]; + } + + self + } + + fn div_mut(&mut self, other: &Self) -> &Self { + if self.size != other.size { + panic!("A and B should have the same shape"); + } + for i in 0..self.size { + self.values[i] /= other.values[i]; + } + + self + } + fn dot(&self, other: &Self) -> f64 { if self.size != other.size { panic!("A and B should be of the same size"); @@ -89,6 +122,24 @@ impl Vector for DenseVector { norm.sqrt() } + fn norm(&self, p:f64) -> f64 { + + if p.is_infinite() && p.is_sign_positive() { + self.values.iter().map(|x| x.abs()).fold(std::f64::NEG_INFINITY, |a, b| a.max(b)) + } else if p.is_infinite() && p.is_sign_negative() { + self.values.iter().map(|x| x.abs()).fold(std::f64::INFINITY, |a, b| a.min(b)) + } else { + + let mut norm = 0f64; + + for xi in self.values.iter() { + norm += xi.abs().powf(p); + } + + norm.powf(1.0/p) + } + } + fn add_scalar_mut(&mut self, scalar: f64) -> &Self { for i in 0..self.values.len() { self.values[i] += scalar; @@ -124,6 +175,28 @@ impl Vector for DenseVector { self } + fn abs_mut(&mut self) -> &Self{ + for i in 0..self.values.len() { + self.values[i] = self.values[i].abs(); + } + self + } + + fn pow_mut(&mut self, p: f64) -> &Self{ + for i in 0..self.values.len() { + self.values[i] = self.values[i].powf(p); + } + self + } + + fn sum(&self) -> f64 { + let mut sum = 0.; + for i in 0..self.values.len() { + sum += self.values[i]; + } + sum + } + fn negative(&self) -> Self { let mut result = DenseVector { size: self.size, @@ -135,4 +208,46 @@ impl Vector for DenseVector { result } + fn copy_from(&mut self, other: &Self) { + for i in 0..self.values.len() { + self.values[i] = other.values[i]; + } + } + + fn max_diff(&self, other: &Self) -> f64{ + let mut max_diff = 0f64; + for i in 0..self.values.len() { + max_diff = max_diff.max((self.values[i] - other.values[i]).abs()); + } + max_diff + + } + +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn qr_solve_mut() { + + let v = DenseVector::from_array(&[3., -2., 6.]); + assert_eq!(v.norm(1.), 11.); + assert_eq!(v.norm(2.), 7.); + assert_eq!(v.norm(std::f64::INFINITY), 6.); + assert_eq!(v.norm(std::f64::NEG_INFINITY), 2.); + } + + #[test] + fn copy_from() { + + let mut a = DenseVector::from_array(&[0., 0., 0.]); + let b = DenseVector::from_array(&[-1., 0., 2.]); + a.copy_from(&b); + assert_eq!(a.get(0), b.get(0)); + assert_eq!(a.get(1), b.get(1)); + assert_eq!(a.get(2), b.get(2)); + } + } \ No newline at end of file diff --git a/src/optimization/first_order.rs b/src/optimization/first_order/gradient_descent.rs similarity index 63% rename from src/optimization/first_order.rs rename to src/optimization/first_order/gradient_descent.rs index 612746c..049cfa8 100644 --- a/src/optimization/first_order.rs +++ b/src/optimization/first_order/gradient_descent.rs @@ -3,18 +3,7 @@ use crate::math::EPSILON; use crate::linalg::Vector; use crate::optimization::{F, DF}; use crate::optimization::line_search::LineSearchMethod; - -pub trait FirstOrderOptimizer { - fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult; -} - -#[derive(Debug, Clone)] -pub struct OptimizerResult -where X: Vector -{ - pub x: X, - pub f_x: f64 -} +use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; pub struct GradientDescent { pub max_iter: usize, @@ -23,7 +12,7 @@ pub struct GradientDescent { } impl Default for GradientDescent { - fn default() -> Self { + fn default() -> Self { GradientDescent { max_iter: 10000, g_rtol: EPSILON.sqrt(), @@ -68,7 +57,7 @@ impl FirstOrderOptimizer for GradientDescent gvec.dot(&dg) }; - let df0 = step.dot(&gvec); + let df0 = step.dot(&gvec); let ls_r = ls.search(&f_alpha, &df_alpha, alpha, fx, df0); alpha = ls_r.alpha; @@ -82,7 +71,8 @@ impl FirstOrderOptimizer for GradientDescent OptimizerResult{ x: x, - f_x: f_x + f_x: f_x, + iterations: iter } } } @@ -97,25 +87,26 @@ mod tests { #[test] fn gradient_descent() { - let x0 = DenseVector::from_array(&[-1., 1.]); - let f = |x: &DenseVector| { - (1.0 - x.get(0)).powf(2.) + 100.0 * (x.get(1) - x.get(0).powf(2.)).powf(2.) - }; + let x0 = DenseVector::from_array(&[-1., 1.]); + let f = |x: &DenseVector| { + (1.0 - x.get(0)).powf(2.) + 100.0 * (x.get(1) - x.get(0).powf(2.)).powf(2.) + }; - let df = |g: &mut DenseVector, x: &DenseVector| { - g.set(0, -2. * (1. - x.get(0)) - 400. * (x.get(1) - x.get(0).powf(2.)) * x.get(0)); - g.set(1, 200. * (x.get(1) - x.get(0).powf(2.))); - }; + let df = |g: &mut DenseVector, x: &DenseVector| { + g.set(0, -2. * (1. - x.get(0)) - 400. * (x.get(1) - x.get(0).powf(2.)) * x.get(0)); + g.set(1, 200. * (x.get(1) - x.get(0).powf(2.))); + }; - let mut ls: Backtracking = Default::default(); - ls.order = FunctionOrder::THIRD; - let optimizer: GradientDescent = Default::default(); - - let result = optimizer.optimize(&f, &df, &x0, &ls); - - assert!((result.f_x - 0.0).abs() < EPSILON); - assert!((result.x.get(0) - 1.0).abs() < EPSILON); - assert!((result.x.get(1) - 1.0).abs() < EPSILON); + let mut ls: Backtracking = Default::default(); + ls.order = FunctionOrder::THIRD; + let optimizer: GradientDescent = Default::default(); + + let result = optimizer.optimize(&f, &df, &x0, &ls); + + assert!((result.f_x - 0.0).abs() < EPSILON); + assert!((result.x.get(0) - 1.0).abs() < EPSILON); + assert!((result.x.get(1) - 1.0).abs() < EPSILON); } + } \ No newline at end of file diff --git a/src/optimization/first_order/lbfgs.rs b/src/optimization/first_order/lbfgs.rs new file mode 100644 index 0000000..23e074a --- /dev/null +++ b/src/optimization/first_order/lbfgs.rs @@ -0,0 +1,255 @@ +use std::default::Default; +use crate::linalg::Vector; +use crate::optimization::{F, DF}; +use crate::optimization::line_search::LineSearchMethod; +use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; + +pub struct LBFGS { + pub max_iter: usize, + pub g_rtol: f64, + pub g_atol: f64, + pub x_atol: f64, + pub x_rtol: f64, + pub f_abstol: f64, + pub f_reltol: f64, + pub successive_f_tol: usize, + pub m: usize +} + +impl Default for LBFGS { + fn default() -> Self { + LBFGS { + max_iter: 1000, + g_rtol: 1e-8, + g_atol: 1e-8, + x_atol: 0., + x_rtol: 0., + f_abstol: 0., + f_reltol: 0., + successive_f_tol: 1, + m: 10 + } + } +} + +impl LBFGS { + + fn two_loops(&self, state: &mut LBFGSState) { + + let lower = state.iteration.max(self.m) - self.m; + let upper = state.iteration; + + state.twoloop_q.copy_from(&state.dx); + + for index in (lower..upper).rev() { + let i = index.rem_euclid(self.m); + let dgi = &state.dg_history[i]; + let dxi = &state.dx_history[i]; + state.twoloop_alpha[i] = state.rho[i] * dxi.dot(&state.twoloop_q); + state.twoloop_q.sub_mut(&dgi.mul_scalar(state.twoloop_alpha[i])); + } + + if state.iteration > 0 { + let i = (upper - 1).rem_euclid(self.m); + let dxi = &state.dx_history[i]; + let dgi = &state.dg_history[i]; + let scaling = dxi.dot(dgi) / dgi.abs().pow_mut(2.).sum(); + state.s.copy_from(&state.twoloop_q.mul_scalar(scaling)); + } else { + state.s.copy_from(&state.twoloop_q); + } + + for index in lower..upper { + let i = index.rem_euclid(self.m); + let dgi = &state.dg_history[i]; + let dxi = &state.dx_history[i]; + let beta = state.rho[i] * dgi.dot(&state.s); + state.s.add_mut(&dxi.mul_scalar(state.twoloop_alpha[i] - beta)); + } + + state.s.mul_scalar_mut(-1.); + + } + + fn init_state(&self, x: &X) -> LBFGSState { + LBFGSState { + x: x.clone(), + x_prev: x.clone(), + fx: std::f64::NAN, + g_prev: x.clone(), + rho: vec![0.; self.m], + dx_history: vec![x.clone(); self.m], + dg_history: vec![x.clone(); self.m], + dx: x.clone(), + dg: x.clone(), + fx_prev: std::f64::NAN, + twoloop_q: x.clone(), + twoloop_alpha: vec![0.; self.m], + iteration: 0, + counter_f_tol: 0, + s: x.clone(), + alpha: 1.0 + } + } + + fn update_state<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, ls: &'a LS, state: &mut LBFGSState) { + df(&mut state.dx, &state.x); + + self.two_loops(state); + + df(&mut state.g_prev, &state.x); + + let df0 = state.dx.dot(&state.s); + + state.fx_prev = f(&state.x); + state.x_prev.copy_from(&state.x); + + let f_alpha = |alpha: f64| -> f64 { + let mut dx = state.s.clone(); + dx.mul_scalar_mut(alpha); + f(&dx.add_mut(&state.x)) // f(x) = f(x .+ gvec .* alpha) + }; + + let df_alpha = |alpha: f64| -> f64 { + let mut dx = state.s.clone(); + let mut dg = state.dx.clone(); + dx.mul_scalar_mut(alpha); + df(&mut dg, &dx.add_mut(&state.x)); //df(x) = df(x .+ gvec .* alpha) + state.dx.dot(&dg) + }; + + let ls_r = ls.search(&f_alpha, &df_alpha, 1.0, state.fx_prev, df0); + state.alpha = ls_r.alpha; + + state.dx.copy_from(state.s.mul_scalar_mut(state.alpha)); + state.x.add_mut(&state.dx); + } + + fn assess_convergence(&self, state: &mut LBFGSState) -> bool { + let (mut x_converged, mut g_converged) = (false, false); + + if state.x.max_diff(&state.x_prev) <= self.x_atol { + x_converged = true; + } + + if state.x.max_diff(&state.x_prev) <= self.x_rtol * state.x.norm(std::f64::INFINITY) { + x_converged = true; + } + + if (state.fx - state.fx_prev).abs() <= self.f_abstol { + state.counter_f_tol += 1; + } + + if (state.fx - state.fx_prev).abs() <= self.f_reltol * state.fx.abs() { + state.counter_f_tol += 1; + } + + if state.dx.norm(std::f64::INFINITY) <= self.g_atol { + g_converged = true; + } + + g_converged || x_converged || state.counter_f_tol > self.successive_f_tol + } + + fn update_hessian<'a, X: Vector>(&self, df: &'a DF, state: &mut LBFGSState) { + let mut dx = state.dx.clone(); + df(&mut dx, &state.x); + state.dg = dx.sub(&state.g_prev); + let rho_iteration = 1. / state.dx.dot(&state.dg); + if !rho_iteration.is_infinite() { + let idx = state.iteration.rem_euclid(self.m); + state.dx_history[idx].copy_from(&state.dx); + state.dg_history[idx].copy_from(&state.dg); + state.rho[idx] = rho_iteration; + } + } +} + +struct LBFGSState { + x: X, + x_prev: X, + fx: f64, + g_prev: X, + rho: Vec, + dx_history: Vec, + dg_history: Vec, + dx: X, + dg: X, + fx_prev: f64, + twoloop_q: X, + twoloop_alpha: Vec, + iteration: usize, + counter_f_tol: usize, + s: X, + alpha: f64 +} + +impl FirstOrderOptimizer for LBFGS { + + fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult { + + let mut state = self.init_state(x0); + + df(&mut state.dx, &x0); + + let g_converged = state.dx.norm(std::f64::INFINITY) < self.g_atol; + let mut converged = g_converged; + let stopped = false; + + while !converged && !stopped && state.iteration < self.max_iter { + + self.update_state(f, df, ls, &mut state); + + state.fx = f(&state.x); + + converged = self.assess_convergence(&mut state); + + if !converged { + self.update_hessian(df, &mut state); + } + + state.iteration += 1; + + } + + OptimizerResult{ + x: state.x, + f_x: state.fx, + iterations: state.iteration + } + + } + +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_vector::DenseVector; + use crate::optimization::line_search::Backtracking; + use crate::optimization::FunctionOrder; + use crate::math::EPSILON; + + #[test] + fn lbfgs() { + let x0 = DenseVector::from_array(&[0., 0.]); + let f = |x: &DenseVector| { + (1.0 - x.get(0)).powf(2.) + 100.0 * (x.get(1) - x.get(0).powf(2.)).powf(2.) + }; + + let df = |g: &mut DenseVector, x: &DenseVector| { + g.set(0, -2. * (1. - x.get(0)) - 400. * (x.get(1) - x.get(0).powf(2.)) * x.get(0)); + g.set(1, 200. * (x.get(1) - x.get(0).powf(2.))); + }; + let mut ls: Backtracking = Default::default(); + ls.order = FunctionOrder::THIRD; + let optimizer: LBFGS = Default::default(); + + let result = optimizer.optimize(&f, &df, &x0, &ls); + + assert!((result.f_x - 0.0).abs() < EPSILON); + assert!((result.x.get(0) - 1.0).abs() < 1e-8); + assert!((result.x.get(1) - 1.0).abs() < 1e-8); + assert!(result.iterations <= 24); + } +} \ No newline at end of file diff --git a/src/optimization/first_order/mod.rs b/src/optimization/first_order/mod.rs new file mode 100644 index 0000000..2225835 --- /dev/null +++ b/src/optimization/first_order/mod.rs @@ -0,0 +1,18 @@ +pub mod lbfgs; +pub mod gradient_descent; +use crate::linalg::Vector; +use crate::optimization::line_search::LineSearchMethod; +use crate::optimization::{F, DF}; + +pub trait FirstOrderOptimizer { + fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F, df: &'a DF, x0: &X, ls: &'a LS) -> OptimizerResult; +} + +#[derive(Debug, Clone)] +pub struct OptimizerResult +where X: Vector +{ + pub x: X, + pub f_x: f64, + pub iterations: usize +} \ No newline at end of file diff --git a/src/optimization/line_search.rs b/src/optimization/line_search.rs index bad3a3a..dba3348 100644 --- a/src/optimization/line_search.rs +++ b/src/optimization/line_search.rs @@ -14,6 +14,7 @@ pub struct LineSearchResult { pub struct Backtracking { pub c1: f64, pub max_iterations: usize, + pub max_infinity_iterations: usize, pub phi: f64, pub plo: f64, pub order: FunctionOrder @@ -24,6 +25,7 @@ impl Default for Backtracking { Backtracking { c1: 1e-4, max_iterations: 1000, + max_infinity_iterations: -EPSILON.log2() as usize, phi: 0.5, plo: 0.1, order: FunctionOrder::SECOND @@ -38,6 +40,15 @@ impl LineSearchMethod for Backtracking { let (mut a1, mut a2) = (alpha, alpha); let (mut fx0, mut fx1) = (f0, f(a1)); + let mut iterfinite = 0; + while !fx1.is_finite() && iterfinite < self.max_infinity_iterations { + iterfinite += 1; + a1 = a2; + a2 = a1 / 2.; + + fx1 = f(a2); + } + let mut iteration = 0; while fx1 > f0 + self.c1 * a2 * df0 { @@ -49,7 +60,7 @@ impl LineSearchMethod for Backtracking { match self.order { - FunctionOrder::FIRST | FunctionOrder::SECOND => { + FunctionOrder::FIRST | FunctionOrder::SECOND => { a_tmp = - (df0 * a2.powf(2.)) / (2. * (fx1 - f0 - df0*a2)) }, @@ -85,4 +96,34 @@ impl LineSearchMethod for Backtracking { } } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn backtracking() { + + let f = |x: f64| -> f64 { + x.powf(2.) + x + }; + + let df = |x: f64| -> f64 { + 2. * x + 1. + }; + + let ls: Backtracking = Default::default(); + + let mut x = -3.; + let mut alpha = 1.; + + for _ in 0..10 { + let result = ls.search(&f, &df, alpha, f(x), df(x)); + alpha = result.alpha; + x += alpha; + } + + assert!(f(x).abs() < 0.01); + } } \ No newline at end of file