fix: cargo fmt
This commit is contained in:
@@ -1,15 +1,15 @@
|
||||
use std::default::Default;
|
||||
|
||||
use crate::math::num::FloatExt;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::optimization::{F, DF};
|
||||
use crate::optimization::line_search::LineSearchMethod;
|
||||
use crate::math::num::FloatExt;
|
||||
use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
|
||||
use crate::optimization::line_search::LineSearchMethod;
|
||||
use crate::optimization::{DF, F};
|
||||
|
||||
pub struct GradientDescent<T: FloatExt> {
|
||||
pub max_iter: usize,
|
||||
pub g_rtol: T,
|
||||
pub g_atol: T
|
||||
pub g_atol: T,
|
||||
}
|
||||
|
||||
impl<T: FloatExt> Default for GradientDescent<T> {
|
||||
@@ -17,31 +17,34 @@ impl<T: FloatExt> Default for GradientDescent<T> {
|
||||
GradientDescent {
|
||||
max_iter: 10000,
|
||||
g_rtol: T::epsilon().sqrt(),
|
||||
g_atol: T::epsilon()
|
||||
g_atol: T::epsilon(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: FloatExt> FirstOrderOptimizer<T> for GradientDescent<T>
|
||||
{
|
||||
|
||||
fn optimize<'a, X: Matrix<T>, LS: LineSearchMethod<T>>(&self, f: &'a F<T, X>, df: &'a DF<X>, x0: &X, ls: &'a LS) -> OptimizerResult<T, X> {
|
||||
|
||||
let mut x = x0.clone();
|
||||
impl<T: FloatExt> FirstOrderOptimizer<T> for GradientDescent<T> {
|
||||
fn optimize<'a, X: Matrix<T>, LS: LineSearchMethod<T>>(
|
||||
&self,
|
||||
f: &'a F<T, X>,
|
||||
df: &'a DF<X>,
|
||||
x0: &X,
|
||||
ls: &'a LS,
|
||||
) -> OptimizerResult<T, X> {
|
||||
let mut x = x0.clone();
|
||||
let mut fx = f(&x);
|
||||
|
||||
let mut gvec = x0.clone();
|
||||
let mut gnorm = gvec.norm2();
|
||||
let mut gvec = x0.clone();
|
||||
let mut gnorm = gvec.norm2();
|
||||
|
||||
let gtol = (gvec.norm2() * self.g_rtol).max(self.g_atol);
|
||||
let gtol = (gvec.norm2() * self.g_rtol).max(self.g_atol);
|
||||
|
||||
let mut iter = 0;
|
||||
let mut alpha = T::one();
|
||||
df(&mut gvec, &x);
|
||||
let mut alpha = T::one();
|
||||
df(&mut gvec, &x);
|
||||
|
||||
while iter < self.max_iter && (iter == 0 || gnorm > gtol) {
|
||||
iter += 1;
|
||||
|
||||
|
||||
let mut step = gvec.negative();
|
||||
|
||||
let f_alpha = |alpha: T| -> T {
|
||||
@@ -50,7 +53,7 @@ impl<T: FloatExt> FirstOrderOptimizer<T> for GradientDescent<T>
|
||||
f(&dx.add_mut(&x)) // f(x) = f(x .+ gvec .* alpha)
|
||||
};
|
||||
|
||||
let df_alpha = |alpha: T| -> T {
|
||||
let df_alpha = |alpha: T| -> T {
|
||||
let mut dx = step.clone();
|
||||
let mut dg = gvec.clone();
|
||||
dx.mul_scalar_mut(alpha);
|
||||
@@ -58,56 +61,58 @@ impl<T: FloatExt> FirstOrderOptimizer<T> for GradientDescent<T>
|
||||
gvec.vector_dot(&dg)
|
||||
};
|
||||
|
||||
let df0 = step.vector_dot(&gvec);
|
||||
let df0 = step.vector_dot(&gvec);
|
||||
|
||||
let ls_r = ls.search(&f_alpha, &df_alpha, alpha, fx, df0);
|
||||
alpha = ls_r.alpha;
|
||||
fx = ls_r.f_x;
|
||||
x.add_mut(&step.mul_scalar_mut(alpha));
|
||||
df(&mut gvec, &x);
|
||||
gnorm = gvec.norm2();
|
||||
}
|
||||
x.add_mut(&step.mul_scalar_mut(alpha));
|
||||
df(&mut gvec, &x);
|
||||
gnorm = gvec.norm2();
|
||||
}
|
||||
|
||||
let f_x = f(&x);
|
||||
let f_x = f(&x);
|
||||
|
||||
OptimizerResult{
|
||||
OptimizerResult {
|
||||
x: x,
|
||||
f_x: f_x,
|
||||
iterations: iter
|
||||
iterations: iter,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::optimization::line_search::Backtracking;
|
||||
use crate::optimization::FunctionOrder;
|
||||
|
||||
#[test]
|
||||
fn gradient_descent() {
|
||||
|
||||
fn gradient_descent() {
|
||||
let x0 = DenseMatrix::vector_from_array(&[-1., 1.]);
|
||||
let f = |x: &DenseMatrix<f64>| {
|
||||
let f = |x: &DenseMatrix<f64>| {
|
||||
(1.0 - x.get(0, 0)).powf(2.) + 100.0 * (x.get(0, 1) - x.get(0, 0).powf(2.)).powf(2.)
|
||||
};
|
||||
|
||||
let df = |g: &mut DenseMatrix<f64>, x: &DenseMatrix<f64>| {
|
||||
g.set(0, 0, -2. * (1. - x.get(0, 0)) - 400. * (x.get(0, 1) - x.get(0, 0).powf(2.)) * x.get(0, 0));
|
||||
g.set(0, 1, 200. * (x.get(0, 1) - x.get(0, 0).powf(2.)));
|
||||
let df = |g: &mut DenseMatrix<f64>, x: &DenseMatrix<f64>| {
|
||||
g.set(
|
||||
0,
|
||||
0,
|
||||
-2. * (1. - x.get(0, 0))
|
||||
- 400. * (x.get(0, 1) - x.get(0, 0).powf(2.)) * x.get(0, 0),
|
||||
);
|
||||
g.set(0, 1, 200. * (x.get(0, 1) - x.get(0, 0).powf(2.)));
|
||||
};
|
||||
|
||||
let mut ls: Backtracking<f64> = Default::default();
|
||||
ls.order = FunctionOrder::THIRD;
|
||||
let optimizer: GradientDescent<f64> = Default::default();
|
||||
|
||||
let result = optimizer.optimize(&f, &df, &x0, &ls);
|
||||
|
||||
|
||||
let result = optimizer.optimize(&f, &df, &x0, &ls);
|
||||
|
||||
assert!((result.f_x - 0.0).abs() < 1e-5);
|
||||
assert!((result.x.get(0, 0) - 1.0).abs() < 1e-2);
|
||||
assert!((result.x.get(0, 1) - 1.0).abs() < 1e-2);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,26 +1,26 @@
|
||||
use std::default::Default;
|
||||
use std::fmt::Debug;
|
||||
|
||||
use crate::math::num::FloatExt;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::optimization::{F, DF};
|
||||
use crate::optimization::line_search::LineSearchMethod;
|
||||
use crate::math::num::FloatExt;
|
||||
use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
|
||||
use crate::optimization::line_search::LineSearchMethod;
|
||||
use crate::optimization::{DF, F};
|
||||
|
||||
pub struct LBFGS<T: FloatExt> {
|
||||
pub max_iter: usize,
|
||||
pub g_rtol: T,
|
||||
pub g_atol: T,
|
||||
pub x_atol: T,
|
||||
pub x_atol: T,
|
||||
pub x_rtol: T,
|
||||
pub f_abstol: T,
|
||||
pub f_reltol: T,
|
||||
pub successive_f_tol: usize,
|
||||
pub m: usize
|
||||
pub m: usize,
|
||||
}
|
||||
|
||||
impl<T: FloatExt> Default for LBFGS<T> {
|
||||
fn default() -> Self {
|
||||
fn default() -> Self {
|
||||
LBFGS {
|
||||
max_iter: 1000,
|
||||
g_rtol: T::from(1e-8).unwrap(),
|
||||
@@ -30,48 +30,49 @@ impl<T: FloatExt> Default for LBFGS<T> {
|
||||
f_abstol: T::zero(),
|
||||
f_reltol: T::zero(),
|
||||
successive_f_tol: 1,
|
||||
m: 10
|
||||
m: 10,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: FloatExt> LBFGS<T> {
|
||||
|
||||
fn two_loops<X: Matrix<T>>(&self, state: &mut LBFGSState<T, X>) {
|
||||
|
||||
fn two_loops<X: Matrix<T>>(&self, state: &mut LBFGSState<T, X>) {
|
||||
let lower = state.iteration.max(self.m) - self.m;
|
||||
let upper = state.iteration;
|
||||
let upper = state.iteration;
|
||||
|
||||
state.twoloop_q.copy_from(&state.x_df);
|
||||
state.twoloop_q.copy_from(&state.x_df);
|
||||
|
||||
for index in (lower..upper).rev() {
|
||||
let i = index.rem_euclid(self.m);
|
||||
for index in (lower..upper).rev() {
|
||||
let i = index.rem_euclid(self.m);
|
||||
let dgi = &state.dg_history[i];
|
||||
let dxi = &state.dx_history[i];
|
||||
let dxi = &state.dx_history[i];
|
||||
state.twoloop_alpha[i] = state.rho[i] * dxi.vector_dot(&state.twoloop_q);
|
||||
state.twoloop_q.sub_mut(&dgi.mul_scalar(state.twoloop_alpha[i]));
|
||||
}
|
||||
state
|
||||
.twoloop_q
|
||||
.sub_mut(&dgi.mul_scalar(state.twoloop_alpha[i]));
|
||||
}
|
||||
|
||||
if state.iteration > 0 {
|
||||
let i = (upper - 1).rem_euclid(self.m);
|
||||
if state.iteration > 0 {
|
||||
let i = (upper - 1).rem_euclid(self.m);
|
||||
let dxi = &state.dx_history[i];
|
||||
let dgi = &state.dg_history[i];
|
||||
let scaling = dxi.vector_dot(dgi) / dgi.abs().pow_mut(T::two()).sum();
|
||||
let scaling = dxi.vector_dot(dgi) / dgi.abs().pow_mut(T::two()).sum();
|
||||
state.s.copy_from(&state.twoloop_q.mul_scalar(scaling));
|
||||
} else {
|
||||
state.s.copy_from(&state.twoloop_q);
|
||||
}
|
||||
}
|
||||
|
||||
for index in lower..upper {
|
||||
let i = index.rem_euclid(self.m);
|
||||
let i = index.rem_euclid(self.m);
|
||||
let dgi = &state.dg_history[i];
|
||||
let dxi = &state.dx_history[i];
|
||||
let beta = state.rho[i] * dgi.vector_dot(&state.s);
|
||||
state.s.add_mut(&dxi.mul_scalar(state.twoloop_alpha[i] - beta));
|
||||
}
|
||||
let dxi = &state.dx_history[i];
|
||||
let beta = state.rho[i] * dgi.vector_dot(&state.s);
|
||||
state
|
||||
.s
|
||||
.add_mut(&dxi.mul_scalar(state.twoloop_alpha[i] - beta));
|
||||
}
|
||||
|
||||
state.s.mul_scalar_mut(-T::one());
|
||||
|
||||
state.s.mul_scalar_mut(-T::one());
|
||||
}
|
||||
|
||||
fn init_state<X: Matrix<T>>(&self, x: &X) -> LBFGSState<T, X> {
|
||||
@@ -80,31 +81,37 @@ impl<T: FloatExt> LBFGS<T> {
|
||||
x_prev: x.clone(),
|
||||
x_f: T::nan(),
|
||||
x_f_prev: T::nan(),
|
||||
x_df: x.clone(),
|
||||
x_df: x.clone(),
|
||||
x_df_prev: x.clone(),
|
||||
rho: vec![T::zero(); self.m],
|
||||
dx_history: vec![x.clone(); self.m],
|
||||
dg_history: vec![x.clone(); self.m],
|
||||
dx: x.clone(),
|
||||
dg: x.clone(),
|
||||
|
||||
dg: x.clone(),
|
||||
|
||||
twoloop_q: x.clone(),
|
||||
twoloop_alpha: vec![T::zero(); self.m],
|
||||
iteration: 0,
|
||||
counter_f_tol: 0,
|
||||
s: x.clone(),
|
||||
alpha: T::one()
|
||||
alpha: T::one(),
|
||||
}
|
||||
}
|
||||
|
||||
fn update_state<'a, X: Matrix<T>, LS: LineSearchMethod<T>>(&self, f: &'a F<T, X>, df: &'a DF<X>, ls: &'a LS, state: &mut LBFGSState<T, X>) {
|
||||
self.two_loops(state);
|
||||
fn update_state<'a, X: Matrix<T>, LS: LineSearchMethod<T>>(
|
||||
&self,
|
||||
f: &'a F<T, X>,
|
||||
df: &'a DF<X>,
|
||||
ls: &'a LS,
|
||||
state: &mut LBFGSState<T, X>,
|
||||
) {
|
||||
self.two_loops(state);
|
||||
|
||||
df(&mut state.x_df_prev, &state.x);
|
||||
state.x_f_prev = f(&state.x);
|
||||
state.x_prev.copy_from(&state.x);
|
||||
|
||||
let df0 = state.x_df.vector_dot(&state.s);
|
||||
let df0 = state.x_df.vector_dot(&state.s);
|
||||
|
||||
let f_alpha = |alpha: T| -> T {
|
||||
let mut dx = state.s.clone();
|
||||
@@ -112,22 +119,21 @@ impl<T: FloatExt> LBFGS<T> {
|
||||
f(&dx.add_mut(&state.x)) // f(x) = f(x .+ gvec .* alpha)
|
||||
};
|
||||
|
||||
let df_alpha = |alpha: T| -> T {
|
||||
let df_alpha = |alpha: T| -> T {
|
||||
let mut dx = state.s.clone();
|
||||
let mut dg = state.x_df.clone();
|
||||
dx.mul_scalar_mut(alpha);
|
||||
df(&mut dg, &dx.add_mut(&state.x)); //df(x) = df(x .+ gvec .* alpha)
|
||||
state.x_df.vector_dot(&dg)
|
||||
};
|
||||
};
|
||||
|
||||
let ls_r = ls.search(&f_alpha, &df_alpha, T::one(), state.x_f_prev, df0);
|
||||
state.alpha = ls_r.alpha;
|
||||
state.alpha = ls_r.alpha;
|
||||
|
||||
state.dx.copy_from(state.s.mul_scalar_mut(state.alpha));
|
||||
state.x.add_mut(&state.dx);
|
||||
state.x.add_mut(&state.dx);
|
||||
state.x_f = f(&state.x);
|
||||
df(&mut state.x_df, &state.x);
|
||||
|
||||
df(&mut state.x_df, &state.x);
|
||||
}
|
||||
|
||||
fn assess_convergence<X: Matrix<T>>(&self, state: &mut LBFGSState<T, X>) -> bool {
|
||||
@@ -139,9 +145,9 @@ impl<T: FloatExt> LBFGS<T> {
|
||||
|
||||
if state.x.max_diff(&state.x_prev) <= self.x_rtol * state.x.norm(T::infinity()) {
|
||||
x_converged = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (state.x_f - state.x_f_prev).abs() <= self.f_abstol {
|
||||
if (state.x_f - state.x_f_prev).abs() <= self.f_abstol {
|
||||
state.counter_f_tol += 1;
|
||||
}
|
||||
|
||||
@@ -151,20 +157,20 @@ impl<T: FloatExt> LBFGS<T> {
|
||||
|
||||
if state.x_df.norm(T::infinity()) <= self.g_atol {
|
||||
g_converged = true;
|
||||
}
|
||||
}
|
||||
|
||||
g_converged || x_converged || state.counter_f_tol > self.successive_f_tol
|
||||
}
|
||||
|
||||
fn update_hessian<'a, X: Matrix<T>>(&self, _: &'a DF<X>, state: &mut LBFGSState<T, X>) {
|
||||
state.dg = state.x_df.sub(&state.x_df_prev);
|
||||
fn update_hessian<'a, X: Matrix<T>>(&self, _: &'a DF<X>, state: &mut LBFGSState<T, X>) {
|
||||
state.dg = state.x_df.sub(&state.x_df_prev);
|
||||
let rho_iteration = T::one() / state.dx.vector_dot(&state.dg);
|
||||
if !rho_iteration.is_infinite() {
|
||||
let idx = state.iteration.rem_euclid(self.m);
|
||||
let idx = state.iteration.rem_euclid(self.m);
|
||||
state.dx_history[idx].copy_from(&state.dx);
|
||||
state.dg_history[idx].copy_from(&state.dg);
|
||||
state.dg_history[idx].copy_from(&state.dg);
|
||||
state.rho[idx] = rho_iteration;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,84 +180,89 @@ struct LBFGSState<T: FloatExt, X: Matrix<T>> {
|
||||
x_prev: X,
|
||||
x_f: T,
|
||||
x_f_prev: T,
|
||||
x_df: X,
|
||||
x_df: X,
|
||||
x_df_prev: X,
|
||||
rho: Vec<T>,
|
||||
dx_history: Vec<X>,
|
||||
dg_history: Vec<X>,
|
||||
dx: X,
|
||||
dg: X,
|
||||
dg: X,
|
||||
twoloop_q: X,
|
||||
twoloop_alpha: Vec<T>,
|
||||
iteration: usize,
|
||||
counter_f_tol: usize,
|
||||
s: X,
|
||||
alpha: T
|
||||
alpha: T,
|
||||
}
|
||||
|
||||
impl<T: FloatExt> FirstOrderOptimizer<T> for LBFGS<T> {
|
||||
|
||||
fn optimize<'a, X: Matrix<T>, LS: LineSearchMethod<T>>(&self, f: &F<T, X>, df: &'a DF<X>, x0: &X, ls: &'a LS) -> OptimizerResult<T, X> {
|
||||
|
||||
fn optimize<'a, X: Matrix<T>, LS: LineSearchMethod<T>>(
|
||||
&self,
|
||||
f: &F<T, X>,
|
||||
df: &'a DF<X>,
|
||||
x0: &X,
|
||||
ls: &'a LS,
|
||||
) -> OptimizerResult<T, X> {
|
||||
let mut state = self.init_state(x0);
|
||||
|
||||
df(&mut state.x_df, &x0);
|
||||
df(&mut state.x_df, &x0);
|
||||
|
||||
let g_converged = state.x_df.norm(T::infinity()) < self.g_atol;
|
||||
let mut converged = g_converged;
|
||||
let stopped = false;
|
||||
let stopped = false;
|
||||
|
||||
while !converged && !stopped && state.iteration < self.max_iter {
|
||||
|
||||
self.update_state(f, df, ls, &mut state);
|
||||
while !converged && !stopped && state.iteration < self.max_iter {
|
||||
self.update_state(f, df, ls, &mut state);
|
||||
|
||||
converged = self.assess_convergence(&mut state);
|
||||
|
||||
if !converged {
|
||||
if !converged {
|
||||
self.update_hessian(df, &mut state);
|
||||
}
|
||||
}
|
||||
|
||||
state.iteration += 1;
|
||||
state.iteration += 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
OptimizerResult{
|
||||
OptimizerResult {
|
||||
x: state.x,
|
||||
f_x: state.x_f,
|
||||
iterations: state.iteration
|
||||
iterations: state.iteration,
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::optimization::line_search::Backtracking;
|
||||
use crate::optimization::FunctionOrder;
|
||||
use crate::optimization::FunctionOrder;
|
||||
|
||||
#[test]
|
||||
fn lbfgs() {
|
||||
fn lbfgs() {
|
||||
let x0 = DenseMatrix::vector_from_array(&[0., 0.]);
|
||||
let f = |x: &DenseMatrix<f64>| {
|
||||
let f = |x: &DenseMatrix<f64>| {
|
||||
(1.0 - x.get(0, 0)).powf(2.) + 100.0 * (x.get(0, 1) - x.get(0, 0).powf(2.)).powf(2.)
|
||||
};
|
||||
|
||||
let df = |g: &mut DenseMatrix<f64>, x: &DenseMatrix<f64>| {
|
||||
g.set(0, 0, -2. * (1. - x.get(0, 0)) - 400. * (x.get(0, 1) - x.get(0, 0).powf(2.)) * x.get(0, 0));
|
||||
g.set(0, 1, 200. * (x.get(0, 1) - x.get(0, 0).powf(2.)));
|
||||
let df = |g: &mut DenseMatrix<f64>, x: &DenseMatrix<f64>| {
|
||||
g.set(
|
||||
0,
|
||||
0,
|
||||
-2. * (1. - x.get(0, 0))
|
||||
- 400. * (x.get(0, 1) - x.get(0, 0).powf(2.)) * x.get(0, 0),
|
||||
);
|
||||
g.set(0, 1, 200. * (x.get(0, 1) - x.get(0, 0).powf(2.)));
|
||||
};
|
||||
let mut ls: Backtracking<f64> = Default::default();
|
||||
ls.order = FunctionOrder::THIRD;
|
||||
let optimizer: LBFGS<f64> = Default::default();
|
||||
|
||||
let result = optimizer.optimize(&f, &df, &x0, &ls);
|
||||
|
||||
assert!((result.f_x - 0.0).abs() < std::f64::EPSILON);
|
||||
|
||||
let result = optimizer.optimize(&f, &df, &x0, &ls);
|
||||
|
||||
assert!((result.f_x - 0.0).abs() < std::f64::EPSILON);
|
||||
assert!((result.x.get(0, 0) - 1.0).abs() < 1e-8);
|
||||
assert!((result.x.get(0, 1) - 1.0).abs() < 1e-8);
|
||||
assert!(result.iterations <= 24);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,22 +1,27 @@
|
||||
pub mod lbfgs;
|
||||
pub mod gradient_descent;
|
||||
pub mod lbfgs;
|
||||
|
||||
use std::clone::Clone;
|
||||
use std::fmt::Debug;
|
||||
|
||||
use crate::math::num::FloatExt;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::math::num::FloatExt;
|
||||
use crate::optimization::line_search::LineSearchMethod;
|
||||
use crate::optimization::{F, DF};
|
||||
use crate::optimization::{DF, F};
|
||||
|
||||
pub trait FirstOrderOptimizer<T: FloatExt> {
|
||||
fn optimize<'a, X: Matrix<T>, LS: LineSearchMethod<T>>(&self, f: &F<T, X>, df: &'a DF<X>, x0: &X, ls: &'a LS) -> OptimizerResult<T, X>;
|
||||
fn optimize<'a, X: Matrix<T>, LS: LineSearchMethod<T>>(
|
||||
&self,
|
||||
f: &F<T, X>,
|
||||
df: &'a DF<X>,
|
||||
x0: &X,
|
||||
ls: &'a LS,
|
||||
) -> OptimizerResult<T, X>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OptimizerResult<T: FloatExt, X: Matrix<T>>
|
||||
{
|
||||
pub struct OptimizerResult<T: FloatExt, X: Matrix<T>> {
|
||||
pub x: X,
|
||||
pub f_x: T,
|
||||
pub iterations: usize
|
||||
}
|
||||
pub iterations: usize,
|
||||
}
|
||||
|
||||
@@ -1,14 +1,21 @@
|
||||
use num_traits::Float;
|
||||
use crate::optimization::FunctionOrder;
|
||||
use num_traits::Float;
|
||||
|
||||
pub trait LineSearchMethod<T: Float> {
|
||||
fn search<'a>(&self, f: &(dyn Fn(T) -> T), df: &(dyn Fn(T) -> T), alpha: T, f0: T, df0: T) -> LineSearchResult<T>;
|
||||
fn search<'a>(
|
||||
&self,
|
||||
f: &(dyn Fn(T) -> T),
|
||||
df: &(dyn Fn(T) -> T),
|
||||
alpha: T,
|
||||
f0: T,
|
||||
df0: T,
|
||||
) -> LineSearchResult<T>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LineSearchResult<T: Float> {
|
||||
pub alpha: T,
|
||||
pub f_x: T
|
||||
pub f_x: T,
|
||||
}
|
||||
|
||||
pub struct Backtracking<T: Float> {
|
||||
@@ -17,31 +24,36 @@ pub struct Backtracking<T: Float> {
|
||||
pub max_infinity_iterations: usize,
|
||||
pub phi: T,
|
||||
pub plo: T,
|
||||
pub order: FunctionOrder
|
||||
pub order: FunctionOrder,
|
||||
}
|
||||
|
||||
impl<T: Float> Default for Backtracking<T> {
|
||||
fn default() -> Self {
|
||||
fn default() -> Self {
|
||||
Backtracking {
|
||||
c1: T::from(1e-4).unwrap(),
|
||||
max_iterations: 1000,
|
||||
max_infinity_iterations: (-T::epsilon().log2()).to_usize().unwrap(),
|
||||
phi: T::from(0.5).unwrap(),
|
||||
plo: T::from(0.1).unwrap(),
|
||||
order: FunctionOrder::SECOND
|
||||
order: FunctionOrder::SECOND,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Float> LineSearchMethod<T> for Backtracking<T> {
|
||||
|
||||
fn search<'a>(&self, f: &(dyn Fn(T) -> T), _: &(dyn Fn(T) -> T), alpha: T, f0: T, df0: T) -> LineSearchResult<T> {
|
||||
|
||||
fn search<'a>(
|
||||
&self,
|
||||
f: &(dyn Fn(T) -> T),
|
||||
_: &(dyn Fn(T) -> T),
|
||||
alpha: T,
|
||||
f0: T,
|
||||
df0: T,
|
||||
) -> LineSearchResult<T> {
|
||||
let two = T::from(2.).unwrap();
|
||||
let three = T::from(3.).unwrap();
|
||||
|
||||
let (mut a1, mut a2) = (alpha, alpha);
|
||||
let (mut fx0, mut fx1) = (f0, f(a1));
|
||||
let (mut fx0, mut fx1) = (f0, f(a1));
|
||||
|
||||
let mut iterfinite = 0;
|
||||
while !fx1.is_finite() && iterfinite < self.max_infinity_iterations {
|
||||
@@ -52,7 +64,7 @@ impl<T: Float> LineSearchMethod<T> for Backtracking<T> {
|
||||
fx1 = f(a2);
|
||||
}
|
||||
|
||||
let mut iteration = 0;
|
||||
let mut iteration = 0;
|
||||
|
||||
while fx1 > f0 + self.c1 * a2 * df0 {
|
||||
if iteration > self.max_iterations {
|
||||
@@ -62,66 +74,61 @@ impl<T: Float> LineSearchMethod<T> for Backtracking<T> {
|
||||
let a_tmp;
|
||||
|
||||
if self.order == FunctionOrder::SECOND || iteration == 0 {
|
||||
|
||||
a_tmp = - (df0 * a2.powf(two)) / (two * (fx1 - f0 - df0*a2))
|
||||
|
||||
a_tmp = -(df0 * a2.powf(two)) / (two * (fx1 - f0 - df0 * a2))
|
||||
} else {
|
||||
|
||||
let div = T::one() / (a1.powf(two) * a2.powf(two) * (a2 - a1));
|
||||
let a = (a1.powf(two) * (fx1 - f0 - df0*a2) - a2.powf(two)*(fx0 - f0 - df0*a1))*div;
|
||||
let b = (-a1.powf(three) * (fx1 - f0 - df0*a2) + a2.powf(three)*(fx0 - f0 - df0*a1))*div;
|
||||
let a = (a1.powf(two) * (fx1 - f0 - df0 * a2)
|
||||
- a2.powf(two) * (fx0 - f0 - df0 * a1))
|
||||
* div;
|
||||
let b = (-a1.powf(three) * (fx1 - f0 - df0 * a2)
|
||||
+ a2.powf(three) * (fx0 - f0 - df0 * a1))
|
||||
* div;
|
||||
|
||||
if (a - T::zero()).powf(two).sqrt() <= T::epsilon() {
|
||||
a_tmp = df0 / (two * b);
|
||||
} else {
|
||||
let d = T::max(b.powf(two) - three * a * df0, T::zero());
|
||||
a_tmp = (-b + d.sqrt()) / (three*a); //root of quadratic equation
|
||||
a_tmp = (-b + d.sqrt()) / (three * a); //root of quadratic equation
|
||||
}
|
||||
}
|
||||
|
||||
a1 = a2;
|
||||
a2 = T::max(T::min(a_tmp, a2*self.phi), a2*self.plo);
|
||||
a1 = a2;
|
||||
a2 = T::max(T::min(a_tmp, a2 * self.phi), a2 * self.plo);
|
||||
|
||||
fx0 = fx1;
|
||||
fx1 = f(a2);
|
||||
fx1 = f(a2);
|
||||
|
||||
iteration += 1;
|
||||
}
|
||||
|
||||
LineSearchResult {
|
||||
alpha: a2,
|
||||
f_x: fx1
|
||||
f_x: fx1,
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn backtracking() {
|
||||
|
||||
let f = |x: f64| -> f64 {
|
||||
x.powf(2.) + x
|
||||
};
|
||||
fn backtracking() {
|
||||
let f = |x: f64| -> f64 { x.powf(2.) + x };
|
||||
|
||||
let df = |x: f64| -> f64 {
|
||||
2. * x + 1.
|
||||
};
|
||||
let df = |x: f64| -> f64 { 2. * x + 1. };
|
||||
|
||||
let ls: Backtracking<f64> = Default::default();
|
||||
let ls: Backtracking<f64> = Default::default();
|
||||
|
||||
let mut x = -3.;
|
||||
let mut alpha = 1.;
|
||||
let mut x = -3.;
|
||||
let mut alpha = 1.;
|
||||
|
||||
for _ in 0..10 {
|
||||
let result = ls.search(&f, &df, alpha, f(x), df(x));
|
||||
alpha = result.alpha;
|
||||
x += alpha;
|
||||
}
|
||||
for _ in 0..10 {
|
||||
let result = ls.search(&f, &df, alpha, f(x), df(x));
|
||||
alpha = result.alpha;
|
||||
x += alpha;
|
||||
}
|
||||
|
||||
assert!(f(x).abs() < 0.01);
|
||||
assert!(f(x).abs() < 0.01);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,5 +8,5 @@ pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a;
|
||||
pub enum FunctionOrder {
|
||||
FIRST,
|
||||
SECOND,
|
||||
THIRD
|
||||
}
|
||||
THIRD,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user