First version of the optimizer
This commit is contained in:
@@ -8,6 +8,7 @@ edition = "2018"
|
||||
ndarray = "0.12.1"
|
||||
ndarray-linalg = "0.10"
|
||||
num-traits = "0.2"
|
||||
rand = "0.7.2"
|
||||
|
||||
[dev-dependencies]
|
||||
ndarray = "0.12.1"
|
||||
|
||||
@@ -5,3 +5,4 @@ pub mod math;
|
||||
pub mod error;
|
||||
pub mod algorithm;
|
||||
pub mod common;
|
||||
pub mod optimization;
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::ops::Range;
|
||||
use std::fmt::Debug;
|
||||
|
||||
pub mod naive;
|
||||
|
||||
@@ -30,4 +31,56 @@ pub trait Matrix: Into<Vec<f64>> + Clone{
|
||||
|
||||
fn add_mut(&mut self, other: &Self);
|
||||
|
||||
fn add_scalar_mut(&mut self, scalar: f64);
|
||||
|
||||
fn sub_scalar_mut(&mut self, scalar: f64);
|
||||
|
||||
fn mul_scalar_mut(&mut self, scalar: f64);
|
||||
|
||||
fn div_scalar_mut(&mut self, scalar: f64);
|
||||
|
||||
fn transpose(&self) -> Self;
|
||||
|
||||
fn generate_positive_definite(nrows: usize, ncols: usize) -> Self;
|
||||
|
||||
fn rand(nrows: usize, ncols: usize) -> Self;
|
||||
|
||||
fn norm2(&self) -> f64;
|
||||
|
||||
fn negative_mut(&mut self);
|
||||
|
||||
}
|
||||
|
||||
pub trait Vector: Into<Vec<f64>> + Clone + Debug {
|
||||
|
||||
fn get(&self, i: usize) -> f64;
|
||||
|
||||
fn set(&mut self, i: usize, value: f64);
|
||||
|
||||
fn zeros(size: usize) -> Self;
|
||||
|
||||
fn ones(size: usize) -> Self;
|
||||
|
||||
fn fill(size: usize, value: f64) -> Self;
|
||||
|
||||
fn shape(&self) -> (usize, usize);
|
||||
|
||||
fn norm2(&self) -> f64;
|
||||
|
||||
fn negative_mut(&mut self) -> &Self;
|
||||
|
||||
fn negative(&self) -> Self;
|
||||
|
||||
fn add_mut(&mut self, other: &Self) -> &Self;
|
||||
|
||||
fn add_scalar_mut(&mut self, scalar: f64) -> &Self;
|
||||
|
||||
fn sub_scalar_mut(&mut self, scalar: f64) -> &Self;
|
||||
|
||||
fn mul_scalar_mut(&mut self, scalar: f64) -> &Self;
|
||||
|
||||
fn div_scalar_mut(&mut self, scalar: f64) -> &Self;
|
||||
|
||||
fn dot(&self, other: &Self) -> f64;
|
||||
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::ops::Range;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::math;
|
||||
use rand::prelude::*;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DenseMatrix {
|
||||
@@ -673,6 +674,78 @@ impl Matrix for DenseMatrix {
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_positive_definite(nrows: usize, ncols: usize) -> Self {
|
||||
let m = DenseMatrix::rand(nrows, ncols);
|
||||
m.dot(&m.transpose())
|
||||
}
|
||||
|
||||
fn transpose(&self) -> Self {
|
||||
let mut m = DenseMatrix {
|
||||
ncols: self.nrows,
|
||||
nrows: self.ncols,
|
||||
values: vec![0f64; self.ncols * self.nrows]
|
||||
};
|
||||
for c in 0..self.ncols {
|
||||
for r in 0..self.nrows {
|
||||
m.set(c, r, self.get(r, c));
|
||||
}
|
||||
}
|
||||
m
|
||||
|
||||
}
|
||||
|
||||
fn rand(nrows: usize, ncols: usize) -> Self {
|
||||
let mut rng = rand::thread_rng();
|
||||
let values: Vec<f64> = (0..nrows*ncols).map(|_| {
|
||||
rng.gen()
|
||||
}).collect();
|
||||
DenseMatrix {
|
||||
ncols: ncols,
|
||||
nrows: nrows,
|
||||
values: values
|
||||
}
|
||||
}
|
||||
|
||||
fn norm2(&self) -> f64 {
|
||||
let mut norm = 0f64;
|
||||
|
||||
for xi in self.values.iter() {
|
||||
norm += xi * xi;
|
||||
}
|
||||
|
||||
norm.sqrt()
|
||||
}
|
||||
|
||||
fn add_scalar_mut(&mut self, scalar: f64) {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] += scalar;
|
||||
}
|
||||
}
|
||||
|
||||
fn sub_scalar_mut(&mut self, scalar: f64) {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] -= scalar;
|
||||
}
|
||||
}
|
||||
|
||||
fn mul_scalar_mut(&mut self, scalar: f64) {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] *= scalar;
|
||||
}
|
||||
}
|
||||
|
||||
fn div_scalar_mut(&mut self, scalar: f64) {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] /= scalar;
|
||||
}
|
||||
}
|
||||
|
||||
fn negative_mut(&mut self) {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] = -self.values[i];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -799,5 +872,32 @@ mod tests {
|
||||
assert!(!m.approximate_eq(&m_neq, 0.5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rand() {
|
||||
let m = DenseMatrix::rand(3, 3);
|
||||
for c in 0..3 {
|
||||
for r in 0..3 {
|
||||
assert!(m.get(r, c) != 0f64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transpose() {
|
||||
let m = DenseMatrix::from_2d_array(&[&[1.0, 3.0], &[2.0, 4.0]]);
|
||||
let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]);
|
||||
let m_transposed = m.transpose();
|
||||
for c in 0..2 {
|
||||
for r in 0..2 {
|
||||
assert!(m_transposed.get(r, c) == expected.get(r, c));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_positive_definite() {
|
||||
let m = DenseMatrix::generate_positive_definite(3, 3);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
use crate::linalg::Vector;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DenseVector {
|
||||
|
||||
size: usize,
|
||||
values: Vec<f64>
|
||||
|
||||
}
|
||||
|
||||
impl DenseVector {
|
||||
|
||||
pub fn from_array(values: &[f64]) -> DenseVector {
|
||||
DenseVector::from_vec(Vec::from(values))
|
||||
}
|
||||
|
||||
pub fn from_vec(values: Vec<f64>) -> DenseVector {
|
||||
DenseVector {
|
||||
size: values.len(),
|
||||
values: values
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Into<Vec<f64>> for DenseVector {
|
||||
fn into(self) -> Vec<f64> {
|
||||
self.values
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for DenseVector {
|
||||
|
||||
fn get(&self, i: usize) -> f64 {
|
||||
self.values[i]
|
||||
}
|
||||
|
||||
fn set(&mut self, i: usize, value: f64) {
|
||||
self.values[i] = value;
|
||||
}
|
||||
|
||||
fn zeros(size: usize) -> Self {
|
||||
DenseVector::fill(size, 0f64)
|
||||
}
|
||||
|
||||
fn ones(size: usize) -> Self {
|
||||
DenseVector::fill(size, 1f64)
|
||||
}
|
||||
|
||||
fn fill(size: usize, value: f64) -> Self {
|
||||
DenseVector::from_vec(vec![value; size])
|
||||
}
|
||||
|
||||
fn shape(&self) -> (usize, usize) {
|
||||
(1, self.size)
|
||||
}
|
||||
|
||||
fn add_mut(&mut self, other: &Self) -> &Self {
|
||||
if self.size != other.size {
|
||||
panic!("A and B should have the same shape");
|
||||
}
|
||||
for i in 0..self.size {
|
||||
self.values[i] += other.values[i];
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
fn dot(&self, other: &Self) -> f64 {
|
||||
if self.size != other.size {
|
||||
panic!("A and B should be of the same size");
|
||||
}
|
||||
|
||||
let mut result = 0f64;
|
||||
for i in 0..self.size {
|
||||
result += self.get(i) * other.get(i);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn norm2(&self) -> f64 {
|
||||
let mut norm = 0f64;
|
||||
|
||||
for xi in self.values.iter() {
|
||||
norm += xi * xi;
|
||||
}
|
||||
|
||||
norm.sqrt()
|
||||
}
|
||||
|
||||
fn add_scalar_mut(&mut self, scalar: f64) -> &Self {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] += scalar;
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn sub_scalar_mut(&mut self, scalar: f64) -> &Self {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] -= scalar;
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn mul_scalar_mut(&mut self, scalar: f64) -> &Self {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] *= scalar;
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn div_scalar_mut(&mut self, scalar: f64) -> &Self {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] /= scalar;
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn negative_mut(&mut self) -> &Self {
|
||||
for i in 0..self.values.len() {
|
||||
self.values[i] = -self.values[i];
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn negative(&self) -> Self {
|
||||
let mut result = DenseVector {
|
||||
size: self.size,
|
||||
values: self.values.clone()
|
||||
};
|
||||
for i in 0..self.values.len() {
|
||||
result.values[i] = -self.values[i];
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1 +1,2 @@
|
||||
pub mod dense_matrix;
|
||||
pub mod dense_vector;
|
||||
@@ -0,0 +1,121 @@
|
||||
use std::default::Default;
|
||||
use crate::math::EPSILON;
|
||||
use crate::linalg::Vector;
|
||||
use crate::optimization::{F, DF};
|
||||
use crate::optimization::line_search::LineSearchMethod;
|
||||
|
||||
pub trait FirstOrderOptimizer {
|
||||
fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F<X>, df: &'a DF<X>, x0: &X, ls: &'a LS) -> OptimizerResult<X>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OptimizerResult<X>
|
||||
where X: Vector
|
||||
{
|
||||
pub x: X,
|
||||
pub f_x: f64
|
||||
}
|
||||
|
||||
pub struct GradientDescent {
|
||||
pub max_iter: usize,
|
||||
pub g_rtol: f64,
|
||||
pub g_atol: f64
|
||||
}
|
||||
|
||||
impl Default for GradientDescent {
|
||||
fn default() -> Self {
|
||||
GradientDescent {
|
||||
max_iter: 10000,
|
||||
g_rtol: EPSILON.sqrt(),
|
||||
g_atol: EPSILON
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FirstOrderOptimizer for GradientDescent
|
||||
{
|
||||
|
||||
fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F<X>, df: &'a DF<X>, x0: &X, ls: &'a LS) -> OptimizerResult<X> {
|
||||
|
||||
let mut x = x0.clone();
|
||||
let mut fx = f(&x);
|
||||
|
||||
let mut gvec = x0.clone();
|
||||
let mut gnorm = gvec.norm2();
|
||||
|
||||
let gtol = (gvec.norm2() * self.g_rtol).max(self.g_atol);
|
||||
|
||||
let mut iter = 0;
|
||||
let mut alpha = 1.0;
|
||||
df(&mut gvec, &x);
|
||||
|
||||
while iter < self.max_iter && gnorm > gtol {
|
||||
iter += 1;
|
||||
|
||||
let mut step = gvec.negative();
|
||||
|
||||
let f_alpha = |alpha: f64| -> f64 {
|
||||
let mut dx = step.clone();
|
||||
dx.mul_scalar_mut(alpha);
|
||||
f(&dx.add_mut(&x)) // f(x) = f(x .+ gvec .* alpha)
|
||||
};
|
||||
|
||||
let df_alpha = |alpha: f64| -> f64 {
|
||||
let mut dx = step.clone();
|
||||
let mut dg = gvec.clone();
|
||||
dx.mul_scalar_mut(alpha);
|
||||
df(&mut dg, &dx.add_mut(&x)); //df(x) = df(x .+ gvec .* alpha)
|
||||
gvec.dot(&dg)
|
||||
};
|
||||
|
||||
let df0 = step.dot(&gvec);
|
||||
|
||||
let ls_r = ls.search(&f_alpha, &df_alpha, alpha, fx, df0);
|
||||
alpha = ls_r.alpha;
|
||||
fx = ls_r.f_x;
|
||||
x.add_mut(&step.mul_scalar_mut(alpha));
|
||||
df(&mut gvec, &x);
|
||||
gnorm = gvec.norm2();
|
||||
}
|
||||
|
||||
let f_x = f(&x);
|
||||
|
||||
OptimizerResult{
|
||||
x: x,
|
||||
f_x: f_x
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_vector::DenseVector;
|
||||
use crate::optimization::line_search::Backtracking;
|
||||
use crate::optimization::FunctionOrder;
|
||||
|
||||
#[test]
|
||||
fn gradient_descent() {
|
||||
|
||||
let x0 = DenseVector::from_array(&[-1., 1.]);
|
||||
let f = |x: &DenseVector| {
|
||||
(1.0 - x.get(0)).powf(2.) + 100.0 * (x.get(1) - x.get(0).powf(2.)).powf(2.)
|
||||
};
|
||||
|
||||
let df = |g: &mut DenseVector, x: &DenseVector| {
|
||||
g.set(0, -2. * (1. - x.get(0)) - 400. * (x.get(1) - x.get(0).powf(2.)) * x.get(0));
|
||||
g.set(1, 200. * (x.get(1) - x.get(0).powf(2.)));
|
||||
};
|
||||
|
||||
let mut ls: Backtracking = Default::default();
|
||||
ls.order = FunctionOrder::THIRD;
|
||||
let optimizer: GradientDescent = Default::default();
|
||||
|
||||
let result = optimizer.optimize(&f, &df, &x0, &ls);
|
||||
|
||||
assert!((result.f_x - 0.0).abs() < EPSILON);
|
||||
assert!((result.x.get(0) - 1.0).abs() < EPSILON);
|
||||
assert!((result.x.get(1) - 1.0).abs() < EPSILON);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
use crate::math::EPSILON;
|
||||
use crate::optimization::FunctionOrder;
|
||||
|
||||
pub trait LineSearchMethod {
|
||||
fn search<'a>(&self, f: &(dyn Fn(f64) -> f64), df: &(dyn Fn(f64) -> f64), alpha: f64, f0: f64, df0: f64) -> LineSearchResult;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LineSearchResult {
|
||||
pub alpha: f64,
|
||||
pub f_x: f64
|
||||
}
|
||||
|
||||
pub struct Backtracking {
|
||||
pub c1: f64,
|
||||
pub max_iterations: usize,
|
||||
pub phi: f64,
|
||||
pub plo: f64,
|
||||
pub order: FunctionOrder
|
||||
}
|
||||
|
||||
impl Default for Backtracking {
|
||||
fn default() -> Self {
|
||||
Backtracking {
|
||||
c1: 1e-4,
|
||||
max_iterations: 1000,
|
||||
phi: 0.5,
|
||||
plo: 0.1,
|
||||
order: FunctionOrder::SECOND
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LineSearchMethod for Backtracking {
|
||||
|
||||
fn search<'a>(&self, f: &(dyn Fn(f64) -> f64), _: &(dyn Fn(f64) -> f64), alpha: f64, f0: f64, df0: f64) -> LineSearchResult {
|
||||
|
||||
let (mut a1, mut a2) = (alpha, alpha);
|
||||
let (mut fx0, mut fx1) = (f0, f(a1));
|
||||
|
||||
let mut iteration = 0;
|
||||
|
||||
while fx1 > f0 + self.c1 * a2 * df0 {
|
||||
if iteration > self.max_iterations {
|
||||
panic!("Linesearch failed to converge, reached maximum iterations.");
|
||||
}
|
||||
|
||||
let a_tmp;
|
||||
|
||||
match self.order {
|
||||
|
||||
FunctionOrder::FIRST | FunctionOrder::SECOND => {
|
||||
a_tmp = - (df0 * a2.powf(2.)) / (2. * (fx1 - f0 - df0*a2))
|
||||
},
|
||||
|
||||
FunctionOrder::THIRD => {
|
||||
|
||||
let div = 1. / (a1.powf(2.) * a2.powf(2.) * (a2 - a1));
|
||||
let a = (a1.powf(2.) * (fx1 - f0 - df0*a2) - a2.powf(2.)*(fx0 - f0 - df0*a1))*div;
|
||||
let b = (-a1.powf(3.) * (fx1 - f0 - df0*a2) + a2.powf(3.)*(fx0 - f0 - df0*a1))*div;
|
||||
|
||||
|
||||
|
||||
if (a - 0.).powf(2.).sqrt() <= EPSILON {
|
||||
a_tmp = df0 / (2. * b);
|
||||
} else {
|
||||
let d = f64::max(b.powf(2.) - 3. * a * df0, 0.);
|
||||
a_tmp = (-b + d.sqrt()) / (3.*a); //root of quadratic equation
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
a1 = a2;
|
||||
a2 = f64::max(f64::min(a_tmp, a2*self.phi), a2*self.plo);
|
||||
|
||||
fx0 = fx1;
|
||||
fx1 = f(a2);
|
||||
|
||||
iteration += 1;
|
||||
}
|
||||
|
||||
LineSearchResult {
|
||||
alpha: a2,
|
||||
f_x: fx1
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
pub mod first_order;
|
||||
pub mod line_search;
|
||||
|
||||
use crate::linalg::Vector;
|
||||
|
||||
type F<X: Vector> = dyn Fn(&X) -> f64;
|
||||
type DF<X: Vector> = dyn Fn(&mut X, &X);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FunctionOrder {
|
||||
FIRST,
|
||||
SECOND,
|
||||
THIRD
|
||||
}
|
||||
@@ -26,7 +26,6 @@ impl<M: Matrix> LinearRegression<M> {
|
||||
panic!("Number of rows of X doesn't match number of rows of Y");
|
||||
}
|
||||
|
||||
// let b = y.v_stack(&M::ones(1, 1));
|
||||
let b = y.clone();
|
||||
let mut a = x.h_stack(&M::ones(x_nrows, 1));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user