First version of the optimizer

This commit is contained in:
Volodymyr Orlov
2019-10-29 08:59:06 -07:00
parent f4aec2b35e
commit 4488cc110e
10 changed files with 521 additions and 5 deletions
+1
View File
@@ -8,6 +8,7 @@ edition = "2018"
ndarray = "0.12.1"
ndarray-linalg = "0.10"
num-traits = "0.2"
rand = "0.7.2"
[dev-dependencies]
ndarray = "0.12.1"
+1
View File
@@ -5,3 +5,4 @@ pub mod math;
pub mod error;
pub mod algorithm;
pub mod common;
pub mod optimization;
+53
View File
@@ -1,4 +1,5 @@
use std::ops::Range;
use std::fmt::Debug;
pub mod naive;
@@ -30,4 +31,56 @@ pub trait Matrix: Into<Vec<f64>> + Clone{
fn add_mut(&mut self, other: &Self);
fn add_scalar_mut(&mut self, scalar: f64);
fn sub_scalar_mut(&mut self, scalar: f64);
fn mul_scalar_mut(&mut self, scalar: f64);
fn div_scalar_mut(&mut self, scalar: f64);
fn transpose(&self) -> Self;
fn generate_positive_definite(nrows: usize, ncols: usize) -> Self;
fn rand(nrows: usize, ncols: usize) -> Self;
fn norm2(&self) -> f64;
fn negative_mut(&mut self);
}
pub trait Vector: Into<Vec<f64>> + Clone + Debug {
fn get(&self, i: usize) -> f64;
fn set(&mut self, i: usize, value: f64);
fn zeros(size: usize) -> Self;
fn ones(size: usize) -> Self;
fn fill(size: usize, value: f64) -> Self;
fn shape(&self) -> (usize, usize);
fn norm2(&self) -> f64;
fn negative_mut(&mut self) -> &Self;
fn negative(&self) -> Self;
fn add_mut(&mut self, other: &Self) -> &Self;
fn add_scalar_mut(&mut self, scalar: f64) -> &Self;
fn sub_scalar_mut(&mut self, scalar: f64) -> &Self;
fn mul_scalar_mut(&mut self, scalar: f64) -> &Self;
fn div_scalar_mut(&mut self, scalar: f64) -> &Self;
fn dot(&self, other: &Self) -> f64;
}
+100
View File
@@ -1,6 +1,7 @@
use std::ops::Range;
use crate::linalg::Matrix;
use crate::math;
use rand::prelude::*;
#[derive(Debug, Clone)]
pub struct DenseMatrix {
@@ -673,6 +674,78 @@ impl Matrix for DenseMatrix {
}
}
fn generate_positive_definite(nrows: usize, ncols: usize) -> Self {
let m = DenseMatrix::rand(nrows, ncols);
m.dot(&m.transpose())
}
fn transpose(&self) -> Self {
let mut m = DenseMatrix {
ncols: self.nrows,
nrows: self.ncols,
values: vec![0f64; self.ncols * self.nrows]
};
for c in 0..self.ncols {
for r in 0..self.nrows {
m.set(c, r, self.get(r, c));
}
}
m
}
fn rand(nrows: usize, ncols: usize) -> Self {
let mut rng = rand::thread_rng();
let values: Vec<f64> = (0..nrows*ncols).map(|_| {
rng.gen()
}).collect();
DenseMatrix {
ncols: ncols,
nrows: nrows,
values: values
}
}
fn norm2(&self) -> f64 {
let mut norm = 0f64;
for xi in self.values.iter() {
norm += xi * xi;
}
norm.sqrt()
}
fn add_scalar_mut(&mut self, scalar: f64) {
for i in 0..self.values.len() {
self.values[i] += scalar;
}
}
fn sub_scalar_mut(&mut self, scalar: f64) {
for i in 0..self.values.len() {
self.values[i] -= scalar;
}
}
fn mul_scalar_mut(&mut self, scalar: f64) {
for i in 0..self.values.len() {
self.values[i] *= scalar;
}
}
fn div_scalar_mut(&mut self, scalar: f64) {
for i in 0..self.values.len() {
self.values[i] /= scalar;
}
}
fn negative_mut(&mut self) {
for i in 0..self.values.len() {
self.values[i] = -self.values[i];
}
}
}
#[cfg(test)]
@@ -799,5 +872,32 @@ mod tests {
assert!(!m.approximate_eq(&m_neq, 0.5));
}
#[test]
fn rand() {
let m = DenseMatrix::rand(3, 3);
for c in 0..3 {
for r in 0..3 {
assert!(m.get(r, c) != 0f64);
}
}
}
#[test]
fn transpose() {
let m = DenseMatrix::from_2d_array(&[&[1.0, 3.0], &[2.0, 4.0]]);
let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]);
let m_transposed = m.transpose();
for c in 0..2 {
for r in 0..2 {
assert!(m_transposed.get(r, c) == expected.get(r, c));
}
}
}
#[test]
fn generate_positive_definite() {
let m = DenseMatrix::generate_positive_definite(3, 3);
}
}
+138
View File
@@ -0,0 +1,138 @@
use crate::linalg::Vector;
#[derive(Debug, Clone)]
pub struct DenseVector {
size: usize,
values: Vec<f64>
}
impl DenseVector {
pub fn from_array(values: &[f64]) -> DenseVector {
DenseVector::from_vec(Vec::from(values))
}
pub fn from_vec(values: Vec<f64>) -> DenseVector {
DenseVector {
size: values.len(),
values: values
}
}
}
impl Into<Vec<f64>> for DenseVector {
fn into(self) -> Vec<f64> {
self.values
}
}
impl Vector for DenseVector {
fn get(&self, i: usize) -> f64 {
self.values[i]
}
fn set(&mut self, i: usize, value: f64) {
self.values[i] = value;
}
fn zeros(size: usize) -> Self {
DenseVector::fill(size, 0f64)
}
fn ones(size: usize) -> Self {
DenseVector::fill(size, 1f64)
}
fn fill(size: usize, value: f64) -> Self {
DenseVector::from_vec(vec![value; size])
}
fn shape(&self) -> (usize, usize) {
(1, self.size)
}
fn add_mut(&mut self, other: &Self) -> &Self {
if self.size != other.size {
panic!("A and B should have the same shape");
}
for i in 0..self.size {
self.values[i] += other.values[i];
}
self
}
fn dot(&self, other: &Self) -> f64 {
if self.size != other.size {
panic!("A and B should be of the same size");
}
let mut result = 0f64;
for i in 0..self.size {
result += self.get(i) * other.get(i);
}
result
}
fn norm2(&self) -> f64 {
let mut norm = 0f64;
for xi in self.values.iter() {
norm += xi * xi;
}
norm.sqrt()
}
fn add_scalar_mut(&mut self, scalar: f64) -> &Self {
for i in 0..self.values.len() {
self.values[i] += scalar;
}
self
}
fn sub_scalar_mut(&mut self, scalar: f64) -> &Self {
for i in 0..self.values.len() {
self.values[i] -= scalar;
}
self
}
fn mul_scalar_mut(&mut self, scalar: f64) -> &Self {
for i in 0..self.values.len() {
self.values[i] *= scalar;
}
self
}
fn div_scalar_mut(&mut self, scalar: f64) -> &Self {
for i in 0..self.values.len() {
self.values[i] /= scalar;
}
self
}
fn negative_mut(&mut self) -> &Self {
for i in 0..self.values.len() {
self.values[i] = -self.values[i];
}
self
}
fn negative(&self) -> Self {
let mut result = DenseVector {
size: self.size,
values: self.values.clone()
};
for i in 0..self.values.len() {
result.values[i] = -self.values[i];
}
result
}
}
+1
View File
@@ -1 +1,2 @@
pub mod dense_matrix;
pub mod dense_vector;
+121
View File
@@ -0,0 +1,121 @@
use std::default::Default;
use crate::math::EPSILON;
use crate::linalg::Vector;
use crate::optimization::{F, DF};
use crate::optimization::line_search::LineSearchMethod;
pub trait FirstOrderOptimizer {
fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F<X>, df: &'a DF<X>, x0: &X, ls: &'a LS) -> OptimizerResult<X>;
}
#[derive(Debug, Clone)]
pub struct OptimizerResult<X>
where X: Vector
{
pub x: X,
pub f_x: f64
}
pub struct GradientDescent {
pub max_iter: usize,
pub g_rtol: f64,
pub g_atol: f64
}
impl Default for GradientDescent {
fn default() -> Self {
GradientDescent {
max_iter: 10000,
g_rtol: EPSILON.sqrt(),
g_atol: EPSILON
}
}
}
impl FirstOrderOptimizer for GradientDescent
{
fn optimize<'a, X: Vector, LS: LineSearchMethod>(&self, f: &'a F<X>, df: &'a DF<X>, x0: &X, ls: &'a LS) -> OptimizerResult<X> {
let mut x = x0.clone();
let mut fx = f(&x);
let mut gvec = x0.clone();
let mut gnorm = gvec.norm2();
let gtol = (gvec.norm2() * self.g_rtol).max(self.g_atol);
let mut iter = 0;
let mut alpha = 1.0;
df(&mut gvec, &x);
while iter < self.max_iter && gnorm > gtol {
iter += 1;
let mut step = gvec.negative();
let f_alpha = |alpha: f64| -> f64 {
let mut dx = step.clone();
dx.mul_scalar_mut(alpha);
f(&dx.add_mut(&x)) // f(x) = f(x .+ gvec .* alpha)
};
let df_alpha = |alpha: f64| -> f64 {
let mut dx = step.clone();
let mut dg = gvec.clone();
dx.mul_scalar_mut(alpha);
df(&mut dg, &dx.add_mut(&x)); //df(x) = df(x .+ gvec .* alpha)
gvec.dot(&dg)
};
let df0 = step.dot(&gvec);
let ls_r = ls.search(&f_alpha, &df_alpha, alpha, fx, df0);
alpha = ls_r.alpha;
fx = ls_r.f_x;
x.add_mut(&step.mul_scalar_mut(alpha));
df(&mut gvec, &x);
gnorm = gvec.norm2();
}
let f_x = f(&x);
OptimizerResult{
x: x,
f_x: f_x
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::naive::dense_vector::DenseVector;
use crate::optimization::line_search::Backtracking;
use crate::optimization::FunctionOrder;
#[test]
fn gradient_descent() {
let x0 = DenseVector::from_array(&[-1., 1.]);
let f = |x: &DenseVector| {
(1.0 - x.get(0)).powf(2.) + 100.0 * (x.get(1) - x.get(0).powf(2.)).powf(2.)
};
let df = |g: &mut DenseVector, x: &DenseVector| {
g.set(0, -2. * (1. - x.get(0)) - 400. * (x.get(1) - x.get(0).powf(2.)) * x.get(0));
g.set(1, 200. * (x.get(1) - x.get(0).powf(2.)));
};
let mut ls: Backtracking = Default::default();
ls.order = FunctionOrder::THIRD;
let optimizer: GradientDescent = Default::default();
let result = optimizer.optimize(&f, &df, &x0, &ls);
assert!((result.f_x - 0.0).abs() < EPSILON);
assert!((result.x.get(0) - 1.0).abs() < EPSILON);
assert!((result.x.get(1) - 1.0).abs() < EPSILON);
}
}
+88
View File
@@ -0,0 +1,88 @@
use crate::math::EPSILON;
use crate::optimization::FunctionOrder;
pub trait LineSearchMethod {
fn search<'a>(&self, f: &(dyn Fn(f64) -> f64), df: &(dyn Fn(f64) -> f64), alpha: f64, f0: f64, df0: f64) -> LineSearchResult;
}
#[derive(Debug, Clone)]
pub struct LineSearchResult {
pub alpha: f64,
pub f_x: f64
}
pub struct Backtracking {
pub c1: f64,
pub max_iterations: usize,
pub phi: f64,
pub plo: f64,
pub order: FunctionOrder
}
impl Default for Backtracking {
fn default() -> Self {
Backtracking {
c1: 1e-4,
max_iterations: 1000,
phi: 0.5,
plo: 0.1,
order: FunctionOrder::SECOND
}
}
}
impl LineSearchMethod for Backtracking {
fn search<'a>(&self, f: &(dyn Fn(f64) -> f64), _: &(dyn Fn(f64) -> f64), alpha: f64, f0: f64, df0: f64) -> LineSearchResult {
let (mut a1, mut a2) = (alpha, alpha);
let (mut fx0, mut fx1) = (f0, f(a1));
let mut iteration = 0;
while fx1 > f0 + self.c1 * a2 * df0 {
if iteration > self.max_iterations {
panic!("Linesearch failed to converge, reached maximum iterations.");
}
let a_tmp;
match self.order {
FunctionOrder::FIRST | FunctionOrder::SECOND => {
a_tmp = - (df0 * a2.powf(2.)) / (2. * (fx1 - f0 - df0*a2))
},
FunctionOrder::THIRD => {
let div = 1. / (a1.powf(2.) * a2.powf(2.) * (a2 - a1));
let a = (a1.powf(2.) * (fx1 - f0 - df0*a2) - a2.powf(2.)*(fx0 - f0 - df0*a1))*div;
let b = (-a1.powf(3.) * (fx1 - f0 - df0*a2) + a2.powf(3.)*(fx0 - f0 - df0*a1))*div;
if (a - 0.).powf(2.).sqrt() <= EPSILON {
a_tmp = df0 / (2. * b);
} else {
let d = f64::max(b.powf(2.) - 3. * a * df0, 0.);
a_tmp = (-b + d.sqrt()) / (3.*a); //root of quadratic equation
}
}
}
a1 = a2;
a2 = f64::max(f64::min(a_tmp, a2*self.phi), a2*self.plo);
fx0 = fx1;
fx1 = f(a2);
iteration += 1;
}
LineSearchResult {
alpha: a2,
f_x: fx1
}
}
}
+14
View File
@@ -0,0 +1,14 @@
pub mod first_order;
pub mod line_search;
use crate::linalg::Vector;
type F<X: Vector> = dyn Fn(&X) -> f64;
type DF<X: Vector> = dyn Fn(&mut X, &X);
#[derive(Debug)]
pub enum FunctionOrder {
FIRST,
SECOND,
THIRD
}
-1
View File
@@ -26,7 +26,6 @@ impl<M: Matrix> LinearRegression<M> {
panic!("Number of rows of X doesn't match number of rows of Y");
}
// let b = y.v_stack(&M::ones(1, 1));
let b = y.clone();
let mut a = x.h_stack(&M::ones(x_nrows, 1));