feat: + cross_validate, trait Predictor, refactoring

This commit is contained in:
Volodymyr Orlov
2020-12-22 15:41:53 -08:00
parent 40dfca702e
commit a2be9e117f
34 changed files with 977 additions and 369 deletions
+4 -1
View File
@@ -93,16 +93,18 @@ impl Kernels {
}
/// Linear Kernel
#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct LinearKernel {}
/// Radial basis function (Gaussian) kernel
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct RBFKernel<T: RealNumber> {
/// kernel coefficient
pub gamma: T,
}
/// Polynomial kernel
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct PolynomialKernel<T: RealNumber> {
/// degree of the polynomial
pub degree: T,
@@ -113,6 +115,7 @@ pub struct PolynomialKernel<T: RealNumber> {
}
/// Sigmoid (hyperbolic tangent) kernel
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct SigmoidKernel<T: RealNumber> {
/// kernel coefficient
pub gamma: T,
+57 -37
View File
@@ -57,13 +57,7 @@
//! let y = vec![ 0., 0., 0., 0., 0., 0., 0., 0.,
//! 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.];
//!
//! let svr = SVC::fit(&x, &y,
//! Kernels::linear(),
//! SVCParameters {
//! epoch: 2,
//! c: 200.0,
//! tol: 1e-3,
//! }).unwrap();
//! let svr = SVC::fit(&x, &y, SVCParameters::default().with_c(200.0)).unwrap();
//!
//! let y_hat = svr.predict(&x).unwrap();
//! ```
@@ -84,22 +78,26 @@ use rand::seq::SliceRandom;
use serde::{Deserialize, Serialize};
use crate::base::Predictor;
use crate::error::Failed;
use crate::linalg::BaseVector;
use crate::linalg::Matrix;
use crate::math::num::RealNumber;
use crate::svm::Kernel;
#[derive(Serialize, Deserialize, Debug)]
use crate::svm::{Kernel, Kernels, LinearKernel};
#[derive(Serialize, Deserialize, Debug, Clone)]
/// SVC Parameters
pub struct SVCParameters<T: RealNumber> {
/// Number of epochs
pub struct SVCParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
/// Number of epochs.
pub epoch: usize,
/// Regularization parameter.
pub c: T,
/// Tolerance for stopping criterion
/// Tolerance for stopping criterion.
pub tol: T,
/// The kernel function.
pub kernel: K,
/// Unused parameter.
m: PhantomData<M>,
}
#[derive(Serialize, Deserialize, Debug)]
@@ -136,7 +134,7 @@ struct Cache<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
struct Optimizer<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
x: &'a M,
y: &'a M::RowVector,
parameters: &'a SVCParameters<T>,
parameters: &'a SVCParameters<T, M, K>,
svmin: usize,
svmax: usize,
gmin: T,
@@ -147,27 +145,61 @@ struct Optimizer<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
recalculate_minmax_grad: bool,
}
impl<T: RealNumber> Default for SVCParameters<T> {
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVCParameters<T, M, K> {
/// Number of epochs.
pub fn with_epoch(mut self, epoch: usize) -> Self {
self.epoch = epoch;
self
}
/// Regularization parameter.
pub fn with_c(mut self, c: T) -> Self {
self.c = c;
self
}
/// Tolerance for stopping criterion.
pub fn with_tol(mut self, tol: T) -> Self {
self.tol = tol;
self
}
/// The kernel function.
pub fn with_kernel<KK: Kernel<T, M::RowVector>>(&self, kernel: KK) -> SVCParameters<T, M, KK> {
SVCParameters {
epoch: self.epoch,
c: self.c,
tol: self.tol,
kernel: kernel,
m: PhantomData
}
}
}
impl<T: RealNumber, M: Matrix<T>> Default for SVCParameters<T, M, LinearKernel> {
fn default() -> Self {
SVCParameters {
epoch: 2,
c: T::one(),
tol: T::from_f64(1e-3).unwrap(),
kernel: Kernels::linear(),
m: PhantomData
}
}
}
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector> for SVC<T, M, K> {
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
self.predict(x)
}
}
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
/// Fits SVC to your data.
/// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
/// * `y` - class labels
/// * `kernel` - the kernel function
/// * `parameters` - optional parameters, use `Default::default()` to set parameters to default values.
pub fn fit(
x: &M,
y: &M::RowVector,
kernel: K,
parameters: SVCParameters<T>,
parameters: SVCParameters<T, M, K>,
) -> Result<SVC<T, M, K>, Failed> {
let (n, _) = x.shape();
@@ -198,13 +230,13 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
}
}
let optimizer = Optimizer::new(x, &y, &kernel, &parameters);
let optimizer = Optimizer::new(x, &y, &parameters.kernel, &parameters);
let (support_vectors, weight, b) = optimizer.optimize();
Ok(SVC {
classes,
kernel,
kernel: parameters.kernel,
instances: support_vectors,
w: weight,
b,
@@ -321,7 +353,7 @@ impl<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Optimizer<'a,
x: &'a M,
y: &'a M::RowVector,
kernel: &'a K,
parameters: &'a SVCParameters<T>,
parameters: &'a SVCParameters<T, M, K>,
) -> Optimizer<'a, T, M, K> {
let (n, _) = x.shape();
@@ -711,17 +743,10 @@ mod tests {
let y_hat = SVC::fit(
&x,
&y,
Kernels::linear(),
SVCParameters {
epoch: 2,
c: 200.0,
tol: 1e-3,
},
SVCParameters::default().with_c(200.0).with_kernel(Kernels::linear()),
)
.and_then(|lr| lr.predict(&x))
.unwrap();
println!("{:?}", y_hat);
.unwrap();
assert!(accuracy(&y_hat, &y) >= 0.9);
}
@@ -759,12 +784,7 @@ mod tests {
let y_hat = SVC::fit(
&x,
&y,
Kernels::rbf(0.7),
SVCParameters {
epoch: 2,
c: 1.0,
tol: 1e-3,
},
SVCParameters::default().with_c(1.0).with_kernel(Kernels::rbf(0.7)),
)
.and_then(|lr| lr.predict(&x))
.unwrap();
@@ -801,7 +821,7 @@ mod tests {
-1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
];
let svr = SVC::fit(&x, &y, Kernels::linear(), Default::default()).unwrap();
let svr = SVC::fit(&x, &y, Default::default()).unwrap();
let deserialized_svr: SVC<f64, DenseMatrix<f64>, LinearKernel> =
serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
+56 -27
View File
@@ -49,13 +49,7 @@
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
//!
//! let svr = SVR::fit(&x, &y,
//! LinearKernel {},
//! SVRParameters {
//! eps: 2.0,
//! c: 10.0,
//! tol: 1e-3,
//! }).unwrap();
//! let svr = SVR::fit(&x, &y, SVRParameters::default().with_eps(2.0).with_c(10.0)).unwrap();
//!
//! let y_hat = svr.predict(&x).unwrap();
//! ```
@@ -72,25 +66,30 @@
use std::cell::{Ref, RefCell};
use std::fmt::Debug;
use std::marker::PhantomData;
use serde::{Deserialize, Serialize};
use crate::base::Predictor;
use crate::error::Failed;
use crate::linalg::BaseVector;
use crate::linalg::Matrix;
use crate::math::num::RealNumber;
use crate::svm::Kernel;
#[derive(Serialize, Deserialize, Debug)]
use crate::svm::{Kernel, Kernels, LinearKernel};
#[derive(Serialize, Deserialize, Debug, Clone)]
/// SVR Parameters
pub struct SVRParameters<T: RealNumber> {
/// Epsilon in the epsilon-SVR model
pub struct SVRParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
/// Epsilon in the epsilon-SVR model.
pub eps: T,
/// Regularization parameter.
pub c: T,
/// Tolerance for stopping criterion
/// Tolerance for stopping criterion.
pub tol: T,
/// The kernel function.
pub kernel: K,
/// Unused parameter.
m: PhantomData<M>,
}
#[derive(Serialize, Deserialize, Debug)]
@@ -135,16 +134,52 @@ struct Cache<T: Clone> {
data: Vec<RefCell<Option<Vec<T>>>>,
}
impl<T: RealNumber> Default for SVRParameters<T> {
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVRParameters<T, M, K> {
/// Epsilon in the epsilon-SVR model.
pub fn with_eps(mut self, eps: T) -> Self {
self.eps = eps;
self
}
/// Regularization parameter.
pub fn with_c(mut self, c: T) -> Self {
self.c = c;
self
}
/// Tolerance for stopping criterion.
pub fn with_tol(mut self, tol: T) -> Self {
self.tol = tol;
self
}
/// The kernel function.
pub fn with_kernel<KK: Kernel<T, M::RowVector>>(&self, kernel: KK) -> SVRParameters<T, M, KK> {
SVRParameters {
eps: self.eps,
c: self.c,
tol: self.tol,
kernel: kernel,
m: PhantomData
}
}
}
impl<T: RealNumber, M: Matrix<T>> Default for SVRParameters<T, M, LinearKernel> {
fn default() -> Self {
SVRParameters {
eps: T::from_f64(0.1).unwrap(),
c: T::one(),
tol: T::from_f64(1e-3).unwrap(),
kernel: Kernels::linear(),
m: PhantomData
}
}
}
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector> for SVR<T, M, K> {
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
self.predict(x)
}
}
impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
/// Fits SVR to your data.
/// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -153,9 +188,8 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
/// * `parameters` - optional parameters, use `Default::default()` to set parameters to default values.
pub fn fit(
x: &M,
y: &M::RowVector,
kernel: K,
parameters: SVRParameters<T>,
y: &M::RowVector,
parameters: SVRParameters<T, M, K>,
) -> Result<SVR<T, M, K>, Failed> {
let (n, _) = x.shape();
@@ -165,12 +199,12 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
));
}
let optimizer = Optimizer::new(x, y, &kernel, &parameters);
let optimizer = Optimizer::new(x, y, &parameters.kernel, &parameters);
let (support_vectors, weight, b) = optimizer.smo();
Ok(SVR {
kernel,
kernel: parameters.kernel,
instances: support_vectors,
w: weight,
b,
@@ -243,7 +277,7 @@ impl<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Optimizer<'a,
x: &M,
y: &M::RowVector,
kernel: &'a K,
parameters: &SVRParameters<T>,
parameters: &SVRParameters<T, M, K>,
) -> Optimizer<'a, T, M, K> {
let (n, _) = x.shape();
@@ -513,12 +547,7 @@ mod tests {
let y_hat = SVR::fit(
&x,
&y,
LinearKernel {},
SVRParameters {
eps: 2.0,
c: 10.0,
tol: 1e-3,
},
SVRParameters::default().with_eps(2.0).with_c(10.0),
)
.and_then(|lr| lr.predict(&x))
.unwrap();
@@ -552,7 +581,7 @@ mod tests {
114.2, 115.7, 116.9,
];
let svr = SVR::fit(&x, &y, LinearKernel {}, Default::default()).unwrap();
let svr = SVR::fit(&x, &y, Default::default()).unwrap();
let deserialized_svr: SVR<f64, DenseMatrix<f64>, LinearKernel> =
serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();