From cf4f658f015405481d5c67e828acf0eca5c95709 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Wed, 28 Oct 2020 17:10:17 -0700 Subject: [PATCH] feat: adds 3 more SVM kernels, linalg refactoring --- src/linalg/mod.rs | 70 +++++++++++++++ src/linalg/naive/dense_matrix.rs | 105 ++++++++++++++++++++++ src/linalg/nalgebra_bindings.rs | 85 ++++++++++++++++++ src/linalg/ndarray_bindings.rs | 81 +++++++++++++++++ src/svm/mod.rs | 147 ++++++++++++++++++++++++++++++- src/svm/svc.rs | 89 +++++++++++++++++-- 6 files changed, 568 insertions(+), 9 deletions(-) diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index a637bdf..29c7a89 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -91,6 +91,76 @@ pub trait BaseVector: Clone + Debug { /// Returns True if matrices are element-wise equal within a tolerance `error`. fn approximate_eq(&self, other: &Self, error: T) -> bool; + + /// Returns [L2 norm] of the vector(https://en.wikipedia.org/wiki/Matrix_norm). + fn norm2(&self) -> T; + + /// Returns [vectors norm](https://en.wikipedia.org/wiki/Matrix_norm) of order `p`. + fn norm(&self, p: T) -> T; + + /// Divide single element of the vector by `x`, write result to original vector. + fn div_element_mut(&mut self, pos: usize, x: T); + + /// Multiply single element of the vector by `x`, write result to original vector. + fn mul_element_mut(&mut self, pos: usize, x: T); + + /// Add single element of the vector to `x`, write result to original vector. + fn add_element_mut(&mut self, pos: usize, x: T); + + /// Subtract `x` from single element of the vector, write result to original vector. + fn sub_element_mut(&mut self, pos: usize, x: T); + + /// Add vectors, element-wise, overriding original vector with result. + fn add_mut(&mut self, other: &Self) -> &Self; + + /// Subtract vectors, element-wise, overriding original vector with result. + fn sub_mut(&mut self, other: &Self) -> &Self; + + /// Multiply vectors, element-wise, overriding original vector with result. + fn mul_mut(&mut self, other: &Self) -> &Self; + + /// Divide vectors, element-wise, overriding original vector with result. + fn div_mut(&mut self, other: &Self) -> &Self; + + /// Add vectors, element-wise + fn add(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.add_mut(other); + r + } + + /// Subtract vectors, element-wise + fn sub(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.sub_mut(other); + r + } + + /// Multiply vectors, element-wise + fn mul(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.mul_mut(other); + r + } + + /// Divide vectors, element-wise + fn div(&self, other: &Self) -> Self { + let mut r = self.clone(); + r.div_mut(other); + r + } + + /// Calculates sum of all elements of the vector. + fn sum(&self) -> T; + + /// Returns unique values from the vector. + /// ``` + /// use smartcore::linalg::naive::dense_matrix::*; + /// let a = vec!(1., 2., 2., -2., -6., -7., 2., 3., 4.); + /// + ///assert_eq!(a.unique(), vec![-7., -6., -2., 1., 2., 3., 4.]); + /// ``` + fn unique(&self) -> Vec; } /// Generic matrix type. diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index ab6bf43..cf29061 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -58,6 +58,96 @@ impl BaseVector for Vec { result } + fn norm2(&self) -> T { + let mut norm = T::zero(); + + for xi in self.iter() { + norm = norm + *xi * *xi; + } + + norm.sqrt() + } + + fn norm(&self, p: T) -> T { + if p.is_infinite() && p.is_sign_positive() { + self.iter() + .map(|x| x.abs()) + .fold(T::neg_infinity(), |a, b| a.max(b)) + } else if p.is_infinite() && p.is_sign_negative() { + self.iter() + .map(|x| x.abs()) + .fold(T::infinity(), |a, b| a.min(b)) + } else { + let mut norm = T::zero(); + + for xi in self.iter() { + norm = norm + xi.abs().powf(p); + } + + norm.powf(T::one() / p) + } + } + + fn div_element_mut(&mut self, pos: usize, x: T) { + self[pos] = self[pos] / x; + } + + fn mul_element_mut(&mut self, pos: usize, x: T) { + self[pos] = self[pos] * x; + } + + fn add_element_mut(&mut self, pos: usize, x: T) { + self[pos] = self[pos] + x + } + + fn sub_element_mut(&mut self, pos: usize, x: T) { + self[pos] = self[pos] - x; + } + + fn add_mut(&mut self, other: &Self) -> &Self { + if self.len() != other.len() { + panic!("A and B should have the same shape"); + } + for i in 0..self.len() { + self.add_element_mut(i, other.get(i)); + } + + self + } + + fn sub_mut(&mut self, other: &Self) -> &Self { + if self.len() != other.len() { + panic!("A and B should have the same shape"); + } + for i in 0..self.len() { + self.sub_element_mut(i, other.get(i)); + } + + self + } + + fn mul_mut(&mut self, other: &Self) -> &Self { + if self.len() != other.len() { + panic!("A and B should have the same shape"); + } + for i in 0..self.len() { + self.mul_element_mut(i, other.get(i)); + } + + self + } + + fn div_mut(&mut self, other: &Self) -> &Self { + if self.len() != other.len() { + panic!("A and B should have the same shape"); + } + for i in 0..self.len() { + self.div_element_mut(i, other.get(i)); + } + + self + } + fn approximate_eq(&self, other: &Self, error: T) -> bool { if self.len() != other.len() { false @@ -70,6 +160,21 @@ impl BaseVector for Vec { true } } + + fn sum(&self) -> T { + let mut sum = T::zero(); + for i in 0..self.len() { + sum = sum + self[i]; + } + sum + } + + fn unique(&self) -> Vec { + let mut result = self.clone(); + result.sort_by(|a, b| a.partial_cmp(b).unwrap()); + result.dedup(); + result + } } /// Column-major, dense matrix. See [Simple Dense Matrix](../index.html). diff --git a/src/linalg/nalgebra_bindings.rs b/src/linalg/nalgebra_bindings.rs index badd8c4..3596899 100644 --- a/src/linalg/nalgebra_bindings.rs +++ b/src/linalg/nalgebra_bindings.rs @@ -84,6 +84,76 @@ impl BaseVector for MatrixMN { self.dot(other) } + fn norm2(&self) -> T { + self.iter().map(|x| *x * *x).sum::().sqrt() + } + + fn norm(&self, p: T) -> T { + if p.is_infinite() && p.is_sign_positive() { + self.iter().fold(T::neg_infinity(), |f, &val| { + let v = val.abs(); + if f > v { + f + } else { + v + } + }) + } else if p.is_infinite() && p.is_sign_negative() { + self.iter().fold(T::infinity(), |f, &val| { + let v = val.abs(); + if f < v { + f + } else { + v + } + }) + } else { + let mut norm = T::zero(); + + for xi in self.iter() { + norm = norm + xi.abs().powf(p); + } + + norm.powf(T::one() / p) + } + } + + fn div_element_mut(&mut self, pos: usize, x: T) { + *self.get_mut(pos).unwrap() = *self.get(pos).unwrap() / x; + } + + fn mul_element_mut(&mut self, pos: usize, x: T) { + *self.get_mut(pos).unwrap() = *self.get(pos).unwrap() * x; + } + + fn add_element_mut(&mut self, pos: usize, x: T) { + *self.get_mut(pos).unwrap() = *self.get(pos).unwrap() + x; + } + + fn sub_element_mut(&mut self, pos: usize, x: T) { + *self.get_mut(pos).unwrap() = *self.get(pos).unwrap() - x; + } + + fn add_mut(&mut self, other: &Self) -> &Self { + *self += other; + self + } + + fn sub_mut(&mut self, other: &Self) -> &Self { + *self -= other; + self + } + + fn mul_mut(&mut self, other: &Self) -> &Self { + self.component_mul_assign(other); + self + } + + fn div_mut(&mut self, other: &Self) -> &Self { + self.component_div_assign(other); + self + } + fn approximate_eq(&self, other: &Self, error: T) -> bool { if self.shape() != other.shape() { false @@ -93,6 +163,21 @@ impl BaseVector for MatrixMN { .all(|(a, b)| (*a - *b).abs() <= error) } } + + fn sum(&self) -> T { + let mut sum = T::zero(); + for v in self.iter() { + sum += *v; + } + sum + } + + fn unique(&self) -> Vec { + let mut result: Vec = self.iter().map(|v| *v).collect(); + result.sort_by(|a, b| a.partial_cmp(b).unwrap()); + result.dedup(); + result + } } impl diff --git a/src/linalg/ndarray_bindings.rs b/src/linalg/ndarray_bindings.rs index 9cfb6d7..9f911f5 100644 --- a/src/linalg/ndarray_bindings.rs +++ b/src/linalg/ndarray_bindings.rs @@ -89,9 +89,90 @@ impl BaseVector for ArrayBase, Ix self.dot(other) } + fn norm2(&self) -> T { + self.iter().map(|x| *x * *x).sum::().sqrt() + } + + fn norm(&self, p: T) -> T { + if p.is_infinite() && p.is_sign_positive() { + self.iter().fold(T::neg_infinity(), |f, &val| { + let v = val.abs(); + if f > v { + f + } else { + v + } + }) + } else if p.is_infinite() && p.is_sign_negative() { + self.iter().fold(T::infinity(), |f, &val| { + let v = val.abs(); + if f < v { + f + } else { + v + } + }) + } else { + let mut norm = T::zero(); + + for xi in self.iter() { + norm = norm + xi.abs().powf(p); + } + + norm.powf(T::one() / p) + } + } + + fn div_element_mut(&mut self, pos: usize, x: T) { + self[pos] = self[pos] / x; + } + + fn mul_element_mut(&mut self, pos: usize, x: T) { + self[pos] = self[pos] * x; + } + + fn add_element_mut(&mut self, pos: usize, x: T) { + self[pos] = self[pos] + x; + } + + fn sub_element_mut(&mut self, pos: usize, x: T) { + self[pos] = self[pos] - x; + } + fn approximate_eq(&self, other: &Self, error: T) -> bool { (self - other).iter().all(|v| v.abs() <= error) } + + fn add_mut(&mut self, other: &Self) -> &Self { + *self += other; + self + } + + fn sub_mut(&mut self, other: &Self) -> &Self { + *self -= other; + self + } + + fn mul_mut(&mut self, other: &Self) -> &Self { + *self *= other; + self + } + + fn div_mut(&mut self, other: &Self) -> &Self { + *self /= other; + self + } + + fn sum(&self) -> T { + self.sum() + } + + fn unique(&self) -> Vec { + let mut result = self.clone().into_raw_vec(); + result.sort_by(|a, b| a.partial_cmp(b).unwrap()); + result.dedup(); + result + } } impl diff --git a/src/svm/mod.rs b/src/svm/mod.rs index 6b31683..0605907 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -1,5 +1,7 @@ //! # Support Vector Machines //! +//! +//! pub mod svc; pub mod svr; @@ -9,18 +11,161 @@ use serde::{Deserialize, Serialize}; use crate::linalg::BaseVector; use crate::math::num::RealNumber; -/// Kernel +/// Defines a kernel function pub trait Kernel> { /// Apply kernel function to x_i and x_j fn apply(&self, x_i: &V, x_j: &V) -> T; } +/// Pre-defined kernel functions +pub struct Kernels {} + +impl Kernels { + /// Linear kernel + pub fn linear() -> LinearKernel { + LinearKernel {} + } + + /// Radial basis function kernel (Gaussian) + pub fn rbf(gamma: T) -> RBFKernel { + RBFKernel { gamma: gamma } + } + + /// Polynomial kernel + /// * `degree` - degree of the polynomial + /// * `gamma` - kernel coefficient + /// * `coef0` - independent term in kernel function + pub fn polynomial(degree: T, gamma: T, coef0: T) -> PolynomialKernel { + PolynomialKernel { + degree: degree, + gamma: gamma, + coef0: coef0, + } + } + + /// Polynomial kernel + /// * `degree` - degree of the polynomial + /// * `n_features` - number of features in vector + pub fn polynomial_with_degree( + degree: T, + n_features: usize, + ) -> PolynomialKernel { + let coef0 = T::one(); + let gamma = T::one() / T::from_usize(n_features).unwrap(); + Kernels::polynomial(degree, gamma, coef0) + } + + /// Sigmoid kernel + /// * `gamma` - kernel coefficient + /// * `coef0` - independent term in kernel function + pub fn sigmoid(gamma: T, coef0: T) -> SigmoidKernel { + SigmoidKernel { + gamma: gamma, + coef0: coef0, + } + } + + /// Sigmoid kernel + /// * `gamma` - kernel coefficient + pub fn sigmoid_with_gamma(gamma: T) -> SigmoidKernel { + SigmoidKernel { + gamma: gamma, + coef0: T::one(), + } + } +} + /// Linear Kernel #[derive(Serialize, Deserialize, Debug)] pub struct LinearKernel {} +/// Radial basis function (Gaussian) kernel +pub struct RBFKernel { + /// kernel coefficient + pub gamma: T, +} + +/// Polynomial kernel +pub struct PolynomialKernel { + /// degree of the polynomial + pub degree: T, + /// kernel coefficient + pub gamma: T, + /// independent term in kernel function + pub coef0: T, +} + +/// Sigmoid (hyperbolic tangent) kernel +pub struct SigmoidKernel { + /// kernel coefficient + pub gamma: T, + /// independent term in kernel function + pub coef0: T, +} + impl> Kernel for LinearKernel { fn apply(&self, x_i: &V, x_j: &V) -> T { x_i.dot(x_j) } } + +impl> Kernel for RBFKernel { + fn apply(&self, x_i: &V, x_j: &V) -> T { + let v_diff = x_i.sub(x_j); + (-self.gamma * v_diff.mul(&v_diff).sum()).exp() + } +} + +impl> Kernel for PolynomialKernel { + fn apply(&self, x_i: &V, x_j: &V) -> T { + let dot = x_i.dot(x_j); + (self.gamma * dot + self.coef0).powf(self.degree) + } +} + +impl> Kernel for SigmoidKernel { + fn apply(&self, x_i: &V, x_j: &V) -> T { + let dot = x_i.dot(x_j); + (self.gamma * dot + self.coef0).tanh() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn linear_kernel() { + let v1 = vec![1., 2., 3.]; + let v2 = vec![4., 5., 6.]; + + assert_eq!(32f64, Kernels::linear().apply(&v1, &v2)); + } + + #[test] + fn rbf_kernel() { + let v1 = vec![1., 2., 3.]; + let v2 = vec![4., 5., 6.]; + + assert!((0.2265f64 - Kernels::rbf(0.055).apply(&v1, &v2)).abs() < 1e-4); + } + + #[test] + fn polynomial_kernel() { + let v1 = vec![1., 2., 3.]; + let v2 = vec![4., 5., 6.]; + + assert!( + (4913f64 - Kernels::polynomial(3.0, 0.5, 1.0).apply(&v1, &v2)).abs() + < std::f64::EPSILON + ); + } + + #[test] + fn sigmoid_kernel() { + let v1 = vec![1., 2., 3.]; + let v2 = vec![4., 5., 6.]; + + assert!((0.3969f64 - Kernels::sigmoid(0.01, 0.1).apply(&v1, &v2)).abs() < 1e-4); + } +} diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 22623b4..46625a9 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -5,7 +5,7 @@ //! ``` //! use smartcore::linalg::naive::dense_matrix::*; //! use smartcore::linear::linear_regression::*; -//! use smartcore::svm::LinearKernel; +//! use smartcore::svm::Kernels; //! use smartcore::svm::svc::{SVC, SVCParameters}; //! //! // Iris dataset @@ -31,11 +31,11 @@ //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], //! ]); -//! let y = vec![ -1., -1., -1., -1., -1., -1., -1., -1., +//! let y = vec![ 0., 0., 0., 0., 0., 0., 0., 0., //! 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]; //! //! let svr = SVC::fit(&x, &y, -//! LinearKernel {}, +//! Kernels::linear(), //! SVCParameters { //! epoch: 2, //! c: 200.0, @@ -83,6 +83,7 @@ pub struct SVCParameters { ))] /// Support Vector Classifier pub struct SVC, K: Kernel> { + classes: Vec, kernel: K, instances: Vec, w: Vec, @@ -150,11 +151,32 @@ impl, K: Kernel> SVC { ))); } - let optimizer = Optimizer::new(x, y, &kernel, ¶meters); + let classes = y.unique(); + + if classes.len() != 2 { + return Err(Failed::fit(&format!( + "Incorrect number of classes {}", classes.len() + ))); + } + + // Make sure class labels are either 1 or -1 + let mut y = y.clone(); + for i in 0..y.len() { + let y_v = y.get(i); + if y_v != -T::one() || y_v != T::one() { + match y_v == classes[0] { + true => y.set(i, -T::one()), + false => y.set(i, T::one()) + } + } + } + + let optimizer = Optimizer::new(x, &y, &kernel, ¶meters); let (support_vectors, weight, b) = optimizer.optimize(); Ok(SVC { + classes: classes, kernel: kernel, instances: support_vectors, w: weight, @@ -170,7 +192,11 @@ impl, K: Kernel> SVC { let mut y_hat = M::RowVector::zeros(n); for i in 0..n { - y_hat.set(i, self.predict_for_row(x.get_row(i))); + let cls_idx = match self.predict_for_row(x.get_row(i)) == T::one() { + false => self.classes[0], + true => self.classes[1] + }; + y_hat.set(i, cls_idx); } Ok(y_hat) @@ -647,13 +673,13 @@ mod tests { ]); let y: Vec = vec![ - -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., ]; let y_hat = SVC::fit( &x, &y, - LinearKernel {}, + Kernels::linear(), SVCParameters { epoch: 2, c: 200.0, @@ -663,6 +689,53 @@ mod tests { .and_then(|lr| lr.predict(&x)) .unwrap(); + println!("{:?}", y_hat); + + assert!(accuracy(&y_hat, &y) >= 0.9); + } + + #[test] + fn svc_fit_predict_rbf() { + let x = DenseMatrix::from_2d_array(&[ + &[5.1, 3.5, 1.4, 0.2], + &[4.9, 3.0, 1.4, 0.2], + &[4.7, 3.2, 1.3, 0.2], + &[4.6, 3.1, 1.5, 0.2], + &[5.0, 3.6, 1.4, 0.2], + &[5.4, 3.9, 1.7, 0.4], + &[4.6, 3.4, 1.4, 0.3], + &[5.0, 3.4, 1.5, 0.2], + &[4.4, 2.9, 1.4, 0.2], + &[4.9, 3.1, 1.5, 0.1], + &[7.0, 3.2, 4.7, 1.4], + &[6.4, 3.2, 4.5, 1.5], + &[6.9, 3.1, 4.9, 1.5], + &[5.5, 2.3, 4.0, 1.3], + &[6.5, 2.8, 4.6, 1.5], + &[5.7, 2.8, 4.5, 1.3], + &[6.3, 3.3, 4.7, 1.6], + &[4.9, 2.4, 3.3, 1.0], + &[6.6, 2.9, 4.6, 1.3], + &[5.2, 2.7, 3.9, 1.4], + ]); + + let y: Vec = vec![ + -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + ]; + + let y_hat = SVC::fit( + &x, + &y, + Kernels::rbf(0.7), + SVCParameters { + epoch: 2, + c: 1.0, + tol: 1e-3, + }, + ) + .and_then(|lr| lr.predict(&x)) + .unwrap(); + assert!(accuracy(&y_hat, &y) >= 0.9); } @@ -695,7 +768,7 @@ mod tests { -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., ]; - let svr = SVC::fit(&x, &y, LinearKernel {}, Default::default()).unwrap(); + let svr = SVC::fit(&x, &y, Kernels::linear(), Default::default()).unwrap(); let deserialized_svr: SVC, LinearKernel> = serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();