feat: adds 3 more SVM kernels, linalg refactoring

2020-10-28 17:10:17 -07:00
parent 1773ed0e6e
commit cf4f658f01
6 changed files with 568 additions and 9 deletions
@@ -91,6 +91,76 @@ pub trait BaseVector<T: RealNumber>: Clone + Debug {

    /// Returns True if matrices are element-wise equal within a tolerance `error`.
    fn approximate_eq(&self, other: &Self, error: T) -> bool;
+
+    /// Returns [L2 norm] of the vector(https://en.wikipedia.org/wiki/Matrix_norm).
+    fn norm2(&self) -> T;
+
+    /// Returns [vectors norm](https://en.wikipedia.org/wiki/Matrix_norm) of order `p`.
+    fn norm(&self, p: T) -> T;
+
+    /// Divide single element of the vector by `x`, write result to original vector.
+    fn div_element_mut(&mut self, pos: usize, x: T);
+
+    /// Multiply single element of the vector by `x`, write result to original vector.
+    fn mul_element_mut(&mut self, pos: usize, x: T);
+
+    /// Add single element of the vector to `x`, write result to original vector.
+    fn add_element_mut(&mut self, pos: usize, x: T);
+
+    /// Subtract `x` from single element of the vector, write result to original vector.
+    fn sub_element_mut(&mut self, pos: usize, x: T);
+
+    /// Add vectors, element-wise, overriding original vector with result.
+    fn add_mut(&mut self, other: &Self) -> &Self;
+
+    /// Subtract vectors, element-wise, overriding original vector with result.
+    fn sub_mut(&mut self, other: &Self) -> &Self;
+
+    /// Multiply vectors, element-wise, overriding original vector with result.
+    fn mul_mut(&mut self, other: &Self) -> &Self;
+
+    /// Divide vectors, element-wise, overriding original vector with result.
+    fn div_mut(&mut self, other: &Self) -> &Self;
+
+    /// Add vectors, element-wise
+    fn add(&self, other: &Self) -> Self {
+        let mut r = self.clone();
+        r.add_mut(other);
+        r
+    }
+
+    /// Subtract vectors, element-wise
+    fn sub(&self, other: &Self) -> Self {
+        let mut r = self.clone();
+        r.sub_mut(other);
+        r
+    }
+
+    /// Multiply vectors, element-wise
+    fn mul(&self, other: &Self) -> Self {
+        let mut r = self.clone();
+        r.mul_mut(other);
+        r
+    }
+
+    /// Divide vectors, element-wise
+    fn div(&self, other: &Self) -> Self {
+        let mut r = self.clone();
+        r.div_mut(other);
+        r
+    }
+
+    /// Calculates sum of all elements of the vector.
+    fn sum(&self) -> T;
+
+    /// Returns unique values from the vector.
+    /// ```
+    /// use smartcore::linalg::naive::dense_matrix::*;
+    /// let a = vec!(1., 2., 2., -2., -6., -7., 2., 3., 4.);
+    ///
+    ///assert_eq!(a.unique(), vec![-7., -6., -2., 1., 2., 3., 4.]);
+    /// ```
+    fn unique(&self) -> Vec<T>;
 }

 /// Generic matrix type.
@@ -58,6 +58,96 @@ impl<T: RealNumber> BaseVector<T> for Vec<T> {
        result
    }

+    fn norm2(&self) -> T {
+        let mut norm = T::zero();
+
+        for xi in self.iter() {
+            norm = norm + *xi * *xi;
+        }
+
+        norm.sqrt()
+    }
+
+    fn norm(&self, p: T) -> T {
+        if p.is_infinite() && p.is_sign_positive() {
+            self.iter()
+                .map(|x| x.abs())
+                .fold(T::neg_infinity(), |a, b| a.max(b))
+        } else if p.is_infinite() && p.is_sign_negative() {
+            self.iter()
+                .map(|x| x.abs())
+                .fold(T::infinity(), |a, b| a.min(b))
+        } else {
+            let mut norm = T::zero();
+
+            for xi in self.iter() {
+                norm = norm + xi.abs().powf(p);
+            }
+
+            norm.powf(T::one() / p)
+        }
+    }
+
+    fn div_element_mut(&mut self, pos: usize, x: T) {
+        self[pos] = self[pos] / x;
+    }
+
+    fn mul_element_mut(&mut self, pos: usize, x: T) {
+        self[pos] = self[pos] * x;
+    }
+
+    fn add_element_mut(&mut self, pos: usize, x: T) {
+        self[pos] = self[pos] + x
+    }
+
+    fn sub_element_mut(&mut self, pos: usize, x: T) {
+        self[pos] = self[pos] - x;
+    }
+
+    fn add_mut(&mut self, other: &Self) -> &Self {
+        if self.len() != other.len() {
+            panic!("A and B should have the same shape");
+        }
+        for i in 0..self.len() {
+            self.add_element_mut(i, other.get(i));
+        }
+
+        self
+    }
+
+    fn sub_mut(&mut self, other: &Self) -> &Self {
+        if self.len() != other.len() {
+            panic!("A and B should have the same shape");
+        }
+        for i in 0..self.len() {
+            self.sub_element_mut(i, other.get(i));
+        }
+
+        self
+    }
+
+    fn mul_mut(&mut self, other: &Self) -> &Self {
+        if self.len() != other.len() {
+            panic!("A and B should have the same shape");
+        }
+        for i in 0..self.len() {
+            self.mul_element_mut(i, other.get(i));
+        }
+
+        self
+    }
+
+    fn div_mut(&mut self, other: &Self) -> &Self {
+        if self.len() != other.len() {
+            panic!("A and B should have the same shape");
+        }
+        for i in 0..self.len() {
+            self.div_element_mut(i, other.get(i));
+        }
+
+        self
+    }
+
    fn approximate_eq(&self, other: &Self, error: T) -> bool {
        if self.len() != other.len() {
            false
@@ -70,6 +160,21 @@ impl<T: RealNumber> BaseVector<T> for Vec<T> {
            true
        }
    }
+
+    fn sum(&self) -> T {
+        let mut sum = T::zero();
+        for i in 0..self.len() {
+            sum = sum + self[i];
+        }
+        sum
+    }
+
+    fn unique(&self) -> Vec<T> {
+        let mut result = self.clone();
+        result.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        result.dedup();
+        result
+    }
 }

 /// Column-major, dense matrix. See [Simple Dense Matrix](../index.html).
@@ -84,6 +84,76 @@ impl<T: RealNumber + 'static> BaseVector<T> for MatrixMN<T, U1, Dynamic> {
        self.dot(other)
    }

+    fn norm2(&self) -> T {
+        self.iter().map(|x| *x * *x).sum::<T>().sqrt()
+    }
+
+    fn norm(&self, p: T) -> T {
+        if p.is_infinite() && p.is_sign_positive() {
+            self.iter().fold(T::neg_infinity(), |f, &val| {
+                let v = val.abs();
+                if f > v {
+                    f
+                } else {
+                    v
+                }
+            })
+        } else if p.is_infinite() && p.is_sign_negative() {
+            self.iter().fold(T::infinity(), |f, &val| {
+                let v = val.abs();
+                if f < v {
+                    f
+                } else {
+                    v
+                }
+            })
+        } else {
+            let mut norm = T::zero();
+
+            for xi in self.iter() {
+                norm = norm + xi.abs().powf(p);
+            }
+
+            norm.powf(T::one() / p)
+        }
+    }
+
+    fn div_element_mut(&mut self, pos: usize, x: T) {
+        *self.get_mut(pos).unwrap() = *self.get(pos).unwrap() / x;
+    }
+
+    fn mul_element_mut(&mut self, pos: usize, x: T) {
+        *self.get_mut(pos).unwrap() = *self.get(pos).unwrap() * x;
+    }
+
+    fn add_element_mut(&mut self, pos: usize, x: T) {
+        *self.get_mut(pos).unwrap() = *self.get(pos).unwrap() + x;
+    }
+
+    fn sub_element_mut(&mut self, pos: usize, x: T) {
+        *self.get_mut(pos).unwrap() = *self.get(pos).unwrap() - x;
+    }
+
+    fn add_mut(&mut self, other: &Self) -> &Self {
+        *self += other;
+        self
+    }
+
+    fn sub_mut(&mut self, other: &Self) -> &Self {
+        *self -= other;
+        self
+    }
+
+    fn mul_mut(&mut self, other: &Self) -> &Self {
+        self.component_mul_assign(other);
+        self
+    }
+
+    fn div_mut(&mut self, other: &Self) -> &Self {
+        self.component_div_assign(other);
+        self
+    }
+
    fn approximate_eq(&self, other: &Self, error: T) -> bool {
        if self.shape() != other.shape() {
            false
@@ -93,6 +163,21 @@ impl<T: RealNumber + 'static> BaseVector<T> for MatrixMN<T, U1, Dynamic> {
                .all(|(a, b)| (*a - *b).abs() <= error)
        }
    }
+
+    fn sum(&self) -> T {
+        let mut sum = T::zero();
+        for v in self.iter() {
+            sum += *v;
+        }
+        sum
+    }
+
+    fn unique(&self) -> Vec<T> {
+        let mut result: Vec<T> = self.iter().map(|v| *v).collect();
+        result.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        result.dedup();
+        result
+    }
 }

 impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
@@ -89,9 +89,90 @@ impl<T: RealNumber + ScalarOperand> BaseVector<T> for ArrayBase<OwnedRepr<T>, Ix
        self.dot(other)
    }

+    fn norm2(&self) -> T {
+        self.iter().map(|x| *x * *x).sum::<T>().sqrt()
+    }
+
+    fn norm(&self, p: T) -> T {
+        if p.is_infinite() && p.is_sign_positive() {
+            self.iter().fold(T::neg_infinity(), |f, &val| {
+                let v = val.abs();
+                if f > v {
+                    f
+                } else {
+                    v
+                }
+            })
+        } else if p.is_infinite() && p.is_sign_negative() {
+            self.iter().fold(T::infinity(), |f, &val| {
+                let v = val.abs();
+                if f < v {
+                    f
+                } else {
+                    v
+                }
+            })
+        } else {
+            let mut norm = T::zero();
+
+            for xi in self.iter() {
+                norm = norm + xi.abs().powf(p);
+            }
+
+            norm.powf(T::one() / p)
+        }
+    }
+
+    fn div_element_mut(&mut self, pos: usize, x: T) {
+        self[pos] = self[pos] / x;
+    }
+
+    fn mul_element_mut(&mut self, pos: usize, x: T) {
+        self[pos] = self[pos] * x;
+    }
+
+    fn add_element_mut(&mut self, pos: usize, x: T) {
+        self[pos] = self[pos] + x;
+    }
+
+    fn sub_element_mut(&mut self, pos: usize, x: T) {
+        self[pos] = self[pos] - x;
+    }
+
    fn approximate_eq(&self, other: &Self, error: T) -> bool {
        (self - other).iter().all(|v| v.abs() <= error)
    }
+
+    fn add_mut(&mut self, other: &Self) -> &Self {
+        *self += other;
+        self
+    }
+
+    fn sub_mut(&mut self, other: &Self) -> &Self {
+        *self -= other;
+        self
+    }
+
+    fn mul_mut(&mut self, other: &Self) -> &Self {
+        *self *= other;
+        self
+    }
+
+    fn div_mut(&mut self, other: &Self) -> &Self {
+        *self /= other;
+        self
+    }
+
+    fn sum(&self) -> T {
+        self.sum()
+    }
+
+    fn unique(&self) -> Vec<T> {
+        let mut result = self.clone().into_raw_vec();
+        result.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        result.dedup();
+        result
+    }
 }

 impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum>
@@ -1,5 +1,7 @@
 //! # Support Vector Machines
 //!
+//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>

 pub mod svc;
 pub mod svr;
@@ -9,18 +11,161 @@ use serde::{Deserialize, Serialize};
 use crate::linalg::BaseVector;
 use crate::math::num::RealNumber;

-/// Kernel
+/// Defines a kernel function
 pub trait Kernel<T: RealNumber, V: BaseVector<T>> {
    /// Apply kernel function to x_i and x_j
    fn apply(&self, x_i: &V, x_j: &V) -> T;
 }

+/// Pre-defined kernel functions
+pub struct Kernels {}
+
+impl Kernels {
+    /// Linear kernel
+    pub fn linear() -> LinearKernel {
+        LinearKernel {}
+    }
+
+    /// Radial basis function kernel (Gaussian)
+    pub fn rbf<T: RealNumber>(gamma: T) -> RBFKernel<T> {
+        RBFKernel { gamma: gamma }
+    }
+
+    /// Polynomial kernel
+    /// * `degree` - degree of the polynomial
+    /// * `gamma` - kernel coefficient
+    /// * `coef0` - independent term in kernel function
+    pub fn polynomial<T: RealNumber>(degree: T, gamma: T, coef0: T) -> PolynomialKernel<T> {
+        PolynomialKernel {
+            degree: degree,
+            gamma: gamma,
+            coef0: coef0,
+        }
+    }
+
+    /// Polynomial kernel
+    /// * `degree` - degree of the polynomial
+    /// * `n_features` - number of features in vector
+    pub fn polynomial_with_degree<T: RealNumber>(
+        degree: T,
+        n_features: usize,
+    ) -> PolynomialKernel<T> {
+        let coef0 = T::one();
+        let gamma = T::one() / T::from_usize(n_features).unwrap();
+        Kernels::polynomial(degree, gamma, coef0)
+    }
+
+    /// Sigmoid kernel    
+    /// * `gamma` - kernel coefficient
+    /// * `coef0` - independent term in kernel function
+    pub fn sigmoid<T: RealNumber>(gamma: T, coef0: T) -> SigmoidKernel<T> {
+        SigmoidKernel {
+            gamma: gamma,
+            coef0: coef0,
+        }
+    }
+
+    /// Sigmoid kernel    
+    /// * `gamma` - kernel coefficient    
+    pub fn sigmoid_with_gamma<T: RealNumber>(gamma: T) -> SigmoidKernel<T> {
+        SigmoidKernel {
+            gamma: gamma,
+            coef0: T::one(),
+        }
+    }
+}
+
 /// Linear Kernel
 #[derive(Serialize, Deserialize, Debug)]
 pub struct LinearKernel {}

+/// Radial basis function (Gaussian) kernel 
+pub struct RBFKernel<T: RealNumber> {
+    /// kernel coefficient
+    pub gamma: T,
+}
+
+/// Polynomial kernel
+pub struct PolynomialKernel<T: RealNumber> {
+    /// degree of the polynomial
+    pub degree: T,
+    /// kernel coefficient
+    pub gamma: T,
+    /// independent term in kernel function
+    pub coef0: T,
+}
+
+/// Sigmoid (hyperbolic tangent) kernel
+pub struct SigmoidKernel<T: RealNumber> {
+    /// kernel coefficient
+    pub gamma: T,
+    /// independent term in kernel function
+    pub coef0: T,
+}
+
 impl<T: RealNumber, V: BaseVector<T>> Kernel<T, V> for LinearKernel {
    fn apply(&self, x_i: &V, x_j: &V) -> T {
        x_i.dot(x_j)
    }
 }
+
+impl<T: RealNumber, V: BaseVector<T>> Kernel<T, V> for RBFKernel<T> {
+    fn apply(&self, x_i: &V, x_j: &V) -> T {
+        let v_diff = x_i.sub(x_j);
+        (-self.gamma * v_diff.mul(&v_diff).sum()).exp()
+    }
+}
+
+impl<T: RealNumber, V: BaseVector<T>> Kernel<T, V> for PolynomialKernel<T> {
+    fn apply(&self, x_i: &V, x_j: &V) -> T {
+        let dot = x_i.dot(x_j);
+        (self.gamma * dot + self.coef0).powf(self.degree)
+    }
+}
+
+impl<T: RealNumber, V: BaseVector<T>> Kernel<T, V> for SigmoidKernel<T> {
+    fn apply(&self, x_i: &V, x_j: &V) -> T {
+        let dot = x_i.dot(x_j);
+        (self.gamma * dot + self.coef0).tanh()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn linear_kernel() {
+        let v1 = vec![1., 2., 3.];
+        let v2 = vec![4., 5., 6.];
+
+        assert_eq!(32f64, Kernels::linear().apply(&v1, &v2));
+    }
+
+    #[test]
+    fn rbf_kernel() {
+        let v1 = vec![1., 2., 3.];
+        let v2 = vec![4., 5., 6.];
+
+        assert!((0.2265f64 - Kernels::rbf(0.055).apply(&v1, &v2)).abs() < 1e-4);
+    }
+
+    #[test]
+    fn polynomial_kernel() {
+        let v1 = vec![1., 2., 3.];
+        let v2 = vec![4., 5., 6.];
+
+        assert!(
+            (4913f64 - Kernels::polynomial(3.0, 0.5, 1.0).apply(&v1, &v2)).abs()
+                < std::f64::EPSILON
+        );
+    }
+
+    #[test]
+    fn sigmoid_kernel() {
+        let v1 = vec![1., 2., 3.];
+        let v2 = vec![4., 5., 6.];
+
+        assert!((0.3969f64 - Kernels::sigmoid(0.01, 0.1).apply(&v1, &v2)).abs() < 1e-4);
+    }
+}
@@ -5,7 +5,7 @@
 //! ```
 //! use smartcore::linalg::naive::dense_matrix::*;
 //! use smartcore::linear::linear_regression::*;
-//! use smartcore::svm::LinearKernel;
+//! use smartcore::svm::Kernels;
 //! use smartcore::svm::svc::{SVC, SVCParameters};
 //!
 //! // Iris dataset
@@ -31,11 +31,11 @@
 //!            &[6.6, 2.9, 4.6, 1.3],
 //!            &[5.2, 2.7, 3.9, 1.4],
 //!         ]);
-//! let y = vec![ -1., -1., -1., -1., -1., -1., -1., -1.,
+//! let y = vec![ 0., 0., 0., 0., 0., 0., 0., 0.,
 //!            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.];
 //!
 //! let svr = SVC::fit(&x, &y,
-//!             LinearKernel {},
+//!             Kernels::linear(),
 //!             SVCParameters {
 //!                 epoch: 2,
 //!                 c: 200.0,
@@ -83,6 +83,7 @@ pub struct SVCParameters<T: RealNumber> {
 ))]
 /// Support Vector Classifier
 pub struct SVC<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
+    classes: Vec<T>,
    kernel: K,
    instances: Vec<M::RowVector>,
    w: Vec<T>,
@@ -150,11 +151,32 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
            )));
        }

-        let optimizer = Optimizer::new(x, y, &kernel, &parameters);
+        let classes = y.unique();
+
+        if classes.len() != 2 {
+            return Err(Failed::fit(&format!(
+                "Incorrect number of classes {}", classes.len()
+            )));
+        }
+
+        // Make sure class labels are either 1 or -1
+        let mut y = y.clone();
+        for i in 0..y.len() {
+            let y_v = y.get(i);
+            if y_v != -T::one() || y_v != T::one() {
+                match y_v == classes[0] {
+                    true => y.set(i, -T::one()),
+                    false => y.set(i, T::one())
+                }
+            }
+        }
+
+        let optimizer = Optimizer::new(x, &y, &kernel, &parameters);

        let (support_vectors, weight, b) = optimizer.optimize();

        Ok(SVC {
+            classes: classes,
            kernel: kernel,
            instances: support_vectors,
            w: weight,
@@ -170,7 +192,11 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
        let mut y_hat = M::RowVector::zeros(n);

        for i in 0..n {
-            y_hat.set(i, self.predict_for_row(x.get_row(i)));
+            let cls_idx = match self.predict_for_row(x.get_row(i)) == T::one() {
+                false => self.classes[0],
+                true => self.classes[1]
+            };
+            y_hat.set(i, cls_idx);
        }

        Ok(y_hat)
@@ -647,13 +673,13 @@ mod tests {
        ]);

        let y: Vec<f64> = vec![
-            -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
+            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        ];

        let y_hat = SVC::fit(
            &x,
            &y,
-            LinearKernel {},
+            Kernels::linear(),
            SVCParameters {
                epoch: 2,
                c: 200.0,
@@ -663,6 +689,53 @@ mod tests {
        .and_then(|lr| lr.predict(&x))
        .unwrap();

+        println!("{:?}", y_hat);
+
+        assert!(accuracy(&y_hat, &y) >= 0.9);
+    }
+
+    #[test]
+    fn svc_fit_predict_rbf() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[5.1, 3.5, 1.4, 0.2],
+            &[4.9, 3.0, 1.4, 0.2],
+            &[4.7, 3.2, 1.3, 0.2],
+            &[4.6, 3.1, 1.5, 0.2],
+            &[5.0, 3.6, 1.4, 0.2],
+            &[5.4, 3.9, 1.7, 0.4],
+            &[4.6, 3.4, 1.4, 0.3],
+            &[5.0, 3.4, 1.5, 0.2],
+            &[4.4, 2.9, 1.4, 0.2],
+            &[4.9, 3.1, 1.5, 0.1],
+            &[7.0, 3.2, 4.7, 1.4],
+            &[6.4, 3.2, 4.5, 1.5],
+            &[6.9, 3.1, 4.9, 1.5],
+            &[5.5, 2.3, 4.0, 1.3],
+            &[6.5, 2.8, 4.6, 1.5],
+            &[5.7, 2.8, 4.5, 1.3],
+            &[6.3, 3.3, 4.7, 1.6],
+            &[4.9, 2.4, 3.3, 1.0],
+            &[6.6, 2.9, 4.6, 1.3],
+            &[5.2, 2.7, 3.9, 1.4],
+        ]);
+
+        let y: Vec<f64> = vec![
+            -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
+        ];
+
+        let y_hat = SVC::fit(
+            &x,
+            &y,
+            Kernels::rbf(0.7),
+            SVCParameters {
+                epoch: 2,
+                c: 1.0,
+                tol: 1e-3,
+            },
+        )
+        .and_then(|lr| lr.predict(&x))
+        .unwrap();        
+
        assert!(accuracy(&y_hat, &y) >= 0.9);
    }

@@ -695,7 +768,7 @@ mod tests {
            -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        ];

-        let svr = SVC::fit(&x, &y, LinearKernel {}, Default::default()).unwrap();
+        let svr = SVC::fit(&x, &y, Kernels::linear(), Default::default()).unwrap();

        let deserialized_svr: SVC<f64, DenseMatrix<f64>, LinearKernel> =
            serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();