//! # Support Vector Machines //! //! Support Vector Machines (SVM) is one of the most performant off-the-shelf machine learning algorithms. //! SVM is based on the [Vapnik–Chervonenkiy theory](https://en.wikipedia.org/wiki/Vapnik%E2%80%93Chervonenkis_theory) that was developed during 1960–1990 by Vladimir Vapnik and Alexey Chervonenkiy. //! //! SVM splits data into two sets using a maximal-margin decision boundary, \\(f(x)\\). For regression, the algorithm uses a value of the function \\(f(x)\\) to predict a target value. //! To classify a new point, algorithm calculates a sign of the decision function to see where the new point is relative to the boundary. //! //! SVM is memory efficient since it uses only a subset of training data to find a decision boundary. This subset is called support vectors. //! //! In SVM distance between a data point and the support vectors is defined by the kernel function. //! SmartCore supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel. //! Building a new kernel requires a good mathematical understanding of the [Mercer theorem](https://en.wikipedia.org/wiki/Mercer%27s_theorem) //! that gives necessary and sufficient condition for a function to be a kernel function. //! //! Pre-defined kernel functions: //! //! * *Linear*, \\( K(x, x') = \langle x, x' \rangle\\) //! * *Polynomial*, \\( K(x, x') = (\gamma\langle x, x' \rangle + r)^d\\), where \\(d\\) is polynomial degree, \\(\gamma\\) is a kernel coefficient and \\(r\\) is an independent term in the kernel function. //! * *RBF (Gaussian)*, \\( K(x, x') = e^{-\gamma \lVert x - x' \rVert ^2} \\), where \\(\gamma\\) is kernel coefficient //! * *Sigmoid (hyperbolic tangent)*, \\( K(x, x') = \tanh ( \gamma \langle x, x' \rangle + r ) \\), where \\(\gamma\\) is kernel coefficient and \\(r\\) is an independent term in the kernel function. //! //! //! pub mod svc; pub mod svr; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use crate::linalg::BaseVector; use crate::math::num::RealNumber; /// Defines a kernel function pub trait Kernel>: Clone { /// Apply kernel function to x_i and x_j fn apply(&self, x_i: &V, x_j: &V) -> T; } /// Pre-defined kernel functions pub struct Kernels {} impl Kernels { /// Linear kernel pub fn linear() -> LinearKernel { LinearKernel {} } /// Radial basis function kernel (Gaussian) pub fn rbf(gamma: T) -> RBFKernel { RBFKernel { gamma } } /// Polynomial kernel /// * `degree` - degree of the polynomial /// * `gamma` - kernel coefficient /// * `coef0` - independent term in kernel function pub fn polynomial(degree: T, gamma: T, coef0: T) -> PolynomialKernel { PolynomialKernel { degree, gamma, coef0, } } /// Polynomial kernel /// * `degree` - degree of the polynomial /// * `n_features` - number of features in vector pub fn polynomial_with_degree( degree: T, n_features: usize, ) -> PolynomialKernel { let coef0 = T::one(); let gamma = T::one() / T::from_usize(n_features).unwrap(); Kernels::polynomial(degree, gamma, coef0) } /// Sigmoid kernel /// * `gamma` - kernel coefficient /// * `coef0` - independent term in kernel function pub fn sigmoid(gamma: T, coef0: T) -> SigmoidKernel { SigmoidKernel { gamma, coef0 } } /// Sigmoid kernel /// * `gamma` - kernel coefficient pub fn sigmoid_with_gamma(gamma: T) -> SigmoidKernel { SigmoidKernel { gamma, coef0: T::one(), } } } /// Linear Kernel #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Eq)] pub struct LinearKernel {} /// Radial basis function (Gaussian) kernel #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Eq)] pub struct RBFKernel { /// kernel coefficient pub gamma: T, } /// Polynomial kernel #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Eq)] pub struct PolynomialKernel { /// degree of the polynomial pub degree: T, /// kernel coefficient pub gamma: T, /// independent term in kernel function pub coef0: T, } /// Sigmoid (hyperbolic tangent) kernel #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Eq)] pub struct SigmoidKernel { /// kernel coefficient pub gamma: T, /// independent term in kernel function pub coef0: T, } impl> Kernel for LinearKernel { fn apply(&self, x_i: &V, x_j: &V) -> T { x_i.dot(x_j) } } impl> Kernel for RBFKernel { fn apply(&self, x_i: &V, x_j: &V) -> T { let v_diff = x_i.sub(x_j); (-self.gamma * v_diff.mul(&v_diff).sum()).exp() } } impl> Kernel for PolynomialKernel { fn apply(&self, x_i: &V, x_j: &V) -> T { let dot = x_i.dot(x_j); (self.gamma * dot + self.coef0).powf(self.degree) } } impl> Kernel for SigmoidKernel { fn apply(&self, x_i: &V, x_j: &V) -> T { let dot = x_i.dot(x_j); (self.gamma * dot + self.coef0).tanh() } } #[cfg(test)] mod tests { use super::*; #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[test] fn linear_kernel() { let v1 = vec![1., 2., 3.]; let v2 = vec![4., 5., 6.]; assert_eq!(32f64, Kernels::linear().apply(&v1, &v2)); } #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[test] fn rbf_kernel() { let v1 = vec![1., 2., 3.]; let v2 = vec![4., 5., 6.]; assert!((0.2265f64 - Kernels::rbf(0.055).apply(&v1, &v2)).abs() < 1e-4); } #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[test] fn polynomial_kernel() { let v1 = vec![1., 2., 3.]; let v2 = vec![4., 5., 6.]; assert!( (4913f64 - Kernels::polynomial(3.0, 0.5, 1.0).apply(&v1, &v2)).abs() < std::f64::EPSILON ); } #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[test] fn sigmoid_kernel() { let v1 = vec![1., 2., 3.]; let v2 = vec![4., 5., 6.]; assert!((0.3969f64 - Kernels::sigmoid(0.01, 0.1).apply(&v1, &v2)).abs() < 1e-4); } }