//! # Support Vector Classifier. //! //! Example //! //! ``` //! use smartcore::linalg::naive::dense_matrix::*; //! use smartcore::linear::linear_regression::*; //! use smartcore::svm::LinearKernel; //! use smartcore::svm::svc::{SVC, SVCParameters}; //! //! // Iris dataset //! let x = DenseMatrix::from_2d_array(&[ //! &[5.1, 3.5, 1.4, 0.2], //! &[4.9, 3.0, 1.4, 0.2], //! &[4.7, 3.2, 1.3, 0.2], //! &[4.6, 3.1, 1.5, 0.2], //! &[5.0, 3.6, 1.4, 0.2], //! &[5.4, 3.9, 1.7, 0.4], //! &[4.6, 3.4, 1.4, 0.3], //! &[5.0, 3.4, 1.5, 0.2], //! &[4.4, 2.9, 1.4, 0.2], //! &[4.9, 3.1, 1.5, 0.1], //! &[7.0, 3.2, 4.7, 1.4], //! &[6.4, 3.2, 4.5, 1.5], //! &[6.9, 3.1, 4.9, 1.5], //! &[5.5, 2.3, 4.0, 1.3], //! &[6.5, 2.8, 4.6, 1.5], //! &[5.7, 2.8, 4.5, 1.3], //! &[6.3, 3.3, 4.7, 1.6], //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], //! ]); //! let y = vec![ -1., -1., -1., -1., -1., -1., -1., -1., //! 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]; //! //! let svr = SVC::fit(&x, &y, //! LinearKernel {}, //! SVCParameters { //! epoch: 2, //! c: 200.0, //! tol: 1e-3, //! }).unwrap(); //! //! let y_hat = svr.predict(&x).unwrap(); //! ``` //! //! ## References: //! //! * ["Support Vector Machines" Kowalczyk A., 2017](https://www.svm-tutorial.com/2017/10/support-vector-machines-succinctly-released/) //! * ["Fast Kernel Classifiers with Online and Active Learning", Bordes A., Ertekin S., Weston J., Bottou L., 2005](https://www.jmlr.org/papers/volume6/bordes05a/bordes05a.pdf) use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::marker::PhantomData; use rand::seq::SliceRandom; use serde::{Deserialize, Serialize}; use crate::error::Failed; use crate::linalg::BaseVector; use crate::linalg::Matrix; use crate::math::num::RealNumber; use crate::svm::Kernel; #[derive(Serialize, Deserialize, Debug)] /// SVC Parameters pub struct SVCParameters { /// Number of epochs pub epoch: usize, /// Regularization parameter. pub c: T, /// Tolerance for stopping criterion pub tol: T, } #[derive(Serialize, Deserialize, Debug)] #[serde(bound( serialize = "M::RowVector: Serialize, K: Serialize, T: Serialize", deserialize = "M::RowVector: Deserialize<'de>, K: Deserialize<'de>, T: Deserialize<'de>", ))] /// Support Vector Classifier pub struct SVC, K: Kernel> { kernel: K, instances: Vec, w: Vec, b: T, } #[derive(Serialize, Deserialize, Debug)] struct SupportVector> { index: usize, x: V, alpha: T, grad: T, cmin: T, cmax: T, k: T, } struct Cache<'a, T: RealNumber, M: Matrix, K: Kernel> { kernel: &'a K, data: HashMap<(usize, usize), T>, phantom: PhantomData, } struct Optimizer<'a, T: RealNumber, M: Matrix, K: Kernel> { x: &'a M, y: &'a M::RowVector, parameters: &'a SVCParameters, svmin: usize, svmax: usize, gmin: T, gmax: T, tau: T, sv: Vec>, kernel: &'a K, recalculate_minmax_grad: bool, } impl Default for SVCParameters { fn default() -> Self { SVCParameters { epoch: 2, c: T::one(), tol: T::from_f64(1e-3).unwrap(), } } } impl, K: Kernel> SVC { /// Fits SVC to your data. /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation. /// * `y` - class labels /// * `kernel` - the kernel function /// * `parameters` - optional parameters, use `Default::default()` to set parameters to default values. pub fn fit( x: &M, y: &M::RowVector, kernel: K, parameters: SVCParameters, ) -> Result, Failed> { let (n, _) = x.shape(); if n != y.len() { return Err(Failed::fit(&format!( "Number of rows of X doesn't match number of rows of Y" ))); } let optimizer = Optimizer::new(x, y, &kernel, ¶meters); let (support_vectors, weight, b) = optimizer.optimize(); Ok(SVC { kernel: kernel, instances: support_vectors, w: weight, b: b, }) } /// Predicts estimated class labels from `x` /// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features. pub fn predict(&self, x: &M) -> Result { let (n, _) = x.shape(); let mut y_hat = M::RowVector::zeros(n); for i in 0..n { y_hat.set(i, self.predict_for_row(x.get_row(i))); } Ok(y_hat) } fn predict_for_row(&self, x: M::RowVector) -> T { let mut f = self.b; for i in 0..self.instances.len() { f += self.w[i] * self.kernel.apply(&x, &self.instances[i]); } if f > T::zero() { T::one() } else { -T::one() } } } impl, K: Kernel> PartialEq for SVC { fn eq(&self, other: &Self) -> bool { if self.b != other.b || self.w.len() != other.w.len() || self.instances.len() != other.instances.len() { return false; } else { for i in 0..self.w.len() { if (self.w[i] - other.w[i]).abs() > T::epsilon() { return false; } } for i in 0..self.instances.len() { if !self.instances[i].approximate_eq(&other.instances[i], T::epsilon()) { return false; } } return true; } } } impl> SupportVector { fn new>(i: usize, x: V, y: T, g: T, c: T, k: &K) -> SupportVector { let k_v = k.apply(&x, &x); let (cmin, cmax) = if y > T::zero() { (T::zero(), c) } else { (-c, T::zero()) }; SupportVector { index: i, x: x, grad: g, k: k_v, alpha: T::zero(), cmin: cmin, cmax: cmax, } } } impl<'a, T: RealNumber, M: Matrix, K: Kernel> Cache<'a, T, M, K> { fn new(kernel: &'a K) -> Cache<'a, T, M, K> { Cache { kernel: kernel, data: HashMap::new(), phantom: PhantomData, } } fn get(&mut self, i: &SupportVector, j: &SupportVector) -> T { let idx_i = i.index; let idx_j = j.index; if !self.data.contains_key(&(idx_i, idx_j)) { let v = self.kernel.apply(&i.x, &j.x); self.data.insert((idx_i, idx_j), v); } *self.data.get(&(idx_i, idx_j)).unwrap() } fn insert(&mut self, key: (usize, usize), value: T) { self.data.insert(key, value); } fn drop(&mut self, idxs_to_drop: HashSet) { self.data.retain(|k, _| !idxs_to_drop.contains(&k.0)); } } impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, T, M, K> { fn new( x: &'a M, y: &'a M::RowVector, kernel: &'a K, parameters: &'a SVCParameters, ) -> Optimizer<'a, T, M, K> { let (n, _) = x.shape(); Optimizer { x: x, y: y, parameters: ¶meters, svmin: 0, svmax: 0, gmin: T::max_value(), gmax: T::min_value(), tau: T::from_f64(1e-12).unwrap(), sv: Vec::with_capacity(n), kernel: kernel, recalculate_minmax_grad: true, } } fn optimize(mut self) -> (Vec, Vec, T) { let (n, _) = self.x.shape(); let mut cache = Cache::new(self.kernel); self.initialize(&mut cache); let tol = self.parameters.tol; let good_enough = T::from_i32(1000).unwrap(); for _ in 0..self.parameters.epoch { for i in Self::permutate(n) { self.process(i, self.x.get_row(i), self.y.get(i), &mut cache); loop { self.reprocess(tol, &mut cache); self.find_min_max_gradient(); if self.gmax - self.gmin < good_enough { break; } } } } self.finish(&mut cache); let mut support_vectors: Vec = Vec::new(); let mut w: Vec = Vec::new(); let b = (self.gmax + self.gmin) / T::two(); for v in self.sv { support_vectors.push(v.x); w.push(v.alpha); } (support_vectors, w, b) } fn initialize(&mut self, cache: &mut Cache) { let (n, _) = self.x.shape(); let few = 5; let mut cp = 0; let mut cn = 0; for i in Self::permutate(n) { if self.y.get(i) == T::one() && cp < few { if self.process(i, self.x.get_row(i), self.y.get(i), cache) { cp += 1; } } else if self.y.get(i) == -T::one() && cn < few { if self.process(i, self.x.get_row(i), self.y.get(i), cache) { cn += 1; } } if cp >= few && cn >= few { break; } } } fn process(&mut self, i: usize, x: M::RowVector, y: T, cache: &mut Cache) -> bool { for j in 0..self.sv.len() { if self.sv[j].index == i { return true; } } let mut g = y; let mut cache_values: Vec<((usize, usize), T)> = Vec::new(); for v in self.sv.iter() { let k = self.kernel.apply(&v.x, &x); cache_values.push(((i, v.index), k)); g -= v.alpha * k; } self.find_min_max_gradient(); if self.gmin < self.gmax { if (y > T::zero() && g < self.gmin) || (y < T::zero() && g > self.gmax) { return false; } } for v in cache_values { cache.insert(v.0, v.1); } self.sv.insert( 0, SupportVector::new(i, x, y, g, self.parameters.c, self.kernel), ); if y > T::zero() { self.smo(None, Some(0), T::zero(), cache); } else { self.smo(Some(0), None, T::zero(), cache); } true } fn reprocess(&mut self, tol: T, cache: &mut Cache) -> bool { let status = self.smo(None, None, tol, cache); self.clean(cache); status } fn finish(&mut self, cache: &mut Cache) { let mut max_iter = self.sv.len(); while self.smo(None, None, self.parameters.tol, cache) && max_iter > 0 { max_iter -= 1; } self.clean(cache); } fn find_min_max_gradient(&mut self) { if !self.recalculate_minmax_grad { return; } self.gmin = T::max_value(); self.gmax = T::min_value(); for i in 0..self.sv.len() { let v = &self.sv[i]; let g = v.grad; let a = v.alpha; if g < self.gmin && a > v.cmin { self.gmin = g; self.svmin = i; } if g > self.gmax && a < v.cmax { self.gmax = g; self.svmax = i; } } self.recalculate_minmax_grad = false } fn clean(&mut self, cache: &mut Cache) { self.find_min_max_gradient(); let gmax = self.gmax; let gmin = self.gmin; let mut idxs_to_drop: HashSet = HashSet::new(); self.sv.retain(|v| { if v.alpha == T::zero() { if (v.grad >= gmax && T::zero() >= v.cmax) || (v.grad <= gmin && T::zero() <= v.cmin) { idxs_to_drop.insert(v.index); return false; } }; true }); cache.drop(idxs_to_drop); self.recalculate_minmax_grad = true; } fn permutate(n: usize) -> Vec { let mut rng = rand::thread_rng(); let mut range: Vec = (0..n).collect(); range.shuffle(&mut rng); range } fn select_pair(&mut self, idx_1: Option, idx_2: Option, cache: &mut Cache) -> Option<(usize, usize, T)> { let mut idx_1 = idx_1; let mut idx_2 = idx_2; let mut k_v_12: Option = None; if idx_1.is_none() && idx_2.is_none() { self.find_min_max_gradient(); if self.gmax > -self.gmin { idx_2 = Some(self.svmax); } else { idx_1 = Some(self.svmin); } } if idx_2.is_none() { let idx_1 = &self.sv[idx_1.unwrap()]; let km = idx_1.k; let gm = idx_1.grad; let mut best = T::zero(); for i in 0..self.sv.len() { let v = &self.sv[i]; let z = v.grad - gm; let k = cache.get(idx_1, &v); let mut curv = km + v.k - T::two() * k; if curv <= T::zero() { curv = self.tau; } let mu = z / curv; if (mu > T::zero() && v.alpha < v.cmax) || (mu < T::zero() && v.alpha > v.cmin) { let gain = z * mu; if gain > best { best = gain; idx_2 = Some(i); k_v_12 = Some(k); } } } } if idx_1.is_none() { let idx_2 = &self.sv[idx_2.unwrap()]; let km = idx_2.k; let gm = idx_2.grad; let mut best = T::zero(); for i in 0..self.sv.len() { let v = &self.sv[i]; let z = gm - v.grad; let k = cache.get(idx_2, v); let mut curv = km + v.k - T::two() * k; if curv <= T::zero() { curv = self.tau; } let mu = z / curv; if (mu > T::zero() && v.alpha > v.cmin) || (mu < T::zero() && v.alpha < v.cmax) { let gain = z * mu; if gain > best { best = gain; idx_1 = Some(i); k_v_12 = Some(k); } } } } if idx_1.is_none() || idx_2.is_none() { None } else { let idx_1 = idx_1.unwrap(); let idx_2 = idx_2.unwrap(); if k_v_12.is_none() { k_v_12 = Some(self.kernel.apply(&self.sv[idx_1].x, &self.sv[idx_2].x)); } Some((idx_1, idx_2, k_v_12.unwrap())) } } fn smo( &mut self, idx_1: Option, idx_2: Option, tol: T, cache: &mut Cache, ) -> bool { match self.select_pair(idx_1, idx_2, cache) { Some((idx_1, idx_2, k_v_12)) => { let mut curv = self.sv[idx_1].k + self.sv[idx_2].k - T::two() * k_v_12; if curv <= T::zero() { curv = self.tau; } let mut step = (self.sv[idx_2].grad - self.sv[idx_1].grad) / curv; if step >= T::zero() { let mut ostep = self.sv[idx_1].alpha - self.sv[idx_1].cmin; if ostep < step { step = ostep; } ostep = self.sv[idx_2].cmax - self.sv[idx_2].alpha; if ostep < step { step = ostep; } } else { let mut ostep = self.sv[idx_2].cmin - self.sv[idx_2].alpha; if ostep > step { step = ostep; } ostep = self.sv[idx_1].alpha - self.sv[idx_1].cmax; if ostep > step { step = ostep; } } self.update(idx_1, idx_2, step, cache); return self.gmax - self.gmin > tol; }, None => false } } fn update(&mut self, v1: usize, v2: usize, step: T, cache: &mut Cache) { self.sv[v1].alpha -= step; self.sv[v2].alpha += step; for i in 0..self.sv.len() { let k2 = cache.get(&self.sv[v2], &self.sv[i]); let k1 = cache.get(&self.sv[v1], &self.sv[i]); self.sv[i].grad -= step * (k2 - k1); } self.recalculate_minmax_grad = true; self.find_min_max_gradient(); } } #[cfg(test)] mod tests { use super::*; use crate::linalg::naive::dense_matrix::*; use crate::metrics::accuracy; use crate::svm::*; #[test] fn svc_fit_predict() { let x = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], &[4.7, 3.2, 1.3, 0.2], &[4.6, 3.1, 1.5, 0.2], &[5.0, 3.6, 1.4, 0.2], &[5.4, 3.9, 1.7, 0.4], &[4.6, 3.4, 1.4, 0.3], &[5.0, 3.4, 1.5, 0.2], &[4.4, 2.9, 1.4, 0.2], &[4.9, 3.1, 1.5, 0.1], &[7.0, 3.2, 4.7, 1.4], &[6.4, 3.2, 4.5, 1.5], &[6.9, 3.1, 4.9, 1.5], &[5.5, 2.3, 4.0, 1.3], &[6.5, 2.8, 4.6, 1.5], &[5.7, 2.8, 4.5, 1.3], &[6.3, 3.3, 4.7, 1.6], &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], ]); let y: Vec = vec![ -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., ]; let y_hat = SVC::fit( &x, &y, LinearKernel {}, SVCParameters { epoch: 2, c: 200.0, tol: 1e-3, }, ) .and_then(|lr| lr.predict(&x)) .unwrap(); assert!(accuracy(&y_hat, &y) >= 0.9); } #[test] fn svc_serde() { let x = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], &[4.7, 3.2, 1.3, 0.2], &[4.6, 3.1, 1.5, 0.2], &[5.0, 3.6, 1.4, 0.2], &[5.4, 3.9, 1.7, 0.4], &[4.6, 3.4, 1.4, 0.3], &[5.0, 3.4, 1.5, 0.2], &[4.4, 2.9, 1.4, 0.2], &[4.9, 3.1, 1.5, 0.1], &[7.0, 3.2, 4.7, 1.4], &[6.4, 3.2, 4.5, 1.5], &[6.9, 3.1, 4.9, 1.5], &[5.5, 2.3, 4.0, 1.3], &[6.5, 2.8, 4.6, 1.5], &[5.7, 2.8, 4.5, 1.3], &[6.3, 3.3, 4.7, 1.6], &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], ]); let y: Vec = vec![ -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., ]; let svr = SVC::fit(&x, &y, LinearKernel {}, Default::default()).unwrap(); let deserialized_svr: SVC, LinearKernel> = serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap(); assert_eq!(svr, deserialized_svr); } }