From e5b412451ffde1f94033b0b7d552a04c96a66066 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Thu, 27 Aug 2020 14:17:18 -0700 Subject: [PATCH] feat: adds KNN Regressor --- src/lib.rs | 4 +- src/neighbors/{knn.rs => knn_classifier.rs} | 39 +----- src/neighbors/knn_regressor.rs | 139 ++++++++++++++++++++ src/neighbors/mod.rs | 48 ++++++- 4 files changed, 189 insertions(+), 41 deletions(-) rename src/neighbors/{knn.rs => knn_classifier.rs} (78%) create mode 100644 src/neighbors/knn_regressor.rs diff --git a/src/lib.rs b/src/lib.rs index cc550a1..c0e0171 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,14 +36,14 @@ //! //! Each category is assigned to a separate module. //! -//! For example, KNN classifier is defined in [smartcore::neighbors::knn](neighbors/knn/index.html). To train and run it using standard Rust vectors you will +//! For example, KNN classifier is defined in [smartcore::neighbors::knn_classifier](neighbors/knn_classifier/index.html). To train and run it using standard Rust vectors you will //! run this code: //! //! ``` //! // DenseMatrix defenition //! use smartcore::linalg::naive::dense_matrix::*; //! // KNNClassifier -//! use smartcore::neighbors::knn::*; +//! use smartcore::neighbors::knn_classifier::*; //! // Various distance metrics //! use smartcore::math::distance::*; //! diff --git a/src/neighbors/knn.rs b/src/neighbors/knn_classifier.rs similarity index 78% rename from src/neighbors/knn.rs rename to src/neighbors/knn_classifier.rs index 8cbdd70..e22c897 100644 --- a/src/neighbors/knn.rs +++ b/src/neighbors/knn_classifier.rs @@ -1,17 +1,10 @@ use serde::{Deserialize, Serialize}; -use crate::algorithm::neighbour::cover_tree::CoverTree; -use crate::algorithm::neighbour::linear_search::LinearKNNSearch; +use crate::neighbors::{KNNAlgorithmName, KNNAlgorithm}; use crate::linalg::{row_iter, Matrix}; use crate::math::distance::Distance; use crate::math::num::FloatExt; -#[derive(Serialize, Deserialize, Debug)] -pub enum KNNAlgorithmName { - LinearSearch, - CoverTree, -} - #[derive(Serialize, Deserialize, Debug)] pub struct KNNClassifierParameters { pub algorithm: KNNAlgorithmName, @@ -26,12 +19,6 @@ pub struct KNNClassifier, T>> { k: usize, } -#[derive(Serialize, Deserialize, Debug)] -enum KNNAlgorithm, T>> { - LinearSearch(LinearKNNSearch, T, D>), - CoverTree(CoverTree, T, D>), -} - impl Default for KNNClassifierParameters { fn default() -> Self { KNNClassifierParameters { @@ -41,30 +28,6 @@ impl Default for KNNClassifierParameters { } } -impl KNNAlgorithmName { - fn fit, T>>( - &self, - data: Vec>, - distance: D, - ) -> KNNAlgorithm { - match *self { - KNNAlgorithmName::LinearSearch => { - KNNAlgorithm::LinearSearch(LinearKNNSearch::new(data, distance)) - } - KNNAlgorithmName::CoverTree => KNNAlgorithm::CoverTree(CoverTree::new(data, distance)), - } - } -} - -impl, T>> KNNAlgorithm { - fn find(&self, from: &Vec, k: usize) -> Vec { - match *self { - KNNAlgorithm::LinearSearch(ref linear) => linear.find(from, k), - KNNAlgorithm::CoverTree(ref cover) => cover.find(from, k), - } - } -} - impl, T>> PartialEq for KNNClassifier { fn eq(&self, other: &Self) -> bool { if self.classes.len() != other.classes.len() diff --git a/src/neighbors/knn_regressor.rs b/src/neighbors/knn_regressor.rs new file mode 100644 index 0000000..2d183f4 --- /dev/null +++ b/src/neighbors/knn_regressor.rs @@ -0,0 +1,139 @@ +use serde::{Deserialize, Serialize}; + +use crate::neighbors::{KNNAlgorithmName, KNNAlgorithm}; +use crate::linalg::{row_iter, BaseVector, Matrix}; +use crate::math::distance::Distance; +use crate::math::num::FloatExt; + + +#[derive(Serialize, Deserialize, Debug)] +pub struct KNNRegressorParameters { + pub algorithm: KNNAlgorithmName, + pub k: usize, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct KNNRegressor, T>> { + y: Vec, + knn_algorithm: KNNAlgorithm, + k: usize, +} + +impl Default for KNNRegressorParameters { + fn default() -> Self { + KNNRegressorParameters { + algorithm: KNNAlgorithmName::CoverTree, + k: 3, + } + } +} + +impl, T>> PartialEq for KNNRegressor { + fn eq(&self, other: &Self) -> bool { + if self.k != other.k || self.y.len() != other.y.len(){ + return false; + } else { + for i in 0..self.y.len() { + if (self.y[i] - other.y[i]).abs() > T::epsilon() { + return false; + } + } + true + } + } +} + +impl, T>> KNNRegressor { + pub fn fit>( + x: &M, + y: &M::RowVector, + distance: D, + parameters: KNNRegressorParameters, + ) -> KNNRegressor { + let y_m = M::from_row_vector(y.clone()); + + let (_, y_n) = y_m.shape(); + let (x_n, _) = x.shape(); + + let data = row_iter(x).collect(); + + assert!( + x_n == y_n, + format!( + "Size of x should equal size of y; |x|=[{}], |y|=[{}]", + x_n, y_n + ) + ); + + assert!( + parameters.k > 1, + format!("k should be > 1, k=[{}]", parameters.k) + ); + + KNNRegressor { + y: y.to_vec(), + k: parameters.k, + knn_algorithm: parameters.algorithm.fit(data, distance), + } + } + + pub fn predict>(&self, x: &M) -> M::RowVector { + let mut result = M::zeros(1, x.shape().0); + + row_iter(x) + .enumerate() + .for_each(|(i, x)| result.set(0, i, self.predict_for_row(x))); + + result.to_row_vector() + } + + fn predict_for_row(&self, x: Vec) -> T { + let idxs = self.knn_algorithm.find(&x, self.k); + let mut result = T::zero(); + for i in idxs { + result = result + self.y[i]; + } + + result / T::from_usize(self.k).unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_matrix::DenseMatrix; + use crate::math::distance::Distances; + + #[test] + fn knn_fit_predict() { + let x = DenseMatrix::from_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + let y: Vec = vec![1., 2., 3., 4., 5.]; + let y_exp = vec![2., 2., 3., 4., 4.]; + let knn = KNNRegressor::fit( + &x, + &y, + Distances::euclidian(), + KNNRegressorParameters { + k: 3, + algorithm: KNNAlgorithmName::LinearSearch, + }, + ); + let y_hat = knn.predict(&x); + assert_eq!(5, Vec::len(&y_hat)); + for i in 0..y_hat.len() { + assert!((y_hat[i] - y_exp[i]).abs() < std::f64::EPSILON); + } + } + + #[test] + fn serde() { + let x = DenseMatrix::from_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + let y = vec![1., 2., 3., 4., 5.]; + + let knn = KNNRegressor::fit(&x, &y, Distances::euclidian(), Default::default()); + + let deserialized_knn = bincode::deserialize(&bincode::serialize(&knn).unwrap()).unwrap(); + + assert_eq!(knn, deserialized_knn); + } +} diff --git a/src/neighbors/mod.rs b/src/neighbors/mod.rs index 7fb9d74..b68b204 100644 --- a/src/neighbors/mod.rs +++ b/src/neighbors/mod.rs @@ -1 +1,47 @@ -pub mod knn; +//! # Nearest Neighbors + +use serde::{Deserialize, Serialize}; +use crate::algorithm::neighbour::cover_tree::CoverTree; +use crate::algorithm::neighbour::linear_search::LinearKNNSearch; +use crate::math::distance::Distance; +use crate::math::num::FloatExt; + +/// +pub mod knn_classifier; +pub mod knn_regressor; + +#[derive(Serialize, Deserialize, Debug)] +pub enum KNNAlgorithmName { + LinearSearch, + CoverTree, +} + +#[derive(Serialize, Deserialize, Debug)] +enum KNNAlgorithm, T>> { + LinearSearch(LinearKNNSearch, T, D>), + CoverTree(CoverTree, T, D>), +} + +impl KNNAlgorithmName { + fn fit, T>>( + &self, + data: Vec>, + distance: D, + ) -> KNNAlgorithm { + match *self { + KNNAlgorithmName::LinearSearch => { + KNNAlgorithm::LinearSearch(LinearKNNSearch::new(data, distance)) + } + KNNAlgorithmName::CoverTree => KNNAlgorithm::CoverTree(CoverTree::new(data, distance)), + } + } +} + +impl, T>> KNNAlgorithm { + fn find(&self, from: &Vec, k: usize) -> Vec { + match *self { + KNNAlgorithm::LinearSearch(ref linear) => linear.find(from, k), + KNNAlgorithm::CoverTree(ref cover) => cover.find(from, k), + } + } +}