fix: refactors knn and distance functions

This commit is contained in:
Volodymyr Orlov
2020-02-21 18:54:50 -08:00
parent 0e89113297
commit fe50509d3b
8 changed files with 101 additions and 154 deletions
+58 -62
View File
@@ -1,61 +1,67 @@
use super::Classifier;
use std::collections::HashSet;
use crate::linalg::Matrix;
use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
use crate::algorithm::neighbour::linear_search::LinearKNNSearch;
use crate::algorithm::neighbour::cover_tree::CoverTree;
use crate::common::Nominal;
use ndarray::{ArrayBase, Data, Ix1, Ix2};
use std::fmt::Debug;
type F<X> = dyn Fn(&X, &X) -> f64;
type F = dyn Fn(&Vec<f64>, &Vec<f64>) -> f64;
pub struct KNNClassifier<'a, X, Y>
where
Y: Nominal,
X: Debug
{
classes: Vec<Y>,
pub struct KNNClassifier<'a> {
classes: Vec<f64>,
y: Vec<usize>,
knn_algorithm: Box<dyn KNNAlgorithm<X> + 'a>,
knn_algorithm: Box<dyn KNNAlgorithm<Vec<f64>> + 'a>,
k: usize,
}
impl<'a, X, Y> KNNClassifier<'a, X, Y>
where
Y: Nominal,
X: Debug
{
impl<'a> KNNClassifier<'a> {
pub fn fit(x: Vec<X>, y: Vec<Y>, k: usize, distance: &'a F<X>, algorithm: KNNAlgorithmName) -> KNNClassifier<X, Y> {
pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, k: usize, distance: &'a F, algorithm: KNNAlgorithmName) -> KNNClassifier<'a> {
assert!(Vec::len(&x) == Vec::len(&y), format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", Vec::len(&x), Vec::len(&y)));
let y_m = M::from_row_vector(y.clone());
assert!(k > 1, format!("k should be > 1, k=[{}]", k));
let c_hash: HashSet<Y> = y.clone().into_iter().collect();
let classes: Vec<Y> = c_hash.into_iter().collect();
let y_i:Vec<usize> = y.into_iter().map(|y| classes.iter().position(|yy| yy == &y).unwrap()).collect();
let (_, y_n) = y_m.shape();
let (x_n, _) = x.shape();
let knn_algorithm: Box<dyn KNNAlgorithm<X> + 'a> = match algorithm {
KNNAlgorithmName::CoverTree => Box::new(CoverTree::<X>::new(x, distance)),
KNNAlgorithmName::LinearSearch => Box::new(LinearKNNSearch::<X>::new(x, distance))
let data = x.to_vector();
let mut yi: Vec<usize> = vec![0; y_n];
let classes = y_m.unique();
for i in 0..y_n {
let yc = y_m.get(0, i);
yi[i] = classes.iter().position(|c| yc == *c).unwrap();
}
assert!(x_n == y_n, format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", x_n, y_n));
assert!(k > 1, format!("k should be > 1, k=[{}]", k));
let knn_algorithm: Box<dyn KNNAlgorithm<Vec<f64>> + 'a> = match algorithm {
KNNAlgorithmName::CoverTree => Box::new(CoverTree::<Vec<f64>>::new(data, distance)),
KNNAlgorithmName::LinearSearch => Box::new(LinearKNNSearch::<Vec<f64>>::new(data, distance))
};
KNNClassifier{classes:classes, y: y_i, k: k, knn_algorithm: knn_algorithm}
KNNClassifier{classes:classes, y: yi, k: k, knn_algorithm: knn_algorithm}
}
}
impl<'a, X, Y> Classifier<X, Y> for KNNClassifier<'a, X, Y>
where
Y: Nominal,
X: Debug
{
pub fn predict<M: Matrix>(&self, x: &M) -> M::RowVector {
let mut result = M::zeros(1, x.shape().0);
fn predict(&self, x: &X) -> Y {
let idxs = self.knn_algorithm.find(x, self.k);
let (n, _) = x.shape();
for i in 0..n {
result.set(0, i, self.classes[self.predict_for_row(x, i)]);
}
result.to_row_vector()
}
pub(in crate) fn predict_for_row<M: Matrix>(&self, x: &M, row: usize) -> usize {
let idxs = self.knn_algorithm.find(&x.get_row_as_vec(row), self.k);
let mut c = vec![0; self.classes.len()];
let mut max_c = 0;
let mut max_i = 0;
@@ -65,41 +71,31 @@ where
max_c = c[self.y[i]];
max_i = self.y[i];
}
}
}
max_i
self.classes[max_i].clone()
}
}
pub struct NDArrayUtils {
}
impl NDArrayUtils {
pub fn array2_to_vec<E, S>(x: &ArrayBase<S, Ix2>) -> Vec<ArrayBase<S, Ix1>>
where
E: Nominal,
S: Data<Elem = E>,
std::vec::Vec<ArrayBase<S, Ix1>>: std::iter::FromIterator<ndarray::ArrayBase<ndarray::OwnedRepr<E>, Ix1>>{
let x_vec: Vec<ArrayBase<S, Ix1>> = x.outer_iter().map(|x| x.to_owned()).collect();
x_vec
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::math::distance::Distance;
use ndarray::{arr1, arr2, Array1};
use super::*;
use crate::math::distance::euclidian;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[test]
fn knn_fit_predict() {
let x = arr2(&[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]);
let y = arr1(&[2, 2, 2, 3, 3]);
let knn = KNNClassifier::fit(NDArrayUtils::array2_to_vec(&x), y.to_vec(), 3, &Array1::distance, KNNAlgorithmName::LinearSearch);
let r = knn.predict_vec(&NDArrayUtils::array2_to_vec(&x));
let x = DenseMatrix::from_array(&[
&[1., 2.],
&[3., 4.],
&[5., 6.],
&[7., 8.],
&[9., 10.]]);
let y = vec![2., 2., 2., 3., 3.];
let knn = KNNClassifier::fit(&x, &y, 3, &euclidian::distance, KNNAlgorithmName::LinearSearch);
let r = knn.predict(&x);
assert_eq!(5, Vec::len(&r));
assert_eq!(y.to_vec(), r);
}