fix: refactors knn and distance functions
This commit is contained in:
+58
-62
@@ -1,61 +1,67 @@
|
||||
use super::Classifier;
|
||||
use std::collections::HashSet;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
|
||||
use crate::algorithm::neighbour::linear_search::LinearKNNSearch;
|
||||
use crate::algorithm::neighbour::cover_tree::CoverTree;
|
||||
use crate::common::Nominal;
|
||||
use ndarray::{ArrayBase, Data, Ix1, Ix2};
|
||||
use std::fmt::Debug;
|
||||
|
||||
|
||||
type F<X> = dyn Fn(&X, &X) -> f64;
|
||||
type F = dyn Fn(&Vec<f64>, &Vec<f64>) -> f64;
|
||||
|
||||
pub struct KNNClassifier<'a, X, Y>
|
||||
where
|
||||
Y: Nominal,
|
||||
X: Debug
|
||||
{
|
||||
classes: Vec<Y>,
|
||||
pub struct KNNClassifier<'a> {
|
||||
classes: Vec<f64>,
|
||||
y: Vec<usize>,
|
||||
knn_algorithm: Box<dyn KNNAlgorithm<X> + 'a>,
|
||||
knn_algorithm: Box<dyn KNNAlgorithm<Vec<f64>> + 'a>,
|
||||
k: usize,
|
||||
}
|
||||
|
||||
impl<'a, X, Y> KNNClassifier<'a, X, Y>
|
||||
where
|
||||
Y: Nominal,
|
||||
X: Debug
|
||||
{
|
||||
impl<'a> KNNClassifier<'a> {
|
||||
|
||||
pub fn fit(x: Vec<X>, y: Vec<Y>, k: usize, distance: &'a F<X>, algorithm: KNNAlgorithmName) -> KNNClassifier<X, Y> {
|
||||
pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, k: usize, distance: &'a F, algorithm: KNNAlgorithmName) -> KNNClassifier<'a> {
|
||||
|
||||
assert!(Vec::len(&x) == Vec::len(&y), format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", Vec::len(&x), Vec::len(&y)));
|
||||
let y_m = M::from_row_vector(y.clone());
|
||||
|
||||
assert!(k > 1, format!("k should be > 1, k=[{}]", k));
|
||||
|
||||
let c_hash: HashSet<Y> = y.clone().into_iter().collect();
|
||||
let classes: Vec<Y> = c_hash.into_iter().collect();
|
||||
let y_i:Vec<usize> = y.into_iter().map(|y| classes.iter().position(|yy| yy == &y).unwrap()).collect();
|
||||
let (_, y_n) = y_m.shape();
|
||||
let (x_n, _) = x.shape();
|
||||
|
||||
let knn_algorithm: Box<dyn KNNAlgorithm<X> + 'a> = match algorithm {
|
||||
KNNAlgorithmName::CoverTree => Box::new(CoverTree::<X>::new(x, distance)),
|
||||
KNNAlgorithmName::LinearSearch => Box::new(LinearKNNSearch::<X>::new(x, distance))
|
||||
let data = x.to_vector();
|
||||
|
||||
let mut yi: Vec<usize> = vec![0; y_n];
|
||||
let classes = y_m.unique();
|
||||
|
||||
for i in 0..y_n {
|
||||
let yc = y_m.get(0, i);
|
||||
yi[i] = classes.iter().position(|c| yc == *c).unwrap();
|
||||
}
|
||||
|
||||
assert!(x_n == y_n, format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", x_n, y_n));
|
||||
|
||||
assert!(k > 1, format!("k should be > 1, k=[{}]", k));
|
||||
|
||||
let knn_algorithm: Box<dyn KNNAlgorithm<Vec<f64>> + 'a> = match algorithm {
|
||||
KNNAlgorithmName::CoverTree => Box::new(CoverTree::<Vec<f64>>::new(data, distance)),
|
||||
KNNAlgorithmName::LinearSearch => Box::new(LinearKNNSearch::<Vec<f64>>::new(data, distance))
|
||||
};
|
||||
|
||||
KNNClassifier{classes:classes, y: y_i, k: k, knn_algorithm: knn_algorithm}
|
||||
KNNClassifier{classes:classes, y: yi, k: k, knn_algorithm: knn_algorithm}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl<'a, X, Y> Classifier<X, Y> for KNNClassifier<'a, X, Y>
|
||||
where
|
||||
Y: Nominal,
|
||||
X: Debug
|
||||
{
|
||||
pub fn predict<M: Matrix>(&self, x: &M) -> M::RowVector {
|
||||
let mut result = M::zeros(1, x.shape().0);
|
||||
|
||||
fn predict(&self, x: &X) -> Y {
|
||||
let idxs = self.knn_algorithm.find(x, self.k);
|
||||
let (n, _) = x.shape();
|
||||
|
||||
for i in 0..n {
|
||||
result.set(0, i, self.classes[self.predict_for_row(x, i)]);
|
||||
}
|
||||
|
||||
result.to_row_vector()
|
||||
}
|
||||
|
||||
pub(in crate) fn predict_for_row<M: Matrix>(&self, x: &M, row: usize) -> usize {
|
||||
|
||||
let idxs = self.knn_algorithm.find(&x.get_row_as_vec(row), self.k);
|
||||
let mut c = vec![0; self.classes.len()];
|
||||
let mut max_c = 0;
|
||||
let mut max_i = 0;
|
||||
@@ -65,41 +71,31 @@ where
|
||||
max_c = c[self.y[i]];
|
||||
max_i = self.y[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
max_i
|
||||
|
||||
self.classes[max_i].clone()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
pub struct NDArrayUtils {
|
||||
|
||||
}
|
||||
|
||||
impl NDArrayUtils {
|
||||
|
||||
pub fn array2_to_vec<E, S>(x: &ArrayBase<S, Ix2>) -> Vec<ArrayBase<S, Ix1>>
|
||||
where
|
||||
E: Nominal,
|
||||
S: Data<Elem = E>,
|
||||
std::vec::Vec<ArrayBase<S, Ix1>>: std::iter::FromIterator<ndarray::ArrayBase<ndarray::OwnedRepr<E>, Ix1>>{
|
||||
let x_vec: Vec<ArrayBase<S, Ix1>> = x.outer_iter().map(|x| x.to_owned()).collect();
|
||||
x_vec
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::math::distance::Distance;
|
||||
use ndarray::{arr1, arr2, Array1};
|
||||
use super::*;
|
||||
use crate::math::distance::euclidian;
|
||||
use crate::linalg::naive::dense_matrix::DenseMatrix;
|
||||
|
||||
#[test]
|
||||
fn knn_fit_predict() {
|
||||
let x = arr2(&[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]);
|
||||
let y = arr1(&[2, 2, 2, 3, 3]);
|
||||
let knn = KNNClassifier::fit(NDArrayUtils::array2_to_vec(&x), y.to_vec(), 3, &Array1::distance, KNNAlgorithmName::LinearSearch);
|
||||
let r = knn.predict_vec(&NDArrayUtils::array2_to_vec(&x));
|
||||
let x = DenseMatrix::from_array(&[
|
||||
&[1., 2.],
|
||||
&[3., 4.],
|
||||
&[5., 6.],
|
||||
&[7., 8.],
|
||||
&[9., 10.]]);
|
||||
let y = vec![2., 2., 2., 3., 3.];
|
||||
let knn = KNNClassifier::fit(&x, &y, 3, &euclidian::distance, KNNAlgorithmName::LinearSearch);
|
||||
let r = knn.predict(&x);
|
||||
assert_eq!(5, Vec::len(&r));
|
||||
assert_eq!(y.to_vec(), r);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user