Semi-ready implementation of Simple KNN
This commit is contained in:
+2
-3
@@ -2,8 +2,7 @@
|
||||
extern crate criterion;
|
||||
extern crate smartcore;
|
||||
extern crate ndarray;
|
||||
use ndarray::Array;
|
||||
use smartcore::math::distance::euclidian::EuclidianDistance;
|
||||
use ndarray::{Array, Array1};
|
||||
use smartcore::math::distance::Distance;
|
||||
|
||||
use criterion::Criterion;
|
||||
@@ -12,7 +11,7 @@ use criterion::black_box;
|
||||
fn criterion_benchmark(c: &mut Criterion) {
|
||||
let a = Array::from_vec(vec![1., 2., 3.]);
|
||||
|
||||
c.bench_function("Euclidean Distance", move |b| b.iter(|| EuclidianDistance::distance(black_box(&a), black_box(&a))));
|
||||
c.bench_function("Euclidean Distance", move |b| b.iter(|| Array1::distance(black_box(&a), black_box(&a))));
|
||||
}
|
||||
|
||||
criterion_group!(benches, criterion_benchmark);
|
||||
|
||||
+68
-65
@@ -1,85 +1,92 @@
|
||||
use super::Classifier;
|
||||
use std::collections::HashSet;
|
||||
use crate::algorithm::sort::heap_select::HeapSelect;
|
||||
use crate::common::AnyNumber;
|
||||
use ndarray::prelude::*;
|
||||
use crate::common::Nominal;
|
||||
use ndarray::{ArrayBase, Data, Ix1, Ix2};
|
||||
use num_traits::{Float};
|
||||
use std::cmp::{Ordering, PartialOrd};
|
||||
use ndarray::arr1;
|
||||
|
||||
pub struct KNNClassifier<X, Y, F>
|
||||
|
||||
type F<X> = Fn(&X, &X) -> f64;
|
||||
|
||||
pub struct KNNClassifier<X, Y>
|
||||
where
|
||||
X: AnyNumber,
|
||||
Y: AnyNumber,
|
||||
F: Fn(&Array1<X>, &Array1<X>) -> f64
|
||||
Y: Nominal
|
||||
{
|
||||
y: Vec<Y>,
|
||||
distance: F,
|
||||
classes: Vec<Y>,
|
||||
y: Vec<usize>,
|
||||
data: Vec<X>,
|
||||
distance: Box<F<X>>,
|
||||
k: usize,
|
||||
knn_algorithm: Box<KNNAlgorithm<Array1<X>, F>>
|
||||
}
|
||||
|
||||
impl<X, Y, F> KNNClassifier<X, Y, F>
|
||||
impl<X, Y> KNNClassifier<X, Y>
|
||||
where
|
||||
X: AnyNumber,
|
||||
Y: AnyNumber,
|
||||
F: Fn(&Array1<X>, &Array1<X>) -> f64
|
||||
Y: Nominal
|
||||
{
|
||||
|
||||
pub fn fit<SX: Data<Elem = X>, SY: Data<Elem = Y>>(x: &ArrayBase<SX, Ix2>, y: &ArrayBase<SY, Ix1>, k: usize, distance: F) -> KNNClassifier<X, Y, F> {
|
||||
pub fn fit(x: Vec<X>, y: Vec<Y>, k: usize, distance: &'static F<X>) -> KNNClassifier<X, Y> {
|
||||
|
||||
assert!(ArrayBase::shape(x)[0] == ArrayBase::shape(y)[0], format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", ArrayBase::shape(x)[0], ArrayBase::shape(y)[0]));
|
||||
assert!(Vec::len(&x) == Vec::len(&y), format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", Vec::len(&x), Vec::len(&y)));
|
||||
|
||||
assert!(k > 1, format!("k should be > 1, k=[{}]", k));
|
||||
|
||||
let v: Vec<Array1<X>> = x.outer_iter().map(|x| x.to_owned()).collect();
|
||||
let c_hash: HashSet<Y> = y.clone().into_iter().collect();
|
||||
let classes: Vec<Y> = c_hash.into_iter().collect();
|
||||
let y_i:Vec<usize> = y.into_iter().map(|y| classes.iter().position(|yy| yy == &y).unwrap()).collect();
|
||||
|
||||
let knn = Box::new(SimpleKNNAlgorithm{
|
||||
data: v
|
||||
});
|
||||
|
||||
KNNClassifier{y: y.to_owned().to_vec(), k: k, distance: distance, knn_algorithm: knn}
|
||||
}
|
||||
KNNClassifier{classes:classes, y: y_i, data: x, k: k, distance: Box::new(distance)}
|
||||
}
|
||||
|
||||
impl<X, Y, SX, F> Classifier<X, Y, SX> for KNNClassifier<X, Y, F>
|
||||
}
|
||||
|
||||
impl<X, Y> Classifier<X, Y> for KNNClassifier<X, Y>
|
||||
where
|
||||
X: AnyNumber,
|
||||
Y: AnyNumber,
|
||||
SX: Data<Elem = X>,
|
||||
F: Fn(&Array1<X>, &Array1<X>) -> f64
|
||||
Y: Nominal
|
||||
{
|
||||
|
||||
fn predict(&self, x: &ArrayBase<SX, Ix2>) -> Array1<Y> {
|
||||
let mut result = Vec::new();
|
||||
for x in x.outer_iter() {
|
||||
let idxs = self.knn_algorithm.find(&x.to_owned(), self.k, &self.distance);
|
||||
let mut sum: Y = Y::zero();
|
||||
let mut count = 0;
|
||||
fn predict(&self, x: &X) -> Y {
|
||||
let idxs = self.data.find(x, self.k, &self.distance);
|
||||
let mut c = vec![0; self.classes.len()];
|
||||
let mut max_c = 0;
|
||||
let mut max_i = 0;
|
||||
for i in idxs {
|
||||
sum = sum + self.y[i].to_owned();
|
||||
count += 1;
|
||||
c[self.y[i]] += 1;
|
||||
if c[self.y[i]] > max_c {
|
||||
max_c = c[self.y[i]];
|
||||
max_i = self.y[i];
|
||||
}
|
||||
result.push(sum / Y::from_u64(count).unwrap());
|
||||
}
|
||||
arr1(&result)
|
||||
|
||||
self.classes[max_i].clone()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
pub trait KNNAlgorithm<T: Clone, F: Fn(&T, &T) -> f64>{
|
||||
fn find(&self, from: &T, k: usize, d: &F) -> Vec<usize>;
|
||||
pub struct NDArrayUtils {
|
||||
|
||||
}
|
||||
|
||||
pub struct SimpleKNNAlgorithm<T>
|
||||
impl NDArrayUtils {
|
||||
|
||||
pub fn array2_to_vec<E, S>(x: &ArrayBase<S, Ix2>) -> Vec<ArrayBase<S, Ix1>>
|
||||
where
|
||||
E: Nominal,
|
||||
S: Data<Elem = E>,
|
||||
std::vec::Vec<ArrayBase<S, Ix1>>: std::iter::FromIterator<ndarray::ArrayBase<ndarray::OwnedRepr<E>, Ix1>>{
|
||||
let x_vec: Vec<ArrayBase<S, Ix1>> = x.outer_iter().map(|x| x.to_owned()).collect();
|
||||
x_vec
|
||||
}
|
||||
}
|
||||
|
||||
pub trait KNNAlgorithm<T>{
|
||||
fn find(&self, from: &T, k: usize, d: &Fn(&T, &T) -> f64) -> Vec<usize>;
|
||||
}
|
||||
|
||||
impl<T> KNNAlgorithm<T> for Vec<T>
|
||||
{
|
||||
data: Vec<T>
|
||||
}
|
||||
|
||||
impl<T: Clone, F: Fn(&T, &T) -> f64> KNNAlgorithm<T, F> for SimpleKNNAlgorithm<T>
|
||||
{
|
||||
fn find(&self, from: &T, k: usize, d: &F) -> Vec<usize> {
|
||||
if k < 1 || k > self.data.len() {
|
||||
fn find(&self, from: &T, k: usize, d: &Fn(&T, &T) -> f64) -> Vec<usize> {
|
||||
if k < 1 || k > self.len() {
|
||||
panic!("k should be >= 1 and <= length(data)");
|
||||
}
|
||||
|
||||
@@ -92,9 +99,9 @@ impl<T: Clone, F: Fn(&T, &T) -> f64> KNNAlgorithm<T, F> for SimpleKNNAlgorithm<T
|
||||
});
|
||||
}
|
||||
|
||||
for i in 0..self.data.len() {
|
||||
for i in 0..self.len() {
|
||||
|
||||
let d = d(&from, &self.data[i]);
|
||||
let d = d(&from, &self[i]);
|
||||
let datum = heap.peek_mut();
|
||||
if d < datum.distance {
|
||||
datum.distance = d;
|
||||
@@ -133,11 +140,11 @@ impl Eq for KNNPoint {}
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::math::distance::Distance;
|
||||
use crate::math::distance::euclidian::EuclidianDistance;
|
||||
use ndarray::{arr1, arr2, Array1};
|
||||
|
||||
struct SimpleDistance{}
|
||||
|
||||
impl Distance<i32> for SimpleDistance {
|
||||
impl SimpleDistance {
|
||||
fn distance(a: &i32, b: &i32) -> f64 {
|
||||
(a - b).abs() as f64
|
||||
}
|
||||
@@ -146,26 +153,22 @@ mod tests {
|
||||
#[test]
|
||||
fn knn_fit_predict() {
|
||||
let x = arr2(&[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]);
|
||||
let y = arr1(&[1, 2, 3, 4, 5]);
|
||||
let knn = KNNClassifier::fit(&x, &y, 3, EuclidianDistance::distance);
|
||||
let r = knn.predict(&x);
|
||||
assert_eq!(5, ArrayBase::len(&r));
|
||||
assert_eq!(arr1(&[2, 2, 3, 4, 4]), r);
|
||||
let y = arr1(&[2, 2, 2, 3, 3]);
|
||||
let knn = KNNClassifier::fit(NDArrayUtils::array2_to_vec(&x), y.to_vec(), 3, &Array1::distance);
|
||||
let r = knn.predict_vec(&NDArrayUtils::array2_to_vec(&x));
|
||||
assert_eq!(5, Vec::len(&r));
|
||||
assert_eq!(y.to_vec(), r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn knn_find() {
|
||||
let simple_knn = SimpleKNNAlgorithm{
|
||||
data: vec!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
|
||||
};
|
||||
let data1 = vec!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
|
||||
assert_eq!(vec!(1, 2, 0), simple_knn.find(&2, 3, &SimpleDistance::distance));
|
||||
assert_eq!(vec!(1, 2, 0), data1.find(&2, 3, &SimpleDistance::distance));
|
||||
|
||||
let knn2 = SimpleKNNAlgorithm{
|
||||
data: vec!(arr1(&[1, 1]), arr1(&[2, 2]), arr1(&[3, 3]), arr1(&[4, 4]), arr1(&[5, 5]))
|
||||
};
|
||||
let data2 = vec!(arr1(&[1, 1]), arr1(&[2, 2]), arr1(&[3, 3]), arr1(&[4, 4]), arr1(&[5, 5]));
|
||||
|
||||
assert_eq!(vec!(2, 3, 1), knn2.find(&arr1(&[3, 3]), 3, &EuclidianDistance::distance));
|
||||
assert_eq!(vec!(2, 3, 1), data2.find(&arr1(&[3, 3]), 3, &Array1::distance));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,15 +1,20 @@
|
||||
use crate::common::AnyNumber;
|
||||
use ndarray::{Array1, ArrayBase, Data, Ix2};
|
||||
use crate::common::Nominal;
|
||||
|
||||
pub mod knn;
|
||||
|
||||
pub trait Classifier<X, Y, SX>
|
||||
pub trait Classifier<X, Y>
|
||||
where
|
||||
X: AnyNumber,
|
||||
Y: AnyNumber,
|
||||
SX: Data<Elem = X>
|
||||
Y: Nominal
|
||||
{
|
||||
|
||||
fn predict(&self, x: &ArrayBase<SX, Ix2>) -> Array1<Y>;
|
||||
fn predict(&self, x: &X) -> Y;
|
||||
|
||||
fn predict_vec(&self, x: &Vec<X>) -> Vec<Y>{
|
||||
let mut result = Vec::new();
|
||||
for xv in x.iter() {
|
||||
result.push(self.predict(xv));
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
}
|
||||
+7
-1
@@ -1,7 +1,13 @@
|
||||
use num_traits::{Num, ToPrimitive, FromPrimitive};
|
||||
use num_traits::{Num, ToPrimitive, FromPrimitive, Zero, One};
|
||||
use ndarray::{ScalarOperand};
|
||||
use std::hash::Hash;
|
||||
use std::fmt::Debug;
|
||||
|
||||
pub trait AnyNumber: Num + ScalarOperand + ToPrimitive + FromPrimitive{}
|
||||
|
||||
pub trait Nominal: PartialEq + Zero + One + Eq + Hash + ToPrimitive + FromPrimitive + Debug + 'static + Clone{}
|
||||
|
||||
|
||||
impl<T> AnyNumber for T where T: Num + ScalarOperand + ToPrimitive + FromPrimitive {}
|
||||
|
||||
impl<T> Nominal for T where T: PartialEq + Zero + One + Eq + Hash + ToPrimitive + Debug + FromPrimitive + 'static + Clone {}
|
||||
@@ -1,39 +1,51 @@
|
||||
use super::Distance;
|
||||
use crate::math::distance::Distance;
|
||||
use ndarray::{ArrayBase, Data, Dimension};
|
||||
use crate::common::AnyNumber;
|
||||
|
||||
pub struct EuclidianDistance{}
|
||||
|
||||
impl<A, S, D> Distance<ArrayBase<S, D>> for EuclidianDistance
|
||||
impl<A, S1, S2, D> Distance<ArrayBase<S2, D>> for ArrayBase<S1, D>
|
||||
where
|
||||
A: AnyNumber,
|
||||
S: Data<Elem = A>,
|
||||
S1: Data<Elem = A>,
|
||||
S2: Data<Elem = A>,
|
||||
D: Dimension
|
||||
{
|
||||
fn distance_to(&self, other: &Self) -> f64
|
||||
{
|
||||
Self::distance(self, other)
|
||||
}
|
||||
|
||||
fn distance(a: &ArrayBase<S, D>, b: &ArrayBase<S, D>) -> f64 {
|
||||
fn distance(a: &Self, b: &ArrayBase<S2, D>) -> f64
|
||||
{
|
||||
if a.len() != b.len() {
|
||||
panic!("vectors a and b have different length");
|
||||
} else {
|
||||
((a - b)*(a - b)).sum().to_f64().unwrap().sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use ndarray::arr1;
|
||||
use ndarray::{Array1, ArrayView1, arr1};
|
||||
|
||||
#[test]
|
||||
fn measure_simple_euclidian_distance() {
|
||||
let a = arr1(&[1, 2, 3]);
|
||||
let b = arr1(&[4, 5, 6]);
|
||||
|
||||
let d_arr = EuclidianDistance::distance(&a, &b);
|
||||
let d_view = EuclidianDistance::distance(&a.view(), &b.view());
|
||||
// let r1 = a.distance_to(&b);
|
||||
// let r2 = a.view().distance_to(&b.view());
|
||||
let d_arr = Array1::distance(&a, &b);
|
||||
let d_view = ArrayView1::distance(&a.view(), &b.view());
|
||||
|
||||
|
||||
|
||||
// assert!((r1 - 5.19615242).abs() < 1e-8);
|
||||
// assert!((r2 - 5.19615242).abs() < 1e-8);
|
||||
assert!((d_arr - 5.19615242).abs() < 1e-8);
|
||||
assert!((d_view - 5.19615242).abs() < 1e-8);
|
||||
}
|
||||
@@ -43,7 +55,7 @@ mod tests {
|
||||
let a = arr1(&[-2.1968219, -0.9559913, -0.0431738, 1.0567679, 0.3853515]);
|
||||
let b = arr1(&[-1.7781325, -0.6659839, 0.9526148, -0.9460919, -0.3925300]);
|
||||
|
||||
let d = EuclidianDistance::distance(&a, &b);
|
||||
let d = Array1::distance(&a, &b);
|
||||
|
||||
assert!((d - 2.422302).abs() < 1e-6);
|
||||
}
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
pub mod euclidian;
|
||||
|
||||
use num_traits::Float;
|
||||
pub trait Distance<T> {
|
||||
|
||||
fn distance_to(&self, other: &Self) -> f64;
|
||||
|
||||
fn distance(a: &Self, b: &T) -> f64;
|
||||
|
||||
pub trait Distance<T>
|
||||
{
|
||||
fn distance(a: &T, b: &T) -> f64;
|
||||
}
|
||||
Reference in New Issue
Block a user