Semi-ready implementation of Simple KNN

This commit is contained in:
Volodymyr Orlov
2019-09-16 08:52:12 -07:00
parent 9c5f6eb307
commit 3efd078034
6 changed files with 125 additions and 99 deletions
+2 -3
View File
@@ -2,8 +2,7 @@
extern crate criterion;
extern crate smartcore;
extern crate ndarray;
use ndarray::Array;
use smartcore::math::distance::euclidian::EuclidianDistance;
use ndarray::{Array, Array1};
use smartcore::math::distance::Distance;
use criterion::Criterion;
@@ -12,7 +11,7 @@ use criterion::black_box;
fn criterion_benchmark(c: &mut Criterion) {
let a = Array::from_vec(vec![1., 2., 3.]);
c.bench_function("Euclidean Distance", move |b| b.iter(|| EuclidianDistance::distance(black_box(&a), black_box(&a))));
c.bench_function("Euclidean Distance", move |b| b.iter(|| Array1::distance(black_box(&a), black_box(&a))));
}
criterion_group!(benches, criterion_benchmark);
+68 -65
View File
@@ -1,85 +1,92 @@
use super::Classifier;
use std::collections::HashSet;
use crate::algorithm::sort::heap_select::HeapSelect;
use crate::common::AnyNumber;
use ndarray::prelude::*;
use crate::common::Nominal;
use ndarray::{ArrayBase, Data, Ix1, Ix2};
use num_traits::{Float};
use std::cmp::{Ordering, PartialOrd};
use ndarray::arr1;
pub struct KNNClassifier<X, Y, F>
type F<X> = Fn(&X, &X) -> f64;
pub struct KNNClassifier<X, Y>
where
X: AnyNumber,
Y: AnyNumber,
F: Fn(&Array1<X>, &Array1<X>) -> f64
Y: Nominal
{
y: Vec<Y>,
distance: F,
classes: Vec<Y>,
y: Vec<usize>,
data: Vec<X>,
distance: Box<F<X>>,
k: usize,
knn_algorithm: Box<KNNAlgorithm<Array1<X>, F>>
}
impl<X, Y, F> KNNClassifier<X, Y, F>
impl<X, Y> KNNClassifier<X, Y>
where
X: AnyNumber,
Y: AnyNumber,
F: Fn(&Array1<X>, &Array1<X>) -> f64
Y: Nominal
{
pub fn fit<SX: Data<Elem = X>, SY: Data<Elem = Y>>(x: &ArrayBase<SX, Ix2>, y: &ArrayBase<SY, Ix1>, k: usize, distance: F) -> KNNClassifier<X, Y, F> {
pub fn fit(x: Vec<X>, y: Vec<Y>, k: usize, distance: &'static F<X>) -> KNNClassifier<X, Y> {
assert!(ArrayBase::shape(x)[0] == ArrayBase::shape(y)[0], format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", ArrayBase::shape(x)[0], ArrayBase::shape(y)[0]));
assert!(Vec::len(&x) == Vec::len(&y), format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", Vec::len(&x), Vec::len(&y)));
assert!(k > 1, format!("k should be > 1, k=[{}]", k));
let v: Vec<Array1<X>> = x.outer_iter().map(|x| x.to_owned()).collect();
let c_hash: HashSet<Y> = y.clone().into_iter().collect();
let classes: Vec<Y> = c_hash.into_iter().collect();
let y_i:Vec<usize> = y.into_iter().map(|y| classes.iter().position(|yy| yy == &y).unwrap()).collect();
let knn = Box::new(SimpleKNNAlgorithm{
data: v
});
KNNClassifier{y: y.to_owned().to_vec(), k: k, distance: distance, knn_algorithm: knn}
}
KNNClassifier{classes:classes, y: y_i, data: x, k: k, distance: Box::new(distance)}
}
impl<X, Y, SX, F> Classifier<X, Y, SX> for KNNClassifier<X, Y, F>
}
impl<X, Y> Classifier<X, Y> for KNNClassifier<X, Y>
where
X: AnyNumber,
Y: AnyNumber,
SX: Data<Elem = X>,
F: Fn(&Array1<X>, &Array1<X>) -> f64
Y: Nominal
{
fn predict(&self, x: &ArrayBase<SX, Ix2>) -> Array1<Y> {
let mut result = Vec::new();
for x in x.outer_iter() {
let idxs = self.knn_algorithm.find(&x.to_owned(), self.k, &self.distance);
let mut sum: Y = Y::zero();
let mut count = 0;
fn predict(&self, x: &X) -> Y {
let idxs = self.data.find(x, self.k, &self.distance);
let mut c = vec![0; self.classes.len()];
let mut max_c = 0;
let mut max_i = 0;
for i in idxs {
sum = sum + self.y[i].to_owned();
count += 1;
c[self.y[i]] += 1;
if c[self.y[i]] > max_c {
max_c = c[self.y[i]];
max_i = self.y[i];
}
result.push(sum / Y::from_u64(count).unwrap());
}
arr1(&result)
self.classes[max_i].clone()
}
}
pub trait KNNAlgorithm<T: Clone, F: Fn(&T, &T) -> f64>{
fn find(&self, from: &T, k: usize, d: &F) -> Vec<usize>;
pub struct NDArrayUtils {
}
pub struct SimpleKNNAlgorithm<T>
impl NDArrayUtils {
pub fn array2_to_vec<E, S>(x: &ArrayBase<S, Ix2>) -> Vec<ArrayBase<S, Ix1>>
where
E: Nominal,
S: Data<Elem = E>,
std::vec::Vec<ArrayBase<S, Ix1>>: std::iter::FromIterator<ndarray::ArrayBase<ndarray::OwnedRepr<E>, Ix1>>{
let x_vec: Vec<ArrayBase<S, Ix1>> = x.outer_iter().map(|x| x.to_owned()).collect();
x_vec
}
}
pub trait KNNAlgorithm<T>{
fn find(&self, from: &T, k: usize, d: &Fn(&T, &T) -> f64) -> Vec<usize>;
}
impl<T> KNNAlgorithm<T> for Vec<T>
{
data: Vec<T>
}
impl<T: Clone, F: Fn(&T, &T) -> f64> KNNAlgorithm<T, F> for SimpleKNNAlgorithm<T>
{
fn find(&self, from: &T, k: usize, d: &F) -> Vec<usize> {
if k < 1 || k > self.data.len() {
fn find(&self, from: &T, k: usize, d: &Fn(&T, &T) -> f64) -> Vec<usize> {
if k < 1 || k > self.len() {
panic!("k should be >= 1 and <= length(data)");
}
@@ -92,9 +99,9 @@ impl<T: Clone, F: Fn(&T, &T) -> f64> KNNAlgorithm<T, F> for SimpleKNNAlgorithm<T
});
}
for i in 0..self.data.len() {
for i in 0..self.len() {
let d = d(&from, &self.data[i]);
let d = d(&from, &self[i]);
let datum = heap.peek_mut();
if d < datum.distance {
datum.distance = d;
@@ -133,11 +140,11 @@ impl Eq for KNNPoint {}
mod tests {
use super::*;
use crate::math::distance::Distance;
use crate::math::distance::euclidian::EuclidianDistance;
use ndarray::{arr1, arr2, Array1};
struct SimpleDistance{}
impl Distance<i32> for SimpleDistance {
impl SimpleDistance {
fn distance(a: &i32, b: &i32) -> f64 {
(a - b).abs() as f64
}
@@ -146,26 +153,22 @@ mod tests {
#[test]
fn knn_fit_predict() {
let x = arr2(&[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]);
let y = arr1(&[1, 2, 3, 4, 5]);
let knn = KNNClassifier::fit(&x, &y, 3, EuclidianDistance::distance);
let r = knn.predict(&x);
assert_eq!(5, ArrayBase::len(&r));
assert_eq!(arr1(&[2, 2, 3, 4, 4]), r);
let y = arr1(&[2, 2, 2, 3, 3]);
let knn = KNNClassifier::fit(NDArrayUtils::array2_to_vec(&x), y.to_vec(), 3, &Array1::distance);
let r = knn.predict_vec(&NDArrayUtils::array2_to_vec(&x));
assert_eq!(5, Vec::len(&r));
assert_eq!(y.to_vec(), r);
}
#[test]
fn knn_find() {
let simple_knn = SimpleKNNAlgorithm{
data: vec!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
};
let data1 = vec!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
assert_eq!(vec!(1, 2, 0), simple_knn.find(&2, 3, &SimpleDistance::distance));
assert_eq!(vec!(1, 2, 0), data1.find(&2, 3, &SimpleDistance::distance));
let knn2 = SimpleKNNAlgorithm{
data: vec!(arr1(&[1, 1]), arr1(&[2, 2]), arr1(&[3, 3]), arr1(&[4, 4]), arr1(&[5, 5]))
};
let data2 = vec!(arr1(&[1, 1]), arr1(&[2, 2]), arr1(&[3, 3]), arr1(&[4, 4]), arr1(&[5, 5]));
assert_eq!(vec!(2, 3, 1), knn2.find(&arr1(&[3, 3]), 3, &EuclidianDistance::distance));
assert_eq!(vec!(2, 3, 1), data2.find(&arr1(&[3, 3]), 3, &Array1::distance));
}
#[test]
+12 -7
View File
@@ -1,15 +1,20 @@
use crate::common::AnyNumber;
use ndarray::{Array1, ArrayBase, Data, Ix2};
use crate::common::Nominal;
pub mod knn;
pub trait Classifier<X, Y, SX>
pub trait Classifier<X, Y>
where
X: AnyNumber,
Y: AnyNumber,
SX: Data<Elem = X>
Y: Nominal
{
fn predict(&self, x: &ArrayBase<SX, Ix2>) -> Array1<Y>;
fn predict(&self, x: &X) -> Y;
fn predict_vec(&self, x: &Vec<X>) -> Vec<Y>{
let mut result = Vec::new();
for xv in x.iter() {
result.push(self.predict(xv));
}
result
}
}
+7 -1
View File
@@ -1,7 +1,13 @@
use num_traits::{Num, ToPrimitive, FromPrimitive};
use num_traits::{Num, ToPrimitive, FromPrimitive, Zero, One};
use ndarray::{ScalarOperand};
use std::hash::Hash;
use std::fmt::Debug;
pub trait AnyNumber: Num + ScalarOperand + ToPrimitive + FromPrimitive{}
pub trait Nominal: PartialEq + Zero + One + Eq + Hash + ToPrimitive + FromPrimitive + Debug + 'static + Clone{}
impl<T> AnyNumber for T where T: Num + ScalarOperand + ToPrimitive + FromPrimitive {}
impl<T> Nominal for T where T: PartialEq + Zero + One + Eq + Hash + ToPrimitive + Debug + FromPrimitive + 'static + Clone {}
+22 -10
View File
@@ -1,39 +1,51 @@
use super::Distance;
use crate::math::distance::Distance;
use ndarray::{ArrayBase, Data, Dimension};
use crate::common::AnyNumber;
pub struct EuclidianDistance{}
impl<A, S, D> Distance<ArrayBase<S, D>> for EuclidianDistance
impl<A, S1, S2, D> Distance<ArrayBase<S2, D>> for ArrayBase<S1, D>
where
A: AnyNumber,
S: Data<Elem = A>,
S1: Data<Elem = A>,
S2: Data<Elem = A>,
D: Dimension
{
fn distance_to(&self, other: &Self) -> f64
{
Self::distance(self, other)
}
fn distance(a: &ArrayBase<S, D>, b: &ArrayBase<S, D>) -> f64 {
fn distance(a: &Self, b: &ArrayBase<S2, D>) -> f64
{
if a.len() != b.len() {
panic!("vectors a and b have different length");
} else {
((a - b)*(a - b)).sum().to_f64().unwrap().sqrt()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::arr1;
use ndarray::{Array1, ArrayView1, arr1};
#[test]
fn measure_simple_euclidian_distance() {
let a = arr1(&[1, 2, 3]);
let b = arr1(&[4, 5, 6]);
let d_arr = EuclidianDistance::distance(&a, &b);
let d_view = EuclidianDistance::distance(&a.view(), &b.view());
// let r1 = a.distance_to(&b);
// let r2 = a.view().distance_to(&b.view());
let d_arr = Array1::distance(&a, &b);
let d_view = ArrayView1::distance(&a.view(), &b.view());
// assert!((r1 - 5.19615242).abs() < 1e-8);
// assert!((r2 - 5.19615242).abs() < 1e-8);
assert!((d_arr - 5.19615242).abs() < 1e-8);
assert!((d_view - 5.19615242).abs() < 1e-8);
}
@@ -43,7 +55,7 @@ mod tests {
let a = arr1(&[-2.1968219, -0.9559913, -0.0431738, 1.0567679, 0.3853515]);
let b = arr1(&[-1.7781325, -0.6659839, 0.9526148, -0.9460919, -0.3925300]);
let d = EuclidianDistance::distance(&a, &b);
let d = Array1::distance(&a, &b);
assert!((d - 2.422302).abs() < 1e-6);
}
+5 -4
View File
@@ -1,8 +1,9 @@
pub mod euclidian;
use num_traits::Float;
pub trait Distance<T> {
fn distance_to(&self, other: &Self) -> f64;
fn distance(a: &Self, b: &T) -> f64;
pub trait Distance<T>
{
fn distance(a: &T, b: &T) -> f64;
}