Semi-ready implementation of Simple KNN
This commit is contained in:
+2
-3
@@ -2,8 +2,7 @@
|
|||||||
extern crate criterion;
|
extern crate criterion;
|
||||||
extern crate smartcore;
|
extern crate smartcore;
|
||||||
extern crate ndarray;
|
extern crate ndarray;
|
||||||
use ndarray::Array;
|
use ndarray::{Array, Array1};
|
||||||
use smartcore::math::distance::euclidian::EuclidianDistance;
|
|
||||||
use smartcore::math::distance::Distance;
|
use smartcore::math::distance::Distance;
|
||||||
|
|
||||||
use criterion::Criterion;
|
use criterion::Criterion;
|
||||||
@@ -12,7 +11,7 @@ use criterion::black_box;
|
|||||||
fn criterion_benchmark(c: &mut Criterion) {
|
fn criterion_benchmark(c: &mut Criterion) {
|
||||||
let a = Array::from_vec(vec![1., 2., 3.]);
|
let a = Array::from_vec(vec![1., 2., 3.]);
|
||||||
|
|
||||||
c.bench_function("Euclidean Distance", move |b| b.iter(|| EuclidianDistance::distance(black_box(&a), black_box(&a))));
|
c.bench_function("Euclidean Distance", move |b| b.iter(|| Array1::distance(black_box(&a), black_box(&a))));
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(benches, criterion_benchmark);
|
criterion_group!(benches, criterion_benchmark);
|
||||||
|
|||||||
+66
-63
@@ -1,85 +1,92 @@
|
|||||||
use super::Classifier;
|
use super::Classifier;
|
||||||
|
use std::collections::HashSet;
|
||||||
use crate::algorithm::sort::heap_select::HeapSelect;
|
use crate::algorithm::sort::heap_select::HeapSelect;
|
||||||
use crate::common::AnyNumber;
|
use crate::common::Nominal;
|
||||||
use ndarray::prelude::*;
|
|
||||||
use ndarray::{ArrayBase, Data, Ix1, Ix2};
|
use ndarray::{ArrayBase, Data, Ix1, Ix2};
|
||||||
use num_traits::{Float};
|
use num_traits::{Float};
|
||||||
use std::cmp::{Ordering, PartialOrd};
|
use std::cmp::{Ordering, PartialOrd};
|
||||||
use ndarray::arr1;
|
|
||||||
|
|
||||||
pub struct KNNClassifier<X, Y, F>
|
|
||||||
|
type F<X> = Fn(&X, &X) -> f64;
|
||||||
|
|
||||||
|
pub struct KNNClassifier<X, Y>
|
||||||
where
|
where
|
||||||
X: AnyNumber,
|
Y: Nominal
|
||||||
Y: AnyNumber,
|
|
||||||
F: Fn(&Array1<X>, &Array1<X>) -> f64
|
|
||||||
{
|
{
|
||||||
y: Vec<Y>,
|
classes: Vec<Y>,
|
||||||
distance: F,
|
y: Vec<usize>,
|
||||||
|
data: Vec<X>,
|
||||||
|
distance: Box<F<X>>,
|
||||||
k: usize,
|
k: usize,
|
||||||
knn_algorithm: Box<KNNAlgorithm<Array1<X>, F>>
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<X, Y, F> KNNClassifier<X, Y, F>
|
impl<X, Y> KNNClassifier<X, Y>
|
||||||
where
|
where
|
||||||
X: AnyNumber,
|
Y: Nominal
|
||||||
Y: AnyNumber,
|
|
||||||
F: Fn(&Array1<X>, &Array1<X>) -> f64
|
|
||||||
{
|
{
|
||||||
|
|
||||||
pub fn fit<SX: Data<Elem = X>, SY: Data<Elem = Y>>(x: &ArrayBase<SX, Ix2>, y: &ArrayBase<SY, Ix1>, k: usize, distance: F) -> KNNClassifier<X, Y, F> {
|
pub fn fit(x: Vec<X>, y: Vec<Y>, k: usize, distance: &'static F<X>) -> KNNClassifier<X, Y> {
|
||||||
|
|
||||||
assert!(ArrayBase::shape(x)[0] == ArrayBase::shape(y)[0], format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", ArrayBase::shape(x)[0], ArrayBase::shape(y)[0]));
|
assert!(Vec::len(&x) == Vec::len(&y), format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", Vec::len(&x), Vec::len(&y)));
|
||||||
|
|
||||||
assert!(k > 1, format!("k should be > 1, k=[{}]", k));
|
assert!(k > 1, format!("k should be > 1, k=[{}]", k));
|
||||||
|
|
||||||
let v: Vec<Array1<X>> = x.outer_iter().map(|x| x.to_owned()).collect();
|
let c_hash: HashSet<Y> = y.clone().into_iter().collect();
|
||||||
|
let classes: Vec<Y> = c_hash.into_iter().collect();
|
||||||
|
let y_i:Vec<usize> = y.into_iter().map(|y| classes.iter().position(|yy| yy == &y).unwrap()).collect();
|
||||||
|
|
||||||
let knn = Box::new(SimpleKNNAlgorithm{
|
KNNClassifier{classes:classes, y: y_i, data: x, k: k, distance: Box::new(distance)}
|
||||||
data: v
|
|
||||||
});
|
|
||||||
|
|
||||||
KNNClassifier{y: y.to_owned().to_vec(), k: k, distance: distance, knn_algorithm: knn}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<X, Y, SX, F> Classifier<X, Y, SX> for KNNClassifier<X, Y, F>
|
impl<X, Y> Classifier<X, Y> for KNNClassifier<X, Y>
|
||||||
where
|
where
|
||||||
X: AnyNumber,
|
Y: Nominal
|
||||||
Y: AnyNumber,
|
|
||||||
SX: Data<Elem = X>,
|
|
||||||
F: Fn(&Array1<X>, &Array1<X>) -> f64
|
|
||||||
{
|
{
|
||||||
|
|
||||||
fn predict(&self, x: &ArrayBase<SX, Ix2>) -> Array1<Y> {
|
fn predict(&self, x: &X) -> Y {
|
||||||
let mut result = Vec::new();
|
let idxs = self.data.find(x, self.k, &self.distance);
|
||||||
for x in x.outer_iter() {
|
let mut c = vec![0; self.classes.len()];
|
||||||
let idxs = self.knn_algorithm.find(&x.to_owned(), self.k, &self.distance);
|
let mut max_c = 0;
|
||||||
let mut sum: Y = Y::zero();
|
let mut max_i = 0;
|
||||||
let mut count = 0;
|
for i in idxs {
|
||||||
for i in idxs {
|
c[self.y[i]] += 1;
|
||||||
sum = sum + self.y[i].to_owned();
|
if c[self.y[i]] > max_c {
|
||||||
count += 1;
|
max_c = c[self.y[i]];
|
||||||
|
max_i = self.y[i];
|
||||||
}
|
}
|
||||||
result.push(sum / Y::from_u64(count).unwrap());
|
|
||||||
}
|
}
|
||||||
arr1(&result)
|
|
||||||
|
self.classes[max_i].clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait KNNAlgorithm<T: Clone, F: Fn(&T, &T) -> f64>{
|
pub struct NDArrayUtils {
|
||||||
fn find(&self, from: &T, k: usize, d: &F) -> Vec<usize>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct SimpleKNNAlgorithm<T>
|
impl NDArrayUtils {
|
||||||
{
|
|
||||||
data: Vec<T>
|
pub fn array2_to_vec<E, S>(x: &ArrayBase<S, Ix2>) -> Vec<ArrayBase<S, Ix1>>
|
||||||
|
where
|
||||||
|
E: Nominal,
|
||||||
|
S: Data<Elem = E>,
|
||||||
|
std::vec::Vec<ArrayBase<S, Ix1>>: std::iter::FromIterator<ndarray::ArrayBase<ndarray::OwnedRepr<E>, Ix1>>{
|
||||||
|
let x_vec: Vec<ArrayBase<S, Ix1>> = x.outer_iter().map(|x| x.to_owned()).collect();
|
||||||
|
x_vec
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Clone, F: Fn(&T, &T) -> f64> KNNAlgorithm<T, F> for SimpleKNNAlgorithm<T>
|
pub trait KNNAlgorithm<T>{
|
||||||
|
fn find(&self, from: &T, k: usize, d: &Fn(&T, &T) -> f64) -> Vec<usize>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> KNNAlgorithm<T> for Vec<T>
|
||||||
{
|
{
|
||||||
fn find(&self, from: &T, k: usize, d: &F) -> Vec<usize> {
|
fn find(&self, from: &T, k: usize, d: &Fn(&T, &T) -> f64) -> Vec<usize> {
|
||||||
if k < 1 || k > self.data.len() {
|
if k < 1 || k > self.len() {
|
||||||
panic!("k should be >= 1 and <= length(data)");
|
panic!("k should be >= 1 and <= length(data)");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -92,9 +99,9 @@ impl<T: Clone, F: Fn(&T, &T) -> f64> KNNAlgorithm<T, F> for SimpleKNNAlgorithm<T
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in 0..self.data.len() {
|
for i in 0..self.len() {
|
||||||
|
|
||||||
let d = d(&from, &self.data[i]);
|
let d = d(&from, &self[i]);
|
||||||
let datum = heap.peek_mut();
|
let datum = heap.peek_mut();
|
||||||
if d < datum.distance {
|
if d < datum.distance {
|
||||||
datum.distance = d;
|
datum.distance = d;
|
||||||
@@ -133,11 +140,11 @@ impl Eq for KNNPoint {}
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::math::distance::Distance;
|
use crate::math::distance::Distance;
|
||||||
use crate::math::distance::euclidian::EuclidianDistance;
|
use ndarray::{arr1, arr2, Array1};
|
||||||
|
|
||||||
struct SimpleDistance{}
|
struct SimpleDistance{}
|
||||||
|
|
||||||
impl Distance<i32> for SimpleDistance {
|
impl SimpleDistance {
|
||||||
fn distance(a: &i32, b: &i32) -> f64 {
|
fn distance(a: &i32, b: &i32) -> f64 {
|
||||||
(a - b).abs() as f64
|
(a - b).abs() as f64
|
||||||
}
|
}
|
||||||
@@ -146,26 +153,22 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn knn_fit_predict() {
|
fn knn_fit_predict() {
|
||||||
let x = arr2(&[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]);
|
let x = arr2(&[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]);
|
||||||
let y = arr1(&[1, 2, 3, 4, 5]);
|
let y = arr1(&[2, 2, 2, 3, 3]);
|
||||||
let knn = KNNClassifier::fit(&x, &y, 3, EuclidianDistance::distance);
|
let knn = KNNClassifier::fit(NDArrayUtils::array2_to_vec(&x), y.to_vec(), 3, &Array1::distance);
|
||||||
let r = knn.predict(&x);
|
let r = knn.predict_vec(&NDArrayUtils::array2_to_vec(&x));
|
||||||
assert_eq!(5, ArrayBase::len(&r));
|
assert_eq!(5, Vec::len(&r));
|
||||||
assert_eq!(arr1(&[2, 2, 3, 4, 4]), r);
|
assert_eq!(y.to_vec(), r);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn knn_find() {
|
fn knn_find() {
|
||||||
let simple_knn = SimpleKNNAlgorithm{
|
let data1 = vec!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||||
data: vec!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(vec!(1, 2, 0), simple_knn.find(&2, 3, &SimpleDistance::distance));
|
assert_eq!(vec!(1, 2, 0), data1.find(&2, 3, &SimpleDistance::distance));
|
||||||
|
|
||||||
let knn2 = SimpleKNNAlgorithm{
|
let data2 = vec!(arr1(&[1, 1]), arr1(&[2, 2]), arr1(&[3, 3]), arr1(&[4, 4]), arr1(&[5, 5]));
|
||||||
data: vec!(arr1(&[1, 1]), arr1(&[2, 2]), arr1(&[3, 3]), arr1(&[4, 4]), arr1(&[5, 5]))
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(vec!(2, 3, 1), knn2.find(&arr1(&[3, 3]), 3, &EuclidianDistance::distance));
|
assert_eq!(vec!(2, 3, 1), data2.find(&arr1(&[3, 3]), 3, &Array1::distance));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -1,15 +1,20 @@
|
|||||||
use crate::common::AnyNumber;
|
use crate::common::Nominal;
|
||||||
use ndarray::{Array1, ArrayBase, Data, Ix2};
|
|
||||||
|
|
||||||
pub mod knn;
|
pub mod knn;
|
||||||
|
|
||||||
pub trait Classifier<X, Y, SX>
|
pub trait Classifier<X, Y>
|
||||||
where
|
where
|
||||||
X: AnyNumber,
|
Y: Nominal
|
||||||
Y: AnyNumber,
|
|
||||||
SX: Data<Elem = X>
|
|
||||||
{
|
{
|
||||||
|
|
||||||
fn predict(&self, x: &ArrayBase<SX, Ix2>) -> Array1<Y>;
|
fn predict(&self, x: &X) -> Y;
|
||||||
|
|
||||||
|
fn predict_vec(&self, x: &Vec<X>) -> Vec<Y>{
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for xv in x.iter() {
|
||||||
|
result.push(self.predict(xv));
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
+7
-1
@@ -1,7 +1,13 @@
|
|||||||
use num_traits::{Num, ToPrimitive, FromPrimitive};
|
use num_traits::{Num, ToPrimitive, FromPrimitive, Zero, One};
|
||||||
use ndarray::{ScalarOperand};
|
use ndarray::{ScalarOperand};
|
||||||
|
use std::hash::Hash;
|
||||||
|
use std::fmt::Debug;
|
||||||
|
|
||||||
pub trait AnyNumber: Num + ScalarOperand + ToPrimitive + FromPrimitive{}
|
pub trait AnyNumber: Num + ScalarOperand + ToPrimitive + FromPrimitive{}
|
||||||
|
|
||||||
|
pub trait Nominal: PartialEq + Zero + One + Eq + Hash + ToPrimitive + FromPrimitive + Debug + 'static + Clone{}
|
||||||
|
|
||||||
|
|
||||||
impl<T> AnyNumber for T where T: Num + ScalarOperand + ToPrimitive + FromPrimitive {}
|
impl<T> AnyNumber for T where T: Num + ScalarOperand + ToPrimitive + FromPrimitive {}
|
||||||
|
|
||||||
|
impl<T> Nominal for T where T: PartialEq + Zero + One + Eq + Hash + ToPrimitive + Debug + FromPrimitive + 'static + Clone {}
|
||||||
@@ -1,39 +1,51 @@
|
|||||||
use super::Distance;
|
use crate::math::distance::Distance;
|
||||||
use ndarray::{ArrayBase, Data, Dimension};
|
use ndarray::{ArrayBase, Data, Dimension};
|
||||||
use crate::common::AnyNumber;
|
use crate::common::AnyNumber;
|
||||||
|
|
||||||
pub struct EuclidianDistance{}
|
impl<A, S1, S2, D> Distance<ArrayBase<S2, D>> for ArrayBase<S1, D>
|
||||||
|
|
||||||
impl<A, S, D> Distance<ArrayBase<S, D>> for EuclidianDistance
|
|
||||||
where
|
where
|
||||||
A: AnyNumber,
|
A: AnyNumber,
|
||||||
S: Data<Elem = A>,
|
S1: Data<Elem = A>,
|
||||||
D: Dimension
|
S2: Data<Elem = A>,
|
||||||
|
D: Dimension
|
||||||
{
|
{
|
||||||
|
fn distance_to(&self, other: &Self) -> f64
|
||||||
|
{
|
||||||
|
Self::distance(self, other)
|
||||||
|
}
|
||||||
|
|
||||||
fn distance(a: &ArrayBase<S, D>, b: &ArrayBase<S, D>) -> f64 {
|
fn distance(a: &Self, b: &ArrayBase<S2, D>) -> f64
|
||||||
|
{
|
||||||
if a.len() != b.len() {
|
if a.len() != b.len() {
|
||||||
panic!("vectors a and b have different length");
|
panic!("vectors a and b have different length");
|
||||||
} else {
|
} else {
|
||||||
((a - b)*(a - b)).sum().to_f64().unwrap().sqrt()
|
((a - b)*(a - b)).sum().to_f64().unwrap().sqrt()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use ndarray::arr1;
|
use ndarray::{Array1, ArrayView1, arr1};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn measure_simple_euclidian_distance() {
|
fn measure_simple_euclidian_distance() {
|
||||||
let a = arr1(&[1, 2, 3]);
|
let a = arr1(&[1, 2, 3]);
|
||||||
let b = arr1(&[4, 5, 6]);
|
let b = arr1(&[4, 5, 6]);
|
||||||
|
|
||||||
let d_arr = EuclidianDistance::distance(&a, &b);
|
// let r1 = a.distance_to(&b);
|
||||||
let d_view = EuclidianDistance::distance(&a.view(), &b.view());
|
// let r2 = a.view().distance_to(&b.view());
|
||||||
|
let d_arr = Array1::distance(&a, &b);
|
||||||
|
let d_view = ArrayView1::distance(&a.view(), &b.view());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// assert!((r1 - 5.19615242).abs() < 1e-8);
|
||||||
|
// assert!((r2 - 5.19615242).abs() < 1e-8);
|
||||||
assert!((d_arr - 5.19615242).abs() < 1e-8);
|
assert!((d_arr - 5.19615242).abs() < 1e-8);
|
||||||
assert!((d_view - 5.19615242).abs() < 1e-8);
|
assert!((d_view - 5.19615242).abs() < 1e-8);
|
||||||
}
|
}
|
||||||
@@ -43,7 +55,7 @@ mod tests {
|
|||||||
let a = arr1(&[-2.1968219, -0.9559913, -0.0431738, 1.0567679, 0.3853515]);
|
let a = arr1(&[-2.1968219, -0.9559913, -0.0431738, 1.0567679, 0.3853515]);
|
||||||
let b = arr1(&[-1.7781325, -0.6659839, 0.9526148, -0.9460919, -0.3925300]);
|
let b = arr1(&[-1.7781325, -0.6659839, 0.9526148, -0.9460919, -0.3925300]);
|
||||||
|
|
||||||
let d = EuclidianDistance::distance(&a, &b);
|
let d = Array1::distance(&a, &b);
|
||||||
|
|
||||||
assert!((d - 2.422302).abs() < 1e-6);
|
assert!((d - 2.422302).abs() < 1e-6);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
pub mod euclidian;
|
pub mod euclidian;
|
||||||
|
|
||||||
use num_traits::Float;
|
pub trait Distance<T> {
|
||||||
|
|
||||||
|
fn distance_to(&self, other: &Self) -> f64;
|
||||||
|
|
||||||
|
fn distance(a: &Self, b: &T) -> f64;
|
||||||
|
|
||||||
pub trait Distance<T>
|
|
||||||
{
|
|
||||||
fn distance(a: &T, b: &T) -> f64;
|
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user