diff --git a/benches/distance.rs b/benches/distance.rs index 13ee0c0..407b4db 100644 --- a/benches/distance.rs +++ b/benches/distance.rs @@ -2,8 +2,7 @@ extern crate criterion; extern crate smartcore; extern crate ndarray; -use ndarray::Array; -use smartcore::math::distance::euclidian::EuclidianDistance; +use ndarray::{Array, Array1}; use smartcore::math::distance::Distance; use criterion::Criterion; @@ -12,7 +11,7 @@ use criterion::black_box; fn criterion_benchmark(c: &mut Criterion) { let a = Array::from_vec(vec![1., 2., 3.]); - c.bench_function("Euclidean Distance", move |b| b.iter(|| EuclidianDistance::distance(black_box(&a), black_box(&a)))); + c.bench_function("Euclidean Distance", move |b| b.iter(|| Array1::distance(black_box(&a), black_box(&a)))); } criterion_group!(benches, criterion_benchmark); diff --git a/src/classification/knn.rs b/src/classification/knn.rs index 1dad6d5..a8518fd 100644 --- a/src/classification/knn.rs +++ b/src/classification/knn.rs @@ -1,85 +1,92 @@ use super::Classifier; +use std::collections::HashSet; use crate::algorithm::sort::heap_select::HeapSelect; -use crate::common::AnyNumber; -use ndarray::prelude::*; +use crate::common::Nominal; use ndarray::{ArrayBase, Data, Ix1, Ix2}; use num_traits::{Float}; use std::cmp::{Ordering, PartialOrd}; -use ndarray::arr1; -pub struct KNNClassifier + +type F = Fn(&X, &X) -> f64; + +pub struct KNNClassifier where - X: AnyNumber, - Y: AnyNumber, - F: Fn(&Array1, &Array1) -> f64 + Y: Nominal { - y: Vec, - distance: F, - k: usize, - knn_algorithm: Box, F>> + classes: Vec, + y: Vec, + data: Vec, + distance: Box>, + k: usize, } -impl KNNClassifier +impl KNNClassifier where - X: AnyNumber, - Y: AnyNumber, - F: Fn(&Array1, &Array1) -> f64 + Y: Nominal { - pub fn fit, SY: Data>(x: &ArrayBase, y: &ArrayBase, k: usize, distance: F) -> KNNClassifier { + pub fn fit(x: Vec, y: Vec, k: usize, distance: &'static F) -> KNNClassifier { - assert!(ArrayBase::shape(x)[0] == ArrayBase::shape(y)[0], format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", ArrayBase::shape(x)[0], ArrayBase::shape(y)[0])); + assert!(Vec::len(&x) == Vec::len(&y), format!("Size of x should equal size of y; |x|=[{}], |y|=[{}]", Vec::len(&x), Vec::len(&y))); - assert!(k > 1, format!("k should be > 1, k=[{}]", k)); - - let v: Vec> = x.outer_iter().map(|x| x.to_owned()).collect(); + assert!(k > 1, format!("k should be > 1, k=[{}]", k)); + + let c_hash: HashSet = y.clone().into_iter().collect(); + let classes: Vec = c_hash.into_iter().collect(); + let y_i:Vec = y.into_iter().map(|y| classes.iter().position(|yy| yy == &y).unwrap()).collect(); - let knn = Box::new(SimpleKNNAlgorithm{ - data: v - }); - - KNNClassifier{y: y.to_owned().to_vec(), k: k, distance: distance, knn_algorithm: knn} + KNNClassifier{classes:classes, y: y_i, data: x, k: k, distance: Box::new(distance)} } + } -impl Classifier for KNNClassifier +impl Classifier for KNNClassifier where - X: AnyNumber, - Y: AnyNumber, - SX: Data, - F: Fn(&Array1, &Array1) -> f64 + Y: Nominal { - fn predict(&self, x: &ArrayBase) -> Array1 { - let mut result = Vec::new(); - for x in x.outer_iter() { - let idxs = self.knn_algorithm.find(&x.to_owned(), self.k, &self.distance); - let mut sum: Y = Y::zero(); - let mut count = 0; - for i in idxs { - sum = sum + self.y[i].to_owned(); - count += 1; - } - result.push(sum / Y::from_u64(count).unwrap()); - } - arr1(&result) + fn predict(&self, x: &X) -> Y { + let idxs = self.data.find(x, self.k, &self.distance); + let mut c = vec![0; self.classes.len()]; + let mut max_c = 0; + let mut max_i = 0; + for i in idxs { + c[self.y[i]] += 1; + if c[self.y[i]] > max_c { + max_c = c[self.y[i]]; + max_i = self.y[i]; + } + } + + self.classes[max_i].clone() } } -pub trait KNNAlgorithm f64>{ - fn find(&self, from: &T, k: usize, d: &F) -> Vec; +pub struct NDArrayUtils { + } -pub struct SimpleKNNAlgorithm -{ - data: Vec +impl NDArrayUtils { + + pub fn array2_to_vec(x: &ArrayBase) -> Vec> + where + E: Nominal, + S: Data, + std::vec::Vec>: std::iter::FromIterator, Ix1>>{ + let x_vec: Vec> = x.outer_iter().map(|x| x.to_owned()).collect(); + x_vec + } } -impl f64> KNNAlgorithm for SimpleKNNAlgorithm +pub trait KNNAlgorithm{ + fn find(&self, from: &T, k: usize, d: &Fn(&T, &T) -> f64) -> Vec; +} + +impl KNNAlgorithm for Vec { - fn find(&self, from: &T, k: usize, d: &F) -> Vec { - if k < 1 || k > self.data.len() { + fn find(&self, from: &T, k: usize, d: &Fn(&T, &T) -> f64) -> Vec { + if k < 1 || k > self.len() { panic!("k should be >= 1 and <= length(data)"); } @@ -92,9 +99,9 @@ impl f64> KNNAlgorithm for SimpleKNNAlgorithm for SimpleDistance { + impl SimpleDistance { fn distance(a: &i32, b: &i32) -> f64 { (a - b).abs() as f64 } - } + } #[test] fn knn_fit_predict() { - let x = arr2(&[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]); - let y = arr1(&[1, 2, 3, 4, 5]); - let knn = KNNClassifier::fit(&x, &y, 3, EuclidianDistance::distance); - let r = knn.predict(&x); - assert_eq!(5, ArrayBase::len(&r)); - assert_eq!(arr1(&[2, 2, 3, 4, 4]), r); + let x = arr2(&[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]); + let y = arr1(&[2, 2, 2, 3, 3]); + let knn = KNNClassifier::fit(NDArrayUtils::array2_to_vec(&x), y.to_vec(), 3, &Array1::distance); + let r = knn.predict_vec(&NDArrayUtils::array2_to_vec(&x)); + assert_eq!(5, Vec::len(&r)); + assert_eq!(y.to_vec(), r); } #[test] fn knn_find() { - let simple_knn = SimpleKNNAlgorithm{ - data: vec!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) - }; + let data1 = vec!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - assert_eq!(vec!(1, 2, 0), simple_knn.find(&2, 3, &SimpleDistance::distance)); + assert_eq!(vec!(1, 2, 0), data1.find(&2, 3, &SimpleDistance::distance)); - let knn2 = SimpleKNNAlgorithm{ - data: vec!(arr1(&[1, 1]), arr1(&[2, 2]), arr1(&[3, 3]), arr1(&[4, 4]), arr1(&[5, 5])) - }; + let data2 = vec!(arr1(&[1, 1]), arr1(&[2, 2]), arr1(&[3, 3]), arr1(&[4, 4]), arr1(&[5, 5])); - assert_eq!(vec!(2, 3, 1), knn2.find(&arr1(&[3, 3]), 3, &EuclidianDistance::distance)); + assert_eq!(vec!(2, 3, 1), data2.find(&arr1(&[3, 3]), 3, &Array1::distance)); } #[test] diff --git a/src/classification/mod.rs b/src/classification/mod.rs index b3747d2..89f03b5 100644 --- a/src/classification/mod.rs +++ b/src/classification/mod.rs @@ -1,15 +1,20 @@ -use crate::common::AnyNumber; -use ndarray::{Array1, ArrayBase, Data, Ix2}; +use crate::common::Nominal; pub mod knn; -pub trait Classifier +pub trait Classifier where - X: AnyNumber, - Y: AnyNumber, - SX: Data + Y: Nominal { - fn predict(&self, x: &ArrayBase) -> Array1; + fn predict(&self, x: &X) -> Y; + + fn predict_vec(&self, x: &Vec) -> Vec{ + let mut result = Vec::new(); + for xv in x.iter() { + result.push(self.predict(xv)); + } + result + } } \ No newline at end of file diff --git a/src/common/mod.rs b/src/common/mod.rs index 773c328..f40105d 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,7 +1,13 @@ -use num_traits::{Num, ToPrimitive, FromPrimitive}; +use num_traits::{Num, ToPrimitive, FromPrimitive, Zero, One}; use ndarray::{ScalarOperand}; +use std::hash::Hash; +use std::fmt::Debug; pub trait AnyNumber: Num + ScalarOperand + ToPrimitive + FromPrimitive{} +pub trait Nominal: PartialEq + Zero + One + Eq + Hash + ToPrimitive + FromPrimitive + Debug + 'static + Clone{} -impl AnyNumber for T where T: Num + ScalarOperand + ToPrimitive + FromPrimitive {} \ No newline at end of file + +impl AnyNumber for T where T: Num + ScalarOperand + ToPrimitive + FromPrimitive {} + +impl Nominal for T where T: PartialEq + Zero + One + Eq + Hash + ToPrimitive + Debug + FromPrimitive + 'static + Clone {} \ No newline at end of file diff --git a/src/math/distance/euclidian.rs b/src/math/distance/euclidian.rs index 31bf785..89894e1 100644 --- a/src/math/distance/euclidian.rs +++ b/src/math/distance/euclidian.rs @@ -1,39 +1,51 @@ -use super::Distance; +use crate::math::distance::Distance; use ndarray::{ArrayBase, Data, Dimension}; use crate::common::AnyNumber; -pub struct EuclidianDistance{} - -impl Distance> for EuclidianDistance +impl Distance> for ArrayBase where - A: AnyNumber, - S: Data, - D: Dimension + A: AnyNumber, + S1: Data, + S2: Data, + D: Dimension { + fn distance_to(&self, other: &Self) -> f64 + { + Self::distance(self, other) + } - fn distance(a: &ArrayBase, b: &ArrayBase) -> f64 { + fn distance(a: &Self, b: &ArrayBase) -> f64 + { if a.len() != b.len() { panic!("vectors a and b have different length"); } else { ((a - b)*(a - b)).sum().to_f64().unwrap().sqrt() } } + + } #[cfg(test)] mod tests { use super::*; - use ndarray::arr1; + use ndarray::{Array1, ArrayView1, arr1}; #[test] fn measure_simple_euclidian_distance() { let a = arr1(&[1, 2, 3]); let b = arr1(&[4, 5, 6]); - let d_arr = EuclidianDistance::distance(&a, &b); - let d_view = EuclidianDistance::distance(&a.view(), &b.view()); + // let r1 = a.distance_to(&b); + // let r2 = a.view().distance_to(&b.view()); + let d_arr = Array1::distance(&a, &b); + let d_view = ArrayView1::distance(&a.view(), &b.view()); + + + // assert!((r1 - 5.19615242).abs() < 1e-8); + // assert!((r2 - 5.19615242).abs() < 1e-8); assert!((d_arr - 5.19615242).abs() < 1e-8); assert!((d_view - 5.19615242).abs() < 1e-8); } @@ -43,7 +55,7 @@ mod tests { let a = arr1(&[-2.1968219, -0.9559913, -0.0431738, 1.0567679, 0.3853515]); let b = arr1(&[-1.7781325, -0.6659839, 0.9526148, -0.9460919, -0.3925300]); - let d = EuclidianDistance::distance(&a, &b); + let d = Array1::distance(&a, &b); assert!((d - 2.422302).abs() < 1e-6); } diff --git a/src/math/distance/mod.rs b/src/math/distance/mod.rs index b58dfea..7b3104e 100644 --- a/src/math/distance/mod.rs +++ b/src/math/distance/mod.rs @@ -1,8 +1,9 @@ pub mod euclidian; -use num_traits::Float; +pub trait Distance { + + fn distance_to(&self, other: &Self) -> f64; + + fn distance(a: &Self, b: &T) -> f64; -pub trait Distance -{ - fn distance(a: &T, b: &T) -> f64; } \ No newline at end of file