From f4d3a80490d8376d1190e02280fa987518f37317 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Tue, 28 May 2019 17:46:03 -0700 Subject: [PATCH] Initial commit --- .gitignore | 8 ++++ Cargo.toml | 18 ++++++++ benches/distance.rs | 19 ++++++++ smartcore.iml | 9 ++++ src/algorithm/mod.rs | 1 + src/algorithm/sort/heap_select.rs | 64 ++++++++++++++++++++++++++ src/algorithm/sort/mod.rs | 1 + src/classification/knn.rs | 76 +++++++++++++++++++++++++++++++ src/classification/mod.rs | 14 ++++++ src/error.rs | 12 +++++ src/lib.rs | 4 ++ src/math/distance/euclidian.rs | 48 +++++++++++++++++++ src/math/distance/mod.rs | 10 ++++ src/math/mod.rs | 1 + 14 files changed, 285 insertions(+) create mode 100644 Cargo.toml create mode 100644 benches/distance.rs create mode 100644 smartcore.iml create mode 100644 src/algorithm/mod.rs create mode 100644 src/algorithm/sort/heap_select.rs create mode 100644 src/algorithm/sort/mod.rs create mode 100644 src/classification/knn.rs create mode 100644 src/classification/mod.rs create mode 100644 src/error.rs create mode 100644 src/lib.rs create mode 100644 src/math/distance/euclidian.rs create mode 100644 src/math/distance/mod.rs create mode 100644 src/math/mod.rs diff --git a/.gitignore b/.gitignore index 088ba6b..d770de9 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,11 @@ Cargo.lock # These are backup files generated by rustfmt **/*.rs.bk + +# IDE +.idea +.project +.vscode + +# OS +.DS_Store diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8c87ed2 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "smartcore" +version = "0.1.0" +authors = ["Vlad Orlov"] +edition = "2018" + +[dependencies] +ndarray = "0.12.1" +ndarray-linalg = "0.10" +num-traits = "0.2" + +[dev-dependencies] +ndarray = "0.12.1" +criterion = "0.2" + +[[bench]] +name = "distance" +harness = false \ No newline at end of file diff --git a/benches/distance.rs b/benches/distance.rs new file mode 100644 index 0000000..13ee0c0 --- /dev/null +++ b/benches/distance.rs @@ -0,0 +1,19 @@ +#[macro_use] +extern crate criterion; +extern crate smartcore; +extern crate ndarray; +use ndarray::Array; +use smartcore::math::distance::euclidian::EuclidianDistance; +use smartcore::math::distance::Distance; + +use criterion::Criterion; +use criterion::black_box; + +fn criterion_benchmark(c: &mut Criterion) { + let a = Array::from_vec(vec![1., 2., 3.]); + + c.bench_function("Euclidean Distance", move |b| b.iter(|| EuclidianDistance::distance(black_box(&a), black_box(&a)))); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); \ No newline at end of file diff --git a/smartcore.iml b/smartcore.iml new file mode 100644 index 0000000..8021953 --- /dev/null +++ b/smartcore.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/src/algorithm/mod.rs b/src/algorithm/mod.rs new file mode 100644 index 0000000..20ae7d2 --- /dev/null +++ b/src/algorithm/mod.rs @@ -0,0 +1 @@ +pub mod sort; \ No newline at end of file diff --git a/src/algorithm/sort/heap_select.rs b/src/algorithm/sort/heap_select.rs new file mode 100644 index 0000000..0fb7677 --- /dev/null +++ b/src/algorithm/sort/heap_select.rs @@ -0,0 +1,64 @@ +use std::cmp::Ordering; + +pub struct HeapSelect { + + k: usize, + n: usize, + sorted: bool, + heap: Vec + + +} + +impl HeapSelect { + + pub fn from_vec(vec: Vec) -> HeapSelect { + HeapSelect{ + k: vec.len(), + n: 0, + sorted: false, + heap: vec + } + } + + pub fn add(&mut self, element: T) { + self.sorted = false; + if self.n < self.k { + self.heap[self.n] = element; + self.n += 1; + if self.n == self.k { + self.heapify(); + } + } else { + self.n += 1; + if element.cmp(&self.heap[0]) == Ordering::Less { + self.heap[0] = element; + } + } + } + + pub fn heapify(&mut self){ + + } + +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_vec() { + let heap = HeapSelect::from_vec(vec!(1, 2, 3)); + assert_eq!(3, heap.k); + } + + #[test] + fn test_add() { + let mut heap = HeapSelect::from_vec(Vec::::new()); + heap.add(1); + heap.add(2); + heap.add(3); + assert_eq!(3, heap.n); + } +} \ No newline at end of file diff --git a/src/algorithm/sort/mod.rs b/src/algorithm/sort/mod.rs new file mode 100644 index 0000000..7984c1d --- /dev/null +++ b/src/algorithm/sort/mod.rs @@ -0,0 +1 @@ +pub mod heap_select; \ No newline at end of file diff --git a/src/classification/knn.rs b/src/classification/knn.rs new file mode 100644 index 0000000..0a00d9d --- /dev/null +++ b/src/classification/knn.rs @@ -0,0 +1,76 @@ +use super::Classifier; +use super::super::math::distance::Distance; +use super::super::math::distance::euclidian::EuclidianDistance; +use ndarray::prelude::*; +use num_traits::Signed; +use num_traits::Float; +use std::marker::PhantomData; + +pub struct KNNClassifier { + y: Option> +} + +pub trait KNNAlgorithm{ + fn find(&self, from: &T, k: i32) -> &Vec; +} + +pub struct SimpleKNNAlgorithm +where + A: Float, + D: Distance +{ + data: Vec, + distance: D, + __phantom: PhantomData +} + +impl KNNAlgorithm for SimpleKNNAlgorithm +where + A: Float, + D: Distance +{ + fn find(&self, from: &T, k: i32) -> &Vec { + &self.data + } +} + +impl Classifier for KNNClassifier +where + A2: Signed + Clone, + { + fn fit(&mut self, x: &Array2, y: &Array1){ + self.y = Some(Array1::::zeros(ArrayBase::len(y))); + } + + fn predict(&self, x: &Array2) -> Array1{ + let array = Array1::::zeros(ArrayBase::len(self.y.as_ref().unwrap())); + array + } + +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn knn_fit_predict() { + let mut knn = KNNClassifier{y: None}; + let x = arr2(&[[1, 2, 3],[4, 5, 6]]); + let y = arr1(&[1, 2]); + knn.fit(&x, &y); + let r = knn.predict(&x); + assert_eq!(2, ArrayBase::len(&r)); + } + + #[test] + fn knn_find() { + let sKnn = SimpleKNNAlgorithm{ + data: vec!(arr1(&[1., 2.]), arr1(&[1., 2.]), arr1(&[1., 2.])), + distance: EuclidianDistance{}, + __phantom: PhantomData + }; + + assert_eq!(&vec!(arr1(&[1., 2.]), arr1(&[1., 2.]), arr1(&[1., 2.])), sKnn.find(&arr1(&[1., 2.]), 3)); + } +} \ No newline at end of file diff --git a/src/classification/mod.rs b/src/classification/mod.rs new file mode 100644 index 0000000..1f94636 --- /dev/null +++ b/src/classification/mod.rs @@ -0,0 +1,14 @@ +use ndarray::prelude::*; +use ndarray::{arr1, arr2}; +use ndarray::FixedInitializer; + +pub mod knn; + +pub trait Classifier +{ + + fn fit(&mut self, x: &Array2, y: &Array1); + + fn predict(&self, x: &Array2) -> Array1; + +} \ No newline at end of file diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..42f7632 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,12 @@ +use std::fmt; + +#[derive(Debug)] +pub struct IllegalArgumentError { + pub message: String, +} + +impl fmt::Display for IllegalArgumentError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.message) + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..cc6a7c1 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,4 @@ +pub mod classification; +pub mod math; +pub mod error; +pub mod algorithm; \ No newline at end of file diff --git a/src/math/distance/euclidian.rs b/src/math/distance/euclidian.rs new file mode 100644 index 0000000..84e5707 --- /dev/null +++ b/src/math/distance/euclidian.rs @@ -0,0 +1,48 @@ +use super::Distance; +use ndarray::{ArrayBase, Data, Dimension}; +use num_traits::Float; + +pub struct EuclidianDistance{} + +impl Distance, A> for EuclidianDistance +where + A: Float, + S: Data, + D: Dimension +{ + + fn distance(a: &ArrayBase, b: &ArrayBase) -> A { + if a.len() != b.len() { + panic!("vectors a and b have different length"); + } else { + ((a - b)*(a - b)).sum().sqrt() + } + } +} + + +#[cfg(test)] +mod tests { + use super::*; + use ndarray::{arr1, Array}; + + #[test] + fn measure_simple_euclidian_distance() { + let a = Array::from_vec(vec![1., 2., 3.]); + let b = Array::from_vec(vec![4., 5., 6.]); + + let d = EuclidianDistance::distance(&a, &b); + + assert!((d - 5.19615242).abs() < 1e-8); + } + + #[test] + fn measure_simple_euclidian_distance_static() { + let a = arr1(&[-2.1968219, -0.9559913, -0.0431738, 1.0567679, 0.3853515]); + let b = arr1(&[-1.7781325, -0.6659839, 0.9526148, -0.9460919, -0.3925300]); + + let d = EuclidianDistance::distance(&a, &b); + + assert!((d - 2.422302).abs() < 1e-6); + } +} \ No newline at end of file diff --git a/src/math/distance/mod.rs b/src/math/distance/mod.rs new file mode 100644 index 0000000..3967638 --- /dev/null +++ b/src/math/distance/mod.rs @@ -0,0 +1,10 @@ +pub mod euclidian; + +use num_traits::Float; + +pub trait Distance +where + A: Float +{ + fn distance(a: &T, b: &T) -> A; +} \ No newline at end of file diff --git a/src/math/mod.rs b/src/math/mod.rs new file mode 100644 index 0000000..4567f3e --- /dev/null +++ b/src/math/mod.rs @@ -0,0 +1 @@ +pub mod distance; \ No newline at end of file