From fa0918cee3364ef084408ab1654fea0126bfae71 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Sat, 29 Aug 2020 20:17:01 -0700 Subject: [PATCH] fix: renames FloatExt to RealNumber --- src/algorithm/neighbour/bbd_tree.rs | 10 +++--- src/algorithm/neighbour/cover_tree.rs | 6 ++-- src/algorithm/neighbour/linear_search.rs | 14 ++++---- src/cluster/kmeans.rs | 8 ++--- src/decomposition/pca.rs | 8 ++--- src/ensemble/random_forest_classifier.rs | 8 ++--- src/ensemble/random_forest_regressor.rs | 8 ++--- src/lib.rs | 2 +- src/linalg/evd.rs | 24 ++++++------- src/linalg/lu.rs | 8 ++--- src/linalg/mod.rs | 14 ++++---- src/linalg/naive/dense_matrix.rs | 34 +++++++++---------- src/linalg/nalgebra_bindings.rs | 16 ++++----- src/linalg/ndarray_bindings.rs | 16 ++++----- src/linalg/qr.rs | 8 ++--- src/linalg/svd.rs | 8 ++--- src/linear/linear_regression.rs | 8 ++--- src/linear/logistic_regression.rs | 18 +++++----- src/math/distance/euclidian.rs | 6 ++-- src/math/distance/hamming.rs | 4 +-- src/math/distance/mahalanobis.rs | 8 ++--- src/math/distance/manhattan.rs | 4 +-- src/math/distance/minkowski.rs | 24 +++++++------ src/math/distance/mod.rs | 33 ++++++++++++++++-- src/math/mod.rs | 3 +- src/math/num.rs | 6 ++-- src/metrics/accuracy.rs | 4 +-- src/metrics/auc.rs | 4 +-- src/metrics/f1.rs | 4 +-- src/metrics/mean_absolute_error.rs | 4 +-- src/metrics/mean_squared_error.rs | 4 +-- src/metrics/mod.rs | 18 +++++----- src/metrics/precision.rs | 4 +-- src/metrics/r2.rs | 4 +-- src/metrics/recall.rs | 4 +-- src/neighbors/knn_classifier.rs | 8 ++--- src/neighbors/knn_regressor.rs | 8 ++--- src/neighbors/mod.rs | 10 +++--- .../first_order/gradient_descent.rs | 8 ++--- src/optimization/first_order/lbfgs.rs | 12 +++---- src/optimization/first_order/mod.rs | 6 ++-- src/tree/decision_tree_classifier.rs | 20 +++++------ src/tree/decision_tree_regressor.rs | 18 +++++----- 43 files changed, 238 insertions(+), 208 deletions(-) diff --git a/src/algorithm/neighbour/bbd_tree.rs b/src/algorithm/neighbour/bbd_tree.rs index 684e5bc..4194930 100644 --- a/src/algorithm/neighbour/bbd_tree.rs +++ b/src/algorithm/neighbour/bbd_tree.rs @@ -2,17 +2,17 @@ use std::fmt::Debug; use crate::linalg::Matrix; use crate::math::distance::euclidian::*; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Debug)] -pub struct BBDTree { +pub struct BBDTree { nodes: Vec>, index: Vec, root: usize, } #[derive(Debug)] -struct BBDTreeNode { +struct BBDTreeNode { count: usize, index: usize, center: Vec, @@ -23,7 +23,7 @@ struct BBDTreeNode { upper: Option, } -impl BBDTreeNode { +impl BBDTreeNode { fn new(d: usize) -> BBDTreeNode { BBDTreeNode { count: 0, @@ -38,7 +38,7 @@ impl BBDTreeNode { } } -impl BBDTree { +impl BBDTree { pub fn new>(data: &M) -> BBDTree { let nodes = Vec::new(); diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index be57006..08e06cc 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -30,11 +30,11 @@ use serde::{Deserialize, Serialize}; use crate::algorithm::sort::heap_select::HeapSelect; use crate::math::distance::Distance; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; /// Implements Cover Tree algorithm #[derive(Serialize, Deserialize, Debug)] -pub struct CoverTree> { +pub struct CoverTree> { base: F, max_level: i8, min_level: i8, @@ -42,7 +42,7 @@ pub struct CoverTree> { nodes: Vec>, } -impl> CoverTree { +impl> CoverTree { /// Construct a cover tree. /// * `data` - vector of data points to search for. /// * `distance` - distance metric to use for searching. This function should extend [`Distance`](../algorithm/neighbour/index.html) interface. diff --git a/src/algorithm/neighbour/linear_search.rs b/src/algorithm/neighbour/linear_search.rs index 39e1b40..2164e48 100644 --- a/src/algorithm/neighbour/linear_search.rs +++ b/src/algorithm/neighbour/linear_search.rs @@ -27,17 +27,17 @@ use std::marker::PhantomData; use crate::algorithm::sort::heap_select::HeapSelect; use crate::math::distance::Distance; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; /// Implements Linear Search algorithm, see [KNN algorithms](../index.html) #[derive(Serialize, Deserialize, Debug)] -pub struct LinearKNNSearch> { +pub struct LinearKNNSearch> { distance: D, data: Vec, f: PhantomData, } -impl> LinearKNNSearch { +impl> LinearKNNSearch { /// Initializes algorithm. /// * `data` - vector of data points to search for. /// * `distance` - distance metric to use for searching. This function should extend [`Distance`](../algorithm/neighbour/index.html) interface. @@ -86,24 +86,24 @@ impl> LinearKNNSearch { } #[derive(Debug)] -struct KNNPoint { +struct KNNPoint { distance: F, index: Option, } -impl PartialOrd for KNNPoint { +impl PartialOrd for KNNPoint { fn partial_cmp(&self, other: &Self) -> Option { self.distance.partial_cmp(&other.distance) } } -impl PartialEq for KNNPoint { +impl PartialEq for KNNPoint { fn eq(&self, other: &Self) -> bool { self.distance == other.distance } } -impl Eq for KNNPoint {} +impl Eq for KNNPoint {} #[cfg(test)] mod tests { diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 67e966c..9dd0dc3 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -9,10 +9,10 @@ use serde::{Deserialize, Serialize}; use crate::algorithm::neighbour::bbd_tree::BBDTree; use crate::linalg::Matrix; use crate::math::distance::euclidian::*; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] -pub struct KMeans { +pub struct KMeans { k: usize, y: Vec, size: Vec, @@ -20,7 +20,7 @@ pub struct KMeans { centroids: Vec>, } -impl PartialEq for KMeans { +impl PartialEq for KMeans { fn eq(&self, other: &Self) -> bool { if self.k != other.k || self.size != other.size @@ -55,7 +55,7 @@ impl Default for KMeansParameters { } } -impl KMeans { +impl KMeans { pub fn new>(data: &M, k: usize, parameters: KMeansParameters) -> KMeans { let bbd = BBDTree::new(data); diff --git a/src/decomposition/pca.rs b/src/decomposition/pca.rs index a2c7126..e1f7027 100644 --- a/src/decomposition/pca.rs +++ b/src/decomposition/pca.rs @@ -3,10 +3,10 @@ use std::fmt::Debug; use serde::{Deserialize, Serialize}; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] -pub struct PCA> { +pub struct PCA> { eigenvectors: M, eigenvalues: Vec, projection: M, @@ -14,7 +14,7 @@ pub struct PCA> { pmu: Vec, } -impl> PartialEq for PCA { +impl> PartialEq for PCA { fn eq(&self, other: &Self) -> bool { if self.eigenvectors != other.eigenvectors || self.eigenvalues.len() != other.eigenvalues.len() @@ -44,7 +44,7 @@ impl Default for PCAParameters { } } -impl> PCA { +impl> PCA { pub fn new(data: &M, n_components: usize, parameters: PCAParameters) -> PCA { let (m, n) = data.shape(); diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index 172c5ac..b99a0da 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -7,7 +7,7 @@ use rand::Rng; use serde::{Deserialize, Serialize}; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use crate::tree::decision_tree_classifier::{ which_max, DecisionTreeClassifier, DecisionTreeClassifierParameters, SplitCriterion, }; @@ -23,13 +23,13 @@ pub struct RandomForestClassifierParameters { } #[derive(Serialize, Deserialize, Debug)] -pub struct RandomForestClassifier { +pub struct RandomForestClassifier { parameters: RandomForestClassifierParameters, trees: Vec>, classes: Vec, } -impl PartialEq for RandomForestClassifier { +impl PartialEq for RandomForestClassifier { fn eq(&self, other: &Self) -> bool { if self.classes.len() != other.classes.len() || self.trees.len() != other.trees.len() { return false; @@ -62,7 +62,7 @@ impl Default for RandomForestClassifierParameters { } } -impl RandomForestClassifier { +impl RandomForestClassifier { pub fn fit>( x: &M, y: &M::RowVector, diff --git a/src/ensemble/random_forest_regressor.rs b/src/ensemble/random_forest_regressor.rs index 9decd8f..d651fb7 100644 --- a/src/ensemble/random_forest_regressor.rs +++ b/src/ensemble/random_forest_regressor.rs @@ -7,7 +7,7 @@ use rand::Rng; use serde::{Deserialize, Serialize}; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use crate::tree::decision_tree_regressor::{ DecisionTreeRegressor, DecisionTreeRegressorParameters, }; @@ -22,7 +22,7 @@ pub struct RandomForestRegressorParameters { } #[derive(Serialize, Deserialize, Debug)] -pub struct RandomForestRegressor { +pub struct RandomForestRegressor { parameters: RandomForestRegressorParameters, trees: Vec>, } @@ -39,7 +39,7 @@ impl Default for RandomForestRegressorParameters { } } -impl PartialEq for RandomForestRegressor { +impl PartialEq for RandomForestRegressor { fn eq(&self, other: &Self) -> bool { if self.trees.len() != other.trees.len() { return false; @@ -54,7 +54,7 @@ impl PartialEq for RandomForestRegressor { } } -impl RandomForestRegressor { +impl RandomForestRegressor { pub fn fit>( x: &M, y: &M::RowVector, diff --git a/src/lib.rs b/src/lib.rs index 59b236e..45f562d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,7 +76,7 @@ pub mod ensemble; pub mod linalg; /// Supervised classification and regression models that assume linear relationship between dependent and explanatory variables. pub mod linear; -/// Multitude of helper methods and classes, including definitions of distance metrics +/// Helper methods and classes, including definitions of distance metrics pub mod math; /// Functions for assessing prediction error. pub mod metrics; diff --git a/src/linalg/evd.rs b/src/linalg/evd.rs index 32e6d88..c79b17d 100644 --- a/src/linalg/evd.rs +++ b/src/linalg/evd.rs @@ -1,24 +1,24 @@ #![allow(non_snake_case)] use crate::linalg::BaseMatrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use num::complex::Complex; use std::fmt::Debug; #[derive(Debug, Clone)] -pub struct EVD> { +pub struct EVD> { pub d: Vec, pub e: Vec, pub V: M, } -impl> EVD { +impl> EVD { pub fn new(V: M, d: Vec, e: Vec) -> EVD { EVD { d: d, e: e, V: V } } } -pub trait EVDDecomposableMatrix: BaseMatrix { +pub trait EVDDecomposableMatrix: BaseMatrix { fn evd(&self, symmetric: bool) -> EVD { self.clone().evd_mut(symmetric) } @@ -58,7 +58,7 @@ pub trait EVDDecomposableMatrix: BaseMatrix { } } -fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec) { +fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec) { let (n, _) = V.shape(); for i in 0..n { d[i] = V.get(n - 1, i); @@ -161,7 +161,7 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec>(V: &mut M, d: &mut Vec, e: &mut Vec) { +fn tql2>(V: &mut M, d: &mut Vec, e: &mut Vec) { let (n, _) = V.shape(); for i in 1..n { e[i - 1] = e[i]; @@ -277,7 +277,7 @@ fn tql2>(V: &mut M, d: &mut Vec, e: &mut Vec } } -fn balance>(A: &mut M) -> Vec { +fn balance>(A: &mut M) -> Vec { let radix = T::two(); let sqrdx = radix * radix; @@ -330,7 +330,7 @@ fn balance>(A: &mut M) -> Vec { return scale; } -fn elmhes>(A: &mut M) -> Vec { +fn elmhes>(A: &mut M) -> Vec { let (n, _) = A.shape(); let mut perm = vec![0; n]; @@ -376,7 +376,7 @@ fn elmhes>(A: &mut M) -> Vec { return perm; } -fn eltran>(A: &M, V: &mut M, perm: &Vec) { +fn eltran>(A: &M, V: &mut M, perm: &Vec) { let (n, _) = A.shape(); for mp in (1..n - 1).rev() { for k in mp + 1..n { @@ -393,7 +393,7 @@ fn eltran>(A: &M, V: &mut M, perm: &Vec) { } } -fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e: &mut Vec) { +fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e: &mut Vec) { let (n, _) = A.shape(); let mut z = T::zero(); let mut s = T::zero(); @@ -748,7 +748,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e: } } -fn balbak>(V: &mut M, scale: &Vec) { +fn balbak>(V: &mut M, scale: &Vec) { let (n, _) = V.shape(); for i in 0..n { for j in 0..n { @@ -757,7 +757,7 @@ fn balbak>(V: &mut M, scale: &Vec) { } } -fn sort>(d: &mut Vec, e: &mut Vec, V: &mut M) { +fn sort>(d: &mut Vec, e: &mut Vec, V: &mut M) { let n = d.len(); let mut temp = vec![T::zero(); n]; for j in 1..n { diff --git a/src/linalg/lu.rs b/src/linalg/lu.rs index 48fd894..914bc71 100644 --- a/src/linalg/lu.rs +++ b/src/linalg/lu.rs @@ -4,10 +4,10 @@ use std::fmt::Debug; use std::marker::PhantomData; use crate::linalg::BaseMatrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Debug, Clone)] -pub struct LU> { +pub struct LU> { LU: M, pivot: Vec, pivot_sign: i8, @@ -15,7 +15,7 @@ pub struct LU> { phantom: PhantomData, } -impl> LU { +impl> LU { pub fn new(LU: M, pivot: Vec, pivot_sign: i8) -> LU { let (_, n) = LU.shape(); @@ -153,7 +153,7 @@ impl> LU { } } -pub trait LUDecomposableMatrix: BaseMatrix { +pub trait LUDecomposableMatrix: BaseMatrix { fn lu(&self) -> LU { self.clone().lu_mut() } diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index 8fbc339..cf6c222 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -12,13 +12,13 @@ use std::fmt::{Debug, Display}; use std::marker::PhantomData; use std::ops::Range; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use evd::EVDDecomposableMatrix; use lu::LUDecomposableMatrix; use qr::QRDecomposableMatrix; use svd::SVDDecomposableMatrix; -pub trait BaseVector: Clone + Debug { +pub trait BaseVector: Clone + Debug { fn get(&self, i: usize) -> T; fn set(&mut self, i: usize, x: T); @@ -28,7 +28,7 @@ pub trait BaseVector: Clone + Debug { fn to_vec(&self) -> Vec; } -pub trait BaseMatrix: Clone + Debug { +pub trait BaseMatrix: Clone + Debug { type RowVector: BaseVector + Clone + Debug; fn from_row_vector(vec: Self::RowVector) -> Self; @@ -190,7 +190,7 @@ pub trait BaseMatrix: Clone + Debug { fn cov(&self) -> Self; } -pub trait Matrix: +pub trait Matrix: BaseMatrix + SVDDecomposableMatrix + EVDDecomposableMatrix @@ -201,7 +201,7 @@ pub trait Matrix: { } -pub fn row_iter>(m: &M) -> RowIter { +pub fn row_iter>(m: &M) -> RowIter { RowIter { m: m, pos: 0, @@ -210,14 +210,14 @@ pub fn row_iter>(m: &M) -> RowIter { } } -pub struct RowIter<'a, T: FloatExt, M: BaseMatrix> { +pub struct RowIter<'a, T: RealNumber, M: BaseMatrix> { m: &'a M, pos: usize, max_pos: usize, phantom: PhantomData<&'a T>, } -impl<'a, T: FloatExt, M: BaseMatrix> Iterator for RowIter<'a, T, M> { +impl<'a, T: RealNumber, M: BaseMatrix> Iterator for RowIter<'a, T, M> { type Item = Vec; fn next(&mut self) -> Option> { diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index f807140..a5d88dc 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -14,9 +14,9 @@ use crate::linalg::qr::QRDecomposableMatrix; use crate::linalg::svd::SVDDecomposableMatrix; use crate::linalg::Matrix; pub use crate::linalg::{BaseMatrix, BaseVector}; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; -impl BaseVector for Vec { +impl BaseVector for Vec { fn get(&self, i: usize) -> T { self[i] } @@ -35,13 +35,13 @@ impl BaseVector for Vec { } #[derive(Debug, Clone)] -pub struct DenseMatrix { +pub struct DenseMatrix { ncols: usize, nrows: usize, values: Vec, } -impl fmt::Display for DenseMatrix { +impl fmt::Display for DenseMatrix { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut rows: Vec> = Vec::new(); for r in 0..self.nrows { @@ -56,7 +56,7 @@ impl fmt::Display for DenseMatrix { } } -impl DenseMatrix { +impl DenseMatrix { fn new(nrows: usize, ncols: usize, values: Vec) -> Self { DenseMatrix { ncols: ncols, @@ -115,7 +115,7 @@ impl DenseMatrix { } } -impl<'de, T: FloatExt + fmt::Debug + Deserialize<'de>> Deserialize<'de> for DenseMatrix { +impl<'de, T: RealNumber + fmt::Debug + Deserialize<'de>> Deserialize<'de> for DenseMatrix { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, @@ -128,11 +128,11 @@ impl<'de, T: FloatExt + fmt::Debug + Deserialize<'de>> Deserialize<'de> for Dens Values, } - struct DenseMatrixVisitor { + struct DenseMatrixVisitor { t: PhantomData, } - impl<'a, T: FloatExt + fmt::Debug + Deserialize<'a>> Visitor<'a> for DenseMatrixVisitor { + impl<'a, T: RealNumber + fmt::Debug + Deserialize<'a>> Visitor<'a> for DenseMatrixVisitor { type Value = DenseMatrix; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { @@ -200,7 +200,7 @@ impl<'de, T: FloatExt + fmt::Debug + Deserialize<'de>> Deserialize<'de> for Dens } } -impl Serialize for DenseMatrix { +impl Serialize for DenseMatrix { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -214,17 +214,17 @@ impl Serialize for DenseMatrix { } } -impl SVDDecomposableMatrix for DenseMatrix {} +impl SVDDecomposableMatrix for DenseMatrix {} -impl EVDDecomposableMatrix for DenseMatrix {} +impl EVDDecomposableMatrix for DenseMatrix {} -impl QRDecomposableMatrix for DenseMatrix {} +impl QRDecomposableMatrix for DenseMatrix {} -impl LUDecomposableMatrix for DenseMatrix {} +impl LUDecomposableMatrix for DenseMatrix {} -impl Matrix for DenseMatrix {} +impl Matrix for DenseMatrix {} -impl PartialEq for DenseMatrix { +impl PartialEq for DenseMatrix { fn eq(&self, other: &Self) -> bool { if self.ncols != other.ncols || self.nrows != other.nrows { return false; @@ -247,13 +247,13 @@ impl PartialEq for DenseMatrix { } } -impl Into> for DenseMatrix { +impl Into> for DenseMatrix { fn into(self) -> Vec { self.values } } -impl BaseMatrix for DenseMatrix { +impl BaseMatrix for DenseMatrix { type RowVector = Vec; fn from_row_vector(vec: Self::RowVector) -> Self { diff --git a/src/linalg/nalgebra_bindings.rs b/src/linalg/nalgebra_bindings.rs index 88a14f4..29519c2 100644 --- a/src/linalg/nalgebra_bindings.rs +++ b/src/linalg/nalgebra_bindings.rs @@ -9,9 +9,9 @@ use crate::linalg::qr::QRDecomposableMatrix; use crate::linalg::svd::SVDDecomposableMatrix; use crate::linalg::Matrix as SmartCoreMatrix; use crate::linalg::{BaseMatrix, BaseVector}; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; -impl BaseVector for MatrixMN { +impl BaseVector for MatrixMN { fn get(&self, i: usize) -> T { *self.get((0, i)).unwrap() } @@ -28,7 +28,7 @@ impl BaseVector for MatrixMN { } } -impl +impl BaseMatrix for Matrix> { type RowVector = MatrixMN; @@ -340,27 +340,27 @@ impl +impl SVDDecomposableMatrix for Matrix> { } -impl +impl EVDDecomposableMatrix for Matrix> { } -impl +impl QRDecomposableMatrix for Matrix> { } -impl +impl LUDecomposableMatrix for Matrix> { } -impl +impl SmartCoreMatrix for Matrix> { } diff --git a/src/linalg/ndarray_bindings.rs b/src/linalg/ndarray_bindings.rs index f0bacfd..122673a 100644 --- a/src/linalg/ndarray_bindings.rs +++ b/src/linalg/ndarray_bindings.rs @@ -14,9 +14,9 @@ use crate::linalg::qr::QRDecomposableMatrix; use crate::linalg::svd::SVDDecomposableMatrix; use crate::linalg::Matrix; use crate::linalg::{BaseMatrix, BaseVector}; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; -impl BaseVector for ArrayBase, Ix1> { +impl BaseVector for ArrayBase, Ix1> { fn get(&self, i: usize) -> T { self[i] } @@ -33,7 +33,7 @@ impl BaseVector for ArrayBase, Ix1> { } } -impl +impl BaseMatrix for ArrayBase, Ix2> { type RowVector = ArrayBase, Ix1>; @@ -308,27 +308,27 @@ impl +impl SVDDecomposableMatrix for ArrayBase, Ix2> { } -impl +impl EVDDecomposableMatrix for ArrayBase, Ix2> { } -impl +impl QRDecomposableMatrix for ArrayBase, Ix2> { } -impl +impl LUDecomposableMatrix for ArrayBase, Ix2> { } -impl Matrix +impl Matrix for ArrayBase, Ix2> { } diff --git a/src/linalg/qr.rs b/src/linalg/qr.rs index dbec3ab..9a9a955 100644 --- a/src/linalg/qr.rs +++ b/src/linalg/qr.rs @@ -3,16 +3,16 @@ use std::fmt::Debug; use crate::linalg::BaseMatrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Debug, Clone)] -pub struct QR> { +pub struct QR> { QR: M, tau: Vec, singular: bool, } -impl> QR { +impl> QR { pub fn new(QR: M, tau: Vec) -> QR { let mut singular = false; for j in 0..tau.len() { @@ -112,7 +112,7 @@ impl> QR { } } -pub trait QRDecomposableMatrix: BaseMatrix { +pub trait QRDecomposableMatrix: BaseMatrix { fn qr(&self) -> QR { self.clone().qr_mut() } diff --git a/src/linalg/svd.rs b/src/linalg/svd.rs index 1b4c791..3b5e590 100644 --- a/src/linalg/svd.rs +++ b/src/linalg/svd.rs @@ -1,11 +1,11 @@ #![allow(non_snake_case)] use crate::linalg::BaseMatrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use std::fmt::Debug; #[derive(Debug, Clone)] -pub struct SVD> { +pub struct SVD> { pub U: M, pub V: M, pub s: Vec, @@ -15,7 +15,7 @@ pub struct SVD> { tol: T, } -pub trait SVDDecomposableMatrix: BaseMatrix { +pub trait SVDDecomposableMatrix: BaseMatrix { fn svd_solve_mut(self, b: Self) -> Self { self.svd_mut().solve(b) } @@ -367,7 +367,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { } } -impl> SVD { +impl> SVD { pub fn new(U: M, V: M, s: Vec) -> SVD { let m = U.shape().0; let n = V.shape().0; diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index e07a182..78cab25 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -3,7 +3,7 @@ use std::fmt::Debug; use serde::{Deserialize, Serialize}; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub enum LinearRegressionSolverName { @@ -17,7 +17,7 @@ pub struct LinearRegressionParameters { } #[derive(Serialize, Deserialize, Debug)] -pub struct LinearRegression> { +pub struct LinearRegression> { coefficients: M, intercept: T, solver: LinearRegressionSolverName, @@ -31,14 +31,14 @@ impl Default for LinearRegressionParameters { } } -impl> PartialEq for LinearRegression { +impl> PartialEq for LinearRegression { fn eq(&self, other: &Self) -> bool { self.coefficients == other.coefficients && (self.intercept - other.intercept).abs() <= T::epsilon() } } -impl> LinearRegression { +impl> LinearRegression { pub fn fit( x: &M, y: &M::RowVector, diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 2dac0bc..f4e893b 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -4,21 +4,21 @@ use std::marker::PhantomData; use serde::{Deserialize, Serialize}; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use crate::optimization::first_order::lbfgs::LBFGS; use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; use crate::optimization::line_search::Backtracking; use crate::optimization::FunctionOrder; #[derive(Serialize, Deserialize, Debug)] -pub struct LogisticRegression> { +pub struct LogisticRegression> { weights: M, classes: Vec, num_attributes: usize, num_classes: usize, } -trait ObjectiveFunction> { +trait ObjectiveFunction> { fn f(&self, w_bias: &M) -> T; fn df(&self, g: &mut M, w_bias: &M); @@ -33,13 +33,13 @@ trait ObjectiveFunction> { } } -struct BinaryObjectiveFunction<'a, T: FloatExt, M: Matrix> { +struct BinaryObjectiveFunction<'a, T: RealNumber, M: Matrix> { x: &'a M, y: Vec, phantom: PhantomData<&'a T>, } -impl> PartialEq for LogisticRegression { +impl> PartialEq for LogisticRegression { fn eq(&self, other: &Self) -> bool { if self.num_classes != other.num_classes || self.num_attributes != other.num_attributes @@ -58,7 +58,7 @@ impl> PartialEq for LogisticRegression { } } -impl<'a, T: FloatExt, M: Matrix> ObjectiveFunction for BinaryObjectiveFunction<'a, T, M> { +impl<'a, T: RealNumber, M: Matrix> ObjectiveFunction for BinaryObjectiveFunction<'a, T, M> { fn f(&self, w_bias: &M) -> T { let mut f = T::zero(); let (n, _) = self.x.shape(); @@ -88,14 +88,14 @@ impl<'a, T: FloatExt, M: Matrix> ObjectiveFunction for BinaryObjectiveF } } -struct MultiClassObjectiveFunction<'a, T: FloatExt, M: Matrix> { +struct MultiClassObjectiveFunction<'a, T: RealNumber, M: Matrix> { x: &'a M, y: Vec, k: usize, phantom: PhantomData<&'a T>, } -impl<'a, T: FloatExt, M: Matrix> ObjectiveFunction +impl<'a, T: RealNumber, M: Matrix> ObjectiveFunction for MultiClassObjectiveFunction<'a, T, M> { fn f(&self, w_bias: &M) -> T { @@ -147,7 +147,7 @@ impl<'a, T: FloatExt, M: Matrix> ObjectiveFunction } } -impl> LogisticRegression { +impl> LogisticRegression { pub fn fit(x: &M, y: &M::RowVector) -> LogisticRegression { let y_m = M::from_row_vector(y.clone()); let (x_nrows, num_attributes) = x.shape(); diff --git a/src/math/distance/euclidian.rs b/src/math/distance/euclidian.rs index c7b9598..66ec531 100644 --- a/src/math/distance/euclidian.rs +++ b/src/math/distance/euclidian.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use super::Distance; @@ -8,7 +8,7 @@ use super::Distance; pub struct Euclidian {} impl Euclidian { - pub fn squared_distance(x: &Vec, y: &Vec) -> T { + pub fn squared_distance(x: &Vec, y: &Vec) -> T { if x.len() != y.len() { panic!("Input vector sizes are different."); } @@ -22,7 +22,7 @@ impl Euclidian { } } -impl Distance, T> for Euclidian { +impl Distance, T> for Euclidian { fn distance(&self, x: &Vec, y: &Vec) -> T { Euclidian::squared_distance(x, y).sqrt() } diff --git a/src/math/distance/hamming.rs b/src/math/distance/hamming.rs index e62391e..0f83be2 100644 --- a/src/math/distance/hamming.rs +++ b/src/math/distance/hamming.rs @@ -1,13 +1,13 @@ use serde::{Deserialize, Serialize}; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use super::Distance; #[derive(Serialize, Deserialize, Debug)] pub struct Hamming {} -impl Distance, F> for Hamming { +impl Distance, F> for Hamming { fn distance(&self, x: &Vec, y: &Vec) -> F { if x.len() != y.len() { panic!("Input vector sizes are different"); diff --git a/src/math/distance/mahalanobis.rs b/src/math/distance/mahalanobis.rs index 66fef08..7c26ae1 100644 --- a/src/math/distance/mahalanobis.rs +++ b/src/math/distance/mahalanobis.rs @@ -4,19 +4,19 @@ use std::marker::PhantomData; use serde::{Deserialize, Serialize}; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use super::Distance; use crate::linalg::Matrix; #[derive(Serialize, Deserialize, Debug)] -pub struct Mahalanobis> { +pub struct Mahalanobis> { pub sigma: M, pub sigmaInv: M, t: PhantomData, } -impl> Mahalanobis { +impl> Mahalanobis { pub fn new(data: &M) -> Mahalanobis { let sigma = data.cov(); let sigmaInv = sigma.lu().inverse(); @@ -38,7 +38,7 @@ impl> Mahalanobis { } } -impl> Distance, T> for Mahalanobis { +impl> Distance, T> for Mahalanobis { fn distance(&self, x: &Vec, y: &Vec) -> T { let (nrows, ncols) = self.sigma.shape(); if x.len() != nrows { diff --git a/src/math/distance/manhattan.rs b/src/math/distance/manhattan.rs index fe56cc8..13ab983 100644 --- a/src/math/distance/manhattan.rs +++ b/src/math/distance/manhattan.rs @@ -1,13 +1,13 @@ use serde::{Deserialize, Serialize}; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use super::Distance; #[derive(Serialize, Deserialize, Debug)] pub struct Manhattan {} -impl Distance, T> for Manhattan { +impl Distance, T> for Manhattan { fn distance(&self, x: &Vec, y: &Vec) -> T { if x.len() != y.len() { panic!("Input vector sizes are different"); diff --git a/src/math/distance/minkowski.rs b/src/math/distance/minkowski.rs index 01e1dff..fdf83ed 100644 --- a/src/math/distance/minkowski.rs +++ b/src/math/distance/minkowski.rs @@ -1,30 +1,32 @@ use serde::{Deserialize, Serialize}; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use super::Distance; #[derive(Serialize, Deserialize, Debug)] -pub struct Minkowski { - pub p: T, +pub struct Minkowski { + pub p: u16, } -impl Distance, T> for Minkowski { +impl Distance, T> for Minkowski { fn distance(&self, x: &Vec, y: &Vec) -> T { if x.len() != y.len() { panic!("Input vector sizes are different"); } - if self.p < T::one() { + if self.p < 1 { panic!("p must be at least 1"); } let mut dist = T::zero(); + let p_t = T::from_u16(self.p).unwrap(); + for i in 0..x.len() { let d = (x[i] - y[i]).abs(); - dist = dist + d.powf(self.p); + dist = dist + d.powf(p_t); } - dist.powf(T::one() / self.p) + dist.powf(T::one() / p_t) } } @@ -37,9 +39,9 @@ mod tests { let a = vec![1., 2., 3.]; let b = vec![4., 5., 6.]; - let l1: f64 = Minkowski { p: 1.0 }.distance(&a, &b); - let l2: f64 = Minkowski { p: 2.0 }.distance(&a, &b); - let l3: f64 = Minkowski { p: 3.0 }.distance(&a, &b); + let l1: f64 = Minkowski { p: 1 }.distance(&a, &b); + let l2: f64 = Minkowski { p: 2 }.distance(&a, &b); + let l3: f64 = Minkowski { p: 3 }.distance(&a, &b); assert!((l1 - 9.0).abs() < 1e-8); assert!((l2 - 5.19615242).abs() < 1e-8); @@ -52,6 +54,6 @@ mod tests { let a = vec![1., 2., 3.]; let b = vec![4., 5., 6.]; - let _: f64 = Minkowski { p: 0.0 }.distance(&a, &b); + let _: f64 = Minkowski { p: 0 }.distance(&a, &b); } } diff --git a/src/math/distance/mod.rs b/src/math/distance/mod.rs index b9b34a9..d7c8527 100644 --- a/src/math/distance/mod.rs +++ b/src/math/distance/mod.rs @@ -1,30 +1,57 @@ +//! # Collection of Distance Functions +//! +//! Many algorithms in machine learning require a measure of distance between data points. Distance metric (or metric) is a function that defines a distance between a pair of point elements of a set. +//! Formally, the distance can be any metric measure that is defined as \\( d(x, y) \geq 0\\) and follows three conditions: +//! 1. \\( d(x, y) = 0 \\) if and only \\( x = y \\), positive definiteness +//! 1. \\( d(x, y) = d(y, x) \\), symmetry +//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality +//! +//! for all \\(x, y, z \in Z \\) +//! +//! A good distance metric helps to improve the performance of classification, clustering and information retrieval algorithms significantly. +//! +//! + +/// Euclidean Distance is the straight-line distance between two points in Euclidean spacere that presents the shortest distance between these points. pub mod euclidian; +/// Hamming Distance between two strings is the number of positions at which the corresponding symbols are different. pub mod hamming; +/// The Mahalanobis distance is the distance between two points in multivariate space. pub mod mahalanobis; +/// Also known as rectilinear distance, city block distance, taxicab metric. pub mod manhattan; +/// A generalization of both the Euclidean distance and the Manhattan distance. pub mod minkowski; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; -pub trait Distance { +/// Distance metric, a function that calculates distance between two points +pub trait Distance { + /// Calculates distance between _a_ and _b_ fn distance(&self, a: &T, b: &T) -> F; } +/// Multitude of distance metric functions pub struct Distances {} impl Distances { + /// Euclidian distance pub fn euclidian() -> euclidian::Euclidian { euclidian::Euclidian {} } - pub fn minkowski(p: T) -> minkowski::Minkowski { + /// Minkowski distance + /// * `p` - function order. Should be >= 1 + pub fn minkowski(p: u16) -> minkowski::Minkowski { minkowski::Minkowski { p: p } } + /// Manhattan distance pub fn manhattan() -> manhattan::Manhattan { manhattan::Manhattan {} } + /// Hamming distance pub fn hamming() -> hamming::Hamming { hamming::Hamming {} } diff --git a/src/math/mod.rs b/src/math/mod.rs index 2c6e226..7093dd7 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -1,2 +1,3 @@ +/// Multitude of distance metrics are defined here pub mod distance; -pub(crate) mod num; +pub mod num; diff --git a/src/math/num.rs b/src/math/num.rs index a0b690f..c87a73d 100644 --- a/src/math/num.rs +++ b/src/math/num.rs @@ -3,7 +3,7 @@ use rand::prelude::*; use std::fmt::{Debug, Display}; use std::iter::{Product, Sum}; -pub trait FloatExt: Float + FromPrimitive + Debug + Display + Copy + Sum + Product { +pub trait RealNumber: Float + FromPrimitive + Debug + Display + Copy + Sum + Product { fn copysign(self, sign: Self) -> Self; fn ln_1pe(self) -> Self; @@ -21,7 +21,7 @@ pub trait FloatExt: Float + FromPrimitive + Debug + Display + Copy + Sum + Produ } } -impl FloatExt for f64 { +impl RealNumber for f64 { fn copysign(self, sign: Self) -> Self { self.copysign(sign) } @@ -58,7 +58,7 @@ impl FloatExt for f64 { } } -impl FloatExt for f32 { +impl RealNumber for f32 { fn copysign(self, sign: Self) -> Self { self.copysign(sign) } diff --git a/src/metrics/accuracy.rs b/src/metrics/accuracy.rs index a1695d0..135440b 100644 --- a/src/metrics/accuracy.rs +++ b/src/metrics/accuracy.rs @@ -1,13 +1,13 @@ use serde::{Deserialize, Serialize}; use crate::linalg::BaseVector; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub struct Accuracy {} impl Accuracy { - pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { + pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { if y_true.len() != y_pred.len() { panic!( "The vector sizes don't match: {} != {}", diff --git a/src/metrics/auc.rs b/src/metrics/auc.rs index cf34e2c..3daf2b8 100644 --- a/src/metrics/auc.rs +++ b/src/metrics/auc.rs @@ -4,13 +4,13 @@ use serde::{Deserialize, Serialize}; use crate::algorithm::sort::quick_sort::QuickArgSort; use crate::linalg::BaseVector; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub struct AUC {} impl AUC { - pub fn get_score>(&self, y_true: &V, y_pred_prob: &V) -> T { + pub fn get_score>(&self, y_true: &V, y_pred_prob: &V) -> T { let mut pos = T::zero(); let mut neg = T::zero(); diff --git a/src/metrics/f1.rs b/src/metrics/f1.rs index a1af664..caec7d2 100644 --- a/src/metrics/f1.rs +++ b/src/metrics/f1.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; use crate::linalg::BaseVector; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use crate::metrics::precision::Precision; use crate::metrics::recall::Recall; @@ -9,7 +9,7 @@ use crate::metrics::recall::Recall; pub struct F1 {} impl F1 { - pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { + pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { if y_true.len() != y_pred.len() { panic!( "The vector sizes don't match: {} != {}", diff --git a/src/metrics/mean_absolute_error.rs b/src/metrics/mean_absolute_error.rs index 6448ea7..a5332e5 100644 --- a/src/metrics/mean_absolute_error.rs +++ b/src/metrics/mean_absolute_error.rs @@ -1,13 +1,13 @@ use serde::{Deserialize, Serialize}; use crate::linalg::BaseVector; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub struct MeanAbsoluteError {} impl MeanAbsoluteError { - pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { + pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { if y_true.len() != y_pred.len() { panic!( "The vector sizes don't match: {} != {}", diff --git a/src/metrics/mean_squared_error.rs b/src/metrics/mean_squared_error.rs index 37ecf73..81bd0f7 100644 --- a/src/metrics/mean_squared_error.rs +++ b/src/metrics/mean_squared_error.rs @@ -1,13 +1,13 @@ use serde::{Deserialize, Serialize}; use crate::linalg::BaseVector; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub struct MeanSquareError {} impl MeanSquareError { - pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { + pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { if y_true.len() != y_pred.len() { panic!( "The vector sizes don't match: {} != {}", diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 2ae6464..69235d3 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -8,7 +8,7 @@ pub mod r2; pub mod recall; use crate::linalg::BaseVector; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; pub struct ClassificationMetrics {} @@ -50,34 +50,34 @@ impl RegressionMetrics { } } -pub fn accuracy>(y_true: &V, y_pred: &V) -> T { +pub fn accuracy>(y_true: &V, y_pred: &V) -> T { ClassificationMetrics::accuracy().get_score(y_true, y_pred) } -pub fn recall>(y_true: &V, y_pred: &V) -> T { +pub fn recall>(y_true: &V, y_pred: &V) -> T { ClassificationMetrics::recall().get_score(y_true, y_pred) } -pub fn precision>(y_true: &V, y_pred: &V) -> T { +pub fn precision>(y_true: &V, y_pred: &V) -> T { ClassificationMetrics::precision().get_score(y_true, y_pred) } -pub fn f1>(y_true: &V, y_pred: &V) -> T { +pub fn f1>(y_true: &V, y_pred: &V) -> T { ClassificationMetrics::f1().get_score(y_true, y_pred) } -pub fn roc_auc_score>(y_true: &V, y_pred_probabilities: &V) -> T { +pub fn roc_auc_score>(y_true: &V, y_pred_probabilities: &V) -> T { ClassificationMetrics::roc_auc_score().get_score(y_true, y_pred_probabilities) } -pub fn mean_squared_error>(y_true: &V, y_pred: &V) -> T { +pub fn mean_squared_error>(y_true: &V, y_pred: &V) -> T { RegressionMetrics::mean_squared_error().get_score(y_true, y_pred) } -pub fn mean_absolute_error>(y_true: &V, y_pred: &V) -> T { +pub fn mean_absolute_error>(y_true: &V, y_pred: &V) -> T { RegressionMetrics::mean_absolute_error().get_score(y_true, y_pred) } -pub fn r2>(y_true: &V, y_pred: &V) -> T { +pub fn r2>(y_true: &V, y_pred: &V) -> T { RegressionMetrics::r2().get_score(y_true, y_pred) } diff --git a/src/metrics/precision.rs b/src/metrics/precision.rs index b3e6c72..602aecc 100644 --- a/src/metrics/precision.rs +++ b/src/metrics/precision.rs @@ -1,13 +1,13 @@ use serde::{Deserialize, Serialize}; use crate::linalg::BaseVector; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub struct Precision {} impl Precision { - pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { + pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { if y_true.len() != y_pred.len() { panic!( "The vector sizes don't match: {} != {}", diff --git a/src/metrics/r2.rs b/src/metrics/r2.rs index b823bc6..6ba3979 100644 --- a/src/metrics/r2.rs +++ b/src/metrics/r2.rs @@ -1,13 +1,13 @@ use serde::{Deserialize, Serialize}; use crate::linalg::BaseVector; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub struct R2 {} impl R2 { - pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { + pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { if y_true.len() != y_pred.len() { panic!( "The vector sizes don't match: {} != {}", diff --git a/src/metrics/recall.rs b/src/metrics/recall.rs index 14e91ee..63101b6 100644 --- a/src/metrics/recall.rs +++ b/src/metrics/recall.rs @@ -1,13 +1,13 @@ use serde::{Deserialize, Serialize}; use crate::linalg::BaseVector; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub struct Recall {} impl Recall { - pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { + pub fn get_score>(&self, y_true: &V, y_pred: &V) -> T { if y_true.len() != y_pred.len() { panic!( "The vector sizes don't match: {} != {}", diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index f7de583..80945f1 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -36,7 +36,7 @@ use serde::{Deserialize, Serialize}; use crate::linalg::{row_iter, Matrix}; use crate::math::distance::Distance; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use crate::neighbors::{KNNAlgorithm, KNNAlgorithmName, KNNWeightFunction}; /// `KNNClassifier` parameters. Use `Default::default()` for default values. @@ -52,7 +52,7 @@ pub struct KNNClassifierParameters { /// K Nearest Neighbors Classifier #[derive(Serialize, Deserialize, Debug)] -pub struct KNNClassifier, T>> { +pub struct KNNClassifier, T>> { classes: Vec, y: Vec, knn_algorithm: KNNAlgorithm, @@ -70,7 +70,7 @@ impl Default for KNNClassifierParameters { } } -impl, T>> PartialEq for KNNClassifier { +impl, T>> PartialEq for KNNClassifier { fn eq(&self, other: &Self) -> bool { if self.classes.len() != other.classes.len() || self.k != other.k @@ -93,7 +93,7 @@ impl, T>> PartialEq for KNNClassifier { } } -impl, T>> KNNClassifier { +impl, T>> KNNClassifier { /// Fits KNN classifier to a NxM matrix where N is number of samples and M is number of features. /// * `x` - training data /// * `y` - vector with target values (classes) of length N diff --git a/src/neighbors/knn_regressor.rs b/src/neighbors/knn_regressor.rs index 5724f87..577152d 100644 --- a/src/neighbors/knn_regressor.rs +++ b/src/neighbors/knn_regressor.rs @@ -38,7 +38,7 @@ use serde::{Deserialize, Serialize}; use crate::linalg::{row_iter, BaseVector, Matrix}; use crate::math::distance::Distance; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use crate::neighbors::{KNNAlgorithm, KNNAlgorithmName, KNNWeightFunction}; /// `KNNRegressor` parameters. Use `Default::default()` for default values. @@ -54,7 +54,7 @@ pub struct KNNRegressorParameters { /// K Nearest Neighbors Regressor #[derive(Serialize, Deserialize, Debug)] -pub struct KNNRegressor, T>> { +pub struct KNNRegressor, T>> { y: Vec, knn_algorithm: KNNAlgorithm, weight: KNNWeightFunction, @@ -71,7 +71,7 @@ impl Default for KNNRegressorParameters { } } -impl, T>> PartialEq for KNNRegressor { +impl, T>> PartialEq for KNNRegressor { fn eq(&self, other: &Self) -> bool { if self.k != other.k || self.y.len() != other.y.len() { return false; @@ -86,7 +86,7 @@ impl, T>> PartialEq for KNNRegressor { } } -impl, T>> KNNRegressor { +impl, T>> KNNRegressor { /// Fits KNN regressor to a NxM matrix where N is number of samples and M is number of features. /// * `x` - training data /// * `y` - vector with real values diff --git a/src/neighbors/mod.rs b/src/neighbors/mod.rs index 37cd310..ec3a71a 100644 --- a/src/neighbors/mod.rs +++ b/src/neighbors/mod.rs @@ -34,7 +34,7 @@ use crate::algorithm::neighbour::cover_tree::CoverTree; use crate::algorithm::neighbour::linear_search::LinearKNNSearch; use crate::math::distance::Distance; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use serde::{Deserialize, Serialize}; /// K Nearest Neighbors Classifier @@ -62,13 +62,13 @@ pub enum KNNWeightFunction { } #[derive(Serialize, Deserialize, Debug)] -enum KNNAlgorithm, T>> { +enum KNNAlgorithm, T>> { LinearSearch(LinearKNNSearch, T, D>), CoverTree(CoverTree, T, D>), } impl KNNWeightFunction { - fn calc_weights(&self, distances: Vec) -> std::vec::Vec { + fn calc_weights(&self, distances: Vec) -> std::vec::Vec { match *self { KNNWeightFunction::Distance => { // if there are any points that has zero distance from one or more training points, @@ -88,7 +88,7 @@ impl KNNWeightFunction { } impl KNNAlgorithmName { - fn fit, T>>( + fn fit, T>>( &self, data: Vec>, distance: D, @@ -102,7 +102,7 @@ impl KNNAlgorithmName { } } -impl, T>> KNNAlgorithm { +impl, T>> KNNAlgorithm { fn find(&self, from: &Vec, k: usize) -> Vec<(usize, T)> { match *self { KNNAlgorithm::LinearSearch(ref linear) => linear.find(from, k), diff --git a/src/optimization/first_order/gradient_descent.rs b/src/optimization/first_order/gradient_descent.rs index 39f7e72..dfd4d31 100644 --- a/src/optimization/first_order/gradient_descent.rs +++ b/src/optimization/first_order/gradient_descent.rs @@ -1,18 +1,18 @@ use std::default::Default; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{DF, F}; -pub struct GradientDescent { +pub struct GradientDescent { pub max_iter: usize, pub g_rtol: T, pub g_atol: T, } -impl Default for GradientDescent { +impl Default for GradientDescent { fn default() -> Self { GradientDescent { max_iter: 10000, @@ -22,7 +22,7 @@ impl Default for GradientDescent { } } -impl FirstOrderOptimizer for GradientDescent { +impl FirstOrderOptimizer for GradientDescent { fn optimize<'a, X: Matrix, LS: LineSearchMethod>( &self, f: &'a F, diff --git a/src/optimization/first_order/lbfgs.rs b/src/optimization/first_order/lbfgs.rs index 975d98e..a1301dc 100644 --- a/src/optimization/first_order/lbfgs.rs +++ b/src/optimization/first_order/lbfgs.rs @@ -2,12 +2,12 @@ use std::default::Default; use std::fmt::Debug; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{DF, F}; -pub struct LBFGS { +pub struct LBFGS { pub max_iter: usize, pub g_rtol: T, pub g_atol: T, @@ -19,7 +19,7 @@ pub struct LBFGS { pub m: usize, } -impl Default for LBFGS { +impl Default for LBFGS { fn default() -> Self { LBFGS { max_iter: 1000, @@ -35,7 +35,7 @@ impl Default for LBFGS { } } -impl LBFGS { +impl LBFGS { fn two_loops>(&self, state: &mut LBFGSState) { let lower = state.iteration.max(self.m) - self.m; let upper = state.iteration; @@ -175,7 +175,7 @@ impl LBFGS { } #[derive(Debug)] -struct LBFGSState> { +struct LBFGSState> { x: X, x_prev: X, x_f: T, @@ -195,7 +195,7 @@ struct LBFGSState> { alpha: T, } -impl FirstOrderOptimizer for LBFGS { +impl FirstOrderOptimizer for LBFGS { fn optimize<'a, X: Matrix, LS: LineSearchMethod>( &self, f: &F, diff --git a/src/optimization/first_order/mod.rs b/src/optimization/first_order/mod.rs index 9bdeb33..d1c628f 100644 --- a/src/optimization/first_order/mod.rs +++ b/src/optimization/first_order/mod.rs @@ -5,11 +5,11 @@ use std::clone::Clone; use std::fmt::Debug; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{DF, F}; -pub trait FirstOrderOptimizer { +pub trait FirstOrderOptimizer { fn optimize<'a, X: Matrix, LS: LineSearchMethod>( &self, f: &F, @@ -20,7 +20,7 @@ pub trait FirstOrderOptimizer { } #[derive(Debug, Clone)] -pub struct OptimizerResult> { +pub struct OptimizerResult> { pub x: X, pub f_x: T, pub iterations: usize, diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 51db52f..391303c 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use crate::algorithm::sort::quick_sort::QuickArgSort; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub struct DecisionTreeClassifierParameters { @@ -18,7 +18,7 @@ pub struct DecisionTreeClassifierParameters { } #[derive(Serialize, Deserialize, Debug)] -pub struct DecisionTreeClassifier { +pub struct DecisionTreeClassifier { nodes: Vec>, parameters: DecisionTreeClassifierParameters, num_classes: usize, @@ -34,7 +34,7 @@ pub enum SplitCriterion { } #[derive(Serialize, Deserialize, Debug)] -pub struct Node { +pub struct Node { index: usize, output: usize, split_feature: usize, @@ -44,7 +44,7 @@ pub struct Node { false_child: Option, } -impl PartialEq for DecisionTreeClassifier { +impl PartialEq for DecisionTreeClassifier { fn eq(&self, other: &Self) -> bool { if self.depth != other.depth || self.num_classes != other.num_classes @@ -67,7 +67,7 @@ impl PartialEq for DecisionTreeClassifier { } } -impl PartialEq for Node { +impl PartialEq for Node { fn eq(&self, other: &Self) -> bool { self.output == other.output && self.split_feature == other.split_feature @@ -95,7 +95,7 @@ impl Default for DecisionTreeClassifierParameters { } } -impl Node { +impl Node { fn new(index: usize, output: usize) -> Self { Node { index: index, @@ -109,7 +109,7 @@ impl Node { } } -struct NodeVisitor<'a, T: FloatExt, M: Matrix> { +struct NodeVisitor<'a, T: RealNumber, M: Matrix> { x: &'a M, y: &'a Vec, node: usize, @@ -121,7 +121,7 @@ struct NodeVisitor<'a, T: FloatExt, M: Matrix> { phantom: PhantomData<&'a T>, } -fn impurity(criterion: &SplitCriterion, count: &Vec, n: usize) -> T { +fn impurity(criterion: &SplitCriterion, count: &Vec, n: usize) -> T { let mut impurity = T::zero(); match criterion { @@ -156,7 +156,7 @@ fn impurity(criterion: &SplitCriterion, count: &Vec, n: usiz return impurity; } -impl<'a, T: FloatExt, M: Matrix> NodeVisitor<'a, T, M> { +impl<'a, T: RealNumber, M: Matrix> NodeVisitor<'a, T, M> { fn new( node_id: usize, samples: Vec, @@ -193,7 +193,7 @@ pub(in crate) fn which_max(x: &Vec) -> usize { return which; } -impl DecisionTreeClassifier { +impl DecisionTreeClassifier { pub fn fit>( x: &M, y: &M::RowVector, diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index d0802ee..c5bdba7 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use crate::algorithm::sort::quick_sort::QuickArgSort; use crate::linalg::Matrix; -use crate::math::num::FloatExt; +use crate::math::num::RealNumber; #[derive(Serialize, Deserialize, Debug)] pub struct DecisionTreeRegressorParameters { @@ -16,14 +16,14 @@ pub struct DecisionTreeRegressorParameters { } #[derive(Serialize, Deserialize, Debug)] -pub struct DecisionTreeRegressor { +pub struct DecisionTreeRegressor { nodes: Vec>, parameters: DecisionTreeRegressorParameters, depth: u16, } #[derive(Serialize, Deserialize, Debug)] -pub struct Node { +pub struct Node { index: usize, output: T, split_feature: usize, @@ -43,7 +43,7 @@ impl Default for DecisionTreeRegressorParameters { } } -impl Node { +impl Node { fn new(index: usize, output: T) -> Self { Node { index: index, @@ -57,7 +57,7 @@ impl Node { } } -impl PartialEq for Node { +impl PartialEq for Node { fn eq(&self, other: &Self) -> bool { (self.output - other.output).abs() < T::epsilon() && self.split_feature == other.split_feature @@ -74,7 +74,7 @@ impl PartialEq for Node { } } -impl PartialEq for DecisionTreeRegressor { +impl PartialEq for DecisionTreeRegressor { fn eq(&self, other: &Self) -> bool { if self.depth != other.depth || self.nodes.len() != other.nodes.len() { return false; @@ -89,7 +89,7 @@ impl PartialEq for DecisionTreeRegressor { } } -struct NodeVisitor<'a, T: FloatExt, M: Matrix> { +struct NodeVisitor<'a, T: RealNumber, M: Matrix> { x: &'a M, y: &'a M, node: usize, @@ -100,7 +100,7 @@ struct NodeVisitor<'a, T: FloatExt, M: Matrix> { level: u16, } -impl<'a, T: FloatExt, M: Matrix> NodeVisitor<'a, T, M> { +impl<'a, T: RealNumber, M: Matrix> NodeVisitor<'a, T, M> { fn new( node_id: usize, samples: Vec, @@ -122,7 +122,7 @@ impl<'a, T: FloatExt, M: Matrix> NodeVisitor<'a, T, M> { } } -impl DecisionTreeRegressor { +impl DecisionTreeRegressor { pub fn fit>( x: &M, y: &M::RowVector,