fix: code cleanup, documentation

2020-08-27 11:37:14 -07:00
parent 7a7b0d6875
commit aa458d22fa
10 changed files with 147 additions and 64 deletions
@@ -13,6 +13,7 @@ Cargo.lock
 .idea
 .project
 .vscode
 smartcore.code-workspace
 # OS
 .DS_Store
@@ -1,12 +1,12 @@
 [package]
 name = "smartcore"
 version = "0.1.0"
-authors = ["Vlad Orlov"]
+authors = ["SmartCore Developers"]
 edition = "2018"
 [dependencies]
 ndarray = "0.13"
-nalgebra = "0.21.1"
+nalgebra = "0.22.0"
 num-traits = "0.2.12"
 num = "0.3.0"
 rand = "0.7.3"
@@ -66,7 +66,7 @@ impl<T: Debug, F: FloatExt, D: Distance<T, F>> CoverTree<T, F, D> {
        }
    }
-    pub fn new_node(&mut self, parent: Option<NodeId>, data: T) -> NodeId {
+    fn new_node(&mut self, parent: Option<NodeId>, data: T) -> NodeId {
        let next_index = self.nodes.len();
        let node_id = NodeId { index: next_index };
        self.nodes.push(Node {
@@ -300,7 +300,7 @@ impl<T: Debug, F: FloatExt, D: Distance<T, F>> CoverTree<T, F, D> {
 }
 #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
-pub struct NodeId {
+struct NodeId {
    index: usize,
 }
@@ -1,3 +1,3 @@
-pub mod bbd_tree;
+pub(crate) mod bbd_tree;
 pub mod cover_tree;
 pub mod linear_search;
@@ -1,18 +0,0 @@
 use ndarray::ScalarOperand;
 use num_traits::{FromPrimitive, Num, One, ToPrimitive, Zero};
 use std::fmt::Debug;
 use std::hash::Hash;
 pub trait AnyNumber: Num + ScalarOperand + ToPrimitive + FromPrimitive {}
 pub trait Nominal:
    PartialEq + Zero + One + Eq + Hash + ToPrimitive + FromPrimitive + Debug + 'static + Clone
 {
 }
 impl<T> AnyNumber for T where T: Num + ScalarOperand + ToPrimitive + FromPrimitive {}
 impl<T> Nominal for T where
    T: PartialEq + Zero + One + Eq + Hash + ToPrimitive + Debug + FromPrimitive + 'static + Clone
 {
 }
@@ -1,12 +1,87 @@
 #![warn(missing_docs)]
 #![warn(missing_doc_code_examples)]
 //! # SmartCore
 //! 
 //! Welcome to SmartCore library, the most complete machine learning library for Rust! 
 //! 
 //! In SmartCore you will find implementation of these ML algorithms:
 //! * Regression: Linear Regression (OLS), Decision Tree Regressor, Random Forest Regressor
 //! * Classification: Logistic Regressor, Decision Tree Classifier, Random Forest Classifier, Unsupervised Nearest Neighbors (KNN)
 //! * Clustering: K-Means
 //! * Matrix decomposition: PCA, LU, QR, SVD, EVD
 //! * Distance Metrics: Euclidian, Minkowski, Manhattan, Hamming, Mahalanobis
 //! * Evaluation Metrics: Accuracy, AUC, Recall, Precision, F1, Mean Absolute Error, Mean Squared Error, R2
 //! 
 //! Most of algorithms implemented in SmartCore operate on n-dimentional arrays. While you can use Rust vectors with all functions defined in this library
 //! we do recommend to go with one of the popular linear algebra libraries available in Rust. At this moment we support these packages:
 //! * [ndarray](https://docs.rs/ndarray)
 //! * [nalgebra](https://docs.rs/nalgebra/)
 //! 
 //! ## Getting Started
 //! 
 //! To start using SmartCore simply add the following to your Cargo.toml file:
 //! ```ignore
 //! [dependencies]
 //! smartcore = "0.1.0"
 //! ```
 //! 
 //! All ML algorithms in SmartCore are grouped into these generic categories:
 //! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data. 
 //! * [Martix Decomposition](decomposition/index.html), various methods for matrix decomposition. 
 //! * [Linear Models](linear/index.html), regression and classification methods where output is assumed to have linear relation to explanatory variables
 //! * [Ensemble Models](ensemble/index.html), variety of regression and classification ensemble models
 //! * [Tree-based Models](tree/index.html), classification and regression trees
 //! * [Nearest Neighbors](neighbors/index.html), K Nearest Neighbors for classification and regression
 //! 
 //! Each category is assigned to a separate module. 
 //! 
 //! For example, KNN classifier is defined in [smartcore::neighbors::knn](neighbors/knn/index.html). To train and run it using standard Rust vectors you will
 //! run this code:
 //! 
 //! ```
 //! // DenseMatrix defenition
 //! use smartcore::linalg::naive::dense_matrix::*;
 //! // KNNClassifier
 //! use smartcore::neighbors::knn::*;
 //! // Various distance metrics
 //! use smartcore::math::distance::*;
 //! 
 //! // Turn Rust vectors with samples into a matrix
 //! let x = DenseMatrix::from_array(&[
 //!    &[1., 2.], 
 //!    &[3., 4.], 
 //!    &[5., 6.], 
 //!    &[7., 8.], 
 //!    &[9., 10.]]);
 //! // Our classes are defined as a Vector
 //! let y = vec![2., 2., 2., 3., 3.];
 //! 
 //! // Train classifier
 //! let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default());
 //! 
 //! // Predict classes
 //! let y_hat = knn.predict(&x);
 //! ```
 /// Various algorithms and helper methods that are used elsewhere in SmartCore
 pub mod algorithm;
 /// Algorithms for clustering of unlabeled data
 pub mod cluster;
-pub mod common;
+/// Matrix decomposition algorithms
 pub mod decomposition;
 /// Ensemble methods, including Random Forest classifier and regressor
 pub mod ensemble;
 /// Diverse collection of linear algebra abstractions and methods that power SmartCore algorithms
 pub mod linalg;
 /// Supervised classification and regression models that assume linear relationship between dependent and explanatory variables.
 pub mod linear;
 /// Multitude of helper methods and classes, including definitions of distance metrics
 pub mod math;
 /// Functions for assessing prediction error.
 pub mod metrics;
 /// Supervised neighbors-based learning methods
 pub mod neighbors;
-pub mod optimization;
+pub(crate) mod optimization;
 /// Supervised tree-based learning methods
 pub mod tree;
@@ -6,16 +6,29 @@ use crate::linalg::Matrix;
 use crate::math::num::FloatExt;
 #[derive(Serialize, Deserialize, Debug)]
-pub enum LinearRegressionSolver {
+pub enum LinearRegressionSolverName {
    QR,
    SVD,
 }
 #[derive(Serialize, Deserialize, Debug)]
 pub struct LinearRegressionParameters {    
    solver: LinearRegressionSolverName,
 }
 #[derive(Serialize, Deserialize, Debug)]
 pub struct LinearRegression<T: FloatExt, M: Matrix<T>> {
    coefficients: M,
    intercept: T,
-    solver: LinearRegressionSolver,
+    solver: LinearRegressionSolverName,
 }
 impl Default for LinearRegressionParameters {
    fn default() -> Self {
        LinearRegressionParameters {            
            solver: LinearRegressionSolverName::SVD
        }
    }
 }
 impl<T: FloatExt, M: Matrix<T>> PartialEq for LinearRegression<T, M> {
@@ -26,7 +39,7 @@ impl<T: FloatExt, M: Matrix<T>> PartialEq for LinearRegression<T, M> {
 }
 impl<T: FloatExt, M: Matrix<T>> LinearRegression<T, M> {
-    pub fn fit(x: &M, y: &M::RowVector, solver: LinearRegressionSolver) -> LinearRegression<T, M> {
+    pub fn fit(x: &M, y: &M::RowVector, parameters: LinearRegressionParameters) -> LinearRegression<T, M> {
        let y_m = M::from_row_vector(y.clone());
        let b = y_m.transpose();
        let (x_nrows, num_attributes) = x.shape();
@@ -38,9 +51,9 @@ impl<T: FloatExt, M: Matrix<T>> LinearRegression<T, M> {
        let a = x.v_stack(&M::ones(x_nrows, 1));
-        let w = match solver {
+        let w = match parameters.solver {
-            LinearRegressionSolver::QR => a.qr_solve_mut(b),
+            LinearRegressionSolverName::QR => a.qr_solve_mut(b),
-            LinearRegressionSolver::SVD => a.svd_solve_mut(b),
+            LinearRegressionSolverName::SVD => a.svd_solve_mut(b),
        };
        let wights = w.slice(0..num_attributes, 0..1);
@@ -48,7 +61,7 @@ impl<T: FloatExt, M: Matrix<T>> LinearRegression<T, M> {
        LinearRegression {
            intercept: w.get(num_attributes, 0),
            coefficients: wights,
-            solver: solver,
+            solver: parameters.solver,
        }
    }
@@ -90,9 +103,9 @@ mod tests {
            114.2, 115.7, 116.9,
        ]);
-        let y_hat_qr = LinearRegression::fit(&x, &y, LinearRegressionSolver::QR).predict(&x);
+        let y_hat_qr = LinearRegression::fit(&x, &y, LinearRegressionParameters{solver: LinearRegressionSolverName::QR}).predict(&x);
-        let y_hat_svd = LinearRegression::fit(&x, &y, LinearRegressionSolver::SVD).predict(&x);
+        let y_hat_svd = LinearRegression::fit(&x, &y, Default::default()).predict(&x);
        assert!(y
            .iter()
@@ -130,9 +143,9 @@ mod tests {
            114.2, 115.7, 116.9,
        ];
-        let y_hat_qr = LinearRegression::fit(&x, &y, LinearRegressionSolver::QR).predict(&x);
+        let y_hat_qr = LinearRegression::fit(&x, &y, LinearRegressionParameters{solver: LinearRegressionSolverName::QR}).predict(&x);
-        let y_hat_svd = LinearRegression::fit(&x, &y, LinearRegressionSolver::SVD).predict(&x);
+        let y_hat_svd = LinearRegression::fit(&x, &y, Default::default()).predict(&x);
        assert!(y
            .iter()
@@ -170,7 +183,7 @@ mod tests {
            114.2, 115.7, 116.9,
        ];
-        let lr = LinearRegression::fit(&x, &y, LinearRegressionSolver::QR);
+        let lr = LinearRegression::fit(&x, &y, Default::default());
        let deserialized_lr: LinearRegression<f64, DenseMatrix<f64>> =
            serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
@@ -1,2 +1,2 @@
 pub mod distance;
-pub mod num;
+pub(crate) mod num;
@@ -7,44 +7,60 @@ use crate::math::distance::Distance;
 use crate::math::num::FloatExt;
 #[derive(Serialize, Deserialize, Debug)]
 pub struct KNNClassifier<T: FloatExt, D: Distance<Vec<T>, T>> {
    classes: Vec<T>,
    y: Vec<usize>,
    knn_algorithm: KNNAlgorithmV<T, D>,
    k: usize,
 }
 pub enum KNNAlgorithmName {
    LinearSearch,
    CoverTree,
 }
 #[derive(Serialize, Deserialize, Debug)]
-pub enum KNNAlgorithmV<T: FloatExt, D: Distance<Vec<T>, T>> {
+pub struct KNNClassifierParameters {    
    pub algorithm: KNNAlgorithmName,
    pub k: usize
 }
 #[derive(Serialize, Deserialize, Debug)]
 pub struct KNNClassifier<T: FloatExt, D: Distance<Vec<T>, T>> {
    classes: Vec<T>,
    y: Vec<usize>,
    knn_algorithm: KNNAlgorithm<T, D>,
    k: usize,
 }
 #[derive(Serialize, Deserialize, Debug)]
 enum KNNAlgorithm<T: FloatExt, D: Distance<Vec<T>, T>> {
    LinearSearch(LinearKNNSearch<Vec<T>, T, D>),
    CoverTree(CoverTree<Vec<T>, T, D>),
 }
 impl Default for KNNClassifierParameters {
    fn default() -> Self {
        KNNClassifierParameters {            
            algorithm: KNNAlgorithmName::CoverTree,
            k: 3
        }
    }
 }
 impl KNNAlgorithmName {
    fn fit<T: FloatExt, D: Distance<Vec<T>, T>>(
        &self,
        data: Vec<Vec<T>>,
        distance: D,
-    ) -> KNNAlgorithmV<T, D> {
+    ) -> KNNAlgorithm<T, D> {
        match *self {
            KNNAlgorithmName::LinearSearch => {
-                KNNAlgorithmV::LinearSearch(LinearKNNSearch::new(data, distance))
+                KNNAlgorithm::LinearSearch(LinearKNNSearch::new(data, distance))
            }
-            KNNAlgorithmName::CoverTree => KNNAlgorithmV::CoverTree(CoverTree::new(data, distance)),
+            KNNAlgorithmName::CoverTree => KNNAlgorithm::CoverTree(CoverTree::new(data, distance)),
        }
    }
 }
-impl<T: FloatExt, D: Distance<Vec<T>, T>> KNNAlgorithmV<T, D> {
+impl<T: FloatExt, D: Distance<Vec<T>, T>> KNNAlgorithm<T, D> {
    fn find(&self, from: &Vec<T>, k: usize) -> Vec<usize> {
        match *self {
-            KNNAlgorithmV::LinearSearch(ref linear) => linear.find(from, k),
+            KNNAlgorithm::LinearSearch(ref linear) => linear.find(from, k),
-            KNNAlgorithmV::CoverTree(ref cover) => cover.find(from, k),
+            KNNAlgorithm::CoverTree(ref cover) => cover.find(from, k),
        }
    }
 }
@@ -76,9 +92,8 @@ impl<T: FloatExt, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
    pub fn fit<M: Matrix<T>>(
        x: &M,
        y: &M::RowVector,
        k: usize,
        distance: D,
-        algorithm: KNNAlgorithmName,
+        parameters: KNNClassifierParameters
    ) -> KNNClassifier<T, D> {
        let y_m = M::from_row_vector(y.clone());
@@ -103,13 +118,13 @@ impl<T: FloatExt, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
            )
        );
-        assert!(k > 1, format!("k should be > 1, k=[{}]", k));
+        assert!(parameters.k > 1, format!("k should be > 1, k=[{}]", parameters.k));
        KNNClassifier {
            classes: classes,
            y: yi,
-            k: k,
+            k: parameters.k,
-            knn_algorithm: algorithm.fit(data, distance),
+            knn_algorithm: parameters.algorithm.fit(data, distance),
        }
    }
@@ -153,9 +168,8 @@ mod tests {
        let knn = KNNClassifier::fit(
            &x,
            &y,
            3,
            Distances::euclidian(),
-            KNNAlgorithmName::LinearSearch,
+            KNNClassifierParameters{k: 3, algorithm: KNNAlgorithmName::LinearSearch}            
        );
        let r = knn.predict(&x);
        assert_eq!(5, Vec::len(&r));
@@ -169,10 +183,9 @@ mod tests {
        let knn = KNNClassifier::fit(
            &x,
-            &y,
+            &y,            
            3,
            Distances::euclidian(),
-            KNNAlgorithmName::CoverTree,
+            Default::default()
        );
        let deserialized_knn = bincode::deserialize(&bincode::serialize(&knn).unwrap()).unwrap();
@@ -5,8 +5,7 @@ pub type F<'a, T, X> = dyn for<'b> Fn(&'b X) -> T + 'a;
 pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a;
 #[derive(Debug, PartialEq)]
-pub enum FunctionOrder {
+pub enum FunctionOrder {    
    FIRST,
    SECOND,
    THIRD,
 }
`@@ -1,2 +1,2 @@`
	`pub mod distance;`	`pub mod distance;`
	`pub mod num;`	`pub(crate) mod num;`