Merge branch 'development' into prdct-prb

2023-03-21 14:03:04 +09:00
parent 97604a2d83 d15ea43975
commit 5bf7102fc2
14 changed files with 120 additions and 46 deletions
@@ -41,4 +41,4 @@ jobs:
      - name: Upload to codecov.io
        uses: codecov/codecov-action@v2
        with:
-          fail_ci_if_error:     true
+          fail_ci_if_error: false
@@ -2,7 +2,7 @@
 name = "smartcore"
 description = "Machine Learning in Rust."
 homepage = "https://smartcorelib.org"
-version = "0.3.0"
+version = "0.3.1"
 authors = ["smartcore Developers"]
 edition = "2021"
 license = "Apache-2.0"
@@ -18,4 +18,4 @@
 -----
 [![CI](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml/badge.svg)](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml)

-To start getting familiar with the new smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
+To start getting familiar with the new smartcore v0.3 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
@@ -49,20 +49,15 @@ pub mod linear_search;
 /// Both, KNN classifier and regressor benefits from underlying search algorithms that helps to speed up queries.
 /// `KNNAlgorithmName` maintains a list of supported search algorithms, see [KNN algorithms](../algorithm/neighbour/index.html)
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub enum KNNAlgorithmName {
    /// Heap Search algorithm, see [`LinearSearch`](../algorithm/neighbour/linear_search/index.html)
    LinearSearch,
    /// Cover Tree Search algorithm, see [`CoverTree`](../algorithm/neighbour/cover_tree/index.html)
+    #[default]
    CoverTree,
 }

-impl Default for KNNAlgorithmName {
-    fn default() -> Self {
-        KNNAlgorithmName::CoverTree
-    }
-}
-
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug)]
 pub(crate) enum KNNAlgorithm<T: Number, D: Distance<Vec<T>>> {
@@ -18,7 +18,7 @@
 //!
 //! Example:
 //!
-//! ```
+//! ```ignore
 //! use smartcore::linalg::basic::matrix::DenseMatrix;
 //! use smartcore::linalg::basic::arrays::Array2;
 //! use smartcore::cluster::dbscan::*;
@@ -454,8 +454,12 @@ impl<TX: FloatNumber + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY
        y: &Y,
        parameters: RandomForestClassifierParameters,
    ) -> Result<RandomForestClassifier<TX, TY, X, Y>, Failed> {
-        let (_, num_attributes) = x.shape();
+        let (x_nrows, num_attributes) = x.shape();
        let y_ncols = y.shape();
+        if x_nrows != y_ncols {
+            return Err(Failed::fit("Number of rows in X should = len(y)"));
+        }
+
        let mut yi: Vec<usize> = vec![0; y_ncols];
        let classes = y.unique();

@@ -710,6 +714,30 @@ mod tests {
        assert!(accuracy(&y, &classifier.predict(&x).unwrap()) >= 0.95);
    }

+    #[test]
+    fn test_random_matrix_with_wrong_rownum() {
+        let x_rand: DenseMatrix<f64> = DenseMatrix::<f64>::rand(21, 200);
+
+        let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
+
+        let fail = RandomForestClassifier::fit(
+            &x_rand,
+            &y,
+            RandomForestClassifierParameters {
+                criterion: SplitCriterion::Gini,
+                max_depth: Option::None,
+                min_samples_leaf: 1,
+                min_samples_split: 2,
+                n_trees: 100,
+                m: Option::None,
+                keep_samples: false,
+                seed: 87,
+            },
+        );
+
+        assert!(fail.is_err());
+    }
+
    #[cfg_attr(
        all(target_arch = "wasm32", not(target_os = "wasi")),
        wasm_bindgen_test::wasm_bindgen_test
@@ -399,6 +399,10 @@ impl<TX: Number + FloatNumber + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1
    ) -> Result<RandomForestRegressor<TX, TY, X, Y>, Failed> {
        let (n_rows, num_attributes) = x.shape();

+        if n_rows != y.shape() {
+            return Err(Failed::fit("Number of rows in X should = len(y)"));
+        }
+
        let mtry = parameters
            .m
            .unwrap_or((num_attributes as f64).sqrt().floor() as usize);
@@ -595,6 +599,32 @@ mod tests {
        assert!(mean_absolute_error(&y, &y_hat) < 1.0);
    }

+    #[test]
+    fn test_random_matrix_with_wrong_rownum() {
+        let x_rand: DenseMatrix<f64> = DenseMatrix::<f64>::rand(17, 200);
+
+        let y = vec![
+            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+            114.2, 115.7, 116.9,
+        ];
+
+        let fail = RandomForestRegressor::fit(
+            &x_rand,
+            &y,
+            RandomForestRegressorParameters {
+                max_depth: Option::None,
+                min_samples_leaf: 1,
+                min_samples_split: 2,
+                n_trees: 1000,
+                m: Option::None,
+                keep_samples: false,
+                seed: 87,
+            },
+        );
+
+        assert!(fail.is_err());
+    }
+
    #[cfg_attr(
        all(target_arch = "wasm32", not(target_os = "wasi")),
        wasm_bindgen_test::wasm_bindgen_test
@@ -30,7 +30,7 @@ pub enum FailedError {
    DecompositionFailed,
    /// Can't solve for x
    SolutionFailed,
-    /// Erro in input
+    /// Error in input parameters
    ParametersError,
 }

@@ -71,19 +71,14 @@ use crate::optimization::line_search::Backtracking;
 use crate::optimization::FunctionOrder;

 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone, Eq, PartialEq)]
+#[derive(Debug, Clone, Eq, PartialEq, Default)]
 /// Solver options for Logistic regression. Right now only LBFGS solver is supported.
 pub enum LogisticRegressionSolverName {
    /// Limited-memory Broyden–Fletcher–Goldfarb–Shanno method, see [LBFGS paper](http://users.iems.northwestern.edu/~nocedal/lbfgsb.html)
+    #[default]
    LBFGS,
 }

-impl Default for LogisticRegressionSolverName {
-    fn default() -> Self {
-        LogisticRegressionSolverName::LBFGS
-    }
-}
-
 /// Logistic Regression parameters
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
@@ -71,21 +71,16 @@ use crate::numbers::basenum::Number;
 use crate::numbers::realnum::RealNumber;

 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone, Eq, PartialEq)]
+#[derive(Debug, Clone, Eq, PartialEq, Default)]
 /// Approach to use for estimation of regression coefficients. Cholesky is more efficient but SVD is more stable.
 pub enum RidgeRegressionSolverName {
    /// Cholesky decomposition, see [Cholesky](../../linalg/cholesky/index.html)
+    #[default]
    Cholesky,
    /// SVD decomposition, see [SVD](../../linalg/svd/index.html)
    SVD,
 }

-impl Default for RidgeRegressionSolverName {
-    fn default() -> Self {
-        RidgeRegressionSolverName::Cholesky
-    }
-}
-
 /// Ridge Regression parameters
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
@@ -49,20 +49,15 @@ pub type KNNAlgorithmName = crate::algorithm::neighbour::KNNAlgorithmName;

 /// Weight function that is used to determine estimated value.
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub enum KNNWeightFunction {
    /// All k nearest points are weighted equally
+    #[default]
    Uniform,
    /// k nearest points are weighted by the inverse of their distance. Closer neighbors will have a greater influence than neighbors which are further away.
    Distance,
 }

-impl Default for KNNWeightFunction {
-    fn default() -> Self {
-        KNNWeightFunction::Uniform
-    }
-}
-
 impl KNNWeightFunction {
    fn calc_weights(&self, distances: Vec<f64>) -> std::vec::Vec<f64> {
        match *self {
@@ -2,9 +2,13 @@
 //! Most algorithms in `smartcore` rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ.
 //! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module.

+use rand::rngs::SmallRng;
+use rand::{Rng, SeedableRng};
+
 use num_traits::Float;

 use crate::numbers::basenum::Number;
+use crate::rand_custom::get_rng_impl;

 /// Defines real number
 /// <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
@@ -63,8 +67,12 @@ impl RealNumber for f64 {
    }

    fn rand() -> f64 {
-        // TODO: to be implemented, see issue smartcore#214
-        1.0
+        let mut small_rng = get_rng_impl(None);
+
+        let mut rngs: Vec<SmallRng> = (0..3)
+            .map(|_| SmallRng::from_rng(&mut small_rng).unwrap())
+            .collect();
+        rngs[0].gen::<f64>()
    }

    fn two() -> Self {
@@ -108,7 +116,12 @@ impl RealNumber for f32 {
    }

    fn rand() -> f32 {
-        1.0
+        let mut small_rng = get_rng_impl(None);
+
+        let mut rngs: Vec<SmallRng> = (0..3)
+            .map(|_| SmallRng::from_rng(&mut small_rng).unwrap())
+            .collect();
+        rngs[0].gen::<f32>()
    }

    fn two() -> Self {
@@ -149,4 +162,14 @@ mod tests {
    fn f64_from_string() {
        assert_eq!(f64::from_str("1.111111111").unwrap(), 1.111111111)
    }
+
+    #[test]
+    fn f64_rand() {
+        f64::rand();
+    }
+
+    #[test]
+    fn f32_rand() {
+        f32::rand();
+    }
 }
@@ -137,16 +137,17 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
        self.classes.as_ref()
    }
    /// Get depth of tree
-    fn depth(&self) -> u16 {
+    pub fn depth(&self) -> u16 {
        self.depth
    }
 }

 /// The function to measure the quality of a split.
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub enum SplitCriterion {
    /// [Gini index](../decision_tree_classifier/index.html)
+    #[default]
    Gini,
    /// [Entropy](../decision_tree_classifier/index.html)
    Entropy,
@@ -154,12 +155,6 @@ pub enum SplitCriterion {
    ClassificationError,
 }

-impl Default for SplitCriterion {
-    fn default() -> Self {
-        SplitCriterion::Gini
-    }
-}
-
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 struct Node {
@@ -543,6 +538,10 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
        parameters: DecisionTreeClassifierParameters,
    ) -> Result<DecisionTreeClassifier<TX, TY, X, Y>, Failed> {
        let (x_nrows, num_attributes) = x.shape();
+        if x_nrows != y.shape() {
+            return Err(Failed::fit("Size of x should equal size of y"));
+        }
+
        let samples = vec![1; x_nrows];
        DecisionTreeClassifier::fit_weak_learner(x, y, samples, num_attributes, parameters)
    }
@@ -968,6 +967,17 @@ mod tests {
        );
    }

+    #[test]
+    fn test_random_matrix_with_wrong_rownum() {
+        let x_rand: DenseMatrix<f64> = DenseMatrix::<f64>::rand(21, 200);
+
+        let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
+
+        let fail = DecisionTreeClassifier::fit(&x_rand, &y, Default::default());
+
+        assert!(fail.is_err());
+    }
+
    #[cfg_attr(
        all(target_arch = "wasm32", not(target_os = "wasi")),
        wasm_bindgen_test::wasm_bindgen_test
@@ -18,7 +18,6 @@
 //! Example:
 //!
 //! ```
-//! use rand::thread_rng;
 //! use smartcore::linalg::basic::matrix::DenseMatrix;
 //! use smartcore::tree::decision_tree_regressor::*;
 //!
@@ -422,6 +421,10 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
        parameters: DecisionTreeRegressorParameters,
    ) -> Result<DecisionTreeRegressor<TX, TY, X, Y>, Failed> {
        let (x_nrows, num_attributes) = x.shape();
+        if x_nrows != y.shape() {
+            return Err(Failed::fit("Size of x should equal size of y"));
+        }
+
        let samples = vec![1; x_nrows];
        DecisionTreeRegressor::fit_weak_learner(x, y, samples, num_attributes, parameters)
    }