Merge potential next release v0.4 (#187) Breaking Changes

* First draft of the new n-dimensional arrays + NB use case * Improves default implementation of multiple Array methods * Refactors tree methods * Adds matrix decomposition routines * Adds matrix decomposition methods to ndarray and nalgebra bindings * Refactoring + linear regression now uses array2 * Ridge & Linear regression * LBFGS optimizer & logistic regression * LBFGS optimizer & logistic regression * Changes linear methods, metrics and model selection methods to new n-dimensional arrays * Switches KNN and clustering algorithms to new n-d array layer * Refactors distance metrics * Optimizes knn and clustering methods * Refactors metrics module * Switches decomposition methods to n-dimensional arrays * Linalg refactoring - cleanup rng merge (#172) * Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure. * Exclude AUC metrics. Needs reimplementation * Improve developers walkthrough New traits system in place at `src/numbers` and `src/linalg` Co-authored-by: Lorenzo <tunedconsulting@gmail.com> * Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021 * Implement getters to use as_ref() in src/neighbors * Implement getters to use as_ref() in src/naive_bayes * Implement getters to use as_ref() in src/linear * Add Clone to src/naive_bayes * Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021 * Implement ndarray-bindings. Remove FloatNumber from implementations * Drop nalgebra-bindings support (as decided in conf-call to go for ndarray) * Remove benches. Benches will have their own repo at smartcore-benches * Implement SVC * Implement SVC serialization. Move search parameters in dedicated module * Implement SVR. Definitely too slow * Fix compilation issues for wasm (#202) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> * Fix tests (#203) * Port linalg/traits/stats.rs * Improve methods naming * Improve Display for DenseMatrix Co-authored-by: Montana Low <montanalow@users.noreply.github.com> Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
2022-10-31 10:44:57 +00:00
parent bb71656137
commit 52eb6ce023
110 changed files with 10327 additions and 9107 deletions
@@ -1,8 +1,11 @@
+// TODO: missing documentation
+
 use crate::{
    api::{Predictor, SupervisedEstimator},
    error::{Failed, FailedError},
-    linalg::Matrix,
-    math::num::RealNumber,
+    linalg::basic::arrays::{Array2, Array1},
+    numbers::realnum::RealNumber,
+    numbers::basenum::Number,
 };

 use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult};
@@ -10,8 +13,8 @@ use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult};
 /// Parameters for GridSearchCV
 #[derive(Debug)]
 pub struct GridSearchCVParameters<
-    T: RealNumber,
-    M: Matrix<T>,
+    T: Number,
+    M: Array2<T>,
    C: Clone,
    I: Iterator<Item = C>,
    E: Predictor<M, M::RowVector>,
@@ -29,7 +32,7 @@ pub struct GridSearchCVParameters<

 impl<
        T: RealNumber,
-        M: Matrix<T>,
+        M: Array2<T>,
        C: Clone,
        I: Iterator<Item = C>,
        E: Predictor<M, M::RowVector>,
@@ -51,7 +54,7 @@ impl<
 }
 /// Exhaustive search over specified parameter values for an estimator.
 #[derive(Debug)]
-pub struct GridSearchCV<T: RealNumber, M: Matrix<T>, C: Clone, E: Predictor<M, M::RowVector>> {
+pub struct GridSearchCV<T: RealNumber, M: Array2<T>, C: Clone, E: Predictor<M, M::RowVector>> {
    _phantom: std::marker::PhantomData<(T, M)>,
    predictor: E,
    /// Cross validation results.
@@ -60,7 +63,7 @@ pub struct GridSearchCV<T: RealNumber, M: Matrix<T>, C: Clone, E: Predictor<M, M
    pub best_parameter: C,
 }

-impl<T: RealNumber, M: Matrix<T>, E: Predictor<M, M::RowVector>, C: Clone>
+impl<T: RealNumber, M: Array2<T>, E: Predictor<M, M::RowVector>, C: Clone>
    GridSearchCV<T, M, C, E>
 {
    ///  Search for the best estimator by testing all possible combinations with cross-validation using given metric.
@@ -130,7 +133,7 @@ impl<T: RealNumber, M: Matrix<T>, E: Predictor<M, M::RowVector>, C: Clone>

 impl<
        T: RealNumber,
-        M: Matrix<T>,
+        M: Array2<T>,
        C: Clone,
        I: Iterator<Item = C>,
        E: Predictor<M, M::RowVector>,
@@ -149,7 +152,7 @@ impl<
    }
 }

-impl<T: RealNumber, M: Matrix<T>, C: Clone, E: Predictor<M, M::RowVector>>
+impl<T: RealNumber, M: Array2<T>, C: Clone, E: Predictor<M, M::RowVector>>
    Predictor<M, M::RowVector> for GridSearchCV<T, M, C, E>
 {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
@@ -1,11 +1,11 @@
 //! # KFold
 //!
 //! Defines k-fold cross validator.
+use std::fmt::{Debug, Display};

-use crate::linalg::Matrix;
-use crate::math::num::RealNumber;
+use crate::linalg::basic::arrays::Array2;
 use crate::model_selection::BaseKFold;
-use crate::rand::get_rng_impl;
+use crate::rand_custom::get_rng_impl;
 use rand::seq::SliceRandom;

 /// K-Folds cross-validator
@@ -20,7 +20,10 @@ pub struct KFold {
 }

 impl KFold {
-    fn test_indices<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<usize>> {
+    fn test_indices<T: Debug + Display + Copy + Sized, M: Array2<T>>(
+        &self,
+        x: &M,
+    ) -> Vec<Vec<usize>> {
        // number of samples (rows) in the matrix
        let n_samples: usize = x.shape().0;

@@ -51,7 +54,7 @@ impl KFold {
        return_values
    }

-    fn test_masks<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<bool>> {
+    fn test_masks<T: Debug + Display + Copy + Sized, M: Array2<T>>(&self, x: &M) -> Vec<Vec<bool>> {
        let mut return_values: Vec<Vec<bool>> = Vec::with_capacity(self.n_splits);
        for test_index in self.test_indices(x).drain(..) {
            // init mask
@@ -71,7 +74,7 @@ impl Default for KFold {
        KFold {
            n_splits: 3,
            shuffle: true,
-            seed: None,
+            seed: Option::None,
        }
    }
 }
@@ -134,7 +137,7 @@ impl BaseKFold for KFold {
        self.n_splits
    }

-    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output {
+    fn split<T: Debug + Display + Copy + Sized, M: Array2<T>>(&self, x: &M) -> Self::Output {
        if self.n_splits < 2 {
            panic!("Number of splits is too small: {}", self.n_splits);
        }
@@ -154,7 +157,7 @@ impl BaseKFold for KFold {
 mod tests {

    use super::*;
-    use crate::linalg::naive::dense_matrix::*;
+    use crate::linalg::basic::matrix::DenseMatrix;

    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
    #[test]
@@ -162,7 +165,7 @@ mod tests {
        let k = KFold {
            n_splits: 3,
            shuffle: false,
-            seed: None,
+            seed: Option::None,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(33, 100);
        let test_indices = k.test_indices(&x);
@@ -178,7 +181,7 @@ mod tests {
        let k = KFold {
            n_splits: 3,
            shuffle: false,
-            seed: None,
+            seed: Option::None,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(34, 100);
        let test_indices = k.test_indices(&x);
@@ -194,7 +197,7 @@ mod tests {
        let k = KFold {
            n_splits: 2,
            shuffle: false,
-            seed: None,
+            seed: Option::None,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
        let test_masks = k.test_masks(&x);
@@ -221,7 +224,7 @@ mod tests {
        let k = KFold {
            n_splits: 2,
            shuffle: false,
-            seed: None,
+            seed: Option::None,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
        let train_test_splits: Vec<(Vec<usize>, Vec<usize>)> = k.split(&x).collect();
@@ -254,7 +257,7 @@ mod tests {
        let k = KFold {
            n_splits: 3,
            shuffle: false,
-            seed: None,
+            seed: Option::None,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
        let expected: Vec<(Vec<usize>, Vec<usize>)> = vec![
@@ -10,9 +10,9 @@
 //! In SmartCore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function.
 //!
 //! ```
-//! use crate::smartcore::linalg::BaseMatrix;
-//! use smartcore::linalg::naive::dense_matrix::DenseMatrix;
+//! use smartcore::linalg::basic::matrix::DenseMatrix;
 //! use smartcore::model_selection::train_test_split;
+//! use smartcore::linalg::basic::arrays::Array;
 //!
 //! //Iris data
 //! let x = DenseMatrix::from_2d_array(&[
@@ -55,10 +55,12 @@
 //! The simplest way to run cross-validation is to use the [cross_val_score](./fn.cross_validate.html) helper function on your estimator and the dataset.
 //!
 //! ```
-//! use smartcore::linalg::naive::dense_matrix::DenseMatrix;
+//! use smartcore::linalg::basic::matrix::DenseMatrix;
 //! use smartcore::model_selection::{KFold, cross_validate};
 //! use smartcore::metrics::accuracy;
 //! use smartcore::linear::logistic_regression::LogisticRegression;
+//! use smartcore::api::SupervisedEstimator;
+//! use smartcore::linalg::basic::arrays::Array;
 //!
 //! //Iris data
 //! let x = DenseMatrix::from_2d_array(&[
@@ -83,17 +85,18 @@
 //!           &[6.6, 2.9, 4.6, 1.3],
 //!           &[5.2, 2.7, 3.9, 1.4],
 //!           ]);
-//! let y: Vec<f64> = vec![
-//!           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
+//! let y: Vec<i32> = vec![
+//!           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 //! ];
 //!
 //! let cv = KFold::default().with_n_splits(3);
 //!
-//! let results = cross_validate(LogisticRegression::fit,   //estimator
-//!                                 &x, &y,                 //data
-//!                                 &Default::default(),     //hyperparameters
-//!                                 &cv,                     //cross validation split
-//!                                 &accuracy).unwrap();    //metric
+//! let results = cross_validate(
+//!     LogisticRegression::new(),   //estimator
+//!     &x, &y,                 //data
+//!     Default::default(),     //hyperparameters
+//!     &cv,                     //cross validation split
+//!     &accuracy).unwrap();    //metric
 //!
 //! println!("Training accuracy: {}, test accuracy: {}",
 //!     results.mean_test_score(), results.mean_train_score());
@@ -102,18 +105,22 @@
 //! The function [cross_val_predict](./fn.cross_val_predict.html) has a similar interface to `cross_val_score`,
 //! but instead of test error it calculates predictions for all samples in the test set.

-use crate::api::Predictor;
-use crate::error::Failed;
-use crate::linalg::BaseVector;
-use crate::linalg::Matrix;
-use crate::math::num::RealNumber;
-use crate::rand::get_rng_impl;
 use rand::seq::SliceRandom;
+use std::fmt::{Debug, Display};

-pub(crate) mod hyper_tuning;
+#[allow(unused_imports)]
+use crate::api::{Predictor, SupervisedEstimator};
+use crate::error::Failed;
+use crate::linalg::basic::arrays::{Array1, Array2};
+use crate::numbers::basenum::Number;
+use crate::numbers::realnum::RealNumber;
+use crate::rand_custom::get_rng_impl;
+
+// TODO: fix this module
+// pub(crate) mod hyper_tuning;
 pub(crate) mod kfold;

-pub use hyper_tuning::{GridSearchCV, GridSearchCVParameters};
+// pub use hyper_tuning::{GridSearchCV, GridSearchCVParameters};
 pub use kfold::{KFold, KFoldIter};

 /// An interface for the K-Folds cross-validator
@@ -122,7 +129,7 @@ pub trait BaseKFold {
    type Output: Iterator<Item = (Vec<usize>, Vec<usize>)>;
    /// Return a tuple containing the the training set indices for that split and
    /// the testing set indices for that split.
-    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output;
+    fn split<T: Number, X: Array2<T>>(&self, x: &X) -> Self::Output;
    /// Returns the number of splits
    fn n_splits(&self) -> usize;
 }
@@ -132,19 +139,23 @@ pub trait BaseKFold {
 /// * `y` - target values, should be of size _N_
 /// * `test_size`, (0, 1] - the proportion of the dataset to include in the test split.
 /// * `shuffle`, - whether or not to shuffle the data before splitting
-/// * `seed` - Controls the shuffling applied to the data before applying the split. Pass an int for reproducible output across multiple function calls
-pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
-    x: &M,
-    y: &M::RowVector,
+pub fn train_test_split<
+    TX: Debug + Display + Copy + Sized,
+    TY: Debug + Display + Copy + Sized,
+    X: Array2<TX>,
+    Y: Array1<TY>,
+>(
+    x: &X,
+    y: &Y,
    test_size: f32,
    shuffle: bool,
    seed: Option<u64>,
-) -> (M, M, M::RowVector, M::RowVector) {
-    if x.shape().0 != y.len() {
+) -> (X, X, Y, Y) {
+    if x.shape().0 != y.shape() {
        panic!(
            "x and y should have the same number of samples. |x|: {}, |y|: {}",
            x.shape().0,
-            y.len()
+            y.shape()
        );
    }
    let mut rng = get_rng_impl(seed);
@@ -153,7 +164,7 @@ pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
        panic!("test_size should be between 0 and 1");
    }

-    let n = y.len();
+    let n = y.shape();

    let n_test = ((n as f32) * test_size) as usize;

@@ -177,21 +188,29 @@ pub fn train_test_split<T: RealNumber, M: Matrix<T>>(

 /// Cross validation results.
 #[derive(Clone, Debug)]
-pub struct CrossValidationResult<T: RealNumber> {
+pub struct CrossValidationResult {
    /// Vector with test scores on each cv split
-    pub test_score: Vec<T>,
+    pub test_score: Vec<f64>,
    /// Vector with training scores on each cv split
-    pub train_score: Vec<T>,
+    pub train_score: Vec<f64>,
 }

-impl<T: RealNumber> CrossValidationResult<T> {
+impl CrossValidationResult {
    /// Average test score
-    pub fn mean_test_score(&self) -> T {
-        self.test_score.sum() / T::from_usize(self.test_score.len()).unwrap()
+    pub fn mean_test_score(&self) -> f64 {
+        let mut sum = 0f64;
+        for s in self.test_score.iter() {
+            sum += *s;
+        }
+        sum / self.test_score.len() as f64
    }
    /// Average training score
-    pub fn mean_train_score(&self) -> T {
-        self.train_score.sum() / T::from_usize(self.train_score.len()).unwrap()
+    pub fn mean_train_score(&self) -> f64 {
+        let mut sum = 0f64;
+        for s in self.train_score.iter() {
+            sum += *s;
+        }
+        sum / self.train_score.len() as f64
    }
 }

@@ -202,26 +221,27 @@ impl<T: RealNumber> CrossValidationResult<T> {
 /// * `parameters` - parameters of selected estimator. Use `Default::default()` for default parameters.
 /// * `cv` - the cross-validation splitting strategy, should be an instance of [`BaseKFold`](./trait.BaseKFold.html)
 /// * `score` - a metric to use for evaluation, see [metrics](../metrics/index.html)
-pub fn cross_validate<T, M, H, E, K, F, S>(
-    fit_estimator: F,
-    x: &M,
-    y: &M::RowVector,
-    parameters: &H,
+pub fn cross_validate<TX, TY, X, Y, H, E, K, S>(
+    _estimator: E, // just an empty placeholder to allow passing `fit()`
+    x: &X,
+    y: &Y,
+    parameters: H,
    cv: &K,
-    score: S,
-) -> Result<CrossValidationResult<T>, Failed>
+    score: &S,
+) -> Result<CrossValidationResult, Failed>
 where
-    T: RealNumber,
-    M: Matrix<T>,
+    TX: Number + RealNumber,
+    TY: Number,
+    X: Array2<TX>,
+    Y: Array1<TY>,
    H: Clone,
-    E: Predictor<M, M::RowVector>,
    K: BaseKFold,
-    F: Fn(&M, &M::RowVector, H) -> Result<E, Failed>,
-    S: Fn(&M::RowVector, &M::RowVector) -> T,
+    E: SupervisedEstimator<X, Y, H>,
+    S: Fn(&Y, &Y) -> f64,
 {
    let k = cv.n_splits();
-    let mut test_score = Vec::with_capacity(k);
-    let mut train_score = Vec::with_capacity(k);
+    let mut test_score: Vec<f64> = Vec::with_capacity(k);
+    let mut train_score: Vec<f64> = Vec::with_capacity(k);

    for (train_idx, test_idx) in cv.split(x) {
        let train_x = x.take(&train_idx, 0);
@@ -229,10 +249,12 @@ where
        let test_x = x.take(&test_idx, 0);
        let test_y = y.take(&test_idx);

-        let estimator = fit_estimator(&train_x, &train_y, parameters.clone())?;
+        // NOTE: we use here only the estimator "class", the  actual struct get dropped
+        let computed =
+            <E as SupervisedEstimator<X, Y, H>>::fit(&train_x, &train_y, parameters.clone())?;

-        train_score.push(score(&train_y, &estimator.predict(&train_x)?));
-        test_score.push(score(&test_y, &estimator.predict(&test_x)?));
+        train_score.push(score(&train_y, &computed.predict(&train_x)?));
+        test_score.push(score(&test_y, &computed.predict(&test_x)?));
    }

    Ok(CrossValidationResult {
@@ -248,33 +270,35 @@ where
 /// * `y` - target values, should be of size _N_
 /// * `parameters` - parameters of selected estimator. Use `Default::default()` for default parameters.
 /// * `cv` - the cross-validation splitting strategy, should be an instance of [`BaseKFold`](./trait.BaseKFold.html)
-pub fn cross_val_predict<T, M, H, E, K, F>(
-    fit_estimator: F,
-    x: &M,
-    y: &M::RowVector,
+pub fn cross_val_predict<TX, TY, X, Y, H, E, K>(
+    _estimator: E, // just an empty placeholder to allow passing `fit()`
+    x: &X,
+    y: &Y,
    parameters: H,
-    cv: K,
-) -> Result<M::RowVector, Failed>
+    cv: &K,
+) -> Result<Y, Failed>
 where
-    T: RealNumber,
-    M: Matrix<T>,
+    TX: Number,
+    TY: Number,
+    X: Array2<TX>,
+    Y: Array1<TY>,
    H: Clone,
-    E: Predictor<M, M::RowVector>,
    K: BaseKFold,
-    F: Fn(&M, &M::RowVector, H) -> Result<E, Failed>,
+    E: SupervisedEstimator<X, Y, H>,
 {
-    let mut y_hat = M::RowVector::zeros(y.len());
+    let mut y_hat = Y::zeros(y.shape());

    for (train_idx, test_idx) in cv.split(x) {
        let train_x = x.take(&train_idx, 0);
        let train_y = y.take(&train_idx);
        let test_x = x.take(&test_idx, 0);

-        let estimator = fit_estimator(&train_x, &train_y, parameters.clone())?;
+        let computed =
+            <E as SupervisedEstimator<X, Y, H>>::fit(&train_x, &train_y, parameters.clone())?;

-        let y_test_hat = estimator.predict(&test_x)?;
+        let y_test_hat = computed.predict(&test_x)?;
        for (i, &idx) in test_idx.iter().enumerate() {
-            y_hat.set(idx, y_test_hat.get(i));
+            y_hat.set(idx, *y_test_hat.get(i));
        }
    }

@@ -285,10 +309,17 @@ where
 mod tests {

    use super::*;
-    use crate::linalg::naive::dense_matrix::*;
+    use crate::algorithm::neighbour::KNNAlgorithmName;
+    use crate::api::NoParameters;
+    use crate::linalg::basic::arrays::Array;
+    use crate::linalg::basic::matrix::DenseMatrix;
+    use crate::linear::logistic_regression::LogisticRegression;
+    use crate::metrics::distance::Distances;
    use crate::metrics::{accuracy, mean_absolute_error};
+    use crate::model_selection::cross_validate;
    use crate::model_selection::kfold::KFold;
-    use crate::neighbors::knn_regressor::KNNRegressor;
+    use crate::neighbors::knn_regressor::{KNNRegressor, KNNRegressorParameters};
+    use crate::neighbors::KNNWeightFunction;

    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
    #[test]
@@ -312,31 +343,33 @@ mod tests {
    }

    #[derive(Clone)]
-    struct NoParameters {}
+    struct BiasedParameters {}
+    impl NoParameters for BiasedParameters {}

    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
    #[test]
    fn test_cross_validate_biased() {
        struct BiasedEstimator {}

-        impl BiasedEstimator {
-            fn fit<M: Matrix<f32>>(
-                _: &M,
-                _: &M::RowVector,
-                _: NoParameters,
-            ) -> Result<BiasedEstimator, Failed> {
+        impl<X: Array2<f32>, Y: Array1<u32>, P: NoParameters> SupervisedEstimator<X, Y, P>
+            for BiasedEstimator
+        {
+            fn new() -> Self {
+                Self {}
+            }
+            fn fit(_: &X, _: &Y, _: P) -> Result<BiasedEstimator, Failed> {
                Ok(BiasedEstimator {})
            }
        }

-        impl<M: Matrix<f32>> Predictor<M, M::RowVector> for BiasedEstimator {
-            fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        impl<X: Array2<f32>, Y: Array1<u32>> Predictor<X, Y> for BiasedEstimator {
+            fn predict(&self, x: &X) -> Result<Y, Failed> {
                let (n, _) = x.shape();
-                Ok(M::RowVector::zeros(n))
+                Ok(Y::zeros(n))
            }
        }

-        let x = DenseMatrix::from_2d_array(&[
+        let x: DenseMatrix<f32> = DenseMatrix::from_2d_array(&[
            &[5.1, 3.5, 1.4, 0.2],
            &[4.9, 3.0, 1.4, 0.2],
            &[4.7, 3.2, 1.3, 0.2],
@@ -358,9 +391,7 @@ mod tests {
            &[6.6, 2.9, 4.6, 1.3],
            &[5.2, 2.7, 3.9, 1.4],
        ]);
-        let y = vec![
-            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
-        ];
+        let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];

        let cv = KFold {
            n_splits: 5,
@@ -368,10 +399,10 @@ mod tests {
        };

        let results = cross_validate(
-            BiasedEstimator::fit,
+            BiasedEstimator {},
            &x,
            &y,
-            &NoParameters {},
+            BiasedParameters {},
            &cv,
            &accuracy,
        )
@@ -413,10 +444,10 @@ mod tests {
        };

        let results = cross_validate(
-            KNNRegressor::fit,
+            KNNRegressor::new(),
            &x,
            &y,
-            &Default::default(),
+            Default::default(),
            &cv,
            &mean_absolute_error,
        )
@@ -429,7 +460,7 @@ mod tests {
    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
    #[test]
    fn test_cross_val_predict_knn() {
-        let x = DenseMatrix::from_2d_array(&[
+        let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
            &[234.289, 235.6, 159., 107.608, 1947., 60.323],
            &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
            &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
@@ -447,18 +478,69 @@ mod tests {
            &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
            &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
        ]);
-        let y = vec![
+        let y: Vec<f64> = vec![
            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
            114.2, 115.7, 116.9,
        ];

-        let cv = KFold {
+        let cv: KFold = KFold {
            n_splits: 2,
            ..KFold::default()
        };

-        let y_hat = cross_val_predict(KNNRegressor::fit, &x, &y, Default::default(), cv).unwrap();
+        let y_hat: Vec<f64> = cross_val_predict(
+            KNNRegressor::new(),
+            &x,
+            &y,
+            KNNRegressorParameters::default()
+                .with_k(3)
+                .with_distance(Distances::euclidian())
+                .with_algorithm(KNNAlgorithmName::LinearSearch)
+                .with_weight(KNNWeightFunction::Distance),
+            &cv,
+        )
+        .unwrap();

        assert!(mean_absolute_error(&y, &y_hat) < 10.0);
    }
+
+    #[test]
+    fn test_cross_validation_accuracy() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[5.1, 3.5, 1.4, 0.2],
+            &[4.9, 3.0, 1.4, 0.2],
+            &[4.7, 3.2, 1.3, 0.2],
+            &[4.6, 3.1, 1.5, 0.2],
+            &[5.0, 3.6, 1.4, 0.2],
+            &[5.4, 3.9, 1.7, 0.4],
+            &[4.6, 3.4, 1.4, 0.3],
+            &[5.0, 3.4, 1.5, 0.2],
+            &[4.4, 2.9, 1.4, 0.2],
+            &[4.9, 3.1, 1.5, 0.1],
+            &[7.0, 3.2, 4.7, 1.4],
+            &[6.4, 3.2, 4.5, 1.5],
+            &[6.9, 3.1, 4.9, 1.5],
+            &[5.5, 2.3, 4.0, 1.3],
+            &[6.5, 2.8, 4.6, 1.5],
+            &[5.7, 2.8, 4.5, 1.3],
+            &[6.3, 3.3, 4.7, 1.6],
+            &[4.9, 2.4, 3.3, 1.0],
+            &[6.6, 2.9, 4.6, 1.3],
+            &[5.2, 2.7, 3.9, 1.4],
+        ]);
+        let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
+
+        let cv = KFold::default().with_n_splits(3);
+
+        let results = cross_validate(
+            LogisticRegression::new(),
+            &x,
+            &y,
+            Default::default(),
+            &cv,
+            &accuracy,
+        )
+        .unwrap();
+        println!("{:?}", results);
+    }
 }