Merge pull request #43 from smartcorelib/kfold

Kfold
2020-12-24 15:01:32 -08:00
parent 40dfca702e d22be7d6ae
commit a69fb3aada
37 changed files with 1261 additions and 416 deletions
@@ -6,6 +6,7 @@
 //! use smartcore::algorithm::neighbour::cover_tree::*;
 //! use smartcore::math::distance::Distance;
 //!
 //! #[derive(Clone)]
 //! struct SimpleDistance {} // Our distance function
 //!
 //! impl Distance<i32, f64> for SimpleDistance {
@@ -453,7 +454,7 @@ mod tests {
    use super::*;
    use crate::math::distance::Distances;
-    #[derive(Debug, Serialize, Deserialize)]
+    #[derive(Debug, Serialize, Deserialize, Clone)]
    struct SimpleDistance {}
    impl Distance<i32, f64> for SimpleDistance {
@@ -5,6 +5,7 @@
 //! use smartcore::algorithm::neighbour::linear_search::*;
 //! use smartcore::math::distance::Distance;
 //!
 //! #[derive(Clone)]
 //! struct SimpleDistance {} // Our distance function
 //!
 //! impl Distance<i32, f64> for SimpleDistance {
@@ -137,6 +138,7 @@ mod tests {
    use super::*;
    use crate::math::distance::Distances;
    #[derive(Debug, Serialize, Deserialize, Clone)]
    struct SimpleDistance {}
    impl Distance<i32, f64> for SimpleDistance {
@@ -0,0 +1,10 @@
 //! # Common Interfaces and methods
 //!
 //! This module consolidates interfaces and uniform basic API that is used elsewhere in the code.
 use crate::error::Failed;
 /// Implements method predict that offers a way to estimate target value from new data
 pub trait Predictor<X, Y> {
    fn predict(&self, x: &X) -> Result<Y, Failed>;
 }
@@ -15,11 +15,9 @@
 //! let blobs = generator::make_blobs(100, 2, 3);
 //! let x = DenseMatrix::from_vec(blobs.num_samples, blobs.num_features, &blobs.data);
 //! // Fit the algorithm and predict cluster labels
-//! let labels = DBSCAN::fit(&x, Distances::euclidian(), DBSCANParameters{
+//! let labels = DBSCAN::fit(&x, Distances::euclidian(),
-//!     min_samples: 5,
+//!     DBSCANParameters::default().with_eps(3.0)).
-//!     eps: 3.0,
+//!     and_then(|dbscan| dbscan.predict(&x));
 //!     algorithm: KNNAlgorithmName::CoverTree
 //! }).and_then(|dbscan| dbscan.predict(&x));
 //!
 //! println!("{:?}", labels);
 //! ```
@@ -53,14 +51,32 @@ pub struct DBSCAN<T: RealNumber, D: Distance<Vec<T>, T>> {
 #[derive(Debug, Clone)]
 /// DBSCAN clustering algorithm parameters
 pub struct DBSCANParameters<T: RealNumber> {
-    /// Maximum number of iterations of the k-means algorithm for a single run.
+    /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
    pub min_samples: usize,
-    /// The number of samples in a neighborhood for a point to be considered as a core point.
+    /// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
    pub eps: T,
    /// KNN algorithm to use.
    pub algorithm: KNNAlgorithmName,
 }
 impl<T: RealNumber> DBSCANParameters<T> {
    /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
    pub fn with_min_samples(mut self, min_samples: usize) -> Self {
        self.min_samples = min_samples;
        self
    }
    /// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
    pub fn with_eps(mut self, eps: T) -> Self {
        self.eps = eps;
        self
    }
    /// KNN algorithm to use.
    pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self {
        self.algorithm = algorithm;
        self
    }
 }
 impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for DBSCAN<T, D> {
    fn eq(&self, other: &Self) -> bool {
        self.cluster_labels.len() == other.cluster_labels.len()
@@ -105,6 +105,14 @@ pub struct KMeansParameters {
    pub max_iter: usize,
 }
 impl KMeansParameters {
    /// Maximum number of iterations of the k-means algorithm for a single run.
    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
        self.max_iter = max_iter;
        self
    }
 }
 impl Default for KMeansParameters {
    fn default() -> Self {
        KMeansParameters { max_iter: 100 }
@@ -88,6 +88,15 @@ pub struct PCAParameters {
    pub use_correlation_matrix: bool,
 }
 impl PCAParameters {
    /// By default, covariance matrix is used to compute principal components.
    /// Enable this flag if you want to use correlation matrix instead.
    pub fn with_use_correlation_matrix(mut self, use_correlation_matrix: bool) -> Self {
        self.use_correlation_matrix = use_correlation_matrix;
        self
    }
 }
 impl Default for PCAParameters {
    fn default() -> Self {
        PCAParameters {
@@ -9,7 +9,7 @@
 //!
 //! ```
 //! use smartcore::linalg::naive::dense_matrix::*;
-//! use smartcore::ensemble::random_forest_classifier::*;
+//! use smartcore::ensemble::random_forest_classifier::RandomForestClassifier;
 //!
 //! // Iris dataset
 //! let x = DenseMatrix::from_2d_array(&[
@@ -51,6 +51,7 @@ use std::fmt::Debug;
 use rand::Rng;
 use serde::{Deserialize, Serialize};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
@@ -84,6 +85,39 @@ pub struct RandomForestClassifier<T: RealNumber> {
    classes: Vec<T>,
 }
 impl RandomForestClassifierParameters {
    /// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub fn with_criterion(mut self, criterion: SplitCriterion) -> Self {
        self.criterion = criterion;
        self
    }
    /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub fn with_max_depth(mut self, max_depth: u16) -> Self {
        self.max_depth = Some(max_depth);
        self
    }
    /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
        self.min_samples_leaf = min_samples_leaf;
        self
    }
    /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
        self.min_samples_split = min_samples_split;
        self
    }
    /// The number of trees in the forest.
    pub fn with_n_trees(mut self, n_trees: u16) -> Self {
        self.n_trees = n_trees;
        self
    }
    /// Number of random sample of predictors to use as split candidates.
    pub fn with_m(mut self, m: usize) -> Self {
        self.m = Some(m);
        self
    }
 }
 impl<T: RealNumber> PartialEq for RandomForestClassifier<T> {
    fn eq(&self, other: &Self) -> bool {
        if self.classes.len() != other.classes.len() || self.trees.len() != other.trees.len() {
@@ -117,6 +151,12 @@ impl Default for RandomForestClassifierParameters {
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestClassifier<T> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber> RandomForestClassifier<T> {
    /// Build a forest of trees from the training set.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -49,6 +49,7 @@ use std::fmt::Debug;
 use rand::Rng;
 use serde::{Deserialize, Serialize};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
@@ -79,6 +80,34 @@ pub struct RandomForestRegressor<T: RealNumber> {
    trees: Vec<DecisionTreeRegressor<T>>,
 }
 impl RandomForestRegressorParameters {
    /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub fn with_max_depth(mut self, max_depth: u16) -> Self {
        self.max_depth = Some(max_depth);
        self
    }
    /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
        self.min_samples_leaf = min_samples_leaf;
        self
    }
    /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
        self.min_samples_split = min_samples_split;
        self
    }
    /// The number of trees in the forest.
    pub fn with_n_trees(mut self, n_trees: usize) -> Self {
        self.n_trees = n_trees;
        self
    }
    /// Number of random sample of predictors to use as split candidates.
    pub fn with_m(mut self, m: usize) -> Self {
        self.m = Some(m);
        self
    }
 }
 impl Default for RandomForestRegressorParameters {
    fn default() -> Self {
        RandomForestRegressorParameters {
@@ -106,6 +135,12 @@ impl<T: RealNumber> PartialEq for RandomForestRegressor<T> {
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestRegressor<T> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber> RandomForestRegressor<T> {
    /// Build a forest of trees from the training set.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -63,7 +63,7 @@
 //! let y = vec![2., 2., 2., 3., 3.];
 //!
 //! // Train classifier
-//! let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+//! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
 //!
 //! // Predict classes
 //! let y_hat = knn.predict(&x).unwrap();
@@ -71,6 +71,7 @@
 /// Various algorithms and helper methods that are used elsewhere in SmartCore
 pub mod algorithm;
 pub(crate) mod base;
 /// Algorithms for clustering of unlabeled data
 pub mod cluster;
 /// Various datasets
@@ -274,6 +274,19 @@ pub trait BaseVector<T: RealNumber>: Clone + Debug {
    /// Copies content of `other` vector.
    fn copy_from(&mut self, other: &Self);
    /// Take elements from an array.
    fn take(&self, index: &[usize]) -> Self {
        let n = index.len();
        let mut result = Self::zeros(n);
        for (i, idx) in index.iter().enumerate() {
            result.set(i, self.get(*idx));
        }
        result
    }
 }
 /// Generic matrix type.
@@ -611,6 +624,32 @@ pub trait BaseMatrix<T: RealNumber>: Clone + Debug {
    /// Calculates the covariance matrix
    fn cov(&self) -> Self;
    /// Take elements from an array along an axis.
    fn take(&self, index: &[usize], axis: u8) -> Self {
        let (n, p) = self.shape();
        let k = match axis {
            0 => p,
            _ => n,
        };
        let mut result = match axis {
            0 => Self::zeros(index.len(), p),
            _ => Self::zeros(n, index.len()),
        };
        for (i, idx) in index.iter().enumerate() {
            for j in 0..k {
                match axis {
                    0 => result.set(i, j, self.get(*idx, j)),
                    _ => result.set(j, i, self.get(j, *idx)),
                };
            }
        }
        result
    }
 }
 /// Generic matrix with additional mixins like various factorization methods.
@@ -662,6 +701,8 @@ impl<'a, T: RealNumber, M: BaseMatrix<T>> Iterator for RowIter<'a, T, M> {
 #[cfg(test)]
 mod tests {
    use crate::linalg::naive::dense_matrix::DenseMatrix;
    use crate::linalg::BaseMatrix;
    use crate::linalg::BaseVector;
    #[test]
@@ -684,4 +725,35 @@ mod tests {
        assert!((m.var() - 1.25f64).abs() < std::f64::EPSILON);
    }
    #[test]
    fn vec_take() {
        let m = vec![1., 2., 3., 4., 5.];
        assert_eq!(m.take(&vec!(0, 0, 4, 4)), vec![1., 1., 5., 5.]);
    }
    #[test]
    fn take() {
        let m = DenseMatrix::from_2d_array(&[
            &[1.0, 2.0],
            &[3.0, 4.0],
            &[5.0, 6.0],
            &[7.0, 8.0],
            &[9.0, 10.0],
        ]);
        let expected_0 = DenseMatrix::from_2d_array(&[&[3.0, 4.0], &[3.0, 4.0], &[7.0, 8.0]]);
        let expected_1 = DenseMatrix::from_2d_array(&[
            &[2.0, 1.0],
            &[4.0, 3.0],
            &[6.0, 5.0],
            &[8.0, 7.0],
            &[10.0, 9.0],
        ]);
        assert_eq!(m.take(&vec!(1, 1, 3), 0), expected_0);
        assert_eq!(m.take(&vec!(1, 0), 1), expected_1);
    }
 }
@@ -36,7 +36,7 @@
 //!             1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
 //!         ]);
 //!
-//! let lr = LogisticRegression::fit(&x, &y).unwrap();
+//! let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
 //! let y_hat = lr.predict(&x).unwrap();
 //! ```
 use std::iter::Sum;
@@ -917,7 +917,7 @@ mod tests {
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        ]);
-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
        let y_hat = lr.predict(&x).unwrap();
@@ -58,6 +58,7 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
@@ -66,7 +67,7 @@ use crate::math::num::RealNumber;
 use crate::linear::lasso_optimizer::InteriorPointOptimizer;
 /// Elastic net parameters
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct ElasticNetParameters<T: RealNumber> {
    /// Regularization parameter.
    pub alpha: T,
@@ -89,6 +90,36 @@ pub struct ElasticNet<T: RealNumber, M: Matrix<T>> {
    intercept: T,
 }
 impl<T: RealNumber> ElasticNetParameters<T> {
    /// Regularization parameter.
    pub fn with_alpha(mut self, alpha: T) -> Self {
        self.alpha = alpha;
        self
    }
    /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
    /// For l1_ratio = 0 the penalty is an L2 penalty.
    /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
    pub fn with_l1_ratio(mut self, l1_ratio: T) -> Self {
        self.l1_ratio = l1_ratio;
        self
    }
    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
    pub fn with_normalize(mut self, normalize: bool) -> Self {
        self.normalize = normalize;
        self
    }
    /// The tolerance for the optimization
    pub fn with_tol(mut self, tol: T) -> Self {
        self.tol = tol;
        self
    }
    /// The maximum number of iterations
    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
        self.max_iter = max_iter;
        self
    }
 }
 impl<T: RealNumber> Default for ElasticNetParameters<T> {
    fn default() -> Self {
        ElasticNetParameters {
@@ -108,6 +139,12 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for ElasticNet<T, M> {
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for ElasticNet<T, M> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
    /// Fits elastic net regression to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -26,6 +26,7 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
@@ -33,7 +34,7 @@ use crate::linear::lasso_optimizer::InteriorPointOptimizer;
 use crate::math::num::RealNumber;
 /// Lasso regression parameters
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct LassoParameters<T: RealNumber> {
    /// Controls the strength of the penalty to the loss function.
    pub alpha: T,
@@ -53,6 +54,29 @@ pub struct Lasso<T: RealNumber, M: Matrix<T>> {
    intercept: T,
 }
 impl<T: RealNumber> LassoParameters<T> {
    /// Regularization parameter.
    pub fn with_alpha(mut self, alpha: T) -> Self {
        self.alpha = alpha;
        self
    }
    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
    pub fn with_normalize(mut self, normalize: bool) -> Self {
        self.normalize = normalize;
        self
    }
    /// The tolerance for the optimization
    pub fn with_tol(mut self, tol: T) -> Self {
        self.tol = tol;
        self
    }
    /// The maximum number of iterations
    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
        self.max_iter = max_iter;
        self
    }
 }
 impl<T: RealNumber> Default for LassoParameters<T> {
    fn default() -> Self {
        LassoParameters {
@@ -71,6 +95,12 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for Lasso<T, M> {
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for Lasso<T, M> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
    /// Fits Lasso regression to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -45,9 +45,9 @@
 //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
 //!           100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
 //!
-//! let lr = LinearRegression::fit(&x, &y, LinearRegressionParameters {
+//! let lr = LinearRegression::fit(&x, &y,
-//!                        solver: LinearRegressionSolverName::QR, // or SVD
+//!             LinearRegressionParameters::default().
-//!          }).unwrap();
+//!             with_solver(LinearRegressionSolverName::QR)).unwrap();
 //!
 //! let y_hat = lr.predict(&x).unwrap();
 //! ```
@@ -64,11 +64,12 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 /// Approach to use for estimation of regression coefficients. QR is more efficient but SVD is more stable.
 pub enum LinearRegressionSolverName {
    /// QR decomposition, see [QR](../../linalg/qr/index.html)
@@ -78,7 +79,7 @@ pub enum LinearRegressionSolverName {
 }
 /// Linear Regression parameters
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct LinearRegressionParameters {
    /// Solver to use for estimation of regression coefficients.
    pub solver: LinearRegressionSolverName,
@@ -92,6 +93,14 @@ pub struct LinearRegression<T: RealNumber, M: Matrix<T>> {
    solver: LinearRegressionSolverName,
 }
 impl LinearRegressionParameters {
    /// Solver to use for estimation of regression coefficients.
    pub fn with_solver(mut self, solver: LinearRegressionSolverName) -> Self {
        self.solver = solver;
        self
    }
 }
 impl Default for LinearRegressionParameters {
    fn default() -> Self {
        LinearRegressionParameters {
@@ -107,6 +116,12 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for LinearRegression<T, M> {
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for LinearRegression<T, M> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>> LinearRegression<T, M> {
    /// Fits Linear Regression to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -40,7 +40,7 @@
 //!           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 //! ];
 //!
-//! let lr = LogisticRegression::fit(&x, &y).unwrap();
+//! let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
 //!
 //! let y_hat = lr.predict(&x).unwrap();
 //! ```
@@ -58,6 +58,7 @@ use std::marker::PhantomData;
 use serde::{Deserialize, Serialize};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
@@ -66,6 +67,10 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
 use crate::optimization::line_search::Backtracking;
 use crate::optimization::FunctionOrder;
 /// Logistic Regression parameters
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct LogisticRegressionParameters {}
 /// Logistic Regression
 #[derive(Serialize, Deserialize, Debug)]
 pub struct LogisticRegression<T: RealNumber, M: Matrix<T>> {
@@ -97,6 +102,12 @@ struct BinaryObjectiveFunction<'a, T: RealNumber, M: Matrix<T>> {
    phantom: PhantomData<&'a T>,
 }
 impl Default for LogisticRegressionParameters {
    fn default() -> Self {
        LogisticRegressionParameters {}
    }
 }
 impl<T: RealNumber, M: Matrix<T>> PartialEq for LogisticRegression<T, M> {
    fn eq(&self, other: &Self) -> bool {
        if self.num_classes != other.num_classes
@@ -207,11 +218,22 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for LogisticRegression<T, M> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
    /// Fits Logistic Regression to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
    /// * `y` - target class values
-    pub fn fit(x: &M, y: &M::RowVector) -> Result<LogisticRegression<T, M>, Failed> {
+    /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.    
    pub fn fit(
        x: &M,
        y: &M::RowVector,
        _parameters: LogisticRegressionParameters,
    ) -> Result<LogisticRegression<T, M>, Failed> {
        let y_m = M::from_row_vector(y.clone());
        let (x_nrows, num_attributes) = x.shape();
        let (_, y_nrows) = y_m.shape();
@@ -461,7 +483,7 @@ mod tests {
        ]);
        let y: Vec<f64> = vec![0., 0., 1., 1., 2., 1., 1., 0., 0., 2., 1., 1., 0., 0., 1.];
-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
        assert_eq!(lr.coefficients().shape(), (3, 2));
        assert_eq!(lr.intercept().shape(), (3, 1));
@@ -484,7 +506,7 @@ mod tests {
        let x = DenseMatrix::from_vec(15, 4, &blobs.data);
        let y = blobs.target;
-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
        let y_hat = lr.predict(&x).unwrap();
@@ -498,7 +520,7 @@ mod tests {
        let x = DenseMatrix::from_vec(20, 4, &blobs.data);
        let y = blobs.target;
-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
        let y_hat = lr.predict(&x).unwrap();
@@ -526,7 +548,7 @@ mod tests {
        ]);
        let y: Vec<f64> = vec![0., 0., 1., 1., 2., 1., 1., 0., 0., 2., 1., 1., 0., 0., 1.];
-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
        let deserialized_lr: LogisticRegression<f64, DenseMatrix<f64>> =
            serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
@@ -562,7 +584,7 @@ mod tests {
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        ];
-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
        let y_hat = lr.predict(&x).unwrap();
@@ -45,11 +45,8 @@
 //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
 //!           100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
 //!
-//! let y_hat = RidgeRegression::fit(&x, &y, RidgeRegressionParameters {
+//! let y_hat = RidgeRegression::fit(&x, &y, RidgeRegressionParameters::default().with_alpha(0.1)).
-//!                        solver: RidgeRegressionSolverName::Cholesky,
+//!                 and_then(|lr| lr.predict(&x)).unwrap();
 //!                        alpha: 0.1,
 //!                        normalize: true
 //! }).and_then(|lr| lr.predict(&x)).unwrap();
 //! ```
 //!
 //! ## References:
@@ -63,12 +60,13 @@ use std::fmt::Debug;
 use serde::{Deserialize, Serialize};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 /// Approach to use for estimation of regression coefficients. Cholesky is more efficient but SVD is more stable.
 pub enum RidgeRegressionSolverName {
    /// Cholesky decomposition, see [Cholesky](../../linalg/cholesky/index.html)
@@ -78,7 +76,7 @@ pub enum RidgeRegressionSolverName {
 }
 /// Ridge Regression parameters
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct RidgeRegressionParameters<T: RealNumber> {
    /// Solver to use for estimation of regression coefficients.
    pub solver: RidgeRegressionSolverName,
@@ -97,6 +95,24 @@ pub struct RidgeRegression<T: RealNumber, M: Matrix<T>> {
    solver: RidgeRegressionSolverName,
 }
 impl<T: RealNumber> RidgeRegressionParameters<T> {
    /// Regularization parameter.
    pub fn with_alpha(mut self, alpha: T) -> Self {
        self.alpha = alpha;
        self
    }
    /// Solver to use for estimation of regression coefficients.
    pub fn with_solver(mut self, solver: RidgeRegressionSolverName) -> Self {
        self.solver = solver;
        self
    }
    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
    pub fn with_normalize(mut self, normalize: bool) -> Self {
        self.normalize = normalize;
        self
    }
 }
 impl<T: RealNumber> Default for RidgeRegressionParameters<T> {
    fn default() -> Self {
        RidgeRegressionParameters {
@@ -114,6 +130,12 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for RidgeRegression<T, M> {
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RidgeRegression<T, M> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
    /// Fits ridge regression to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -25,7 +25,7 @@ use crate::math::num::RealNumber;
 use super::Distance;
 /// Euclidean distance is a measure of the true straight line distance between two points in Euclidean n-space.
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct Euclidian {}
 impl Euclidian {
@@ -26,7 +26,7 @@ use crate::math::num::RealNumber;
 use super::Distance;
 /// While comparing two integer-valued vectors of equal length, Hamming distance is the number of bit positions in which the two bits are different
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct Hamming {}
 impl<T: PartialEq, F: RealNumber> Distance<Vec<T>, F> for Hamming {
@@ -52,7 +52,7 @@ use super::Distance;
 use crate::linalg::Matrix;
 /// Mahalanobis distance.
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct Mahalanobis<T: RealNumber, M: Matrix<T>> {
    /// covariance matrix of the dataset
    pub sigma: M,
@@ -24,7 +24,7 @@ use crate::math::num::RealNumber;
 use super::Distance;
 /// Manhattan distance
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct Manhattan {}
 impl<T: RealNumber> Distance<Vec<T>, T> for Manhattan {
@@ -28,7 +28,7 @@ use crate::math::num::RealNumber;
 use super::Distance;
 /// Defines the Minkowski distance of order `p`
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct Minkowski {
    /// order, integer
    pub p: u16,
@@ -28,7 +28,7 @@ use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
 /// Distance metric, a function that calculates distance between two points
-pub trait Distance<T, F: RealNumber> {
+pub trait Distance<T, F: RealNumber>: Clone {
    /// Calculates distance between _a_ and _b_
    fn distance(&self, a: &T, b: &T) -> F;
 }
@@ -42,7 +42,7 @@
 //!             0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 //!   ];
 //!
-//! let lr = LogisticRegression::fit(&x, &y).unwrap();
+//! let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
 //!
 //! let y_hat = lr.predict(&x).unwrap();
 //!
@@ -0,0 +1,269 @@
 //! # KFold
 //!
 //! Defines k-fold cross validator.
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
 use crate::model_selection::BaseKFold;
 use rand::seq::SliceRandom;
 use rand::thread_rng;
 /// K-Folds cross-validator
 pub struct KFold {
    /// Number of folds. Must be at least 2.
    pub n_splits: usize, // cannot exceed std::usize::MAX
    /// Whether to shuffle the data before splitting into batches
    pub shuffle: bool,
 }
 impl KFold {
    fn test_indices<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<usize>> {
        // number of samples (rows) in the matrix
        let n_samples: usize = x.shape().0;
        // initialise indices
        let mut indices: Vec<usize> = (0..n_samples).collect();
        if self.shuffle {
            indices.shuffle(&mut thread_rng());
        }
        //  return a new array of given shape n_split, filled with each element of n_samples divided by n_splits.
        let mut fold_sizes = vec![n_samples / self.n_splits; self.n_splits];
        // increment by one if odd
        for fold_size in fold_sizes.iter_mut().take(n_samples % self.n_splits) {
            *fold_size += 1;
        }
        // generate the right array of arrays for test indices
        let mut return_values: Vec<Vec<usize>> = Vec::with_capacity(self.n_splits);
        let mut current: usize = 0;
        for fold_size in fold_sizes.drain(..) {
            let stop = current + fold_size;
            return_values.push(indices[current..stop].to_vec());
            current = stop
        }
        return_values
    }
    fn test_masks<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<bool>> {
        let mut return_values: Vec<Vec<bool>> = Vec::with_capacity(self.n_splits);
        for test_index in self.test_indices(x).drain(..) {
            // init mask
            let mut test_mask = vec![false; x.shape().0];
            // set mask's indices to true according to test indices
            for i in test_index {
                test_mask[i] = true; // can be implemented with map()
            }
            return_values.push(test_mask);
        }
        return_values
    }
 }
 impl Default for KFold {
    fn default() -> KFold {
        KFold {
            n_splits: 3,
            shuffle: true,
        }
    }
 }
 impl KFold {
    /// Number of folds. Must be at least 2.
    pub fn with_n_splits(mut self, n_splits: usize) -> Self {
        self.n_splits = n_splits;
        self
    }
    /// Whether to shuffle the data before splitting into batches
    pub fn with_shuffle(mut self, shuffle: bool) -> Self {
        self.shuffle = shuffle;
        self
    }
 }
 /// An iterator over indices that split data into training and test set.
 pub struct KFoldIter {
    indices: Vec<usize>,
    test_indices: Vec<Vec<bool>>,
 }
 impl Iterator for KFoldIter {
    type Item = (Vec<usize>, Vec<usize>);
    fn next(&mut self) -> Option<(Vec<usize>, Vec<usize>)> {
        self.test_indices.pop().map(|test_index| {
            let train_index = self
                .indices
                .iter()
                .enumerate()
                .filter(|&(idx, _)| !test_index[idx])
                .map(|(idx, _)| idx)
                .collect::<Vec<usize>>(); // filter train indices out according to mask
            let test_index = self
                .indices
                .iter()
                .enumerate()
                .filter(|&(idx, _)| test_index[idx])
                .map(|(idx, _)| idx)
                .collect::<Vec<usize>>(); // filter tests indices out according to mask
            (train_index, test_index)
        })
    }
 }
 /// Abstract class for all KFold functionalities
 impl BaseKFold for KFold {
    type Output = KFoldIter;
    fn n_splits(&self) -> usize {
        self.n_splits
    }
    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output {
        if self.n_splits < 2 {
            panic!("Number of splits is too small: {}", self.n_splits);
        }
        let n_samples: usize = x.shape().0;
        let indices: Vec<usize> = (0..n_samples).collect();
        let mut test_indices = self.test_masks(x);
        test_indices.reverse();
        KFoldIter {
            indices,
            test_indices,
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::linalg::naive::dense_matrix::*;
    #[test]
    fn run_kfold_return_test_indices_simple() {
        let k = KFold {
            n_splits: 3,
            shuffle: false,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(33, 100);
        let test_indices = k.test_indices(&x);
        assert_eq!(test_indices[0], (0..11).collect::<Vec<usize>>());
        assert_eq!(test_indices[1], (11..22).collect::<Vec<usize>>());
        assert_eq!(test_indices[2], (22..33).collect::<Vec<usize>>());
    }
    #[test]
    fn run_kfold_return_test_indices_odd() {
        let k = KFold {
            n_splits: 3,
            shuffle: false,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(34, 100);
        let test_indices = k.test_indices(&x);
        assert_eq!(test_indices[0], (0..12).collect::<Vec<usize>>());
        assert_eq!(test_indices[1], (12..23).collect::<Vec<usize>>());
        assert_eq!(test_indices[2], (23..34).collect::<Vec<usize>>());
    }
    #[test]
    fn run_kfold_return_test_mask_simple() {
        let k = KFold {
            n_splits: 2,
            shuffle: false,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
        let test_masks = k.test_masks(&x);
        for t in &test_masks[0][0..11] {
            // TODO: this can be prob done better
            assert_eq!(*t, true)
        }
        for t in &test_masks[0][11..22] {
            assert_eq!(*t, false)
        }
        for t in &test_masks[1][0..11] {
            assert_eq!(*t, false)
        }
        for t in &test_masks[1][11..22] {
            assert_eq!(*t, true)
        }
    }
    #[test]
    fn run_kfold_return_split_simple() {
        let k = KFold {
            n_splits: 2,
            shuffle: false,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
        let train_test_splits: Vec<(Vec<usize>, Vec<usize>)> = k.split(&x).collect();
        assert_eq!(train_test_splits[0].1, (0..11).collect::<Vec<usize>>());
        assert_eq!(train_test_splits[0].0, (11..22).collect::<Vec<usize>>());
        assert_eq!(train_test_splits[1].0, (0..11).collect::<Vec<usize>>());
        assert_eq!(train_test_splits[1].1, (11..22).collect::<Vec<usize>>());
    }
    #[test]
    fn run_kfold_return_split_simple_shuffle() {
        let k = KFold {
            n_splits: 2,
            ..KFold::default()
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(23, 100);
        let train_test_splits: Vec<(Vec<usize>, Vec<usize>)> = k.split(&x).collect();
        assert_eq!(train_test_splits[0].1.len(), 12_usize);
        assert_eq!(train_test_splits[0].0.len(), 11_usize);
        assert_eq!(train_test_splits[1].0.len(), 12_usize);
        assert_eq!(train_test_splits[1].1.len(), 11_usize);
    }
    #[test]
    fn numpy_parity_test() {
        let k = KFold {
            n_splits: 3,
            shuffle: false,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
        let expected: Vec<(Vec<usize>, Vec<usize>)> = vec![
            (vec![4, 5, 6, 7, 8, 9], vec![0, 1, 2, 3]),
            (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
            (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
        ];
        for ((train, test), (expected_train, expected_test)) in
            k.split(&x).into_iter().zip(expected)
        {
            assert_eq!(test, expected_test);
            assert_eq!(train, expected_train);
        }
    }
    #[test]
    fn numpy_parity_test_shuffle() {
        let k = KFold {
            n_splits: 3,
            ..KFold::default()
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
        let expected: Vec<(Vec<usize>, Vec<usize>)> = vec![
            (vec![4, 5, 6, 7, 8, 9], vec![0, 1, 2, 3]),
            (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
            (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
        ];
        for ((train, test), (expected_train, expected_test)) in
            k.split(&x).into_iter().zip(expected)
        {
            assert_eq!(test.len(), expected_test.len());
            assert_eq!(train.len(), expected_train.len());
        }
    }
 }
@@ -9,21 +9,39 @@
 //!
 //! In SmartCore you can split your data into training and test datasets using `train_test_split` function.
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
 use rand::seq::SliceRandom;
 use rand::thread_rng;
-use rand::Rng;
+
 pub(crate) mod kfold;
 pub use kfold::{KFold, KFoldIter};
 /// An interface for the K-Folds cross-validator
 pub trait BaseKFold {
    /// An iterator over indices that split data into training and test set.
    type Output: Iterator<Item = (Vec<usize>, Vec<usize>)>;
    /// Return a tuple containing the the training set indices for that split and
    /// the testing set indices for that split.
    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output;
    /// Returns the number of splits
    fn n_splits(&self) -> usize;
 }
 /// Splits data into 2 disjoint datasets.
 /// * `x` - features, matrix of size _NxM_ where _N_ is number of samples and _M_ is number of attributes.
-/// * `y` - target values, should be of size _M_
+/// * `y` - target values, should be of size _N_
 /// * `test_size`, (0, 1] - the proportion of the dataset to include in the test split.
 /// * `shuffle`, - whether or not to shuffle the data before splitting
 pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
    x: &M,
    y: &M::RowVector,
    test_size: f32,
    shuffle: bool,
 ) -> (M, M, M::RowVector, M::RowVector) {
    if x.shape().0 != y.len() {
        panic!(
@@ -38,155 +56,131 @@ pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
    }
    let n = y.len();
    let m = x.shape().1;
-    let mut rng = rand::thread_rng();
+    let n_test = ((n as f32) * test_size) as usize;
    let mut n_test = 0;
    let mut index = vec![false; n];
-    for index_i in index.iter_mut().take(n) {
+    if n_test < 1 {
-        let p_test: f32 = rng.gen();
+        panic!("number of sample is too small {}", n);
        if p_test <= test_size {
            *index_i = true;
            n_test += 1;
        }
    }
-    let n_train = n - n_test;
+    let mut indices: Vec<usize> = (0..n).collect();
-    let mut x_train = M::zeros(n_train, m);
+    if shuffle {
-    let mut x_test = M::zeros(n_test, m);
+        indices.shuffle(&mut thread_rng());
    let mut y_train = M::RowVector::zeros(n_train);
    let mut y_test = M::RowVector::zeros(n_test);
    let mut r_train = 0;
    let mut r_test = 0;
    for (r, index_r) in index.iter().enumerate().take(n) {
        if *index_r {
            //sample belongs to test
            for c in 0..m {
                x_test.set(r_test, c, x.get(r, c));
                y_test.set(r_test, y.get(r));
            }
            r_test += 1;
        } else {
            for c in 0..m {
                x_train.set(r_train, c, x.get(r, c));
                y_train.set(r_train, y.get(r));
            }
            r_train += 1;
        }
    }
    let x_train = x.take(&indices[n_test..n], 0);
    let x_test = x.take(&indices[0..n_test], 0);
    let y_train = y.take(&indices[n_test..n]);
    let y_test = y.take(&indices[0..n_test]);
    (x_train, x_test, y_train, y_test)
 }
-///
+/// Cross validation results.
-/// KFold Cross-Validation
+#[derive(Clone, Debug)]
-///
+pub struct CrossValidationResult<T: RealNumber> {
-pub trait BaseKFold {
+    /// Vector with test scores on each cv split
-    /// Returns integer indices corresponding to test sets
+    pub test_score: Vec<T>,
-    fn test_indices<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<usize>>;
+    /// Vector with training scores on each cv split
-
+    pub train_score: Vec<T>,
    /// Returns masksk corresponding to test sets
    fn test_masks<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<bool>>;
    /// Return a tuple containing the the training set indices for that split and
    /// the testing set indices for that split.
    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<(Vec<usize>, Vec<usize>)>;
 }
-///
+impl<T: RealNumber> CrossValidationResult<T> {
-/// An implementation of KFold
+    /// Average test score
-///
+    pub fn mean_test_score(&self) -> T {
-pub struct KFold {
+        self.test_score.sum() / T::from_usize(self.test_score.len()).unwrap()
-    n_splits: usize, // cannot exceed std::usize::MAX
+    }
-    shuffle: bool,
+    /// Average training score
-    // TODO: to be implemented later
+    pub fn mean_train_score(&self) -> T {
-    // random_state: i32,
+        self.train_score.sum() / T::from_usize(self.train_score.len()).unwrap()
 }
 impl Default for KFold {
    fn default() -> KFold {
        KFold {
            n_splits: 3_usize,
            shuffle: true,
        }
    }
 }
-///
+/// Evaluate an estimator by cross-validation using given metric.
-/// Abstract class for all KFold functionalities
+/// * `fit_estimator` - a `fit` function of an estimator
-///
+/// * `x` - features, matrix of size _NxM_ where _N_ is number of samples and _M_ is number of attributes.
-impl BaseKFold for KFold {
+/// * `y` - target values, should be of size _N_
-    fn test_indices<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<usize>> {
+/// * `parameters` - parameters of selected estimator. Use `Default::default()` for default parameters.
-        // number of samples (rows) in the matrix
+/// * `cv` - the cross-validation splitting strategy, should be an instance of [`BaseKFold`](./trait.BaseKFold.html)
-        let n_samples: usize = x.shape().0;
+/// * `score` - a metric to use for evaluation, see [metrics](../metrics/index.html)
 pub fn cross_validate<T, M, H, E, K, F, S>(
    fit_estimator: F,
    x: &M,
    y: &M::RowVector,
    parameters: H,
    cv: K,
    score: S,
 ) -> Result<CrossValidationResult<T>, Failed>
 where
    T: RealNumber,
    M: Matrix<T>,
    H: Clone,
    E: Predictor<M, M::RowVector>,
    K: BaseKFold,
    F: Fn(&M, &M::RowVector, H) -> Result<E, Failed>,
    S: Fn(&M::RowVector, &M::RowVector) -> T,
 {
    let k = cv.n_splits();
    let mut test_score = Vec::with_capacity(k);
    let mut train_score = Vec::with_capacity(k);
-        // initialise indices
+    for (train_idx, test_idx) in cv.split(x) {
-        let mut indices: Vec<usize> = (0..n_samples).collect();
+        let train_x = x.take(&train_idx, 0);
-        if self.shuffle {
+        let train_y = y.take(&train_idx);
-            indices.shuffle(&mut thread_rng());
+        let test_x = x.take(&test_idx, 0);
-        }
+        let test_y = y.take(&test_idx);
        //  return a new array of given shape n_split, filled with each element of n_samples divided by n_splits.
        let mut fold_sizes = vec![n_samples / self.n_splits; self.n_splits];
-        // increment by one if odd
+        let estimator = fit_estimator(&train_x, &train_y, parameters.clone())?;
        for fold_size in fold_sizes.iter_mut().take(n_samples % self.n_splits) {
            *fold_size += 1;
        }
-        // generate the right array of arrays for test indices
+        train_score.push(score(&train_y, &estimator.predict(&train_x)?));
-        let mut return_values: Vec<Vec<usize>> = Vec::with_capacity(self.n_splits);
+        test_score.push(score(&test_y, &estimator.predict(&test_x)?));
        let mut current: usize = 0;
        for fold_size in fold_sizes.drain(..) {
            let stop = current + fold_size;
            return_values.push(indices[current..stop].to_vec());
            current = stop
        }
        return_values
    }
-    fn test_masks<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<bool>> {
+    Ok(CrossValidationResult {
-        let mut return_values: Vec<Vec<bool>> = Vec::with_capacity(self.n_splits);
+        test_score,
-        for test_index in self.test_indices(x).drain(..) {
+        train_score,
-            // init mask
+    })
-            let mut test_mask = vec![false; x.shape().0];
+}
-            // set mask's indices to true according to test indices
+
-            for i in test_index {
+/// Generate cross-validated estimates for each input data point.
-                test_mask[i] = true; // can be implemented with map()
+/// The data is split according to the cv parameter. Each sample belongs to exactly one test set, and its prediction is computed with an estimator fitted on the corresponding training set.
-            }
+/// * `fit_estimator` - a `fit` function of an estimator
-            return_values.push(test_mask);
+/// * `x` - features, matrix of size _NxM_ where _N_ is number of samples and _M_ is number of attributes.
 /// * `y` - target values, should be of size _N_
 /// * `parameters` - parameters of selected estimator. Use `Default::default()` for default parameters.
 /// * `cv` - the cross-validation splitting strategy, should be an instance of [`BaseKFold`](./trait.BaseKFold.html)
 pub fn cross_val_predict<T, M, H, E, K, F>(
    fit_estimator: F,
    x: &M,
    y: &M::RowVector,
    parameters: H,
    cv: K,
 ) -> Result<M::RowVector, Failed>
 where
    T: RealNumber,
    M: Matrix<T>,
    H: Clone,
    E: Predictor<M, M::RowVector>,
    K: BaseKFold,
    F: Fn(&M, &M::RowVector, H) -> Result<E, Failed>,
 {
    let mut y_hat = M::RowVector::zeros(y.len());
    for (train_idx, test_idx) in cv.split(x) {
        let train_x = x.take(&train_idx, 0);
        let train_y = y.take(&train_idx);
        let test_x = x.take(&test_idx, 0);
        let estimator = fit_estimator(&train_x, &train_y, parameters.clone())?;
        let y_test_hat = estimator.predict(&test_x)?;
        for (i, &idx) in test_idx.iter().enumerate() {
            y_hat.set(idx, y_test_hat.get(i));
        }
        return_values
    }
-    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<(Vec<usize>, Vec<usize>)> {
+    Ok(y_hat)
        let n_samples: usize = x.shape().0;
        let indices: Vec<usize> = (0..n_samples).collect();
        let mut return_values: Vec<(Vec<usize>, Vec<usize>)> = Vec::with_capacity(self.n_splits); // TODO: init nested vecs with capacities by getting the length of test_index vecs
        for test_index in self.test_masks(x).drain(..) {
            let train_index = indices
                .clone()
                .iter()
                .enumerate()
                .filter(|&(idx, _)| !test_index[idx])
                .map(|(idx, _)| idx)
                .collect::<Vec<usize>>(); // filter train indices out according to mask
            let test_index = indices
                .iter()
                .enumerate()
                .filter(|&(idx, _)| test_index[idx])
                .map(|(idx, _)| idx)
                .collect::<Vec<usize>>(); // filter tests indices out according to mask
            return_values.push((train_index, test_index))
        }
        return_values
    }
 }
 #[cfg(test)]
@@ -194,14 +188,17 @@ mod tests {
    use super::*;
    use crate::linalg::naive::dense_matrix::*;
    use crate::metrics::{accuracy, mean_absolute_error};
    use crate::model_selection::kfold::KFold;
    use crate::neighbors::knn_regressor::KNNRegressor;
    #[test]
    fn run_train_test_split() {
-        let n = 100;
+        let n = 123;
-        let x: DenseMatrix<f64> = DenseMatrix::rand(100, 3);
+        let x: DenseMatrix<f64> = DenseMatrix::rand(n, 3);
-        let y = vec![0f64; 100];
+        let y = vec![0f64; n];
-        let (x_train, x_test, y_train, y_test) = train_test_split(&x, &y, 0.2);
+        let (x_train, x_test, y_train, y_test) = train_test_split(&x, &y, 0.2, true);
        assert!(
            x_train.shape().0 > (n as f64 * 0.65) as usize
@@ -215,126 +212,144 @@ mod tests {
        assert_eq!(x_test.shape().0, y_test.len());
    }
-    #[test]
+    #[derive(Clone)]
-    fn run_kfold_return_test_indices_simple() {
+    struct NoParameters {}
        let k = KFold {
            n_splits: 3,
            shuffle: false,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(33, 100);
        let test_indices = k.test_indices(&x);
-        assert_eq!(test_indices[0], (0..11).collect::<Vec<usize>>());
+    #[test]
-        assert_eq!(test_indices[1], (11..22).collect::<Vec<usize>>());
+    fn test_cross_validate_biased() {
-        assert_eq!(test_indices[2], (22..33).collect::<Vec<usize>>());
+        struct BiasedEstimator {}
        impl BiasedEstimator {
            fn fit<M: Matrix<f32>>(
                _: &M,
                _: &M::RowVector,
                _: NoParameters,
            ) -> Result<BiasedEstimator, Failed> {
                Ok(BiasedEstimator {})
            }
        }
        impl<M: Matrix<f32>> Predictor<M, M::RowVector> for BiasedEstimator {
            fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
                let (n, _) = x.shape();
                Ok(M::RowVector::zeros(n))
            }
        }
        let x = DenseMatrix::from_2d_array(&[
            &[5.1, 3.5, 1.4, 0.2],
            &[4.9, 3.0, 1.4, 0.2],
            &[4.7, 3.2, 1.3, 0.2],
            &[4.6, 3.1, 1.5, 0.2],
            &[5.0, 3.6, 1.4, 0.2],
            &[5.4, 3.9, 1.7, 0.4],
            &[4.6, 3.4, 1.4, 0.3],
            &[5.0, 3.4, 1.5, 0.2],
            &[4.4, 2.9, 1.4, 0.2],
            &[4.9, 3.1, 1.5, 0.1],
            &[7.0, 3.2, 4.7, 1.4],
            &[6.4, 3.2, 4.5, 1.5],
            &[6.9, 3.1, 4.9, 1.5],
            &[5.5, 2.3, 4.0, 1.3],
            &[6.5, 2.8, 4.6, 1.5],
            &[5.7, 2.8, 4.5, 1.3],
            &[6.3, 3.3, 4.7, 1.6],
            &[4.9, 2.4, 3.3, 1.0],
            &[6.6, 2.9, 4.6, 1.3],
            &[5.2, 2.7, 3.9, 1.4],
        ]);
        let y = vec![
            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        ];
        let cv = KFold {
            n_splits: 5,
            ..KFold::default()
        };
        let results =
            cross_validate(BiasedEstimator::fit, &x, &y, NoParameters {}, cv, &accuracy).unwrap();
        assert_eq!(0.4, results.mean_test_score());
        assert_eq!(0.4, results.mean_train_score());
    }
    #[test]
-    fn run_kfold_return_test_indices_odd() {
+    fn test_cross_validate_knn() {
-        let k = KFold {
+        let x = DenseMatrix::from_2d_array(&[
-            n_splits: 3,
+            &[234.289, 235.6, 159., 107.608, 1947., 60.323],
-            shuffle: false,
+            &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
-        };
+            &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
-        let x: DenseMatrix<f64> = DenseMatrix::rand(34, 100);
+            &[284.599, 335.1, 165., 110.929, 1950., 61.187],
-        let test_indices = k.test_indices(&x);
+            &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
            &[346.999, 193.2, 359.4, 113.27, 1952., 63.639],
            &[365.385, 187., 354.7, 115.094, 1953., 64.989],
            &[363.112, 357.8, 335., 116.219, 1954., 63.761],
            &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
            &[419.18, 282.2, 285.7, 118.734, 1956., 67.857],
            &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
            &[444.546, 468.1, 263.7, 121.95, 1958., 66.513],
            &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
            &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
            &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
            &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
        ]);
        let y = vec![
            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
            114.2, 115.7, 116.9,
        ];
-        assert_eq!(test_indices[0], (0..12).collect::<Vec<usize>>());
+        let cv = KFold {
-        assert_eq!(test_indices[1], (12..23).collect::<Vec<usize>>());
+            n_splits: 5,
-        assert_eq!(test_indices[2], (23..34).collect::<Vec<usize>>());
+            ..KFold::default()
        };
        let results = cross_validate(
            KNNRegressor::fit,
            &x,
            &y,
            Default::default(),
            cv,
            &mean_absolute_error,
        )
        .unwrap();
        assert!(results.mean_test_score() < 15.0);
        assert!(results.mean_train_score() < results.mean_test_score());
    }
    #[test]
-    fn run_kfold_return_test_mask_simple() {
+    fn test_cross_val_predict_knn() {
-        let k = KFold {
+        let x = DenseMatrix::from_2d_array(&[
-            n_splits: 2,
+            &[234.289, 235.6, 159., 107.608, 1947., 60.323],
-            shuffle: false,
+            &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
-        };
+            &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
-        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
+            &[284.599, 335.1, 165., 110.929, 1950., 61.187],
-        let test_masks = k.test_masks(&x);
+            &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
            &[346.999, 193.2, 359.4, 113.27, 1952., 63.639],
            &[365.385, 187., 354.7, 115.094, 1953., 64.989],
            &[363.112, 357.8, 335., 116.219, 1954., 63.761],
            &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
            &[419.18, 282.2, 285.7, 118.734, 1956., 67.857],
            &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
            &[444.546, 468.1, 263.7, 121.95, 1958., 66.513],
            &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
            &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
            &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
            &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
        ]);
        let y = vec![
            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
            114.2, 115.7, 116.9,
        ];
-        for t in &test_masks[0][0..11] {
+        let cv = KFold {
            // TODO: this can be prob done better
            assert_eq!(*t, true)
        }
        for t in &test_masks[0][11..22] {
            assert_eq!(*t, false)
        }
        for t in &test_masks[1][0..11] {
            assert_eq!(*t, false)
        }
        for t in &test_masks[1][11..22] {
            assert_eq!(*t, true)
        }
    }
    #[test]
    fn run_kfold_return_split_simple() {
        let k = KFold {
            n_splits: 2,
            shuffle: false,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
        let train_test_splits = k.split(&x);
        assert_eq!(train_test_splits[0].1, (0..11).collect::<Vec<usize>>());
        assert_eq!(train_test_splits[0].0, (11..22).collect::<Vec<usize>>());
        assert_eq!(train_test_splits[1].0, (0..11).collect::<Vec<usize>>());
        assert_eq!(train_test_splits[1].1, (11..22).collect::<Vec<usize>>());
    }
    #[test]
    fn run_kfold_return_split_simple_shuffle() {
        let k = KFold {
            n_splits: 2,
            ..KFold::default()
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(23, 100);
        let train_test_splits = k.split(&x);
-        assert_eq!(train_test_splits[0].1.len(), 12_usize);
+        let y_hat = cross_val_predict(KNNRegressor::fit, &x, &y, Default::default(), cv).unwrap();
        assert_eq!(train_test_splits[0].0.len(), 11_usize);
        assert_eq!(train_test_splits[1].0.len(), 12_usize);
        assert_eq!(train_test_splits[1].1.len(), 11_usize);
    }
-    #[test]
+        assert!(mean_absolute_error(&y, &y_hat) < 10.0);
    fn numpy_parity_test() {
        let k = KFold {
            n_splits: 3,
            shuffle: false,
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
        let expected: Vec<(Vec<usize>, Vec<usize>)> = vec![
            (vec![4, 5, 6, 7, 8, 9], vec![0, 1, 2, 3]),
            (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
            (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
        ];
        for ((train, test), (expected_train, expected_test)) in
            k.split(&x).into_iter().zip(expected)
        {
            assert_eq!(test, expected_test);
            assert_eq!(train, expected_train);
        }
    }
    #[test]
    fn numpy_parity_test_shuffle() {
        let k = KFold {
            n_splits: 3,
            ..KFold::default()
        };
        let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
        let expected: Vec<(Vec<usize>, Vec<usize>)> = vec![
            (vec![4, 5, 6, 7, 8, 9], vec![0, 1, 2, 3]),
            (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
            (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
        ];
        for ((train, test), (expected_train, expected_test)) in
            k.split(&x).into_iter().zip(expected)
        {
            assert_eq!(test.len(), expected_test.len());
            assert_eq!(train.len(), expected_train.len());
        }
    }
 }
@@ -33,6 +33,7 @@
 //! ## References:
 //!
 //! * ["Introduction to Information Retrieval", Manning C. D., Raghavan P., Schutze H., 2009, Chapter 13 ](https://nlp.stanford.edu/IR-book/information-retrieval-book.html)
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::row_iter;
 use crate::linalg::BaseVector;
@@ -87,13 +88,20 @@ pub struct BernoulliNBParameters<T: RealNumber> {
 }
 impl<T: RealNumber> BernoulliNBParameters<T> {
-    /// Create BernoulliNBParameters with specific paramaters.
+    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
-    pub fn new(alpha: T, priors: Option<Vec<T>>, binarize: Option<T>) -> Self {
+    pub fn with_alpha(mut self, alpha: T) -> Self {
-        Self {
+        self.alpha = alpha;
-            alpha,
+        self
-            priors,
+    }
-            binarize,
+    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
-        }
+    pub fn with_priors(mut self, priors: Vec<T>) -> Self {
        self.priors = Some(priors);
        self
    }
    /// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.
    pub fn with_binarize(mut self, binarize: T) -> Self {
        self.binarize = Some(binarize);
        self
    }
 }
@@ -200,6 +208,12 @@ pub struct BernoulliNB<T: RealNumber, M: Matrix<T>> {
    binarize: Option<T>,
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for BernoulliNB<T, M> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>> BernoulliNB<T, M> {
    /// Fits BernoulliNB with given data
    /// * `x` - training data of size NxM where N is the number of samples and M is the number of
@@ -30,6 +30,7 @@
 //! let nb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
 //! let y_hat = nb.predict(&x).unwrap();
 //! ```
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
@@ -222,18 +223,13 @@ pub struct CategoricalNBParameters<T: RealNumber> {
 }
 impl<T: RealNumber> CategoricalNBParameters<T> {
-    /// Create CategoricalNBParameters with specific paramaters.
+    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
-    pub fn new(alpha: T) -> Result<Self, Failed> {
+    pub fn with_alpha(mut self, alpha: T) -> Self {
-        if alpha > T::zero() {
+        self.alpha = alpha;
-            Ok(Self { alpha })
+        self
        } else {
            Err(Failed::fit(&format!(
                "alpha should be >= 0, alpha=[{}]",
                alpha
            )))
        }
    }
 }
 impl<T: RealNumber> Default for CategoricalNBParameters<T> {
    fn default() -> Self {
        Self { alpha: T::one() }
@@ -246,6 +242,12 @@ pub struct CategoricalNB<T: RealNumber, M: Matrix<T>> {
    inner: BaseNaiveBayes<T, M, CategoricalNBDistribution<T>>,
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for CategoricalNB<T, M> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>> CategoricalNB<T, M> {
    /// Fits CategoricalNB with given data
    /// * `x` - training data of size NxM where N is the number of samples and M is the number of
@@ -22,6 +22,7 @@
 //! let nb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
 //! let y_hat = nb.predict(&x).unwrap();
 //! ```
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::row_iter;
 use crate::linalg::BaseVector;
@@ -81,9 +82,10 @@ pub struct GaussianNBParameters<T: RealNumber> {
 }
 impl<T: RealNumber> GaussianNBParameters<T> {
-    /// Create GaussianNBParameters with specific paramaters.
+    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
-    pub fn new(priors: Option<Vec<T>>) -> Self {
+    pub fn with_priors(mut self, priors: Vec<T>) -> Self {
-        Self { priors }
+        self.priors = Some(priors);
        self
    }
 }
@@ -181,6 +183,12 @@ pub struct GaussianNB<T: RealNumber, M: Matrix<T>> {
    inner: BaseNaiveBayes<T, M, GaussianNBDistribution<T>>,
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for GaussianNB<T, M> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>> GaussianNB<T, M> {
    /// Fits GaussianNB with given data
    /// * `x` - training data of size NxM where N is the number of samples and M is the number of
@@ -254,7 +262,7 @@ mod tests {
        let y = vec![1., 1., 1., 2., 2., 2.];
        let priors = vec![0.3, 0.7];
-        let parameters = GaussianNBParameters::new(Some(priors.clone()));
+        let parameters = GaussianNBParameters::default().with_priors(priors.clone());
        let gnb = GaussianNB::fit(&x, &y, parameters).unwrap();
        assert_eq!(gnb.inner.distribution.class_priors, priors);
@@ -33,6 +33,7 @@
 //! ## References:
 //!
 //! * ["Introduction to Information Retrieval", Manning C. D., Raghavan P., Schutze H., 2009, Chapter 13 ](https://nlp.stanford.edu/IR-book/information-retrieval-book.html)
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::row_iter;
 use crate::linalg::BaseVector;
@@ -81,9 +82,15 @@ pub struct MultinomialNBParameters<T: RealNumber> {
 }
 impl<T: RealNumber> MultinomialNBParameters<T> {
-    /// Create MultinomialNBParameters with specific paramaters.
+    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
-    pub fn new(alpha: T, priors: Option<Vec<T>>) -> Self {
+    pub fn with_alpha(mut self, alpha: T) -> Self {
-        Self { alpha, priors }
+        self.alpha = alpha;
        self
    }
    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
    pub fn with_priors(mut self, priors: Vec<T>) -> Self {
        self.priors = Some(priors);
        self
    }
 }
@@ -187,6 +194,12 @@ pub struct MultinomialNB<T: RealNumber, M: Matrix<T>> {
    inner: BaseNaiveBayes<T, M, MultinomialNBDistribution<T>>,
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for MultinomialNB<T, M> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>> MultinomialNB<T, M> {
    /// Fits MultinomialNB with given data
    /// * `x` - training data of size NxM where N is the number of samples and M is the number of
@@ -25,31 +25,40 @@
 //! &[9., 10.]]);
 //! let y = vec![2., 2., 2., 3., 3.]; //your class labels
 //!
-//! let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+//! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
 //! let y_hat = knn.predict(&x).unwrap();
 //! ```
 //!
 //! variable `y_hat` will hold a vector with estimates of class labels
 //!
 use std::marker::PhantomData;
 use serde::{Deserialize, Serialize};
 use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::{row_iter, Matrix};
-use crate::math::distance::Distance;
+use crate::math::distance::euclidian::Euclidian;
 use crate::math::distance::{Distance, Distances};
 use crate::math::num::RealNumber;
 use crate::neighbors::KNNWeightFunction;
 /// `KNNClassifier` parameters. Use `Default::default()` for default values.
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct KNNClassifierParameters {
+pub struct KNNClassifierParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
    /// a function that defines a distance between each pair of point in training data.
    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    pub distance: D,
    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
    pub algorithm: KNNAlgorithmName,
    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
    pub weight: KNNWeightFunction,
    /// number of training samples to consider when estimating class for new point. Default value is 3.
    pub k: usize,
    /// this parameter is not used
    t: PhantomData<T>,
 }
 /// K Nearest Neighbors Classifier
@@ -62,12 +71,47 @@ pub struct KNNClassifier<T: RealNumber, D: Distance<Vec<T>, T>> {
    k: usize,
 }
-impl Default for KNNClassifierParameters {
+impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifierParameters<T, D> {
    /// number of training samples to consider when estimating class for new point. Default value is 3.
    pub fn with_k(mut self, k: usize) -> Self {
        self.k = k;
        self
    }
    /// a function that defines a distance between each pair of point in training data.
    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    pub fn with_distance<DD: Distance<Vec<T>, T>>(
        self,
        distance: DD,
    ) -> KNNClassifierParameters<T, DD> {
        KNNClassifierParameters {
            distance,
            algorithm: self.algorithm,
            weight: self.weight,
            k: self.k,
            t: PhantomData,
        }
    }
    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
    pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self {
        self.algorithm = algorithm;
        self
    }
    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
    pub fn with_weight(mut self, weight: KNNWeightFunction) -> Self {
        self.weight = weight;
        self
    }
 }
 impl<T: RealNumber> Default for KNNClassifierParameters<T, Euclidian> {
    fn default() -> Self {
        KNNClassifierParameters {
            distance: Distances::euclidian(),
            algorithm: KNNAlgorithmName::CoverTree,
            weight: KNNWeightFunction::Uniform,
            k: 3,
            t: PhantomData,
        }
    }
 }
@@ -95,19 +139,23 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for KNNClassifier<T, D> {
    }
 }
 impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>> Predictor<M, M::RowVector>
    for KNNClassifier<T, D>
 {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
    /// Fits KNN classifier to a NxM matrix where N is number of samples and M is number of features.
    /// * `x` - training data
    /// * `y` - vector with target values (classes) of length N    
    /// * `distance` - a function that defines a distance between each pair of point in training data.
    ///    This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    ///    See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    /// * `parameters` - additional parameters like search algorithm and k
    pub fn fit<M: Matrix<T>>(
        x: &M,
        y: &M::RowVector,
-        distance: D,
+        parameters: KNNClassifierParameters<T, D>,
        parameters: KNNClassifierParameters,
    ) -> Result<KNNClassifier<T, D>, Failed> {
        let y_m = M::from_row_vector(y.clone());
@@ -142,7 +190,7 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
            classes,
            y: yi,
            k: parameters.k,
-            knn_algorithm: parameters.algorithm.fit(data, distance)?,
+            knn_algorithm: parameters.algorithm.fit(data, parameters.distance)?,
            weight: parameters.weight,
        })
    }
@@ -187,14 +235,13 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
 mod tests {
    use super::*;
    use crate::linalg::naive::dense_matrix::DenseMatrix;
    use crate::math::distance::Distances;
    #[test]
    fn knn_fit_predict() {
        let x =
            DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
        let y = vec![2., 2., 2., 3., 3.];
-        let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+        let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
        let y_hat = knn.predict(&x).unwrap();
        assert_eq!(5, Vec::len(&y_hat));
        assert_eq!(y.to_vec(), y_hat);
@@ -207,12 +254,10 @@ mod tests {
        let knn = KNNClassifier::fit(
            &x,
            &y,
-            Distances::euclidian(),
+            KNNClassifierParameters::default()
-            KNNClassifierParameters {
+                .with_k(5)
-                k: 5,
+                .with_algorithm(KNNAlgorithmName::LinearSearch)
-                algorithm: KNNAlgorithmName::LinearSearch,
+                .with_weight(KNNWeightFunction::Distance),
                weight: KNNWeightFunction::Distance,
            },
        )
        .unwrap();
        let y_hat = knn.predict(&DenseMatrix::from_2d_array(&[&[4.1]])).unwrap();
@@ -225,7 +270,7 @@ mod tests {
            DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
        let y = vec![2., 2., 2., 3., 3.];
-        let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+        let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
        let deserialized_knn = bincode::deserialize(&bincode::serialize(&knn).unwrap()).unwrap();
@@ -27,31 +27,41 @@
 //!     &[5., 5.]]);
 //! let y = vec![1., 2., 3., 4., 5.]; //your target values
 //!
-//! let knn = KNNRegressor::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+//! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
 //! let y_hat = knn.predict(&x).unwrap();
 //! ```
 //!
 //! variable `y_hat` will hold predicted value
 //!
 //!
 use std::marker::PhantomData;
 use serde::{Deserialize, Serialize};
 use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::{row_iter, BaseVector, Matrix};
-use crate::math::distance::Distance;
+use crate::math::distance::euclidian::Euclidian;
 use crate::math::distance::{Distance, Distances};
 use crate::math::num::RealNumber;
 use crate::neighbors::KNNWeightFunction;
 /// `KNNRegressor` parameters. Use `Default::default()` for default values.
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct KNNRegressorParameters {
+pub struct KNNRegressorParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
    /// a function that defines a distance between each pair of point in training data.
    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    distance: D,
    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
    pub algorithm: KNNAlgorithmName,
    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
    pub weight: KNNWeightFunction,
    /// number of training samples to consider when estimating class for new point. Default value is 3.
    pub k: usize,
    /// this parameter is not used
    t: PhantomData<T>,
 }
 /// K Nearest Neighbors Regressor
@@ -63,12 +73,47 @@ pub struct KNNRegressor<T: RealNumber, D: Distance<Vec<T>, T>> {
    k: usize,
 }
-impl Default for KNNRegressorParameters {
+impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressorParameters<T, D> {
    /// number of training samples to consider when estimating class for new point. Default value is 3.
    pub fn with_k(mut self, k: usize) -> Self {
        self.k = k;
        self
    }
    /// a function that defines a distance between each pair of point in training data.
    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    pub fn with_distance<DD: Distance<Vec<T>, T>>(
        self,
        distance: DD,
    ) -> KNNRegressorParameters<T, DD> {
        KNNRegressorParameters {
            distance,
            algorithm: self.algorithm,
            weight: self.weight,
            k: self.k,
            t: PhantomData,
        }
    }
    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
    pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self {
        self.algorithm = algorithm;
        self
    }
    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
    pub fn with_weight(mut self, weight: KNNWeightFunction) -> Self {
        self.weight = weight;
        self
    }
 }
 impl<T: RealNumber> Default for KNNRegressorParameters<T, Euclidian> {
    fn default() -> Self {
        KNNRegressorParameters {
            distance: Distances::euclidian(),
            algorithm: KNNAlgorithmName::CoverTree,
            weight: KNNWeightFunction::Uniform,
            k: 3,
            t: PhantomData,
        }
    }
 }
@@ -88,19 +133,23 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for KNNRegressor<T, D> {
    }
 }
 impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>> Predictor<M, M::RowVector>
    for KNNRegressor<T, D>
 {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressor<T, D> {
    /// Fits KNN regressor to a NxM matrix where N is number of samples and M is number of features.
    /// * `x` - training data
    /// * `y` - vector with real values    
    /// * `distance` - a function that defines a distance between each pair of point in training data.
    ///    This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    ///    See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    /// * `parameters` - additional parameters like search algorithm and k
    pub fn fit<M: Matrix<T>>(
        x: &M,
        y: &M::RowVector,
-        distance: D,
+        parameters: KNNRegressorParameters<T, D>,
        parameters: KNNRegressorParameters,
    ) -> Result<KNNRegressor<T, D>, Failed> {
        let y_m = M::from_row_vector(y.clone());
@@ -126,7 +175,7 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressor<T, D> {
        Ok(KNNRegressor {
            y: y.to_vec(),
            k: parameters.k,
-            knn_algorithm: parameters.algorithm.fit(data, distance)?,
+            knn_algorithm: parameters.algorithm.fit(data, parameters.distance)?,
            weight: parameters.weight,
        })
    }
@@ -176,12 +225,11 @@ mod tests {
        let knn = KNNRegressor::fit(
            &x,
            &y,
-            Distances::euclidian(),
+            KNNRegressorParameters::default()
-            KNNRegressorParameters {
+                .with_k(3)
-                k: 3,
+                .with_distance(Distances::euclidian())
-                algorithm: KNNAlgorithmName::LinearSearch,
+                .with_algorithm(KNNAlgorithmName::LinearSearch)
-                weight: KNNWeightFunction::Distance,
+                .with_weight(KNNWeightFunction::Distance),
            },
        )
        .unwrap();
        let y_hat = knn.predict(&x).unwrap();
@@ -197,7 +245,7 @@ mod tests {
            DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
        let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
        let y_exp = vec![2., 2., 3., 4., 4.];
-        let knn = KNNRegressor::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+        let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
        let y_hat = knn.predict(&x).unwrap();
        assert_eq!(5, Vec::len(&y_hat));
        for i in 0..y_hat.len() {
@@ -211,7 +259,7 @@ mod tests {
            DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
        let y = vec![1., 2., 3., 4., 5.];
-        let knn = KNNRegressor::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+        let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
        let deserialized_knn = bincode::deserialize(&bincode::serialize(&knn).unwrap()).unwrap();
@@ -48,7 +48,7 @@ pub mod knn_regressor;
 pub type KNNAlgorithmName = crate::algorithm::neighbour::KNNAlgorithmName;
 /// Weight function that is used to determine estimated value.
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub enum KNNWeightFunction {
    /// All k nearest points are weighted equally
    Uniform,
@@ -93,16 +93,18 @@ impl Kernels {
 }
 /// Linear Kernel
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct LinearKernel {}
 /// Radial basis function (Gaussian) kernel
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct RBFKernel<T: RealNumber> {
    /// kernel coefficient
    pub gamma: T,
 }
 /// Polynomial kernel
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct PolynomialKernel<T: RealNumber> {
    /// degree of the polynomial
    pub degree: T,
@@ -113,6 +115,7 @@ pub struct PolynomialKernel<T: RealNumber> {
 }
 /// Sigmoid (hyperbolic tangent) kernel
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct SigmoidKernel<T: RealNumber> {
    /// kernel coefficient
    pub gamma: T,
@@ -57,13 +57,7 @@
 //! let y = vec![ 0., 0., 0., 0., 0., 0., 0., 0.,
 //!            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.];
 //!
-//! let svr = SVC::fit(&x, &y,
+//! let svr = SVC::fit(&x, &y, SVCParameters::default().with_c(200.0)).unwrap();
 //!             Kernels::linear(),
 //!             SVCParameters {
 //!                 epoch: 2,
 //!                 c: 200.0,
 //!                 tol: 1e-3,
 //!             }).unwrap();
 //!
 //! let y_hat = svr.predict(&x).unwrap();
 //! ```
@@ -84,22 +78,26 @@ use rand::seq::SliceRandom;
 use serde::{Deserialize, Serialize};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
-use crate::svm::Kernel;
+use crate::svm::{Kernel, Kernels, LinearKernel};
 #[derive(Serialize, Deserialize, Debug)]
 #[derive(Serialize, Deserialize, Debug, Clone)]
 /// SVC Parameters
-pub struct SVCParameters<T: RealNumber> {
+pub struct SVCParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
-    /// Number of epochs
+    /// Number of epochs.
    pub epoch: usize,
    /// Regularization parameter.
    pub c: T,
-    /// Tolerance for stopping criterion
+    /// Tolerance for stopping criterion.
    pub tol: T,
    /// The kernel function.
    pub kernel: K,
    /// Unused parameter.
    m: PhantomData<M>,
 }
 #[derive(Serialize, Deserialize, Debug)]
@@ -136,7 +134,7 @@ struct Cache<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
 struct Optimizer<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
    x: &'a M,
    y: &'a M::RowVector,
-    parameters: &'a SVCParameters<T>,
+    parameters: &'a SVCParameters<T, M, K>,
    svmin: usize,
    svmax: usize,
    gmin: T,
@@ -147,27 +145,63 @@ struct Optimizer<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
    recalculate_minmax_grad: bool,
 }
-impl<T: RealNumber> Default for SVCParameters<T> {
+impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVCParameters<T, M, K> {
    /// Number of epochs.
    pub fn with_epoch(mut self, epoch: usize) -> Self {
        self.epoch = epoch;
        self
    }
    /// Regularization parameter.
    pub fn with_c(mut self, c: T) -> Self {
        self.c = c;
        self
    }
    /// Tolerance for stopping criterion.
    pub fn with_tol(mut self, tol: T) -> Self {
        self.tol = tol;
        self
    }
    /// The kernel function.
    pub fn with_kernel<KK: Kernel<T, M::RowVector>>(&self, kernel: KK) -> SVCParameters<T, M, KK> {
        SVCParameters {
            epoch: self.epoch,
            c: self.c,
            tol: self.tol,
            kernel,
            m: PhantomData,
        }
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Default for SVCParameters<T, M, LinearKernel> {
    fn default() -> Self {
        SVCParameters {
            epoch: 2,
            c: T::one(),
            tol: T::from_f64(1e-3).unwrap(),
            kernel: Kernels::linear(),
            m: PhantomData,
        }
    }
 }
 impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector>
    for SVC<T, M, K>
 {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
    /// Fits SVC to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
    /// * `y` - class labels
    /// * `kernel` - the kernel function
    /// * `parameters` - optional parameters, use `Default::default()` to set parameters to default values.
    pub fn fit(
        x: &M,
        y: &M::RowVector,
-        kernel: K,
+        parameters: SVCParameters<T, M, K>,
        parameters: SVCParameters<T>,
    ) -> Result<SVC<T, M, K>, Failed> {
        let (n, _) = x.shape();
@@ -198,13 +232,13 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
            }
        }
-        let optimizer = Optimizer::new(x, &y, &kernel, &parameters);
+        let optimizer = Optimizer::new(x, &y, &parameters.kernel, &parameters);
        let (support_vectors, weight, b) = optimizer.optimize();
        Ok(SVC {
            classes,
-            kernel,
+            kernel: parameters.kernel,
            instances: support_vectors,
            w: weight,
            b,
@@ -321,7 +355,7 @@ impl<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Optimizer<'a,
        x: &'a M,
        y: &'a M::RowVector,
        kernel: &'a K,
-        parameters: &'a SVCParameters<T>,
+        parameters: &'a SVCParameters<T, M, K>,
    ) -> Optimizer<'a, T, M, K> {
        let (n, _) = x.shape();
@@ -711,18 +745,13 @@ mod tests {
        let y_hat = SVC::fit(
            &x,
            &y,
-            Kernels::linear(),
+            SVCParameters::default()
-            SVCParameters {
+                .with_c(200.0)
-                epoch: 2,
+                .with_kernel(Kernels::linear()),
                c: 200.0,
                tol: 1e-3,
            },
        )
        .and_then(|lr| lr.predict(&x))
        .unwrap();
        println!("{:?}", y_hat);
        assert!(accuracy(&y_hat, &y) >= 0.9);
    }
@@ -759,12 +788,9 @@ mod tests {
        let y_hat = SVC::fit(
            &x,
            &y,
-            Kernels::rbf(0.7),
+            SVCParameters::default()
-            SVCParameters {
+                .with_c(1.0)
-                epoch: 2,
+                .with_kernel(Kernels::rbf(0.7)),
                c: 1.0,
                tol: 1e-3,
            },
        )
        .and_then(|lr| lr.predict(&x))
        .unwrap();
@@ -801,7 +827,7 @@ mod tests {
            -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        ];
-        let svr = SVC::fit(&x, &y, Kernels::linear(), Default::default()).unwrap();
+        let svr = SVC::fit(&x, &y, Default::default()).unwrap();
        let deserialized_svr: SVC<f64, DenseMatrix<f64>, LinearKernel> =
            serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
@@ -49,13 +49,7 @@
 //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
 //!           100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
 //!
-//! let svr = SVR::fit(&x, &y,
+//! let svr = SVR::fit(&x, &y, SVRParameters::default().with_eps(2.0).with_c(10.0)).unwrap();
 //!             LinearKernel {},
 //!             SVRParameters {
 //!                 eps: 2.0,
 //!                 c: 10.0,
 //!                 tol: 1e-3,
 //!             }).unwrap();
 //!
 //! let y_hat = svr.predict(&x).unwrap();
 //! ```
@@ -72,25 +66,30 @@
 use std::cell::{Ref, RefCell};
 use std::fmt::Debug;
 use std::marker::PhantomData;
 use serde::{Deserialize, Serialize};
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
-use crate::svm::Kernel;
+use crate::svm::{Kernel, Kernels, LinearKernel};
 #[derive(Serialize, Deserialize, Debug)]
 #[derive(Serialize, Deserialize, Debug, Clone)]
 /// SVR Parameters
-pub struct SVRParameters<T: RealNumber> {
+pub struct SVRParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
-    /// Epsilon in the epsilon-SVR model
+    /// Epsilon in the epsilon-SVR model.
    pub eps: T,
    /// Regularization parameter.
    pub c: T,
-    /// Tolerance for stopping criterion
+    /// Tolerance for stopping criterion.
    pub tol: T,
    /// The kernel function.
    pub kernel: K,
    /// Unused parameter.
    m: PhantomData<M>,
 }
 #[derive(Serialize, Deserialize, Debug)]
@@ -135,16 +134,54 @@ struct Cache<T: Clone> {
    data: Vec<RefCell<Option<Vec<T>>>>,
 }
-impl<T: RealNumber> Default for SVRParameters<T> {
+impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVRParameters<T, M, K> {
    /// Epsilon in the epsilon-SVR model.
    pub fn with_eps(mut self, eps: T) -> Self {
        self.eps = eps;
        self
    }
    /// Regularization parameter.
    pub fn with_c(mut self, c: T) -> Self {
        self.c = c;
        self
    }
    /// Tolerance for stopping criterion.
    pub fn with_tol(mut self, tol: T) -> Self {
        self.tol = tol;
        self
    }
    /// The kernel function.
    pub fn with_kernel<KK: Kernel<T, M::RowVector>>(&self, kernel: KK) -> SVRParameters<T, M, KK> {
        SVRParameters {
            eps: self.eps,
            c: self.c,
            tol: self.tol,
            kernel,
            m: PhantomData,
        }
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Default for SVRParameters<T, M, LinearKernel> {
    fn default() -> Self {
        SVRParameters {
            eps: T::from_f64(0.1).unwrap(),
            c: T::one(),
            tol: T::from_f64(1e-3).unwrap(),
            kernel: Kernels::linear(),
            m: PhantomData,
        }
    }
 }
 impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector>
    for SVR<T, M, K>
 {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
    /// Fits SVR to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -154,8 +191,7 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
    pub fn fit(
        x: &M,
        y: &M::RowVector,
-        kernel: K,
+        parameters: SVRParameters<T, M, K>,
        parameters: SVRParameters<T>,
    ) -> Result<SVR<T, M, K>, Failed> {
        let (n, _) = x.shape();
@@ -165,12 +201,12 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
            ));
        }
-        let optimizer = Optimizer::new(x, y, &kernel, &parameters);
+        let optimizer = Optimizer::new(x, y, &parameters.kernel, &parameters);
        let (support_vectors, weight, b) = optimizer.smo();
        Ok(SVR {
-            kernel,
+            kernel: parameters.kernel,
            instances: support_vectors,
            w: weight,
            b,
@@ -243,7 +279,7 @@ impl<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Optimizer<'a,
        x: &M,
        y: &M::RowVector,
        kernel: &'a K,
-        parameters: &SVRParameters<T>,
+        parameters: &SVRParameters<T, M, K>,
    ) -> Optimizer<'a, T, M, K> {
        let (n, _) = x.shape();
@@ -510,18 +546,9 @@ mod tests {
            114.2, 115.7, 116.9,
        ];
-        let y_hat = SVR::fit(
+        let y_hat = SVR::fit(&x, &y, SVRParameters::default().with_eps(2.0).with_c(10.0))
-            &x,
+            .and_then(|lr| lr.predict(&x))
-            &y,
+            .unwrap();
            LinearKernel {},
            SVRParameters {
                eps: 2.0,
                c: 10.0,
                tol: 1e-3,
            },
        )
        .and_then(|lr| lr.predict(&x))
        .unwrap();
        assert!(mean_squared_error(&y_hat, &y) < 2.5);
    }
@@ -552,7 +579,7 @@ mod tests {
            114.2, 115.7, 116.9,
        ];
-        let svr = SVR::fit(&x, &y, LinearKernel {}, Default::default()).unwrap();
+        let svr = SVR::fit(&x, &y, Default::default()).unwrap();
        let deserialized_svr: SVR<f64, DenseMatrix<f64>, LinearKernel> =
            serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
@@ -71,11 +71,12 @@ use rand::seq::SliceRandom;
 use serde::{Deserialize, Serialize};
 use crate::algorithm::sort::quick_sort::QuickArgSort;
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 /// Parameters of Decision Tree
 pub struct DecisionTreeClassifierParameters {
    /// Split criteria to use when building a tree.
@@ -160,6 +161,29 @@ impl<T: RealNumber> PartialEq for Node<T> {
    }
 }
 impl DecisionTreeClassifierParameters {
    /// Split criteria to use when building a tree.
    pub fn with_criterion(mut self, criterion: SplitCriterion) -> Self {
        self.criterion = criterion;
        self
    }
    /// The maximum depth of the tree.
    pub fn with_max_depth(mut self, max_depth: u16) -> Self {
        self.max_depth = Some(max_depth);
        self
    }
    /// The minimum number of samples required to be at a leaf node.
    pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
        self.min_samples_leaf = min_samples_leaf;
        self
    }
    /// The minimum number of samples required to split an internal node.
    pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
        self.min_samples_split = min_samples_split;
        self
    }
 }
 impl Default for DecisionTreeClassifierParameters {
    fn default() -> Self {
        DecisionTreeClassifierParameters {
@@ -269,6 +293,12 @@ pub(in crate) fn which_max(x: &[usize]) -> usize {
    which
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for DecisionTreeClassifier<T> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber> DecisionTreeClassifier<T> {
    /// Build a decision tree classifier from the training data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -66,11 +66,12 @@ use rand::seq::SliceRandom;
 use serde::{Deserialize, Serialize};
 use crate::algorithm::sort::quick_sort::QuickArgSort;
 use crate::base::Predictor;
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 /// Parameters of Regression Tree
 pub struct DecisionTreeRegressorParameters {
    /// The maximum depth of the tree.
@@ -100,6 +101,24 @@ struct Node<T: RealNumber> {
    false_child: Option<usize>,
 }
 impl DecisionTreeRegressorParameters {
    /// The maximum depth of the tree.
    pub fn with_max_depth(mut self, max_depth: u16) -> Self {
        self.max_depth = Some(max_depth);
        self
    }
    /// The minimum number of samples required to be at a leaf node.
    pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
        self.min_samples_leaf = min_samples_leaf;
        self
    }
    /// The minimum number of samples required to split an internal node.
    pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
        self.min_samples_split = min_samples_split;
        self
    }
 }
 impl Default for DecisionTreeRegressorParameters {
    fn default() -> Self {
        DecisionTreeRegressorParameters {
@@ -189,6 +208,12 @@ impl<'a, T: RealNumber, M: Matrix<T>> NodeVisitor<'a, T, M> {
    }
 }
 impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for DecisionTreeRegressor<T> {
    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.predict(x)
    }
 }
 impl<T: RealNumber> DecisionTreeRegressor<T> {
    /// Build a decision tree regressor from the training data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.