make default params available to serde (#167)

* add seed param to search params * make default params available to serde * lints * create defaults for enums * lint
2022-09-21 19:48:31 -07:00
parent 403d3f2348
commit 764309e313
22 changed files with 175 additions and 18 deletions
@@ -59,6 +59,12 @@ pub enum KNNAlgorithmName {
    CoverTree,
 }
 impl Default for KNNAlgorithmName {
    fn default() -> Self {
        KNNAlgorithmName::CoverTree
    }
 }
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug)]
 pub(crate) enum KNNAlgorithm<T: RealNumber, D: Distance<Vec<T>, T>> {
@@ -65,17 +65,22 @@ pub struct DBSCAN<T: RealNumber, D: Distance<Vec<T>, T>> {
    eps: T,
 }
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 /// DBSCAN clustering algorithm parameters
 pub struct DBSCANParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// a function that defines a distance between each pair of point in training data.
    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    pub distance: D,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
    pub min_samples: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
    pub eps: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// KNN algorithm to use.
    pub algorithm: KNNAlgorithmName,
 }
@@ -113,14 +118,18 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> DBSCANParameters<T, D> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct DBSCANSearchParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// a function that defines a distance between each pair of point in training data.
    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    pub distance: Vec<D>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
    pub min_samples: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
    pub eps: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// KNN algorithm to use.
    pub algorithm: Vec<KNNAlgorithmName>,
 }
@@ -221,7 +230,7 @@ impl<T: RealNumber> Default for DBSCANParameters<T, Euclidian> {
            distance: Distances::euclidian(),
            min_samples: 5,
            eps: T::half(),
-            algorithm: KNNAlgorithmName::CoverTree,
+            algorithm: KNNAlgorithmName::default(),
        }
    }
 }
@@ -102,13 +102,17 @@ impl<T: RealNumber> PartialEq for KMeans<T> {
    }
 }
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 /// K-Means clustering algorithm parameters
 pub struct KMeansParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of clusters.
    pub k: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Maximum number of iterations of the k-means algorithm for a single run.
    pub max_iter: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Determines random number generation for centroid initialization.
    /// Use an int to make the randomness deterministic
    pub seed: Option<u64>,
@@ -141,10 +145,13 @@ impl Default for KMeansParameters {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct KMeansSearchParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of clusters.
    pub k: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Maximum number of iterations of the k-means algorithm for a single run.
    pub max_iter: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Determines random number generation for centroid initialization.
    /// Use an int to make the randomness deterministic
    pub seed: Vec<Option<u64>>,
@@ -83,11 +83,14 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for PCA<T, M> {
    }
 }
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 /// PCA parameters
 pub struct PCAParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of components to keep.
    pub n_components: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// By default, covariance matrix is used to compute principal components.
    /// Enable this flag if you want to use correlation matrix instead.
    pub use_correlation_matrix: bool,
@@ -120,8 +123,10 @@ impl Default for PCAParameters {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct PCASearchParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of components to keep.
    pub n_components: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// By default, covariance matrix is used to compute principal components.
    /// Enable this flag if you want to use correlation matrix instead.
    pub use_correlation_matrix: Vec<bool>,
@@ -69,9 +69,11 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for SVD<T, M> {
    }
 }
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 /// SVD parameters
 pub struct SVDParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of components to keep.
    pub n_components: usize,
 }
@@ -94,6 +96,7 @@ impl SVDParameters {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct SVDSearchParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Maximum number of iterations of the k-means algorithm for a single run.
    pub n_components: Vec<usize>,
 }
@@ -67,20 +67,28 @@ use crate::tree::decision_tree_classifier::{
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct RandomForestClassifierParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub criterion: SplitCriterion,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub max_depth: Option<u16>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub min_samples_leaf: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub min_samples_split: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The number of trees in the forest.
    pub n_trees: u16,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of random sample of predictors to use as split candidates.
    pub m: Option<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Whether to keep samples used for tree generation. This is required for OOB prediction.
    pub keep_samples: bool,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Seed used for bootstrap sampling and feature selection for each tree.
    pub seed: u64,
 }
@@ -198,20 +206,28 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestCla
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct RandomForestClassifierSearchParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub criterion: Vec<SplitCriterion>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub max_depth: Vec<Option<u16>>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub min_samples_leaf: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub min_samples_split: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The number of trees in the forest.
    pub n_trees: Vec<u16>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of random sample of predictors to use as split candidates.
    pub m: Vec<Option<usize>>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Whether to keep samples used for tree generation. This is required for OOB prediction.
    pub keep_samples: Vec<bool>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Seed used for bootstrap sampling and feature selection for each tree.
    pub seed: Vec<u64>,
 }
@@ -65,18 +65,25 @@ use crate::tree::decision_tree_regressor::{
 /// Parameters of the Random Forest Regressor
 /// Some parameters here are passed directly into base estimator.
 pub struct RandomForestRegressorParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Tree max depth. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
    pub max_depth: Option<u16>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to be at a leaf node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
    pub min_samples_leaf: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to split an internal node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
    pub min_samples_split: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The number of trees in the forest.
    pub n_trees: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of random sample of predictors to use as split candidates.
    pub m: Option<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Whether to keep samples used for tree generation. This is required for OOB prediction.
    pub keep_samples: bool,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Seed used for bootstrap sampling and feature selection for each tree.
    pub seed: u64,
 }
@@ -181,18 +188,25 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestReg
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct RandomForestRegressorSearchParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub max_depth: Vec<Option<u16>>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub min_samples_leaf: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub min_samples_split: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The number of trees in the forest.
    pub n_trees: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of random sample of predictors to use as split candidates.
    pub m: Vec<Option<usize>>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Whether to keep samples used for tree generation. This is required for OOB prediction.
    pub keep_samples: Vec<bool>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Seed used for bootstrap sampling and feature selection for each tree.
    pub seed: Vec<u64>,
 }
@@ -71,16 +71,21 @@ use crate::linear::lasso_optimizer::InteriorPointOptimizer;
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct ElasticNetParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Regularization parameter.
    pub alpha: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
    /// For l1_ratio = 0 the penalty is an L2 penalty.
    /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
    pub l1_ratio: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
    pub normalize: bool,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The tolerance for the optimization
    pub tol: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The maximum number of iterations
    pub max_iter: usize,
 }
@@ -139,16 +144,21 @@ impl<T: RealNumber> Default for ElasticNetParameters<T> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct ElasticNetSearchParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Regularization parameter.
    pub alpha: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
    /// For l1_ratio = 0 the penalty is an L2 penalty.
    /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
    pub l1_ratio: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
    pub normalize: Vec<bool>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The tolerance for the optimization
    pub tol: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The maximum number of iterations
    pub max_iter: Vec<usize>,
 }
@@ -38,13 +38,17 @@ use crate::math::num::RealNumber;
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct LassoParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Controls the strength of the penalty to the loss function.
    pub alpha: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// If true the regressors X will be normalized before regression
    /// by subtracting the mean and dividing by the standard deviation.
    pub normalize: bool,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The tolerance for the optimization
    pub tol: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The maximum number of iterations
    pub max_iter: usize,
 }
@@ -116,13 +120,17 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for Lasso<T, M> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct LassoSearchParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Controls the strength of the penalty to the loss function.
    pub alpha: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// If true the regressors X will be normalized before regression
    /// by subtracting the mean and dividing by the standard deviation.
    pub normalize: Vec<bool>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The tolerance for the optimization
    pub tol: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The maximum number of iterations
    pub max_iter: Vec<usize>,
 }
@@ -71,19 +71,21 @@ use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone, Eq, PartialEq)]
+#[derive(Debug, Default, Clone, Eq, PartialEq)]
 /// Approach to use for estimation of regression coefficients. QR is more efficient but SVD is more stable.
 pub enum LinearRegressionSolverName {
    /// QR decomposition, see [QR](../../linalg/qr/index.html)
    QR,
    #[default]
    /// SVD decomposition, see [SVD](../../linalg/svd/index.html)
    SVD,
 }
 /// Linear Regression parameters
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone)]
+#[derive(Debug, Default, Clone)]
 pub struct LinearRegressionParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Solver to use for estimation of regression coefficients.
    pub solver: LinearRegressionSolverName,
 }
@@ -105,18 +107,11 @@ impl LinearRegressionParameters {
    }
 }
 impl Default for LinearRegressionParameters {
    fn default() -> Self {
        LinearRegressionParameters {
            solver: LinearRegressionSolverName::SVD,
        }
    }
 }
 /// Linear Regression grid search parameters
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct LinearRegressionSearchParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Solver to use for estimation of regression coefficients.
    pub solver: Vec<LinearRegressionSolverName>,
 }
@@ -353,5 +348,9 @@ mod tests {
            serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
        assert_eq!(lr, deserialized_lr);
        let default = LinearRegressionParameters::default();
        let parameters: LinearRegressionParameters = serde_json::from_str("{}").unwrap();
        assert_eq!(parameters.solver, default.solver);
    }
 }
@@ -75,12 +75,20 @@ pub enum LogisticRegressionSolverName {
    LBFGS,
 }
 impl Default for LogisticRegressionSolverName {
    fn default() -> Self {
        LogisticRegressionSolverName::LBFGS
    }
 }
 /// Logistic Regression parameters
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct LogisticRegressionParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Solver to use for estimation of regression coefficients.
    pub solver: LogisticRegressionSolverName,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Regularization parameter.
    pub alpha: T,
 }
@@ -89,8 +97,10 @@ pub struct LogisticRegressionParameters<T: RealNumber> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct LogisticRegressionSearchParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Solver to use for estimation of regression coefficients.
    pub solver: Vec<LogisticRegressionSolverName>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Regularization parameter.
    pub alpha: Vec<T>,
 }
@@ -204,7 +214,7 @@ impl<T: RealNumber> LogisticRegressionParameters<T> {
 impl<T: RealNumber> Default for LogisticRegressionParameters<T> {
    fn default() -> Self {
        LogisticRegressionParameters {
-            solver: LogisticRegressionSolverName::LBFGS,
+            solver: LogisticRegressionSolverName::default(),
            alpha: T::zero(),
        }
    }
@@ -77,6 +77,12 @@ pub enum RidgeRegressionSolverName {
    SVD,
 }
 impl Default for RidgeRegressionSolverName {
    fn default() -> Self {
        RidgeRegressionSolverName::Cholesky
    }
 }
 /// Ridge Regression parameters
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
@@ -94,10 +100,13 @@ pub struct RidgeRegressionParameters<T: RealNumber> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct RidgeRegressionSearchParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Solver to use for estimation of regression coefficients.
    pub solver: Vec<RidgeRegressionSolverName>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Regularization parameter.
    pub alpha: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// If true the regressors X will be normalized before regression
    /// by subtracting the mean and dividing by the standard deviation.
    pub normalize: Vec<bool>,
@@ -204,7 +213,7 @@ impl<T: RealNumber> RidgeRegressionParameters<T> {
 impl<T: RealNumber> Default for RidgeRegressionParameters<T> {
    fn default() -> Self {
        RidgeRegressionParameters {
-            solver: RidgeRegressionSolverName::Cholesky,
+            solver: RidgeRegressionSolverName::default(),
            alpha: T::one(),
            normalize: true,
        }
@@ -114,10 +114,13 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for BernoulliNBDistributi
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct BernoulliNBParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
    pub alpha: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
    pub priors: Option<Vec<T>>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.
    pub binarize: Option<T>,
 }
@@ -154,10 +157,13 @@ impl<T: RealNumber> Default for BernoulliNBParameters<T> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct BernoulliNBSearchParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
    pub alpha: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
    pub priors: Vec<Option<Vec<T>>>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.
    pub binarize: Vec<Option<T>>,
 }
@@ -243,6 +243,7 @@ impl<T: RealNumber> CategoricalNBDistribution<T> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct CategoricalNBParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
    pub alpha: T,
 }
@@ -265,6 +266,7 @@ impl<T: RealNumber> Default for CategoricalNBParameters<T> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct CategoricalNBSearchParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
    pub alpha: Vec<T>,
 }
@@ -78,6 +78,7 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for GaussianNBDistributio
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct GaussianNBParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
    pub priors: Option<Vec<T>>,
 }
@@ -100,6 +101,7 @@ impl<T: RealNumber> Default for GaussianNBParameters<T> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct GaussianNBSearchParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
    pub priors: Vec<Option<Vec<T>>>,
 }
@@ -86,8 +86,10 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for MultinomialNBDistribu
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct MultinomialNBParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
    pub alpha: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
    pub priors: Option<Vec<T>>,
 }
@@ -118,8 +120,10 @@ impl<T: RealNumber> Default for MultinomialNBParameters<T> {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct MultinomialNBSearchParameters<T: RealNumber> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
    pub alpha: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
    pub priors: Vec<Option<Vec<T>>>,
 }
@@ -49,16 +49,21 @@ use crate::neighbors::KNNWeightFunction;
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct KNNClassifierParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// a function that defines a distance between each pair of point in training data.
    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    pub distance: D,
    #[cfg_attr(feature = "serde", serde(default))]
    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
    pub algorithm: KNNAlgorithmName,
    #[cfg_attr(feature = "serde", serde(default))]
    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
    pub weight: KNNWeightFunction,
    #[cfg_attr(feature = "serde", serde(default))]
    /// number of training samples to consider when estimating class for new point. Default value is 3.
    pub k: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// this parameter is not used
    t: PhantomData<T>,
 }
@@ -111,8 +116,8 @@ impl<T: RealNumber> Default for KNNClassifierParameters<T, Euclidian> {
    fn default() -> Self {
        KNNClassifierParameters {
            distance: Distances::euclidian(),
-            algorithm: KNNAlgorithmName::CoverTree,
+            algorithm: KNNAlgorithmName::default(),
-            weight: KNNWeightFunction::Uniform,
+            weight: KNNWeightFunction::default(),
            k: 3,
            t: PhantomData,
        }
@@ -52,16 +52,21 @@ use crate::neighbors::KNNWeightFunction;
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct KNNRegressorParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// a function that defines a distance between each pair of point in training data.
    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    distance: D,
    #[cfg_attr(feature = "serde", serde(default))]
    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
    pub algorithm: KNNAlgorithmName,
    #[cfg_attr(feature = "serde", serde(default))]
    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
    pub weight: KNNWeightFunction,
    #[cfg_attr(feature = "serde", serde(default))]
    /// number of training samples to consider when estimating class for new point. Default value is 3.
    pub k: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// this parameter is not used
    t: PhantomData<T>,
 }
@@ -113,8 +118,8 @@ impl<T: RealNumber> Default for KNNRegressorParameters<T, Euclidian> {
    fn default() -> Self {
        KNNRegressorParameters {
            distance: Distances::euclidian(),
-            algorithm: KNNAlgorithmName::CoverTree,
+            algorithm: KNNAlgorithmName::default(),
-            weight: KNNWeightFunction::Uniform,
+            weight: KNNWeightFunction::default(),
            k: 3,
            t: PhantomData,
        }
@@ -58,6 +58,12 @@ pub enum KNNWeightFunction {
    Distance,
 }
 impl Default for KNNWeightFunction {
    fn default() -> Self {
        KNNWeightFunction::Uniform
    }
 }
 impl KNNWeightFunction {
    fn calc_weights<T: RealNumber>(&self, distances: Vec<T>) -> std::vec::Vec<T> {
        match *self {
@@ -91,16 +91,22 @@ use crate::svm::{Kernel, Kernels, LinearKernel};
 #[derive(Debug, Clone)]
 /// SVC Parameters
 pub struct SVCParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of epochs.
    pub epoch: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Regularization parameter.
    pub c: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Tolerance for stopping criterion.
    pub tol: T,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The kernel function.
    pub kernel: K,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Unused parameter.
    m: PhantomData<M>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Controls the pseudo random number generation for shuffling the data for probability estimates
    seed: Option<u64>,
 }
@@ -109,16 +115,22 @@ pub struct SVCParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct SVCSearchParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Number of epochs.
    pub epoch: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Regularization parameter.
    pub c: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Tolerance for stopping epoch.
    pub tol: Vec<T>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The kernel function.
    pub kernel: Vec<K>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Unused parameter.
    m: PhantomData<M>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Controls the pseudo random number generation for shuffling the data for probability estimates
    seed: Vec<Option<u64>>,
 }
@@ -83,14 +83,19 @@ use crate::rand::get_rng_impl;
 #[derive(Debug, Clone)]
 /// Parameters of Decision Tree
 pub struct DecisionTreeClassifierParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Split criteria to use when building a tree.
    pub criterion: SplitCriterion,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The maximum depth of the tree.
    pub max_depth: Option<u16>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to be at a leaf node.
    pub min_samples_leaf: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to split an internal node.
    pub min_samples_split: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Controls the randomness of the estimator
    pub seed: Option<u64>,
 }
@@ -118,6 +123,12 @@ pub enum SplitCriterion {
    ClassificationError,
 }
 impl Default for SplitCriterion {
    fn default() -> Self {
        SplitCriterion::Gini
    }
 }
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug)]
 struct Node<T: RealNumber> {
@@ -196,7 +207,7 @@ impl DecisionTreeClassifierParameters {
 impl Default for DecisionTreeClassifierParameters {
    fn default() -> Self {
        DecisionTreeClassifierParameters {
-            criterion: SplitCriterion::Gini,
+            criterion: SplitCriterion::default(),
            max_depth: None,
            min_samples_leaf: 1,
            min_samples_split: 2,
@@ -78,12 +78,16 @@ use crate::rand::get_rng_impl;
 #[derive(Debug, Clone)]
 /// Parameters of Regression Tree
 pub struct DecisionTreeRegressorParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// The maximum depth of the tree.
    pub max_depth: Option<u16>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to be at a leaf node.
    pub min_samples_leaf: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to split an internal node.
    pub min_samples_split: usize,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Controls the randomness of the estimator
    pub seed: Option<u64>,
 }
@@ -142,12 +146,16 @@ impl Default for DecisionTreeRegressorParameters {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct DecisionTreeRegressorSearchParameters {
    #[cfg_attr(feature = "serde", serde(default))]
    /// Tree max depth. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
    pub max_depth: Vec<Option<u16>>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to be at a leaf node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
    pub min_samples_leaf: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// The minimum number of samples required to split an internal node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
    pub min_samples_split: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
    /// Controls the randomness of the estimator
    pub seed: Vec<Option<u64>>,
 }