From 764309e313224ba0f6f9047e55c7507da0145224 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Wed, 21 Sep 2022 19:48:31 -0700 Subject: [PATCH] make default params available to serde (#167) * add seed param to search params * make default params available to serde * lints * create defaults for enums * lint --- src/algorithm/neighbour/mod.rs | 6 ++++++ src/cluster/dbscan.rs | 11 ++++++++++- src/cluster/kmeans.rs | 7 +++++++ src/decomposition/pca.rs | 5 +++++ src/decomposition/svd.rs | 3 +++ src/ensemble/random_forest_classifier.rs | 16 ++++++++++++++++ src/ensemble/random_forest_regressor.rs | 14 ++++++++++++++ src/linear/elastic_net.rs | 10 ++++++++++ src/linear/lasso.rs | 8 ++++++++ src/linear/linear_regression.rs | 19 +++++++++---------- src/linear/logistic_regression.rs | 12 +++++++++++- src/linear/ridge_regression.rs | 11 ++++++++++- src/naive_bayes/bernoulli.rs | 6 ++++++ src/naive_bayes/categorical.rs | 2 ++ src/naive_bayes/gaussian.rs | 2 ++ src/naive_bayes/multinomial.rs | 4 ++++ src/neighbors/knn_classifier.rs | 9 +++++++-- src/neighbors/knn_regressor.rs | 9 +++++++-- src/neighbors/mod.rs | 6 ++++++ src/svm/svc.rs | 12 ++++++++++++ src/tree/decision_tree_classifier.rs | 13 ++++++++++++- src/tree/decision_tree_regressor.rs | 8 ++++++++ 22 files changed, 175 insertions(+), 18 deletions(-) diff --git a/src/algorithm/neighbour/mod.rs b/src/algorithm/neighbour/mod.rs index 42ab7bc..f59448a 100644 --- a/src/algorithm/neighbour/mod.rs +++ b/src/algorithm/neighbour/mod.rs @@ -59,6 +59,12 @@ pub enum KNNAlgorithmName { CoverTree, } +impl Default for KNNAlgorithmName { + fn default() -> Self { + KNNAlgorithmName::CoverTree + } +} + #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug)] pub(crate) enum KNNAlgorithm, T>> { diff --git a/src/cluster/dbscan.rs b/src/cluster/dbscan.rs index 621d017..ba8722e 100644 --- a/src/cluster/dbscan.rs +++ b/src/cluster/dbscan.rs @@ -65,17 +65,22 @@ pub struct DBSCAN, T>> { eps: T, } +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] /// DBSCAN clustering algorithm parameters pub struct DBSCANParameters, T>> { + #[cfg_attr(feature = "serde", serde(default))] /// a function that defines a distance between each pair of point in training data. /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait. /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions. pub distance: D, + #[cfg_attr(feature = "serde", serde(default))] /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. pub min_samples: usize, + #[cfg_attr(feature = "serde", serde(default))] /// The maximum distance between two samples for one to be considered as in the neighborhood of the other. pub eps: T, + #[cfg_attr(feature = "serde", serde(default))] /// KNN algorithm to use. pub algorithm: KNNAlgorithmName, } @@ -113,14 +118,18 @@ impl, T>> DBSCANParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct DBSCANSearchParameters, T>> { + #[cfg_attr(feature = "serde", serde(default))] /// a function that defines a distance between each pair of point in training data. /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait. /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions. pub distance: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. pub min_samples: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The maximum distance between two samples for one to be considered as in the neighborhood of the other. pub eps: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// KNN algorithm to use. pub algorithm: Vec, } @@ -221,7 +230,7 @@ impl Default for DBSCANParameters { distance: Distances::euclidian(), min_samples: 5, eps: T::half(), - algorithm: KNNAlgorithmName::CoverTree, + algorithm: KNNAlgorithmName::default(), } } } diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 404f7b0..6f45e6c 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -102,13 +102,17 @@ impl PartialEq for KMeans { } } +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] /// K-Means clustering algorithm parameters pub struct KMeansParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Number of clusters. pub k: usize, + #[cfg_attr(feature = "serde", serde(default))] /// Maximum number of iterations of the k-means algorithm for a single run. pub max_iter: usize, + #[cfg_attr(feature = "serde", serde(default))] /// Determines random number generation for centroid initialization. /// Use an int to make the randomness deterministic pub seed: Option, @@ -141,10 +145,13 @@ impl Default for KMeansParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct KMeansSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Number of clusters. pub k: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Maximum number of iterations of the k-means algorithm for a single run. pub max_iter: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Determines random number generation for centroid initialization. /// Use an int to make the randomness deterministic pub seed: Vec>, diff --git a/src/decomposition/pca.rs b/src/decomposition/pca.rs index 296926a..7961d41 100644 --- a/src/decomposition/pca.rs +++ b/src/decomposition/pca.rs @@ -83,11 +83,14 @@ impl> PartialEq for PCA { } } +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] /// PCA parameters pub struct PCAParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Number of components to keep. pub n_components: usize, + #[cfg_attr(feature = "serde", serde(default))] /// By default, covariance matrix is used to compute principal components. /// Enable this flag if you want to use correlation matrix instead. pub use_correlation_matrix: bool, @@ -120,8 +123,10 @@ impl Default for PCAParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct PCASearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Number of components to keep. pub n_components: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// By default, covariance matrix is used to compute principal components. /// Enable this flag if you want to use correlation matrix instead. pub use_correlation_matrix: Vec, diff --git a/src/decomposition/svd.rs b/src/decomposition/svd.rs index 3001fd9..9a1e33d 100644 --- a/src/decomposition/svd.rs +++ b/src/decomposition/svd.rs @@ -69,9 +69,11 @@ impl> PartialEq for SVD { } } +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] /// SVD parameters pub struct SVDParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Number of components to keep. pub n_components: usize, } @@ -94,6 +96,7 @@ impl SVDParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct SVDSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Maximum number of iterations of the k-means algorithm for a single run. pub n_components: Vec, } diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index 331dab7..4264305 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -67,20 +67,28 @@ use crate::tree::decision_tree_classifier::{ #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct RandomForestClassifierParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub criterion: SplitCriterion, + #[cfg_attr(feature = "serde", serde(default))] /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub max_depth: Option, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub min_samples_leaf: usize, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub min_samples_split: usize, + #[cfg_attr(feature = "serde", serde(default))] /// The number of trees in the forest. pub n_trees: u16, + #[cfg_attr(feature = "serde", serde(default))] /// Number of random sample of predictors to use as split candidates. pub m: Option, + #[cfg_attr(feature = "serde", serde(default))] /// Whether to keep samples used for tree generation. This is required for OOB prediction. pub keep_samples: bool, + #[cfg_attr(feature = "serde", serde(default))] /// Seed used for bootstrap sampling and feature selection for each tree. pub seed: u64, } @@ -198,20 +206,28 @@ impl> Predictor for RandomForestCla #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct RandomForestClassifierSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub criterion: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub max_depth: Vec>, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub min_samples_leaf: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub min_samples_split: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The number of trees in the forest. pub n_trees: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Number of random sample of predictors to use as split candidates. pub m: Vec>, + #[cfg_attr(feature = "serde", serde(default))] /// Whether to keep samples used for tree generation. This is required for OOB prediction. pub keep_samples: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Seed used for bootstrap sampling and feature selection for each tree. pub seed: Vec, } diff --git a/src/ensemble/random_forest_regressor.rs b/src/ensemble/random_forest_regressor.rs index 1270685..d7e61c3 100644 --- a/src/ensemble/random_forest_regressor.rs +++ b/src/ensemble/random_forest_regressor.rs @@ -65,18 +65,25 @@ use crate::tree::decision_tree_regressor::{ /// Parameters of the Random Forest Regressor /// Some parameters here are passed directly into base estimator. pub struct RandomForestRegressorParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Tree max depth. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html) pub max_depth: Option, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to be at a leaf node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html) pub min_samples_leaf: usize, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to split an internal node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html) pub min_samples_split: usize, + #[cfg_attr(feature = "serde", serde(default))] /// The number of trees in the forest. pub n_trees: usize, + #[cfg_attr(feature = "serde", serde(default))] /// Number of random sample of predictors to use as split candidates. pub m: Option, + #[cfg_attr(feature = "serde", serde(default))] /// Whether to keep samples used for tree generation. This is required for OOB prediction. pub keep_samples: bool, + #[cfg_attr(feature = "serde", serde(default))] /// Seed used for bootstrap sampling and feature selection for each tree. pub seed: u64, } @@ -181,18 +188,25 @@ impl> Predictor for RandomForestReg #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct RandomForestRegressorSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub max_depth: Vec>, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub min_samples_leaf: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) pub min_samples_split: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The number of trees in the forest. pub n_trees: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Number of random sample of predictors to use as split candidates. pub m: Vec>, + #[cfg_attr(feature = "serde", serde(default))] /// Whether to keep samples used for tree generation. This is required for OOB prediction. pub keep_samples: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Seed used for bootstrap sampling and feature selection for each tree. pub seed: Vec, } diff --git a/src/linear/elastic_net.rs b/src/linear/elastic_net.rs index 0e9cb57..8ba3287 100644 --- a/src/linear/elastic_net.rs +++ b/src/linear/elastic_net.rs @@ -71,16 +71,21 @@ use crate::linear::lasso_optimizer::InteriorPointOptimizer; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct ElasticNetParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Regularization parameter. pub alpha: T, + #[cfg_attr(feature = "serde", serde(default))] /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1. /// For l1_ratio = 0 the penalty is an L2 penalty. /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. pub l1_ratio: T, + #[cfg_attr(feature = "serde", serde(default))] /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation. pub normalize: bool, + #[cfg_attr(feature = "serde", serde(default))] /// The tolerance for the optimization pub tol: T, + #[cfg_attr(feature = "serde", serde(default))] /// The maximum number of iterations pub max_iter: usize, } @@ -139,16 +144,21 @@ impl Default for ElasticNetParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct ElasticNetSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Regularization parameter. pub alpha: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1. /// For l1_ratio = 0 the penalty is an L2 penalty. /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. pub l1_ratio: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation. pub normalize: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The tolerance for the optimization pub tol: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The maximum number of iterations pub max_iter: Vec, } diff --git a/src/linear/lasso.rs b/src/linear/lasso.rs index aae7e50..d1445a0 100644 --- a/src/linear/lasso.rs +++ b/src/linear/lasso.rs @@ -38,13 +38,17 @@ use crate::math::num::RealNumber; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct LassoParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Controls the strength of the penalty to the loss function. pub alpha: T, + #[cfg_attr(feature = "serde", serde(default))] /// If true the regressors X will be normalized before regression /// by subtracting the mean and dividing by the standard deviation. pub normalize: bool, + #[cfg_attr(feature = "serde", serde(default))] /// The tolerance for the optimization pub tol: T, + #[cfg_attr(feature = "serde", serde(default))] /// The maximum number of iterations pub max_iter: usize, } @@ -116,13 +120,17 @@ impl> Predictor for Lasso { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct LassoSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Controls the strength of the penalty to the loss function. pub alpha: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// If true the regressors X will be normalized before regression /// by subtracting the mean and dividing by the standard deviation. pub normalize: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The tolerance for the optimization pub tol: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The maximum number of iterations pub max_iter: Vec, } diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index c95e6e1..12769bb 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -71,19 +71,21 @@ use crate::linalg::Matrix; use crate::math::num::RealNumber; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Default, Clone, Eq, PartialEq)] /// Approach to use for estimation of regression coefficients. QR is more efficient but SVD is more stable. pub enum LinearRegressionSolverName { /// QR decomposition, see [QR](../../linalg/qr/index.html) QR, + #[default] /// SVD decomposition, see [SVD](../../linalg/svd/index.html) SVD, } /// Linear Regression parameters #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone)] pub struct LinearRegressionParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Solver to use for estimation of regression coefficients. pub solver: LinearRegressionSolverName, } @@ -105,18 +107,11 @@ impl LinearRegressionParameters { } } -impl Default for LinearRegressionParameters { - fn default() -> Self { - LinearRegressionParameters { - solver: LinearRegressionSolverName::SVD, - } - } -} - /// Linear Regression grid search parameters #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct LinearRegressionSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Solver to use for estimation of regression coefficients. pub solver: Vec, } @@ -353,5 +348,9 @@ mod tests { serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap(); assert_eq!(lr, deserialized_lr); + + let default = LinearRegressionParameters::default(); + let parameters: LinearRegressionParameters = serde_json::from_str("{}").unwrap(); + assert_eq!(parameters.solver, default.solver); } } diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 3a4c706..e8fd01f 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -75,12 +75,20 @@ pub enum LogisticRegressionSolverName { LBFGS, } +impl Default for LogisticRegressionSolverName { + fn default() -> Self { + LogisticRegressionSolverName::LBFGS + } +} + /// Logistic Regression parameters #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct LogisticRegressionParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Solver to use for estimation of regression coefficients. pub solver: LogisticRegressionSolverName, + #[cfg_attr(feature = "serde", serde(default))] /// Regularization parameter. pub alpha: T, } @@ -89,8 +97,10 @@ pub struct LogisticRegressionParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct LogisticRegressionSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Solver to use for estimation of regression coefficients. pub solver: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Regularization parameter. pub alpha: Vec, } @@ -204,7 +214,7 @@ impl LogisticRegressionParameters { impl Default for LogisticRegressionParameters { fn default() -> Self { LogisticRegressionParameters { - solver: LogisticRegressionSolverName::LBFGS, + solver: LogisticRegressionSolverName::default(), alpha: T::zero(), } } diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index 4c3d4ff..396953d 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -77,6 +77,12 @@ pub enum RidgeRegressionSolverName { SVD, } +impl Default for RidgeRegressionSolverName { + fn default() -> Self { + RidgeRegressionSolverName::Cholesky + } +} + /// Ridge Regression parameters #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] @@ -94,10 +100,13 @@ pub struct RidgeRegressionParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct RidgeRegressionSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Solver to use for estimation of regression coefficients. pub solver: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Regularization parameter. pub alpha: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// If true the regressors X will be normalized before regression /// by subtracting the mean and dividing by the standard deviation. pub normalize: Vec, @@ -204,7 +213,7 @@ impl RidgeRegressionParameters { impl Default for RidgeRegressionParameters { fn default() -> Self { RidgeRegressionParameters { - solver: RidgeRegressionSolverName::Cholesky, + solver: RidgeRegressionSolverName::default(), alpha: T::one(), normalize: true, } diff --git a/src/naive_bayes/bernoulli.rs b/src/naive_bayes/bernoulli.rs index 29c6c84..d71197e 100644 --- a/src/naive_bayes/bernoulli.rs +++ b/src/naive_bayes/bernoulli.rs @@ -114,10 +114,13 @@ impl> NBDistribution for BernoulliNBDistributi #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct BernoulliNBParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). pub alpha: T, + #[cfg_attr(feature = "serde", serde(default))] /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data pub priors: Option>, + #[cfg_attr(feature = "serde", serde(default))] /// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors. pub binarize: Option, } @@ -154,10 +157,13 @@ impl Default for BernoulliNBParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct BernoulliNBSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). pub alpha: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data pub priors: Vec>>, + #[cfg_attr(feature = "serde", serde(default))] /// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors. pub binarize: Vec>, } diff --git a/src/naive_bayes/categorical.rs b/src/naive_bayes/categorical.rs index 7855688..9cda7a8 100644 --- a/src/naive_bayes/categorical.rs +++ b/src/naive_bayes/categorical.rs @@ -243,6 +243,7 @@ impl CategoricalNBDistribution { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct CategoricalNBParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). pub alpha: T, } @@ -265,6 +266,7 @@ impl Default for CategoricalNBParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct CategoricalNBSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). pub alpha: Vec, } diff --git a/src/naive_bayes/gaussian.rs b/src/naive_bayes/gaussian.rs index 24bbdd3..37aeb0f 100644 --- a/src/naive_bayes/gaussian.rs +++ b/src/naive_bayes/gaussian.rs @@ -78,6 +78,7 @@ impl> NBDistribution for GaussianNBDistributio #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct GaussianNBParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data pub priors: Option>, } @@ -100,6 +101,7 @@ impl Default for GaussianNBParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct GaussianNBSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data pub priors: Vec>>, } diff --git a/src/naive_bayes/multinomial.rs b/src/naive_bayes/multinomial.rs index 6e846c1..8119fa9 100644 --- a/src/naive_bayes/multinomial.rs +++ b/src/naive_bayes/multinomial.rs @@ -86,8 +86,10 @@ impl> NBDistribution for MultinomialNBDistribu #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct MultinomialNBParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). pub alpha: T, + #[cfg_attr(feature = "serde", serde(default))] /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data pub priors: Option>, } @@ -118,8 +120,10 @@ impl Default for MultinomialNBParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct MultinomialNBSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). pub alpha: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data pub priors: Vec>>, } diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index 8723900..5e34ce7 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -49,16 +49,21 @@ use crate::neighbors::KNNWeightFunction; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct KNNClassifierParameters, T>> { + #[cfg_attr(feature = "serde", serde(default))] /// a function that defines a distance between each pair of point in training data. /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait. /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions. pub distance: D, + #[cfg_attr(feature = "serde", serde(default))] /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default. pub algorithm: KNNAlgorithmName, + #[cfg_attr(feature = "serde", serde(default))] /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`. pub weight: KNNWeightFunction, + #[cfg_attr(feature = "serde", serde(default))] /// number of training samples to consider when estimating class for new point. Default value is 3. pub k: usize, + #[cfg_attr(feature = "serde", serde(default))] /// this parameter is not used t: PhantomData, } @@ -111,8 +116,8 @@ impl Default for KNNClassifierParameters { fn default() -> Self { KNNClassifierParameters { distance: Distances::euclidian(), - algorithm: KNNAlgorithmName::CoverTree, - weight: KNNWeightFunction::Uniform, + algorithm: KNNAlgorithmName::default(), + weight: KNNWeightFunction::default(), k: 3, t: PhantomData, } diff --git a/src/neighbors/knn_regressor.rs b/src/neighbors/knn_regressor.rs index 649cd1f..8fdda3d 100644 --- a/src/neighbors/knn_regressor.rs +++ b/src/neighbors/knn_regressor.rs @@ -52,16 +52,21 @@ use crate::neighbors::KNNWeightFunction; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct KNNRegressorParameters, T>> { + #[cfg_attr(feature = "serde", serde(default))] /// a function that defines a distance between each pair of point in training data. /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait. /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions. distance: D, + #[cfg_attr(feature = "serde", serde(default))] /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default. pub algorithm: KNNAlgorithmName, + #[cfg_attr(feature = "serde", serde(default))] /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`. pub weight: KNNWeightFunction, + #[cfg_attr(feature = "serde", serde(default))] /// number of training samples to consider when estimating class for new point. Default value is 3. pub k: usize, + #[cfg_attr(feature = "serde", serde(default))] /// this parameter is not used t: PhantomData, } @@ -113,8 +118,8 @@ impl Default for KNNRegressorParameters { fn default() -> Self { KNNRegressorParameters { distance: Distances::euclidian(), - algorithm: KNNAlgorithmName::CoverTree, - weight: KNNWeightFunction::Uniform, + algorithm: KNNAlgorithmName::default(), + weight: KNNWeightFunction::default(), k: 3, t: PhantomData, } diff --git a/src/neighbors/mod.rs b/src/neighbors/mod.rs index 86b1e46..5a713ab 100644 --- a/src/neighbors/mod.rs +++ b/src/neighbors/mod.rs @@ -58,6 +58,12 @@ pub enum KNNWeightFunction { Distance, } +impl Default for KNNWeightFunction { + fn default() -> Self { + KNNWeightFunction::Uniform + } +} + impl KNNWeightFunction { fn calc_weights(&self, distances: Vec) -> std::vec::Vec { match *self { diff --git a/src/svm/svc.rs b/src/svm/svc.rs index d390866..97b91de 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -91,16 +91,22 @@ use crate::svm::{Kernel, Kernels, LinearKernel}; #[derive(Debug, Clone)] /// SVC Parameters pub struct SVCParameters, K: Kernel> { + #[cfg_attr(feature = "serde", serde(default))] /// Number of epochs. pub epoch: usize, + #[cfg_attr(feature = "serde", serde(default))] /// Regularization parameter. pub c: T, + #[cfg_attr(feature = "serde", serde(default))] /// Tolerance for stopping criterion. pub tol: T, + #[cfg_attr(feature = "serde", serde(default))] /// The kernel function. pub kernel: K, + #[cfg_attr(feature = "serde", serde(default))] /// Unused parameter. m: PhantomData, + #[cfg_attr(feature = "serde", serde(default))] /// Controls the pseudo random number generation for shuffling the data for probability estimates seed: Option, } @@ -109,16 +115,22 @@ pub struct SVCParameters, K: Kernel #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct SVCSearchParameters, K: Kernel> { + #[cfg_attr(feature = "serde", serde(default))] /// Number of epochs. pub epoch: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Regularization parameter. pub c: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Tolerance for stopping epoch. pub tol: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The kernel function. pub kernel: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Unused parameter. m: PhantomData, + #[cfg_attr(feature = "serde", serde(default))] /// Controls the pseudo random number generation for shuffling the data for probability estimates seed: Vec>, } diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index acc3fb0..d330fdf 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -83,14 +83,19 @@ use crate::rand::get_rng_impl; #[derive(Debug, Clone)] /// Parameters of Decision Tree pub struct DecisionTreeClassifierParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Split criteria to use when building a tree. pub criterion: SplitCriterion, + #[cfg_attr(feature = "serde", serde(default))] /// The maximum depth of the tree. pub max_depth: Option, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to be at a leaf node. pub min_samples_leaf: usize, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to split an internal node. pub min_samples_split: usize, + #[cfg_attr(feature = "serde", serde(default))] /// Controls the randomness of the estimator pub seed: Option, } @@ -118,6 +123,12 @@ pub enum SplitCriterion { ClassificationError, } +impl Default for SplitCriterion { + fn default() -> Self { + SplitCriterion::Gini + } +} + #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug)] struct Node { @@ -196,7 +207,7 @@ impl DecisionTreeClassifierParameters { impl Default for DecisionTreeClassifierParameters { fn default() -> Self { DecisionTreeClassifierParameters { - criterion: SplitCriterion::Gini, + criterion: SplitCriterion::default(), max_depth: None, min_samples_leaf: 1, min_samples_split: 2, diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 12bb9c9..c745a0d 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -78,12 +78,16 @@ use crate::rand::get_rng_impl; #[derive(Debug, Clone)] /// Parameters of Regression Tree pub struct DecisionTreeRegressorParameters { + #[cfg_attr(feature = "serde", serde(default))] /// The maximum depth of the tree. pub max_depth: Option, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to be at a leaf node. pub min_samples_leaf: usize, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to split an internal node. pub min_samples_split: usize, + #[cfg_attr(feature = "serde", serde(default))] /// Controls the randomness of the estimator pub seed: Option, } @@ -142,12 +146,16 @@ impl Default for DecisionTreeRegressorParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct DecisionTreeRegressorSearchParameters { + #[cfg_attr(feature = "serde", serde(default))] /// Tree max depth. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html) pub max_depth: Vec>, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to be at a leaf node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html) pub min_samples_leaf: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// The minimum number of samples required to split an internal node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html) pub min_samples_split: Vec, + #[cfg_attr(feature = "serde", serde(default))] /// Controls the randomness of the estimator pub seed: Vec>, }