make default params available to serde (#167)
* add seed param to search params * make default params available to serde * lints * create defaults for enums * lint
This commit is contained in:
@@ -59,6 +59,12 @@ pub enum KNNAlgorithmName {
|
||||
CoverTree,
|
||||
}
|
||||
|
||||
impl Default for KNNAlgorithmName {
|
||||
fn default() -> Self {
|
||||
KNNAlgorithmName::CoverTree
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum KNNAlgorithm<T: RealNumber, D: Distance<Vec<T>, T>> {
|
||||
|
||||
+10
-1
@@ -65,17 +65,22 @@ pub struct DBSCAN<T: RealNumber, D: Distance<Vec<T>, T>> {
|
||||
eps: T,
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
/// DBSCAN clustering algorithm parameters
|
||||
pub struct DBSCANParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// a function that defines a distance between each pair of point in training data.
|
||||
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
|
||||
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
|
||||
pub distance: D,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
|
||||
pub min_samples: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
|
||||
pub eps: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// KNN algorithm to use.
|
||||
pub algorithm: KNNAlgorithmName,
|
||||
}
|
||||
@@ -113,14 +118,18 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> DBSCANParameters<T, D> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DBSCANSearchParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// a function that defines a distance between each pair of point in training data.
|
||||
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
|
||||
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
|
||||
pub distance: Vec<D>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
|
||||
pub min_samples: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
|
||||
pub eps: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// KNN algorithm to use.
|
||||
pub algorithm: Vec<KNNAlgorithmName>,
|
||||
}
|
||||
@@ -221,7 +230,7 @@ impl<T: RealNumber> Default for DBSCANParameters<T, Euclidian> {
|
||||
distance: Distances::euclidian(),
|
||||
min_samples: 5,
|
||||
eps: T::half(),
|
||||
algorithm: KNNAlgorithmName::CoverTree,
|
||||
algorithm: KNNAlgorithmName::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,13 +102,17 @@ impl<T: RealNumber> PartialEq for KMeans<T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
/// K-Means clustering algorithm parameters
|
||||
pub struct KMeansParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of clusters.
|
||||
pub k: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Maximum number of iterations of the k-means algorithm for a single run.
|
||||
pub max_iter: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Determines random number generation for centroid initialization.
|
||||
/// Use an int to make the randomness deterministic
|
||||
pub seed: Option<u64>,
|
||||
@@ -141,10 +145,13 @@ impl Default for KMeansParameters {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct KMeansSearchParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of clusters.
|
||||
pub k: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Maximum number of iterations of the k-means algorithm for a single run.
|
||||
pub max_iter: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Determines random number generation for centroid initialization.
|
||||
/// Use an int to make the randomness deterministic
|
||||
pub seed: Vec<Option<u64>>,
|
||||
|
||||
@@ -83,11 +83,14 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for PCA<T, M> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
/// PCA parameters
|
||||
pub struct PCAParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of components to keep.
|
||||
pub n_components: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// By default, covariance matrix is used to compute principal components.
|
||||
/// Enable this flag if you want to use correlation matrix instead.
|
||||
pub use_correlation_matrix: bool,
|
||||
@@ -120,8 +123,10 @@ impl Default for PCAParameters {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PCASearchParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of components to keep.
|
||||
pub n_components: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// By default, covariance matrix is used to compute principal components.
|
||||
/// Enable this flag if you want to use correlation matrix instead.
|
||||
pub use_correlation_matrix: Vec<bool>,
|
||||
|
||||
@@ -69,9 +69,11 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for SVD<T, M> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
/// SVD parameters
|
||||
pub struct SVDParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of components to keep.
|
||||
pub n_components: usize,
|
||||
}
|
||||
@@ -94,6 +96,7 @@ impl SVDParameters {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SVDSearchParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Maximum number of iterations of the k-means algorithm for a single run.
|
||||
pub n_components: Vec<usize>,
|
||||
}
|
||||
|
||||
@@ -67,20 +67,28 @@ use crate::tree::decision_tree_classifier::{
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RandomForestClassifierParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub criterion: SplitCriterion,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub max_depth: Option<u16>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub min_samples_leaf: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub min_samples_split: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The number of trees in the forest.
|
||||
pub n_trees: u16,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of random sample of predictors to use as split candidates.
|
||||
pub m: Option<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
|
||||
pub keep_samples: bool,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Seed used for bootstrap sampling and feature selection for each tree.
|
||||
pub seed: u64,
|
||||
}
|
||||
@@ -198,20 +206,28 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestCla
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RandomForestClassifierSearchParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub criterion: Vec<SplitCriterion>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub max_depth: Vec<Option<u16>>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub min_samples_leaf: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub min_samples_split: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The number of trees in the forest.
|
||||
pub n_trees: Vec<u16>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of random sample of predictors to use as split candidates.
|
||||
pub m: Vec<Option<usize>>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
|
||||
pub keep_samples: Vec<bool>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Seed used for bootstrap sampling and feature selection for each tree.
|
||||
pub seed: Vec<u64>,
|
||||
}
|
||||
|
||||
@@ -65,18 +65,25 @@ use crate::tree::decision_tree_regressor::{
|
||||
/// Parameters of the Random Forest Regressor
|
||||
/// Some parameters here are passed directly into base estimator.
|
||||
pub struct RandomForestRegressorParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Tree max depth. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
|
||||
pub max_depth: Option<u16>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
|
||||
pub min_samples_leaf: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to split an internal node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
|
||||
pub min_samples_split: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The number of trees in the forest.
|
||||
pub n_trees: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of random sample of predictors to use as split candidates.
|
||||
pub m: Option<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
|
||||
pub keep_samples: bool,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Seed used for bootstrap sampling and feature selection for each tree.
|
||||
pub seed: u64,
|
||||
}
|
||||
@@ -181,18 +188,25 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestReg
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RandomForestRegressorSearchParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub max_depth: Vec<Option<u16>>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub min_samples_leaf: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
|
||||
pub min_samples_split: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The number of trees in the forest.
|
||||
pub n_trees: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of random sample of predictors to use as split candidates.
|
||||
pub m: Vec<Option<usize>>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
|
||||
pub keep_samples: Vec<bool>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Seed used for bootstrap sampling and feature selection for each tree.
|
||||
pub seed: Vec<u64>,
|
||||
}
|
||||
|
||||
@@ -71,16 +71,21 @@ use crate::linear::lasso_optimizer::InteriorPointOptimizer;
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ElasticNetParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Regularization parameter.
|
||||
pub alpha: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
|
||||
/// For l1_ratio = 0 the penalty is an L2 penalty.
|
||||
/// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
|
||||
pub l1_ratio: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
|
||||
pub normalize: bool,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The tolerance for the optimization
|
||||
pub tol: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum number of iterations
|
||||
pub max_iter: usize,
|
||||
}
|
||||
@@ -139,16 +144,21 @@ impl<T: RealNumber> Default for ElasticNetParameters<T> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ElasticNetSearchParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Regularization parameter.
|
||||
pub alpha: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
|
||||
/// For l1_ratio = 0 the penalty is an L2 penalty.
|
||||
/// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
|
||||
pub l1_ratio: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
|
||||
pub normalize: Vec<bool>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The tolerance for the optimization
|
||||
pub tol: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum number of iterations
|
||||
pub max_iter: Vec<usize>,
|
||||
}
|
||||
|
||||
@@ -38,13 +38,17 @@ use crate::math::num::RealNumber;
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LassoParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Controls the strength of the penalty to the loss function.
|
||||
pub alpha: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// If true the regressors X will be normalized before regression
|
||||
/// by subtracting the mean and dividing by the standard deviation.
|
||||
pub normalize: bool,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The tolerance for the optimization
|
||||
pub tol: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum number of iterations
|
||||
pub max_iter: usize,
|
||||
}
|
||||
@@ -116,13 +120,17 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for Lasso<T, M> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LassoSearchParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Controls the strength of the penalty to the loss function.
|
||||
pub alpha: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// If true the regressors X will be normalized before regression
|
||||
/// by subtracting the mean and dividing by the standard deviation.
|
||||
pub normalize: Vec<bool>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The tolerance for the optimization
|
||||
pub tol: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum number of iterations
|
||||
pub max_iter: Vec<usize>,
|
||||
}
|
||||
|
||||
@@ -71,19 +71,21 @@ use crate::linalg::Matrix;
|
||||
use crate::math::num::RealNumber;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
#[derive(Debug, Default, Clone, Eq, PartialEq)]
|
||||
/// Approach to use for estimation of regression coefficients. QR is more efficient but SVD is more stable.
|
||||
pub enum LinearRegressionSolverName {
|
||||
/// QR decomposition, see [QR](../../linalg/qr/index.html)
|
||||
QR,
|
||||
#[default]
|
||||
/// SVD decomposition, see [SVD](../../linalg/svd/index.html)
|
||||
SVD,
|
||||
}
|
||||
|
||||
/// Linear Regression parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct LinearRegressionParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: LinearRegressionSolverName,
|
||||
}
|
||||
@@ -105,18 +107,11 @@ impl LinearRegressionParameters {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LinearRegressionParameters {
|
||||
fn default() -> Self {
|
||||
LinearRegressionParameters {
|
||||
solver: LinearRegressionSolverName::SVD,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Linear Regression grid search parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LinearRegressionSearchParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: Vec<LinearRegressionSolverName>,
|
||||
}
|
||||
@@ -353,5 +348,9 @@ mod tests {
|
||||
serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
|
||||
assert_eq!(lr, deserialized_lr);
|
||||
|
||||
let default = LinearRegressionParameters::default();
|
||||
let parameters: LinearRegressionParameters = serde_json::from_str("{}").unwrap();
|
||||
assert_eq!(parameters.solver, default.solver);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,12 +75,20 @@ pub enum LogisticRegressionSolverName {
|
||||
LBFGS,
|
||||
}
|
||||
|
||||
impl Default for LogisticRegressionSolverName {
|
||||
fn default() -> Self {
|
||||
LogisticRegressionSolverName::LBFGS
|
||||
}
|
||||
}
|
||||
|
||||
/// Logistic Regression parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LogisticRegressionParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: LogisticRegressionSolverName,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Regularization parameter.
|
||||
pub alpha: T,
|
||||
}
|
||||
@@ -89,8 +97,10 @@ pub struct LogisticRegressionParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LogisticRegressionSearchParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: Vec<LogisticRegressionSolverName>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Regularization parameter.
|
||||
pub alpha: Vec<T>,
|
||||
}
|
||||
@@ -204,7 +214,7 @@ impl<T: RealNumber> LogisticRegressionParameters<T> {
|
||||
impl<T: RealNumber> Default for LogisticRegressionParameters<T> {
|
||||
fn default() -> Self {
|
||||
LogisticRegressionParameters {
|
||||
solver: LogisticRegressionSolverName::LBFGS,
|
||||
solver: LogisticRegressionSolverName::default(),
|
||||
alpha: T::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,6 +77,12 @@ pub enum RidgeRegressionSolverName {
|
||||
SVD,
|
||||
}
|
||||
|
||||
impl Default for RidgeRegressionSolverName {
|
||||
fn default() -> Self {
|
||||
RidgeRegressionSolverName::Cholesky
|
||||
}
|
||||
}
|
||||
|
||||
/// Ridge Regression parameters
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -94,10 +100,13 @@ pub struct RidgeRegressionParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RidgeRegressionSearchParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Solver to use for estimation of regression coefficients.
|
||||
pub solver: Vec<RidgeRegressionSolverName>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Regularization parameter.
|
||||
pub alpha: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// If true the regressors X will be normalized before regression
|
||||
/// by subtracting the mean and dividing by the standard deviation.
|
||||
pub normalize: Vec<bool>,
|
||||
@@ -204,7 +213,7 @@ impl<T: RealNumber> RidgeRegressionParameters<T> {
|
||||
impl<T: RealNumber> Default for RidgeRegressionParameters<T> {
|
||||
fn default() -> Self {
|
||||
RidgeRegressionParameters {
|
||||
solver: RidgeRegressionSolverName::Cholesky,
|
||||
solver: RidgeRegressionSolverName::default(),
|
||||
alpha: T::one(),
|
||||
normalize: true,
|
||||
}
|
||||
|
||||
@@ -114,10 +114,13 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for BernoulliNBDistributi
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BernoulliNBParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
|
||||
pub alpha: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
|
||||
pub priors: Option<Vec<T>>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.
|
||||
pub binarize: Option<T>,
|
||||
}
|
||||
@@ -154,10 +157,13 @@ impl<T: RealNumber> Default for BernoulliNBParameters<T> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BernoulliNBSearchParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
|
||||
pub alpha: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
|
||||
pub priors: Vec<Option<Vec<T>>>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.
|
||||
pub binarize: Vec<Option<T>>,
|
||||
}
|
||||
|
||||
@@ -243,6 +243,7 @@ impl<T: RealNumber> CategoricalNBDistribution<T> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CategoricalNBParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
|
||||
pub alpha: T,
|
||||
}
|
||||
@@ -265,6 +266,7 @@ impl<T: RealNumber> Default for CategoricalNBParameters<T> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CategoricalNBSearchParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
|
||||
pub alpha: Vec<T>,
|
||||
}
|
||||
|
||||
@@ -78,6 +78,7 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for GaussianNBDistributio
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GaussianNBParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
|
||||
pub priors: Option<Vec<T>>,
|
||||
}
|
||||
@@ -100,6 +101,7 @@ impl<T: RealNumber> Default for GaussianNBParameters<T> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GaussianNBSearchParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
|
||||
pub priors: Vec<Option<Vec<T>>>,
|
||||
}
|
||||
|
||||
@@ -86,8 +86,10 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for MultinomialNBDistribu
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MultinomialNBParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
|
||||
pub alpha: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
|
||||
pub priors: Option<Vec<T>>,
|
||||
}
|
||||
@@ -118,8 +120,10 @@ impl<T: RealNumber> Default for MultinomialNBParameters<T> {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MultinomialNBSearchParameters<T: RealNumber> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
|
||||
pub alpha: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
|
||||
pub priors: Vec<Option<Vec<T>>>,
|
||||
}
|
||||
|
||||
@@ -49,16 +49,21 @@ use crate::neighbors::KNNWeightFunction;
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct KNNClassifierParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// a function that defines a distance between each pair of point in training data.
|
||||
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
|
||||
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
|
||||
pub distance: D,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
|
||||
pub algorithm: KNNAlgorithmName,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
|
||||
pub weight: KNNWeightFunction,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// number of training samples to consider when estimating class for new point. Default value is 3.
|
||||
pub k: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// this parameter is not used
|
||||
t: PhantomData<T>,
|
||||
}
|
||||
@@ -111,8 +116,8 @@ impl<T: RealNumber> Default for KNNClassifierParameters<T, Euclidian> {
|
||||
fn default() -> Self {
|
||||
KNNClassifierParameters {
|
||||
distance: Distances::euclidian(),
|
||||
algorithm: KNNAlgorithmName::CoverTree,
|
||||
weight: KNNWeightFunction::Uniform,
|
||||
algorithm: KNNAlgorithmName::default(),
|
||||
weight: KNNWeightFunction::default(),
|
||||
k: 3,
|
||||
t: PhantomData,
|
||||
}
|
||||
|
||||
@@ -52,16 +52,21 @@ use crate::neighbors::KNNWeightFunction;
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct KNNRegressorParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// a function that defines a distance between each pair of point in training data.
|
||||
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
|
||||
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
|
||||
distance: D,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
|
||||
pub algorithm: KNNAlgorithmName,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
|
||||
pub weight: KNNWeightFunction,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// number of training samples to consider when estimating class for new point. Default value is 3.
|
||||
pub k: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// this parameter is not used
|
||||
t: PhantomData<T>,
|
||||
}
|
||||
@@ -113,8 +118,8 @@ impl<T: RealNumber> Default for KNNRegressorParameters<T, Euclidian> {
|
||||
fn default() -> Self {
|
||||
KNNRegressorParameters {
|
||||
distance: Distances::euclidian(),
|
||||
algorithm: KNNAlgorithmName::CoverTree,
|
||||
weight: KNNWeightFunction::Uniform,
|
||||
algorithm: KNNAlgorithmName::default(),
|
||||
weight: KNNWeightFunction::default(),
|
||||
k: 3,
|
||||
t: PhantomData,
|
||||
}
|
||||
|
||||
@@ -58,6 +58,12 @@ pub enum KNNWeightFunction {
|
||||
Distance,
|
||||
}
|
||||
|
||||
impl Default for KNNWeightFunction {
|
||||
fn default() -> Self {
|
||||
KNNWeightFunction::Uniform
|
||||
}
|
||||
}
|
||||
|
||||
impl KNNWeightFunction {
|
||||
fn calc_weights<T: RealNumber>(&self, distances: Vec<T>) -> std::vec::Vec<T> {
|
||||
match *self {
|
||||
|
||||
@@ -91,16 +91,22 @@ use crate::svm::{Kernel, Kernels, LinearKernel};
|
||||
#[derive(Debug, Clone)]
|
||||
/// SVC Parameters
|
||||
pub struct SVCParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of epochs.
|
||||
pub epoch: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Regularization parameter.
|
||||
pub c: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Tolerance for stopping criterion.
|
||||
pub tol: T,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The kernel function.
|
||||
pub kernel: K,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Unused parameter.
|
||||
m: PhantomData<M>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Controls the pseudo random number generation for shuffling the data for probability estimates
|
||||
seed: Option<u64>,
|
||||
}
|
||||
@@ -109,16 +115,22 @@ pub struct SVCParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SVCSearchParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Number of epochs.
|
||||
pub epoch: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Regularization parameter.
|
||||
pub c: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Tolerance for stopping epoch.
|
||||
pub tol: Vec<T>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The kernel function.
|
||||
pub kernel: Vec<K>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Unused parameter.
|
||||
m: PhantomData<M>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Controls the pseudo random number generation for shuffling the data for probability estimates
|
||||
seed: Vec<Option<u64>>,
|
||||
}
|
||||
|
||||
@@ -83,14 +83,19 @@ use crate::rand::get_rng_impl;
|
||||
#[derive(Debug, Clone)]
|
||||
/// Parameters of Decision Tree
|
||||
pub struct DecisionTreeClassifierParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Split criteria to use when building a tree.
|
||||
pub criterion: SplitCriterion,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum depth of the tree.
|
||||
pub max_depth: Option<u16>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to be at a leaf node.
|
||||
pub min_samples_leaf: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to split an internal node.
|
||||
pub min_samples_split: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Controls the randomness of the estimator
|
||||
pub seed: Option<u64>,
|
||||
}
|
||||
@@ -118,6 +123,12 @@ pub enum SplitCriterion {
|
||||
ClassificationError,
|
||||
}
|
||||
|
||||
impl Default for SplitCriterion {
|
||||
fn default() -> Self {
|
||||
SplitCriterion::Gini
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
struct Node<T: RealNumber> {
|
||||
@@ -196,7 +207,7 @@ impl DecisionTreeClassifierParameters {
|
||||
impl Default for DecisionTreeClassifierParameters {
|
||||
fn default() -> Self {
|
||||
DecisionTreeClassifierParameters {
|
||||
criterion: SplitCriterion::Gini,
|
||||
criterion: SplitCriterion::default(),
|
||||
max_depth: None,
|
||||
min_samples_leaf: 1,
|
||||
min_samples_split: 2,
|
||||
|
||||
@@ -78,12 +78,16 @@ use crate::rand::get_rng_impl;
|
||||
#[derive(Debug, Clone)]
|
||||
/// Parameters of Regression Tree
|
||||
pub struct DecisionTreeRegressorParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The maximum depth of the tree.
|
||||
pub max_depth: Option<u16>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to be at a leaf node.
|
||||
pub min_samples_leaf: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to split an internal node.
|
||||
pub min_samples_split: usize,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Controls the randomness of the estimator
|
||||
pub seed: Option<u64>,
|
||||
}
|
||||
@@ -142,12 +146,16 @@ impl Default for DecisionTreeRegressorParameters {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DecisionTreeRegressorSearchParameters {
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Tree max depth. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
|
||||
pub max_depth: Vec<Option<u16>>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
|
||||
pub min_samples_leaf: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// The minimum number of samples required to split an internal node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
|
||||
pub min_samples_split: Vec<usize>,
|
||||
#[cfg_attr(feature = "serde", serde(default))]
|
||||
/// Controls the randomness of the estimator
|
||||
pub seed: Vec<Option<u64>>,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user