diff --git a/src/cluster/dbscan.rs b/src/cluster/dbscan.rs index e595028..ac095f6 100644 --- a/src/cluster/dbscan.rs +++ b/src/cluster/dbscan.rs @@ -53,14 +53,32 @@ pub struct DBSCAN, T>> { #[derive(Debug, Clone)] /// DBSCAN clustering algorithm parameters pub struct DBSCANParameters { - /// Maximum number of iterations of the k-means algorithm for a single run. + /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. pub min_samples: usize, - /// The number of samples in a neighborhood for a point to be considered as a core point. + /// The maximum distance between two samples for one to be considered as in the neighborhood of the other. pub eps: T, /// KNN algorithm to use. pub algorithm: KNNAlgorithmName, } +impl DBSCANParameters { + /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. + pub fn with_min_samples(mut self, min_samples: usize) -> Self { + self.min_samples = min_samples; + self + } + /// The maximum distance between two samples for one to be considered as in the neighborhood of the other. + pub fn with_eps(mut self, eps: T) -> Self { + self.eps = eps; + self + } + /// KNN algorithm to use. + pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { + self.algorithm = algorithm; + self + } +} + impl, T>> PartialEq for DBSCAN { fn eq(&self, other: &Self) -> bool { self.cluster_labels.len() == other.cluster_labels.len() diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 26a4038..bc5d673 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -105,6 +105,14 @@ pub struct KMeansParameters { pub max_iter: usize, } +impl KMeansParameters { + /// Maximum number of iterations of the k-means algorithm for a single run. + pub fn with_max_iter(mut self, max_iter: usize) -> Self { + self.max_iter = max_iter; + self + } +} + impl Default for KMeansParameters { fn default() -> Self { KMeansParameters { max_iter: 100 } diff --git a/src/decomposition/pca.rs b/src/decomposition/pca.rs index 7d80f88..68220e3 100644 --- a/src/decomposition/pca.rs +++ b/src/decomposition/pca.rs @@ -88,6 +88,15 @@ pub struct PCAParameters { pub use_correlation_matrix: bool, } +impl PCAParameters { + /// By default, covariance matrix is used to compute principal components. + /// Enable this flag if you want to use correlation matrix instead. + pub fn with_use_correlation_matrix(mut self, use_correlation_matrix: bool) -> Self { + self.use_correlation_matrix = use_correlation_matrix; + self + } +} + impl Default for PCAParameters { fn default() -> Self { PCAParameters { diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index a742d90..9f1ba72 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -85,6 +85,39 @@ pub struct RandomForestClassifier { classes: Vec, } +impl RandomForestClassifierParameters { + /// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) + pub fn with_criterion(mut self, criterion: SplitCriterion) -> Self { + self.criterion = criterion; + self + } + /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) + pub fn with_max_depth(mut self, max_depth: u16) -> Self { + self.max_depth = Some(max_depth); + self + } + /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) + pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self { + self.min_samples_leaf = min_samples_leaf; + self + } + /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) + pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self { + self.min_samples_split = min_samples_split; + self + } + /// The number of trees in the forest. + pub fn with_n_trees(mut self, n_trees: u16) -> Self { + self.n_trees = n_trees; + self + } + /// Number of random sample of predictors to use as split candidates. + pub fn with_m(mut self, m: usize) -> Self { + self.m = Some(m); + self + } +} + impl PartialEq for RandomForestClassifier { fn eq(&self, other: &Self) -> bool { if self.classes.len() != other.classes.len() || self.trees.len() != other.trees.len() { diff --git a/src/ensemble/random_forest_regressor.rs b/src/ensemble/random_forest_regressor.rs index 52b39f9..6aa89d0 100644 --- a/src/ensemble/random_forest_regressor.rs +++ b/src/ensemble/random_forest_regressor.rs @@ -80,6 +80,34 @@ pub struct RandomForestRegressor { trees: Vec>, } +impl RandomForestRegressorParameters { + /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) + pub fn with_max_depth(mut self, max_depth: u16) -> Self { + self.max_depth = Some(max_depth); + self + } + /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) + pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self { + self.min_samples_leaf = min_samples_leaf; + self + } + /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html) + pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self { + self.min_samples_split = min_samples_split; + self + } + /// The number of trees in the forest. + pub fn with_n_trees(mut self, n_trees: usize) -> Self { + self.n_trees = n_trees; + self + } + /// Number of random sample of predictors to use as split candidates. + pub fn with_m(mut self, m: usize) -> Self { + self.m = Some(m); + self + } +} + impl Default for RandomForestRegressorParameters { fn default() -> Self { RandomForestRegressorParameters { diff --git a/src/linear/elastic_net.rs b/src/linear/elastic_net.rs index b386290..1ab933a 100644 --- a/src/linear/elastic_net.rs +++ b/src/linear/elastic_net.rs @@ -90,6 +90,36 @@ pub struct ElasticNet> { intercept: T, } +impl ElasticNetParameters { + /// Regularization parameter. + pub fn with_alpha(mut self, alpha: T) -> Self { + self.alpha = alpha; + self + } + /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1. + /// For l1_ratio = 0 the penalty is an L2 penalty. + /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. + pub fn with_l1_ratio(mut self, l1_ratio: T) -> Self { + self.l1_ratio = l1_ratio; + self + } + /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation. + pub fn with_normalize(mut self, normalize: bool) -> Self { + self.normalize = normalize; + self + } + /// The tolerance for the optimization + pub fn with_tol(mut self, tol: T) -> Self { + self.tol = tol; + self + } + /// The maximum number of iterations + pub fn with_max_iter(mut self, max_iter: usize) -> Self { + self.max_iter = max_iter; + self + } +} + impl Default for ElasticNetParameters { fn default() -> Self { ElasticNetParameters { diff --git a/src/linear/lasso.rs b/src/linear/lasso.rs index 0dab3e5..e16a316 100644 --- a/src/linear/lasso.rs +++ b/src/linear/lasso.rs @@ -54,6 +54,29 @@ pub struct Lasso> { intercept: T, } +impl LassoParameters { + /// Regularization parameter. + pub fn with_alpha(mut self, alpha: T) -> Self { + self.alpha = alpha; + self + } + /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation. + pub fn with_normalize(mut self, normalize: bool) -> Self { + self.normalize = normalize; + self + } + /// The tolerance for the optimization + pub fn with_tol(mut self, tol: T) -> Self { + self.tol = tol; + self + } + /// The maximum number of iterations + pub fn with_max_iter(mut self, max_iter: usize) -> Self { + self.max_iter = max_iter; + self + } +} + impl Default for LassoParameters { fn default() -> Self { LassoParameters { diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index c7bd872..0ebad34 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -93,6 +93,14 @@ pub struct LinearRegression> { solver: LinearRegressionSolverName, } +impl LinearRegressionParameters { + /// Solver to use for estimation of regression coefficients. + pub fn with_solver(mut self, solver: LinearRegressionSolverName) -> Self { + self.solver = solver; + self + } +} + impl Default for LinearRegressionParameters { fn default() -> Self { LinearRegressionParameters { diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index 2b5a898..5c14313 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -98,6 +98,24 @@ pub struct RidgeRegression> { solver: RidgeRegressionSolverName, } +impl RidgeRegressionParameters { + /// Regularization parameter. + pub fn with_alpha(mut self, alpha: T) -> Self { + self.alpha = alpha; + self + } + /// Solver to use for estimation of regression coefficients. + pub fn with_solver(mut self, solver: RidgeRegressionSolverName) -> Self { + self.solver = solver; + self + } + /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation. + pub fn with_normalize(mut self, normalize: bool) -> Self { + self.normalize = normalize; + self + } +} + impl Default for RidgeRegressionParameters { fn default() -> Self { RidgeRegressionParameters { diff --git a/src/naive_bayes/bernoulli.rs b/src/naive_bayes/bernoulli.rs index fe299f3..db98efc 100644 --- a/src/naive_bayes/bernoulli.rs +++ b/src/naive_bayes/bernoulli.rs @@ -96,6 +96,21 @@ impl BernoulliNBParameters { binarize, } } + /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). + pub fn with_alpha(mut self, alpha: T) -> Self { + self.alpha = alpha; + self + } + /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data + pub fn with_priors(mut self, priors: Vec) -> Self { + self.priors = Some(priors); + self + } + /// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors. + pub fn with_binarize(mut self, binarize: T) -> Self { + self.binarize = Some(binarize); + self + } } impl Default for BernoulliNBParameters { diff --git a/src/naive_bayes/categorical.rs b/src/naive_bayes/categorical.rs index ce526ce..ea81eb5 100644 --- a/src/naive_bayes/categorical.rs +++ b/src/naive_bayes/categorical.rs @@ -234,7 +234,13 @@ impl CategoricalNBParameters { ))) } } + /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). + pub fn with_alpha(mut self, alpha: T) -> Self { + self.alpha = alpha; + self + } } + impl Default for CategoricalNBParameters { fn default() -> Self { Self { alpha: T::one() } diff --git a/src/naive_bayes/gaussian.rs b/src/naive_bayes/gaussian.rs index 01dacd7..f1fc812 100644 --- a/src/naive_bayes/gaussian.rs +++ b/src/naive_bayes/gaussian.rs @@ -86,6 +86,11 @@ impl GaussianNBParameters { pub fn new(priors: Option>) -> Self { Self { priors } } + /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data + pub fn with_priors(mut self, priors: Vec) -> Self { + self.priors = Some(priors); + self + } } impl GaussianNBDistribution { diff --git a/src/naive_bayes/multinomial.rs b/src/naive_bayes/multinomial.rs index 849b8db..50d2ee2 100644 --- a/src/naive_bayes/multinomial.rs +++ b/src/naive_bayes/multinomial.rs @@ -86,6 +86,16 @@ impl MultinomialNBParameters { pub fn new(alpha: T, priors: Option>) -> Self { Self { alpha, priors } } + /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). + pub fn with_alpha(mut self, alpha: T) -> Self { + self.alpha = alpha; + self + } + /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data + pub fn with_priors(mut self, priors: Vec) -> Self { + self.priors = Some(priors); + self + } } impl Default for MultinomialNBParameters { diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index 8b4db1b..6668539 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -80,9 +80,17 @@ impl, T>> KNNClassifierParameters { /// a function that defines a distance between each pair of point in training data. /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait. /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions. - pub fn with_distance(mut self, distance: D) -> Self { - self.distance = distance; - self + pub fn with_distance, T>>( + self, + distance: DD, + ) -> KNNClassifierParameters { + KNNClassifierParameters { + distance, + algorithm: self.algorithm, + weight: self.weight, + k: self.k, + t: PhantomData, + } } /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default. pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { diff --git a/src/neighbors/knn_regressor.rs b/src/neighbors/knn_regressor.rs index a97fdea..80971e5 100644 --- a/src/neighbors/knn_regressor.rs +++ b/src/neighbors/knn_regressor.rs @@ -82,9 +82,17 @@ impl, T>> KNNRegressorParameters { /// a function that defines a distance between each pair of point in training data. /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait. /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions. - pub fn with_distance(mut self, distance: D) -> Self { - self.distance = distance; - self + pub fn with_distance, T>>( + self, + distance: DD, + ) -> KNNRegressorParameters { + KNNRegressorParameters { + distance, + algorithm: self.algorithm, + weight: self.weight, + k: self.k, + t: PhantomData, + } } /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default. pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 1845d5e..50a855b 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -161,6 +161,29 @@ impl PartialEq for Node { } } +impl DecisionTreeClassifierParameters { + /// Split criteria to use when building a tree. + pub fn with_criterion(mut self, criterion: SplitCriterion) -> Self { + self.criterion = criterion; + self + } + /// The maximum depth of the tree. + pub fn with_max_depth(mut self, max_depth: u16) -> Self { + self.max_depth = Some(max_depth); + self + } + /// The minimum number of samples required to be at a leaf node. + pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self { + self.min_samples_leaf = min_samples_leaf; + self + } + /// The minimum number of samples required to split an internal node. + pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self { + self.min_samples_split = min_samples_split; + self + } +} + impl Default for DecisionTreeClassifierParameters { fn default() -> Self { DecisionTreeClassifierParameters { diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 492f0a1..806e680 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -101,6 +101,24 @@ struct Node { false_child: Option, } +impl DecisionTreeRegressorParameters { + /// The maximum depth of the tree. + pub fn with_max_depth(mut self, max_depth: u16) -> Self { + self.max_depth = Some(max_depth); + self + } + /// The minimum number of samples required to be at a leaf node. + pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self { + self.min_samples_leaf = min_samples_leaf; + self + } + /// The minimum number of samples required to split an internal node. + pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self { + self.min_samples_split = min_samples_split; + self + } +} + impl Default for DecisionTreeRegressorParameters { fn default() -> Self { DecisionTreeRegressorParameters {