feat: + builders for algorithm parameters

This commit is contained in:
Volodymyr Orlov
2020-12-23 12:29:39 -08:00
parent 74f0d9e6fb
commit dd341f4a12
17 changed files with 276 additions and 8 deletions
+20 -2
View File
@@ -53,14 +53,32 @@ pub struct DBSCAN<T: RealNumber, D: Distance<Vec<T>, T>> {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
/// DBSCAN clustering algorithm parameters /// DBSCAN clustering algorithm parameters
pub struct DBSCANParameters<T: RealNumber> { pub struct DBSCANParameters<T: RealNumber> {
/// Maximum number of iterations of the k-means algorithm for a single run. /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
pub min_samples: usize, pub min_samples: usize,
/// The number of samples in a neighborhood for a point to be considered as a core point. /// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
pub eps: T, pub eps: T,
/// KNN algorithm to use. /// KNN algorithm to use.
pub algorithm: KNNAlgorithmName, pub algorithm: KNNAlgorithmName,
} }
impl<T: RealNumber> DBSCANParameters<T> {
/// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
pub fn with_min_samples(mut self, min_samples: usize) -> Self {
self.min_samples = min_samples;
self
}
/// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
pub fn with_eps(mut self, eps: T) -> Self {
self.eps = eps;
self
}
/// KNN algorithm to use.
pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self {
self.algorithm = algorithm;
self
}
}
impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for DBSCAN<T, D> { impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for DBSCAN<T, D> {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
self.cluster_labels.len() == other.cluster_labels.len() self.cluster_labels.len() == other.cluster_labels.len()
+8
View File
@@ -105,6 +105,14 @@ pub struct KMeansParameters {
pub max_iter: usize, pub max_iter: usize,
} }
impl KMeansParameters {
/// Maximum number of iterations of the k-means algorithm for a single run.
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
}
impl Default for KMeansParameters { impl Default for KMeansParameters {
fn default() -> Self { fn default() -> Self {
KMeansParameters { max_iter: 100 } KMeansParameters { max_iter: 100 }
+9
View File
@@ -88,6 +88,15 @@ pub struct PCAParameters {
pub use_correlation_matrix: bool, pub use_correlation_matrix: bool,
} }
impl PCAParameters {
/// By default, covariance matrix is used to compute principal components.
/// Enable this flag if you want to use correlation matrix instead.
pub fn with_use_correlation_matrix(mut self, use_correlation_matrix: bool) -> Self {
self.use_correlation_matrix = use_correlation_matrix;
self
}
}
impl Default for PCAParameters { impl Default for PCAParameters {
fn default() -> Self { fn default() -> Self {
PCAParameters { PCAParameters {
+33
View File
@@ -85,6 +85,39 @@ pub struct RandomForestClassifier<T: RealNumber> {
classes: Vec<T>, classes: Vec<T>,
} }
impl RandomForestClassifierParameters {
/// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub fn with_criterion(mut self, criterion: SplitCriterion) -> Self {
self.criterion = criterion;
self
}
/// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub fn with_max_depth(mut self, max_depth: u16) -> Self {
self.max_depth = Some(max_depth);
self
}
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
self.min_samples_leaf = min_samples_leaf;
self
}
/// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
self.min_samples_split = min_samples_split;
self
}
/// The number of trees in the forest.
pub fn with_n_trees(mut self, n_trees: u16) -> Self {
self.n_trees = n_trees;
self
}
/// Number of random sample of predictors to use as split candidates.
pub fn with_m(mut self, m: usize) -> Self {
self.m = Some(m);
self
}
}
impl<T: RealNumber> PartialEq for RandomForestClassifier<T> { impl<T: RealNumber> PartialEq for RandomForestClassifier<T> {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
if self.classes.len() != other.classes.len() || self.trees.len() != other.trees.len() { if self.classes.len() != other.classes.len() || self.trees.len() != other.trees.len() {
+28
View File
@@ -80,6 +80,34 @@ pub struct RandomForestRegressor<T: RealNumber> {
trees: Vec<DecisionTreeRegressor<T>>, trees: Vec<DecisionTreeRegressor<T>>,
} }
impl RandomForestRegressorParameters {
/// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub fn with_max_depth(mut self, max_depth: u16) -> Self {
self.max_depth = Some(max_depth);
self
}
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
self.min_samples_leaf = min_samples_leaf;
self
}
/// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
self.min_samples_split = min_samples_split;
self
}
/// The number of trees in the forest.
pub fn with_n_trees(mut self, n_trees: usize) -> Self {
self.n_trees = n_trees;
self
}
/// Number of random sample of predictors to use as split candidates.
pub fn with_m(mut self, m: usize) -> Self {
self.m = Some(m);
self
}
}
impl Default for RandomForestRegressorParameters { impl Default for RandomForestRegressorParameters {
fn default() -> Self { fn default() -> Self {
RandomForestRegressorParameters { RandomForestRegressorParameters {
+30
View File
@@ -90,6 +90,36 @@ pub struct ElasticNet<T: RealNumber, M: Matrix<T>> {
intercept: T, intercept: T,
} }
impl<T: RealNumber> ElasticNetParameters<T> {
/// Regularization parameter.
pub fn with_alpha(mut self, alpha: T) -> Self {
self.alpha = alpha;
self
}
/// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
/// For l1_ratio = 0 the penalty is an L2 penalty.
/// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
pub fn with_l1_ratio(mut self, l1_ratio: T) -> Self {
self.l1_ratio = l1_ratio;
self
}
/// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
pub fn with_normalize(mut self, normalize: bool) -> Self {
self.normalize = normalize;
self
}
/// The tolerance for the optimization
pub fn with_tol(mut self, tol: T) -> Self {
self.tol = tol;
self
}
/// The maximum number of iterations
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
}
impl<T: RealNumber> Default for ElasticNetParameters<T> { impl<T: RealNumber> Default for ElasticNetParameters<T> {
fn default() -> Self { fn default() -> Self {
ElasticNetParameters { ElasticNetParameters {
+23
View File
@@ -54,6 +54,29 @@ pub struct Lasso<T: RealNumber, M: Matrix<T>> {
intercept: T, intercept: T,
} }
impl<T: RealNumber> LassoParameters<T> {
/// Regularization parameter.
pub fn with_alpha(mut self, alpha: T) -> Self {
self.alpha = alpha;
self
}
/// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
pub fn with_normalize(mut self, normalize: bool) -> Self {
self.normalize = normalize;
self
}
/// The tolerance for the optimization
pub fn with_tol(mut self, tol: T) -> Self {
self.tol = tol;
self
}
/// The maximum number of iterations
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
}
impl<T: RealNumber> Default for LassoParameters<T> { impl<T: RealNumber> Default for LassoParameters<T> {
fn default() -> Self { fn default() -> Self {
LassoParameters { LassoParameters {
+8
View File
@@ -93,6 +93,14 @@ pub struct LinearRegression<T: RealNumber, M: Matrix<T>> {
solver: LinearRegressionSolverName, solver: LinearRegressionSolverName,
} }
impl LinearRegressionParameters {
/// Solver to use for estimation of regression coefficients.
pub fn with_solver(mut self, solver: LinearRegressionSolverName) -> Self {
self.solver = solver;
self
}
}
impl Default for LinearRegressionParameters { impl Default for LinearRegressionParameters {
fn default() -> Self { fn default() -> Self {
LinearRegressionParameters { LinearRegressionParameters {
+18
View File
@@ -98,6 +98,24 @@ pub struct RidgeRegression<T: RealNumber, M: Matrix<T>> {
solver: RidgeRegressionSolverName, solver: RidgeRegressionSolverName,
} }
impl<T: RealNumber> RidgeRegressionParameters<T> {
/// Regularization parameter.
pub fn with_alpha(mut self, alpha: T) -> Self {
self.alpha = alpha;
self
}
/// Solver to use for estimation of regression coefficients.
pub fn with_solver(mut self, solver: RidgeRegressionSolverName) -> Self {
self.solver = solver;
self
}
/// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
pub fn with_normalize(mut self, normalize: bool) -> Self {
self.normalize = normalize;
self
}
}
impl<T: RealNumber> Default for RidgeRegressionParameters<T> { impl<T: RealNumber> Default for RidgeRegressionParameters<T> {
fn default() -> Self { fn default() -> Self {
RidgeRegressionParameters { RidgeRegressionParameters {
+15
View File
@@ -96,6 +96,21 @@ impl<T: RealNumber> BernoulliNBParameters<T> {
binarize, binarize,
} }
} }
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
pub fn with_alpha(mut self, alpha: T) -> Self {
self.alpha = alpha;
self
}
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
pub fn with_priors(mut self, priors: Vec<T>) -> Self {
self.priors = Some(priors);
self
}
/// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.
pub fn with_binarize(mut self, binarize: T) -> Self {
self.binarize = Some(binarize);
self
}
} }
impl<T: RealNumber> Default for BernoulliNBParameters<T> { impl<T: RealNumber> Default for BernoulliNBParameters<T> {
+6
View File
@@ -234,7 +234,13 @@ impl<T: RealNumber> CategoricalNBParameters<T> {
))) )))
} }
} }
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
pub fn with_alpha(mut self, alpha: T) -> Self {
self.alpha = alpha;
self
} }
}
impl<T: RealNumber> Default for CategoricalNBParameters<T> { impl<T: RealNumber> Default for CategoricalNBParameters<T> {
fn default() -> Self { fn default() -> Self {
Self { alpha: T::one() } Self { alpha: T::one() }
+5
View File
@@ -86,6 +86,11 @@ impl<T: RealNumber> GaussianNBParameters<T> {
pub fn new(priors: Option<Vec<T>>) -> Self { pub fn new(priors: Option<Vec<T>>) -> Self {
Self { priors } Self { priors }
} }
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
pub fn with_priors(mut self, priors: Vec<T>) -> Self {
self.priors = Some(priors);
self
}
} }
impl<T: RealNumber> GaussianNBDistribution<T> { impl<T: RealNumber> GaussianNBDistribution<T> {
+10
View File
@@ -86,6 +86,16 @@ impl<T: RealNumber> MultinomialNBParameters<T> {
pub fn new(alpha: T, priors: Option<Vec<T>>) -> Self { pub fn new(alpha: T, priors: Option<Vec<T>>) -> Self {
Self { alpha, priors } Self { alpha, priors }
} }
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
pub fn with_alpha(mut self, alpha: T) -> Self {
self.alpha = alpha;
self
}
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
pub fn with_priors(mut self, priors: Vec<T>) -> Self {
self.priors = Some(priors);
self
}
} }
impl<T: RealNumber> Default for MultinomialNBParameters<T> { impl<T: RealNumber> Default for MultinomialNBParameters<T> {
+11 -3
View File
@@ -80,9 +80,17 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifierParameters<T, D> {
/// a function that defines a distance between each pair of point in training data. /// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait. /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions. /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
pub fn with_distance(mut self, distance: D) -> Self { pub fn with_distance<DD: Distance<Vec<T>, T>>(
self.distance = distance; self,
self distance: DD,
) -> KNNClassifierParameters<T, DD> {
KNNClassifierParameters {
distance,
algorithm: self.algorithm,
weight: self.weight,
k: self.k,
t: PhantomData,
}
} }
/// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default. /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self {
+11 -3
View File
@@ -82,9 +82,17 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressorParameters<T, D> {
/// a function that defines a distance between each pair of point in training data. /// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait. /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions. /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
pub fn with_distance(mut self, distance: D) -> Self { pub fn with_distance<DD: Distance<Vec<T>, T>>(
self.distance = distance; self,
self distance: DD,
) -> KNNRegressorParameters<T, DD> {
KNNRegressorParameters {
distance,
algorithm: self.algorithm,
weight: self.weight,
k: self.k,
t: PhantomData,
}
} }
/// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default. /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self {
+23
View File
@@ -161,6 +161,29 @@ impl<T: RealNumber> PartialEq for Node<T> {
} }
} }
impl DecisionTreeClassifierParameters {
/// Split criteria to use when building a tree.
pub fn with_criterion(mut self, criterion: SplitCriterion) -> Self {
self.criterion = criterion;
self
}
/// The maximum depth of the tree.
pub fn with_max_depth(mut self, max_depth: u16) -> Self {
self.max_depth = Some(max_depth);
self
}
/// The minimum number of samples required to be at a leaf node.
pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
self.min_samples_leaf = min_samples_leaf;
self
}
/// The minimum number of samples required to split an internal node.
pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
self.min_samples_split = min_samples_split;
self
}
}
impl Default for DecisionTreeClassifierParameters { impl Default for DecisionTreeClassifierParameters {
fn default() -> Self { fn default() -> Self {
DecisionTreeClassifierParameters { DecisionTreeClassifierParameters {
+18
View File
@@ -101,6 +101,24 @@ struct Node<T: RealNumber> {
false_child: Option<usize>, false_child: Option<usize>,
} }
impl DecisionTreeRegressorParameters {
/// The maximum depth of the tree.
pub fn with_max_depth(mut self, max_depth: u16) -> Self {
self.max_depth = Some(max_depth);
self
}
/// The minimum number of samples required to be at a leaf node.
pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
self.min_samples_leaf = min_samples_leaf;
self
}
/// The minimum number of samples required to split an internal node.
pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
self.min_samples_split = min_samples_split;
self
}
}
impl Default for DecisionTreeRegressorParameters { impl Default for DecisionTreeRegressorParameters {
fn default() -> Self { fn default() -> Self {
DecisionTreeRegressorParameters { DecisionTreeRegressorParameters {