Complete grid search params (#166)

* grid search draft

* hyperparam search for linear estimators

* grid search for ensembles

* support grid search for more algos

* grid search for unsupervised algos

* minor cleanup
This commit is contained in:
Montana Low
2022-09-21 12:34:21 -07:00
committed by GitHub
parent 69d8be35de
commit 48514d1b15
18 changed files with 1713 additions and 25 deletions
+96
View File
@@ -150,6 +150,88 @@ impl<T: RealNumber> Default for BernoulliNBParameters<T> {
}
}
/// BernoulliNB grid search parameters
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct BernoulliNBSearchParameters<T: RealNumber> {
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
pub alpha: Vec<T>,
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
pub priors: Vec<Option<Vec<T>>>,
/// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.
pub binarize: Vec<Option<T>>,
}
/// BernoulliNB grid search iterator
pub struct BernoulliNBSearchParametersIterator<T: RealNumber> {
bernoulli_nb_search_parameters: BernoulliNBSearchParameters<T>,
current_alpha: usize,
current_priors: usize,
current_binarize: usize,
}
impl<T: RealNumber> IntoIterator for BernoulliNBSearchParameters<T> {
type Item = BernoulliNBParameters<T>;
type IntoIter = BernoulliNBSearchParametersIterator<T>;
fn into_iter(self) -> Self::IntoIter {
BernoulliNBSearchParametersIterator {
bernoulli_nb_search_parameters: self,
current_alpha: 0,
current_priors: 0,
current_binarize: 0,
}
}
}
impl<T: RealNumber> Iterator for BernoulliNBSearchParametersIterator<T> {
type Item = BernoulliNBParameters<T>;
fn next(&mut self) -> Option<Self::Item> {
if self.current_alpha == self.bernoulli_nb_search_parameters.alpha.len()
&& self.current_priors == self.bernoulli_nb_search_parameters.priors.len()
&& self.current_binarize == self.bernoulli_nb_search_parameters.binarize.len()
{
return None;
}
let next = BernoulliNBParameters {
alpha: self.bernoulli_nb_search_parameters.alpha[self.current_alpha],
priors: self.bernoulli_nb_search_parameters.priors[self.current_priors].clone(),
binarize: self.bernoulli_nb_search_parameters.binarize[self.current_binarize],
};
if self.current_alpha + 1 < self.bernoulli_nb_search_parameters.alpha.len() {
self.current_alpha += 1;
} else if self.current_priors + 1 < self.bernoulli_nb_search_parameters.priors.len() {
self.current_alpha = 0;
self.current_priors += 1;
} else if self.current_binarize + 1 < self.bernoulli_nb_search_parameters.binarize.len() {
self.current_alpha = 0;
self.current_priors = 0;
self.current_binarize += 1;
} else {
self.current_alpha += 1;
self.current_priors += 1;
self.current_binarize += 1;
}
Some(next)
}
}
impl<T: RealNumber> Default for BernoulliNBSearchParameters<T> {
fn default() -> Self {
let default_params = BernoulliNBParameters::default();
BernoulliNBSearchParameters {
alpha: vec![default_params.alpha],
priors: vec![default_params.priors],
binarize: vec![default_params.binarize],
}
}
}
impl<T: RealNumber> BernoulliNBDistribution<T> {
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
/// * `x` - training data.
@@ -347,6 +429,20 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[test]
fn search_parameters() {
let parameters = BernoulliNBSearchParameters {
alpha: vec![1., 2.],
..Default::default()
};
let mut iter = parameters.into_iter();
let next = iter.next().unwrap();
assert_eq!(next.alpha, 1.);
let next = iter.next().unwrap();
assert_eq!(next.alpha, 2.);
assert!(iter.next().is_none());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_bernoulli_naive_bayes() {
+68
View File
@@ -261,6 +261,60 @@ impl<T: RealNumber> Default for CategoricalNBParameters<T> {
}
}
/// CategoricalNB grid search parameters
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct CategoricalNBSearchParameters<T: RealNumber> {
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
pub alpha: Vec<T>,
}
/// CategoricalNB grid search iterator
pub struct CategoricalNBSearchParametersIterator<T: RealNumber> {
categorical_nb_search_parameters: CategoricalNBSearchParameters<T>,
current_alpha: usize,
}
impl<T: RealNumber> IntoIterator for CategoricalNBSearchParameters<T> {
type Item = CategoricalNBParameters<T>;
type IntoIter = CategoricalNBSearchParametersIterator<T>;
fn into_iter(self) -> Self::IntoIter {
CategoricalNBSearchParametersIterator {
categorical_nb_search_parameters: self,
current_alpha: 0,
}
}
}
impl<T: RealNumber> Iterator for CategoricalNBSearchParametersIterator<T> {
type Item = CategoricalNBParameters<T>;
fn next(&mut self) -> Option<Self::Item> {
if self.current_alpha == self.categorical_nb_search_parameters.alpha.len() {
return None;
}
let next = CategoricalNBParameters {
alpha: self.categorical_nb_search_parameters.alpha[self.current_alpha],
};
self.current_alpha += 1;
Some(next)
}
}
impl<T: RealNumber> Default for CategoricalNBSearchParameters<T> {
fn default() -> Self {
let default_params = CategoricalNBParameters::default();
CategoricalNBSearchParameters {
alpha: vec![default_params.alpha],
}
}
}
/// CategoricalNB implements the categorical naive Bayes algorithm for categorically distributed data.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, PartialEq)]
@@ -351,6 +405,20 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[test]
fn search_parameters() {
let parameters = CategoricalNBSearchParameters {
alpha: vec![1., 2.],
..Default::default()
};
let mut iter = parameters.into_iter();
let next = iter.next().unwrap();
assert_eq!(next.alpha, 1.);
let next = iter.next().unwrap();
assert_eq!(next.alpha, 2.);
assert!(iter.next().is_none());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_categorical_naive_bayes() {
+75 -1
View File
@@ -76,7 +76,7 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for GaussianNBDistributio
/// `GaussianNB` parameters. Use `Default::default()` for default values.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Default, Clone)]
#[derive(Debug, Clone)]
pub struct GaussianNBParameters<T: RealNumber> {
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
pub priors: Option<Vec<T>>,
@@ -90,6 +90,66 @@ impl<T: RealNumber> GaussianNBParameters<T> {
}
}
impl<T: RealNumber> Default for GaussianNBParameters<T> {
fn default() -> Self {
Self { priors: None }
}
}
/// GaussianNB grid search parameters
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct GaussianNBSearchParameters<T: RealNumber> {
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
pub priors: Vec<Option<Vec<T>>>,
}
/// GaussianNB grid search iterator
pub struct GaussianNBSearchParametersIterator<T: RealNumber> {
gaussian_nb_search_parameters: GaussianNBSearchParameters<T>,
current_priors: usize,
}
impl<T: RealNumber> IntoIterator for GaussianNBSearchParameters<T> {
type Item = GaussianNBParameters<T>;
type IntoIter = GaussianNBSearchParametersIterator<T>;
fn into_iter(self) -> Self::IntoIter {
GaussianNBSearchParametersIterator {
gaussian_nb_search_parameters: self,
current_priors: 0,
}
}
}
impl<T: RealNumber> Iterator for GaussianNBSearchParametersIterator<T> {
type Item = GaussianNBParameters<T>;
fn next(&mut self) -> Option<Self::Item> {
if self.current_priors == self.gaussian_nb_search_parameters.priors.len() {
return None;
}
let next = GaussianNBParameters {
priors: self.gaussian_nb_search_parameters.priors[self.current_priors].clone(),
};
self.current_priors += 1;
Some(next)
}
}
impl<T: RealNumber> Default for GaussianNBSearchParameters<T> {
fn default() -> Self {
let default_params = GaussianNBParameters::default();
GaussianNBSearchParameters {
priors: vec![default_params.priors],
}
}
}
impl<T: RealNumber> GaussianNBDistribution<T> {
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
/// * `x` - training data.
@@ -260,6 +320,20 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[test]
fn search_parameters() {
let parameters = GaussianNBSearchParameters {
priors: vec![Some(vec![1.]), Some(vec![2.])],
..Default::default()
};
let mut iter = parameters.into_iter();
let next = iter.next().unwrap();
assert_eq!(next.priors, Some(vec![1.]));
let next = iter.next().unwrap();
assert_eq!(next.priors, Some(vec![2.]));
assert!(iter.next().is_none());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_gaussian_naive_bayes() {
+84
View File
@@ -114,6 +114,76 @@ impl<T: RealNumber> Default for MultinomialNBParameters<T> {
}
}
/// MultinomialNB grid search parameters
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct MultinomialNBSearchParameters<T: RealNumber> {
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
pub alpha: Vec<T>,
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
pub priors: Vec<Option<Vec<T>>>,
}
/// MultinomialNB grid search iterator
pub struct MultinomialNBSearchParametersIterator<T: RealNumber> {
multinomial_nb_search_parameters: MultinomialNBSearchParameters<T>,
current_alpha: usize,
current_priors: usize,
}
impl<T: RealNumber> IntoIterator for MultinomialNBSearchParameters<T> {
type Item = MultinomialNBParameters<T>;
type IntoIter = MultinomialNBSearchParametersIterator<T>;
fn into_iter(self) -> Self::IntoIter {
MultinomialNBSearchParametersIterator {
multinomial_nb_search_parameters: self,
current_alpha: 0,
current_priors: 0,
}
}
}
impl<T: RealNumber> Iterator for MultinomialNBSearchParametersIterator<T> {
type Item = MultinomialNBParameters<T>;
fn next(&mut self) -> Option<Self::Item> {
if self.current_alpha == self.multinomial_nb_search_parameters.alpha.len()
&& self.current_priors == self.multinomial_nb_search_parameters.priors.len()
{
return None;
}
let next = MultinomialNBParameters {
alpha: self.multinomial_nb_search_parameters.alpha[self.current_alpha],
priors: self.multinomial_nb_search_parameters.priors[self.current_priors].clone(),
};
if self.current_alpha + 1 < self.multinomial_nb_search_parameters.alpha.len() {
self.current_alpha += 1;
} else if self.current_priors + 1 < self.multinomial_nb_search_parameters.priors.len() {
self.current_alpha = 0;
self.current_priors += 1;
} else {
self.current_alpha += 1;
self.current_priors += 1;
}
Some(next)
}
}
impl<T: RealNumber> Default for MultinomialNBSearchParameters<T> {
fn default() -> Self {
let default_params = MultinomialNBParameters::default();
MultinomialNBSearchParameters {
alpha: vec![default_params.alpha],
priors: vec![default_params.priors],
}
}
}
impl<T: RealNumber> MultinomialNBDistribution<T> {
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
/// * `x` - training data.
@@ -297,6 +367,20 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[test]
fn search_parameters() {
let parameters = MultinomialNBSearchParameters {
alpha: vec![1., 2.],
..Default::default()
};
let mut iter = parameters.into_iter();
let next = iter.next().unwrap();
assert_eq!(next.alpha, 1.);
let next = iter.next().unwrap();
assert_eq!(next.alpha, 2.);
assert!(iter.next().is_none());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_multinomial_naive_bayes() {