Complete grid search params (#166)

* grid search draft

* hyperparam search for linear estimators

* grid search for ensembles

* support grid search for more algos

* grid search for unsupervised algos

* minor cleanup
This commit is contained in:
Montana Low
2022-09-21 12:34:21 -07:00
committed by GitHub
parent 69d8be35de
commit 48514d1b15
18 changed files with 1713 additions and 25 deletions
+98
View File
@@ -116,6 +116,81 @@ impl Default for PCAParameters {
}
}
/// PCA grid search parameters
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct PCASearchParameters {
/// Number of components to keep.
pub n_components: Vec<usize>,
/// By default, covariance matrix is used to compute principal components.
/// Enable this flag if you want to use correlation matrix instead.
pub use_correlation_matrix: Vec<bool>,
}
/// PCA grid search iterator
pub struct PCASearchParametersIterator {
pca_search_parameters: PCASearchParameters,
current_k: usize,
current_use_correlation_matrix: usize,
}
impl IntoIterator for PCASearchParameters {
type Item = PCAParameters;
type IntoIter = PCASearchParametersIterator;
fn into_iter(self) -> Self::IntoIter {
PCASearchParametersIterator {
pca_search_parameters: self,
current_k: 0,
current_use_correlation_matrix: 0,
}
}
}
impl Iterator for PCASearchParametersIterator {
type Item = PCAParameters;
fn next(&mut self) -> Option<Self::Item> {
if self.current_k == self.pca_search_parameters.n_components.len()
&& self.current_use_correlation_matrix
== self.pca_search_parameters.use_correlation_matrix.len()
{
return None;
}
let next = PCAParameters {
n_components: self.pca_search_parameters.n_components[self.current_k],
use_correlation_matrix: self.pca_search_parameters.use_correlation_matrix
[self.current_use_correlation_matrix],
};
if self.current_k + 1 < self.pca_search_parameters.n_components.len() {
self.current_k += 1;
} else if self.current_use_correlation_matrix + 1
< self.pca_search_parameters.use_correlation_matrix.len()
{
self.current_k = 0;
self.current_use_correlation_matrix += 1;
} else {
self.current_k += 1;
self.current_use_correlation_matrix += 1;
}
Some(next)
}
}
impl Default for PCASearchParameters {
fn default() -> Self {
let default_params = PCAParameters::default();
PCASearchParameters {
n_components: vec![default_params.n_components],
use_correlation_matrix: vec![default_params.use_correlation_matrix],
}
}
}
impl<T: RealNumber, M: Matrix<T>> UnsupervisedEstimator<M, PCAParameters> for PCA<T, M> {
fn fit(x: &M, parameters: PCAParameters) -> Result<Self, Failed> {
PCA::fit(x, parameters)
@@ -271,6 +346,29 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[test]
fn search_parameters() {
let parameters = PCASearchParameters {
n_components: vec![2, 4],
use_correlation_matrix: vec![true, false],
..Default::default()
};
let mut iter = parameters.into_iter();
let next = iter.next().unwrap();
assert_eq!(next.n_components, 2);
assert_eq!(next.use_correlation_matrix, true);
let next = iter.next().unwrap();
assert_eq!(next.n_components, 4);
assert_eq!(next.use_correlation_matrix, true);
let next = iter.next().unwrap();
assert_eq!(next.n_components, 2);
assert_eq!(next.use_correlation_matrix, false);
let next = iter.next().unwrap();
assert_eq!(next.n_components, 4);
assert_eq!(next.use_correlation_matrix, false);
assert!(iter.next().is_none());
}
fn us_arrests_data() -> DenseMatrix<f64> {
DenseMatrix::from_2d_array(&[
&[13.2, 236.0, 58.0, 21.2],
+68
View File
@@ -90,6 +90,60 @@ impl SVDParameters {
}
}
/// SVD grid search parameters
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct SVDSearchParameters {
/// Maximum number of iterations of the k-means algorithm for a single run.
pub n_components: Vec<usize>,
}
/// SVD grid search iterator
pub struct SVDSearchParametersIterator {
svd_search_parameters: SVDSearchParameters,
current_n_components: usize,
}
impl IntoIterator for SVDSearchParameters {
type Item = SVDParameters;
type IntoIter = SVDSearchParametersIterator;
fn into_iter(self) -> Self::IntoIter {
SVDSearchParametersIterator {
svd_search_parameters: self,
current_n_components: 0,
}
}
}
impl Iterator for SVDSearchParametersIterator {
type Item = SVDParameters;
fn next(&mut self) -> Option<Self::Item> {
if self.current_n_components == self.svd_search_parameters.n_components.len() {
return None;
}
let next = SVDParameters {
n_components: self.svd_search_parameters.n_components[self.current_n_components],
};
self.current_n_components += 1;
Some(next)
}
}
impl Default for SVDSearchParameters {
fn default() -> Self {
let default_params = SVDParameters::default();
SVDSearchParameters {
n_components: vec![default_params.n_components],
}
}
}
impl<T: RealNumber, M: Matrix<T>> UnsupervisedEstimator<M, SVDParameters> for SVD<T, M> {
fn fit(x: &M, parameters: SVDParameters) -> Result<Self, Failed> {
SVD::fit(x, parameters)
@@ -153,6 +207,20 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[test]
fn search_parameters() {
let parameters = SVDSearchParameters {
n_components: vec![10, 100],
..Default::default()
};
let mut iter = parameters.into_iter();
let next = iter.next().unwrap();
assert_eq!(next.n_components, 10);
let next = iter.next().unwrap();
assert_eq!(next.n_components, 100);
assert!(iter.next().is_none());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn svd_decompose() {