Merge potential next release v0.4 (#187) Breaking Changes
* First draft of the new n-dimensional arrays + NB use case * Improves default implementation of multiple Array methods * Refactors tree methods * Adds matrix decomposition routines * Adds matrix decomposition methods to ndarray and nalgebra bindings * Refactoring + linear regression now uses array2 * Ridge & Linear regression * LBFGS optimizer & logistic regression * LBFGS optimizer & logistic regression * Changes linear methods, metrics and model selection methods to new n-dimensional arrays * Switches KNN and clustering algorithms to new n-d array layer * Refactors distance metrics * Optimizes knn and clustering methods * Refactors metrics module * Switches decomposition methods to n-dimensional arrays * Linalg refactoring - cleanup rng merge (#172) * Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure. * Exclude AUC metrics. Needs reimplementation * Improve developers walkthrough New traits system in place at `src/numbers` and `src/linalg` Co-authored-by: Lorenzo <tunedconsulting@gmail.com> * Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021 * Implement getters to use as_ref() in src/neighbors * Implement getters to use as_ref() in src/naive_bayes * Implement getters to use as_ref() in src/linear * Add Clone to src/naive_bayes * Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021 * Implement ndarray-bindings. Remove FloatNumber from implementations * Drop nalgebra-bindings support (as decided in conf-call to go for ndarray) * Remove benches. Benches will have their own repo at smartcore-benches * Implement SVC * Implement SVC serialization. Move search parameters in dedicated module * Implement SVR. Definitely too slow * Fix compilation issues for wasm (#202) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> * Fix tests (#203) * Port linalg/traits/stats.rs * Improve methods naming * Improve Display for DenseMatrix Co-authored-by: Montana Low <montanalow@users.noreply.github.com> Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
This commit is contained in:
@@ -1,8 +1,11 @@
|
||||
// TODO: missing documentation
|
||||
|
||||
use crate::{
|
||||
api::{Predictor, SupervisedEstimator},
|
||||
error::{Failed, FailedError},
|
||||
linalg::Matrix,
|
||||
math::num::RealNumber,
|
||||
linalg::basic::arrays::{Array2, Array1},
|
||||
numbers::realnum::RealNumber,
|
||||
numbers::basenum::Number,
|
||||
};
|
||||
|
||||
use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult};
|
||||
@@ -10,8 +13,8 @@ use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult};
|
||||
/// Parameters for GridSearchCV
|
||||
#[derive(Debug)]
|
||||
pub struct GridSearchCVParameters<
|
||||
T: RealNumber,
|
||||
M: Matrix<T>,
|
||||
T: Number,
|
||||
M: Array2<T>,
|
||||
C: Clone,
|
||||
I: Iterator<Item = C>,
|
||||
E: Predictor<M, M::RowVector>,
|
||||
@@ -29,7 +32,7 @@ pub struct GridSearchCVParameters<
|
||||
|
||||
impl<
|
||||
T: RealNumber,
|
||||
M: Matrix<T>,
|
||||
M: Array2<T>,
|
||||
C: Clone,
|
||||
I: Iterator<Item = C>,
|
||||
E: Predictor<M, M::RowVector>,
|
||||
@@ -51,7 +54,7 @@ impl<
|
||||
}
|
||||
/// Exhaustive search over specified parameter values for an estimator.
|
||||
#[derive(Debug)]
|
||||
pub struct GridSearchCV<T: RealNumber, M: Matrix<T>, C: Clone, E: Predictor<M, M::RowVector>> {
|
||||
pub struct GridSearchCV<T: RealNumber, M: Array2<T>, C: Clone, E: Predictor<M, M::RowVector>> {
|
||||
_phantom: std::marker::PhantomData<(T, M)>,
|
||||
predictor: E,
|
||||
/// Cross validation results.
|
||||
@@ -60,7 +63,7 @@ pub struct GridSearchCV<T: RealNumber, M: Matrix<T>, C: Clone, E: Predictor<M, M
|
||||
pub best_parameter: C,
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>, E: Predictor<M, M::RowVector>, C: Clone>
|
||||
impl<T: RealNumber, M: Array2<T>, E: Predictor<M, M::RowVector>, C: Clone>
|
||||
GridSearchCV<T, M, C, E>
|
||||
{
|
||||
/// Search for the best estimator by testing all possible combinations with cross-validation using given metric.
|
||||
@@ -130,7 +133,7 @@ impl<T: RealNumber, M: Matrix<T>, E: Predictor<M, M::RowVector>, C: Clone>
|
||||
|
||||
impl<
|
||||
T: RealNumber,
|
||||
M: Matrix<T>,
|
||||
M: Array2<T>,
|
||||
C: Clone,
|
||||
I: Iterator<Item = C>,
|
||||
E: Predictor<M, M::RowVector>,
|
||||
@@ -149,7 +152,7 @@ impl<
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealNumber, M: Matrix<T>, C: Clone, E: Predictor<M, M::RowVector>>
|
||||
impl<T: RealNumber, M: Array2<T>, C: Clone, E: Predictor<M, M::RowVector>>
|
||||
Predictor<M, M::RowVector> for GridSearchCV<T, M, C, E>
|
||||
{
|
||||
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
//! # KFold
|
||||
//!
|
||||
//! Defines k-fold cross validator.
|
||||
use std::fmt::{Debug, Display};
|
||||
|
||||
use crate::linalg::Matrix;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::Array2;
|
||||
use crate::model_selection::BaseKFold;
|
||||
use crate::rand::get_rng_impl;
|
||||
use crate::rand_custom::get_rng_impl;
|
||||
use rand::seq::SliceRandom;
|
||||
|
||||
/// K-Folds cross-validator
|
||||
@@ -20,7 +20,10 @@ pub struct KFold {
|
||||
}
|
||||
|
||||
impl KFold {
|
||||
fn test_indices<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<usize>> {
|
||||
fn test_indices<T: Debug + Display + Copy + Sized, M: Array2<T>>(
|
||||
&self,
|
||||
x: &M,
|
||||
) -> Vec<Vec<usize>> {
|
||||
// number of samples (rows) in the matrix
|
||||
let n_samples: usize = x.shape().0;
|
||||
|
||||
@@ -51,7 +54,7 @@ impl KFold {
|
||||
return_values
|
||||
}
|
||||
|
||||
fn test_masks<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<bool>> {
|
||||
fn test_masks<T: Debug + Display + Copy + Sized, M: Array2<T>>(&self, x: &M) -> Vec<Vec<bool>> {
|
||||
let mut return_values: Vec<Vec<bool>> = Vec::with_capacity(self.n_splits);
|
||||
for test_index in self.test_indices(x).drain(..) {
|
||||
// init mask
|
||||
@@ -71,7 +74,7 @@ impl Default for KFold {
|
||||
KFold {
|
||||
n_splits: 3,
|
||||
shuffle: true,
|
||||
seed: None,
|
||||
seed: Option::None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -134,7 +137,7 @@ impl BaseKFold for KFold {
|
||||
self.n_splits
|
||||
}
|
||||
|
||||
fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output {
|
||||
fn split<T: Debug + Display + Copy + Sized, M: Array2<T>>(&self, x: &M) -> Self::Output {
|
||||
if self.n_splits < 2 {
|
||||
panic!("Number of splits is too small: {}", self.n_splits);
|
||||
}
|
||||
@@ -154,7 +157,7 @@ impl BaseKFold for KFold {
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
@@ -162,7 +165,7 @@ mod tests {
|
||||
let k = KFold {
|
||||
n_splits: 3,
|
||||
shuffle: false,
|
||||
seed: None,
|
||||
seed: Option::None,
|
||||
};
|
||||
let x: DenseMatrix<f64> = DenseMatrix::rand(33, 100);
|
||||
let test_indices = k.test_indices(&x);
|
||||
@@ -178,7 +181,7 @@ mod tests {
|
||||
let k = KFold {
|
||||
n_splits: 3,
|
||||
shuffle: false,
|
||||
seed: None,
|
||||
seed: Option::None,
|
||||
};
|
||||
let x: DenseMatrix<f64> = DenseMatrix::rand(34, 100);
|
||||
let test_indices = k.test_indices(&x);
|
||||
@@ -194,7 +197,7 @@ mod tests {
|
||||
let k = KFold {
|
||||
n_splits: 2,
|
||||
shuffle: false,
|
||||
seed: None,
|
||||
seed: Option::None,
|
||||
};
|
||||
let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
|
||||
let test_masks = k.test_masks(&x);
|
||||
@@ -221,7 +224,7 @@ mod tests {
|
||||
let k = KFold {
|
||||
n_splits: 2,
|
||||
shuffle: false,
|
||||
seed: None,
|
||||
seed: Option::None,
|
||||
};
|
||||
let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
|
||||
let train_test_splits: Vec<(Vec<usize>, Vec<usize>)> = k.split(&x).collect();
|
||||
@@ -254,7 +257,7 @@ mod tests {
|
||||
let k = KFold {
|
||||
n_splits: 3,
|
||||
shuffle: false,
|
||||
seed: None,
|
||||
seed: Option::None,
|
||||
};
|
||||
let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
|
||||
let expected: Vec<(Vec<usize>, Vec<usize>)> = vec helper function.
|
||||
//!
|
||||
//! ```
|
||||
//! use crate::smartcore::linalg::BaseMatrix;
|
||||
//! use smartcore::linalg::naive::dense_matrix::DenseMatrix;
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::model_selection::train_test_split;
|
||||
//! use smartcore::linalg::basic::arrays::Array;
|
||||
//!
|
||||
//! //Iris data
|
||||
//! let x = DenseMatrix::from_2d_array(&[
|
||||
@@ -55,10 +55,12 @@
|
||||
//! The simplest way to run cross-validation is to use the [cross_val_score](./fn.cross_validate.html) helper function on your estimator and the dataset.
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::linalg::naive::dense_matrix::DenseMatrix;
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::model_selection::{KFold, cross_validate};
|
||||
//! use smartcore::metrics::accuracy;
|
||||
//! use smartcore::linear::logistic_regression::LogisticRegression;
|
||||
//! use smartcore::api::SupervisedEstimator;
|
||||
//! use smartcore::linalg::basic::arrays::Array;
|
||||
//!
|
||||
//! //Iris data
|
||||
//! let x = DenseMatrix::from_2d_array(&[
|
||||
@@ -83,17 +85,18 @@
|
||||
//! &[6.6, 2.9, 4.6, 1.3],
|
||||
//! &[5.2, 2.7, 3.9, 1.4],
|
||||
//! ]);
|
||||
//! let y: Vec<f64> = vec![
|
||||
//! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
|
||||
//! let y: Vec<i32> = vec![
|
||||
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
//! ];
|
||||
//!
|
||||
//! let cv = KFold::default().with_n_splits(3);
|
||||
//!
|
||||
//! let results = cross_validate(LogisticRegression::fit, //estimator
|
||||
//! &x, &y, //data
|
||||
//! &Default::default(), //hyperparameters
|
||||
//! &cv, //cross validation split
|
||||
//! &accuracy).unwrap(); //metric
|
||||
//! let results = cross_validate(
|
||||
//! LogisticRegression::new(), //estimator
|
||||
//! &x, &y, //data
|
||||
//! Default::default(), //hyperparameters
|
||||
//! &cv, //cross validation split
|
||||
//! &accuracy).unwrap(); //metric
|
||||
//!
|
||||
//! println!("Training accuracy: {}, test accuracy: {}",
|
||||
//! results.mean_test_score(), results.mean_train_score());
|
||||
@@ -102,18 +105,22 @@
|
||||
//! The function [cross_val_predict](./fn.cross_val_predict.html) has a similar interface to `cross_val_score`,
|
||||
//! but instead of test error it calculates predictions for all samples in the test set.
|
||||
|
||||
use crate::api::Predictor;
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::linalg::Matrix;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::rand::get_rng_impl;
|
||||
use rand::seq::SliceRandom;
|
||||
use std::fmt::{Debug, Display};
|
||||
|
||||
pub(crate) mod hyper_tuning;
|
||||
#[allow(unused_imports)]
|
||||
use crate::api::{Predictor, SupervisedEstimator};
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::basic::arrays::{Array1, Array2};
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
use crate::rand_custom::get_rng_impl;
|
||||
|
||||
// TODO: fix this module
|
||||
// pub(crate) mod hyper_tuning;
|
||||
pub(crate) mod kfold;
|
||||
|
||||
pub use hyper_tuning::{GridSearchCV, GridSearchCVParameters};
|
||||
// pub use hyper_tuning::{GridSearchCV, GridSearchCVParameters};
|
||||
pub use kfold::{KFold, KFoldIter};
|
||||
|
||||
/// An interface for the K-Folds cross-validator
|
||||
@@ -122,7 +129,7 @@ pub trait BaseKFold {
|
||||
type Output: Iterator<Item = (Vec<usize>, Vec<usize>)>;
|
||||
/// Return a tuple containing the the training set indices for that split and
|
||||
/// the testing set indices for that split.
|
||||
fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output;
|
||||
fn split<T: Number, X: Array2<T>>(&self, x: &X) -> Self::Output;
|
||||
/// Returns the number of splits
|
||||
fn n_splits(&self) -> usize;
|
||||
}
|
||||
@@ -132,19 +139,23 @@ pub trait BaseKFold {
|
||||
/// * `y` - target values, should be of size _N_
|
||||
/// * `test_size`, (0, 1] - the proportion of the dataset to include in the test split.
|
||||
/// * `shuffle`, - whether or not to shuffle the data before splitting
|
||||
/// * `seed` - Controls the shuffling applied to the data before applying the split. Pass an int for reproducible output across multiple function calls
|
||||
pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
pub fn train_test_split<
|
||||
TX: Debug + Display + Copy + Sized,
|
||||
TY: Debug + Display + Copy + Sized,
|
||||
X: Array2<TX>,
|
||||
Y: Array1<TY>,
|
||||
>(
|
||||
x: &X,
|
||||
y: &Y,
|
||||
test_size: f32,
|
||||
shuffle: bool,
|
||||
seed: Option<u64>,
|
||||
) -> (M, M, M::RowVector, M::RowVector) {
|
||||
if x.shape().0 != y.len() {
|
||||
) -> (X, X, Y, Y) {
|
||||
if x.shape().0 != y.shape() {
|
||||
panic!(
|
||||
"x and y should have the same number of samples. |x|: {}, |y|: {}",
|
||||
x.shape().0,
|
||||
y.len()
|
||||
y.shape()
|
||||
);
|
||||
}
|
||||
let mut rng = get_rng_impl(seed);
|
||||
@@ -153,7 +164,7 @@ pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
|
||||
panic!("test_size should be between 0 and 1");
|
||||
}
|
||||
|
||||
let n = y.len();
|
||||
let n = y.shape();
|
||||
|
||||
let n_test = ((n as f32) * test_size) as usize;
|
||||
|
||||
@@ -177,21 +188,29 @@ pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
|
||||
|
||||
/// Cross validation results.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CrossValidationResult<T: RealNumber> {
|
||||
pub struct CrossValidationResult {
|
||||
/// Vector with test scores on each cv split
|
||||
pub test_score: Vec<T>,
|
||||
pub test_score: Vec<f64>,
|
||||
/// Vector with training scores on each cv split
|
||||
pub train_score: Vec<T>,
|
||||
pub train_score: Vec<f64>,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> CrossValidationResult<T> {
|
||||
impl CrossValidationResult {
|
||||
/// Average test score
|
||||
pub fn mean_test_score(&self) -> T {
|
||||
self.test_score.sum() / T::from_usize(self.test_score.len()).unwrap()
|
||||
pub fn mean_test_score(&self) -> f64 {
|
||||
let mut sum = 0f64;
|
||||
for s in self.test_score.iter() {
|
||||
sum += *s;
|
||||
}
|
||||
sum / self.test_score.len() as f64
|
||||
}
|
||||
/// Average training score
|
||||
pub fn mean_train_score(&self) -> T {
|
||||
self.train_score.sum() / T::from_usize(self.train_score.len()).unwrap()
|
||||
pub fn mean_train_score(&self) -> f64 {
|
||||
let mut sum = 0f64;
|
||||
for s in self.train_score.iter() {
|
||||
sum += *s;
|
||||
}
|
||||
sum / self.train_score.len() as f64
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,26 +221,27 @@ impl<T: RealNumber> CrossValidationResult<T> {
|
||||
/// * `parameters` - parameters of selected estimator. Use `Default::default()` for default parameters.
|
||||
/// * `cv` - the cross-validation splitting strategy, should be an instance of [`BaseKFold`](./trait.BaseKFold.html)
|
||||
/// * `score` - a metric to use for evaluation, see [metrics](../metrics/index.html)
|
||||
pub fn cross_validate<T, M, H, E, K, F, S>(
|
||||
fit_estimator: F,
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
parameters: &H,
|
||||
pub fn cross_validate<TX, TY, X, Y, H, E, K, S>(
|
||||
_estimator: E, // just an empty placeholder to allow passing `fit()`
|
||||
x: &X,
|
||||
y: &Y,
|
||||
parameters: H,
|
||||
cv: &K,
|
||||
score: S,
|
||||
) -> Result<CrossValidationResult<T>, Failed>
|
||||
score: &S,
|
||||
) -> Result<CrossValidationResult, Failed>
|
||||
where
|
||||
T: RealNumber,
|
||||
M: Matrix<T>,
|
||||
TX: Number + RealNumber,
|
||||
TY: Number,
|
||||
X: Array2<TX>,
|
||||
Y: Array1<TY>,
|
||||
H: Clone,
|
||||
E: Predictor<M, M::RowVector>,
|
||||
K: BaseKFold,
|
||||
F: Fn(&M, &M::RowVector, H) -> Result<E, Failed>,
|
||||
S: Fn(&M::RowVector, &M::RowVector) -> T,
|
||||
E: SupervisedEstimator<X, Y, H>,
|
||||
S: Fn(&Y, &Y) -> f64,
|
||||
{
|
||||
let k = cv.n_splits();
|
||||
let mut test_score = Vec::with_capacity(k);
|
||||
let mut train_score = Vec::with_capacity(k);
|
||||
let mut test_score: Vec<f64> = Vec::with_capacity(k);
|
||||
let mut train_score: Vec<f64> = Vec::with_capacity(k);
|
||||
|
||||
for (train_idx, test_idx) in cv.split(x) {
|
||||
let train_x = x.take(&train_idx, 0);
|
||||
@@ -229,10 +249,12 @@ where
|
||||
let test_x = x.take(&test_idx, 0);
|
||||
let test_y = y.take(&test_idx);
|
||||
|
||||
let estimator = fit_estimator(&train_x, &train_y, parameters.clone())?;
|
||||
// NOTE: we use here only the estimator "class", the actual struct get dropped
|
||||
let computed =
|
||||
<E as SupervisedEstimator<X, Y, H>>::fit(&train_x, &train_y, parameters.clone())?;
|
||||
|
||||
train_score.push(score(&train_y, &estimator.predict(&train_x)?));
|
||||
test_score.push(score(&test_y, &estimator.predict(&test_x)?));
|
||||
train_score.push(score(&train_y, &computed.predict(&train_x)?));
|
||||
test_score.push(score(&test_y, &computed.predict(&test_x)?));
|
||||
}
|
||||
|
||||
Ok(CrossValidationResult {
|
||||
@@ -248,33 +270,35 @@ where
|
||||
/// * `y` - target values, should be of size _N_
|
||||
/// * `parameters` - parameters of selected estimator. Use `Default::default()` for default parameters.
|
||||
/// * `cv` - the cross-validation splitting strategy, should be an instance of [`BaseKFold`](./trait.BaseKFold.html)
|
||||
pub fn cross_val_predict<T, M, H, E, K, F>(
|
||||
fit_estimator: F,
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
pub fn cross_val_predict<TX, TY, X, Y, H, E, K>(
|
||||
_estimator: E, // just an empty placeholder to allow passing `fit()`
|
||||
x: &X,
|
||||
y: &Y,
|
||||
parameters: H,
|
||||
cv: K,
|
||||
) -> Result<M::RowVector, Failed>
|
||||
cv: &K,
|
||||
) -> Result<Y, Failed>
|
||||
where
|
||||
T: RealNumber,
|
||||
M: Matrix<T>,
|
||||
TX: Number,
|
||||
TY: Number,
|
||||
X: Array2<TX>,
|
||||
Y: Array1<TY>,
|
||||
H: Clone,
|
||||
E: Predictor<M, M::RowVector>,
|
||||
K: BaseKFold,
|
||||
F: Fn(&M, &M::RowVector, H) -> Result<E, Failed>,
|
||||
E: SupervisedEstimator<X, Y, H>,
|
||||
{
|
||||
let mut y_hat = M::RowVector::zeros(y.len());
|
||||
let mut y_hat = Y::zeros(y.shape());
|
||||
|
||||
for (train_idx, test_idx) in cv.split(x) {
|
||||
let train_x = x.take(&train_idx, 0);
|
||||
let train_y = y.take(&train_idx);
|
||||
let test_x = x.take(&test_idx, 0);
|
||||
|
||||
let estimator = fit_estimator(&train_x, &train_y, parameters.clone())?;
|
||||
let computed =
|
||||
<E as SupervisedEstimator<X, Y, H>>::fit(&train_x, &train_y, parameters.clone())?;
|
||||
|
||||
let y_test_hat = estimator.predict(&test_x)?;
|
||||
let y_test_hat = computed.predict(&test_x)?;
|
||||
for (i, &idx) in test_idx.iter().enumerate() {
|
||||
y_hat.set(idx, y_test_hat.get(i));
|
||||
y_hat.set(idx, *y_test_hat.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -285,10 +309,17 @@ where
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::linalg::naive::dense_matrix::*;
|
||||
use crate::algorithm::neighbour::KNNAlgorithmName;
|
||||
use crate::api::NoParameters;
|
||||
use crate::linalg::basic::arrays::Array;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::linear::logistic_regression::LogisticRegression;
|
||||
use crate::metrics::distance::Distances;
|
||||
use crate::metrics::{accuracy, mean_absolute_error};
|
||||
use crate::model_selection::cross_validate;
|
||||
use crate::model_selection::kfold::KFold;
|
||||
use crate::neighbors::knn_regressor::KNNRegressor;
|
||||
use crate::neighbors::knn_regressor::{KNNRegressor, KNNRegressorParameters};
|
||||
use crate::neighbors::KNNWeightFunction;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
@@ -312,31 +343,33 @@ mod tests {
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct NoParameters {}
|
||||
struct BiasedParameters {}
|
||||
impl NoParameters for BiasedParameters {}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn test_cross_validate_biased() {
|
||||
struct BiasedEstimator {}
|
||||
|
||||
impl BiasedEstimator {
|
||||
fn fit<M: Matrix<f32>>(
|
||||
_: &M,
|
||||
_: &M::RowVector,
|
||||
_: NoParameters,
|
||||
) -> Result<BiasedEstimator, Failed> {
|
||||
impl<X: Array2<f32>, Y: Array1<u32>, P: NoParameters> SupervisedEstimator<X, Y, P>
|
||||
for BiasedEstimator
|
||||
{
|
||||
fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
fn fit(_: &X, _: &Y, _: P) -> Result<BiasedEstimator, Failed> {
|
||||
Ok(BiasedEstimator {})
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: Matrix<f32>> Predictor<M, M::RowVector> for BiasedEstimator {
|
||||
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
|
||||
impl<X: Array2<f32>, Y: Array1<u32>> Predictor<X, Y> for BiasedEstimator {
|
||||
fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
let (n, _) = x.shape();
|
||||
Ok(M::RowVector::zeros(n))
|
||||
Ok(Y::zeros(n))
|
||||
}
|
||||
}
|
||||
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
let x: DenseMatrix<f32> = DenseMatrix::from_2d_array(&[
|
||||
&[5.1, 3.5, 1.4, 0.2],
|
||||
&[4.9, 3.0, 1.4, 0.2],
|
||||
&[4.7, 3.2, 1.3, 0.2],
|
||||
@@ -358,9 +391,7 @@ mod tests {
|
||||
&[6.6, 2.9, 4.6, 1.3],
|
||||
&[5.2, 2.7, 3.9, 1.4],
|
||||
]);
|
||||
let y = vec![
|
||||
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
|
||||
];
|
||||
let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||
|
||||
let cv = KFold {
|
||||
n_splits: 5,
|
||||
@@ -368,10 +399,10 @@ mod tests {
|
||||
};
|
||||
|
||||
let results = cross_validate(
|
||||
BiasedEstimator::fit,
|
||||
BiasedEstimator {},
|
||||
&x,
|
||||
&y,
|
||||
&NoParameters {},
|
||||
BiasedParameters {},
|
||||
&cv,
|
||||
&accuracy,
|
||||
)
|
||||
@@ -413,10 +444,10 @@ mod tests {
|
||||
};
|
||||
|
||||
let results = cross_validate(
|
||||
KNNRegressor::fit,
|
||||
KNNRegressor::new(),
|
||||
&x,
|
||||
&y,
|
||||
&Default::default(),
|
||||
Default::default(),
|
||||
&cv,
|
||||
&mean_absolute_error,
|
||||
)
|
||||
@@ -429,7 +460,7 @@ mod tests {
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn test_cross_val_predict_knn() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
|
||||
&[234.289, 235.6, 159., 107.608, 1947., 60.323],
|
||||
&[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||
&[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||
@@ -447,18 +478,69 @@ mod tests {
|
||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||
]);
|
||||
let y = vec![
|
||||
let y: Vec<f64> = vec![
|
||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||
114.2, 115.7, 116.9,
|
||||
];
|
||||
|
||||
let cv = KFold {
|
||||
let cv: KFold = KFold {
|
||||
n_splits: 2,
|
||||
..KFold::default()
|
||||
};
|
||||
|
||||
let y_hat = cross_val_predict(KNNRegressor::fit, &x, &y, Default::default(), cv).unwrap();
|
||||
let y_hat: Vec<f64> = cross_val_predict(
|
||||
KNNRegressor::new(),
|
||||
&x,
|
||||
&y,
|
||||
KNNRegressorParameters::default()
|
||||
.with_k(3)
|
||||
.with_distance(Distances::euclidian())
|
||||
.with_algorithm(KNNAlgorithmName::LinearSearch)
|
||||
.with_weight(KNNWeightFunction::Distance),
|
||||
&cv,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(mean_absolute_error(&y, &y_hat) < 10.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cross_validation_accuracy() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
&[5.1, 3.5, 1.4, 0.2],
|
||||
&[4.9, 3.0, 1.4, 0.2],
|
||||
&[4.7, 3.2, 1.3, 0.2],
|
||||
&[4.6, 3.1, 1.5, 0.2],
|
||||
&[5.0, 3.6, 1.4, 0.2],
|
||||
&[5.4, 3.9, 1.7, 0.4],
|
||||
&[4.6, 3.4, 1.4, 0.3],
|
||||
&[5.0, 3.4, 1.5, 0.2],
|
||||
&[4.4, 2.9, 1.4, 0.2],
|
||||
&[4.9, 3.1, 1.5, 0.1],
|
||||
&[7.0, 3.2, 4.7, 1.4],
|
||||
&[6.4, 3.2, 4.5, 1.5],
|
||||
&[6.9, 3.1, 4.9, 1.5],
|
||||
&[5.5, 2.3, 4.0, 1.3],
|
||||
&[6.5, 2.8, 4.6, 1.5],
|
||||
&[5.7, 2.8, 4.5, 1.3],
|
||||
&[6.3, 3.3, 4.7, 1.6],
|
||||
&[4.9, 2.4, 3.3, 1.0],
|
||||
&[6.6, 2.9, 4.6, 1.3],
|
||||
&[5.2, 2.7, 3.9, 1.4],
|
||||
]);
|
||||
let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||
|
||||
let cv = KFold::default().with_n_splits(3);
|
||||
|
||||
let results = cross_validate(
|
||||
LogisticRegression::new(),
|
||||
&x,
|
||||
&y,
|
||||
Default::default(),
|
||||
&cv,
|
||||
&accuracy,
|
||||
)
|
||||
.unwrap();
|
||||
println!("{:?}", results);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user