Merge potential next release v0.4 (#187) Breaking Changes

* First draft of the new n-dimensional arrays + NB use case
* Improves default implementation of multiple Array methods
* Refactors tree methods
* Adds matrix decomposition routines
* Adds matrix decomposition methods to ndarray and nalgebra bindings
* Refactoring + linear regression now uses array2
* Ridge & Linear regression
* LBFGS optimizer & logistic regression
* LBFGS optimizer & logistic regression
* Changes linear methods, metrics and model selection methods to new n-dimensional arrays
* Switches KNN and clustering algorithms to new n-d array layer
* Refactors distance metrics
* Optimizes knn and clustering methods
* Refactors metrics module
* Switches decomposition methods to n-dimensional arrays
* Linalg refactoring - cleanup rng merge (#172)
* Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure.
* Exclude AUC metrics. Needs reimplementation
* Improve developers walkthrough

New traits system in place at `src/numbers` and `src/linalg`
Co-authored-by: Lorenzo <tunedconsulting@gmail.com>

* Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021
* Implement getters to use as_ref() in src/neighbors
* Implement getters to use as_ref() in src/naive_bayes
* Implement getters to use as_ref() in src/linear
* Add Clone to src/naive_bayes
* Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021
* Implement ndarray-bindings. Remove FloatNumber from implementations
* Drop nalgebra-bindings support (as decided in conf-call to go for ndarray)
* Remove benches. Benches will have their own repo at smartcore-benches
* Implement SVC
* Implement SVC serialization. Move search parameters in dedicated module
* Implement SVR. Definitely too slow
* Fix compilation issues for wasm (#202)

Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>
* Fix tests (#203)

* Port linalg/traits/stats.rs
* Improve methods naming
* Improve Display for DenseMatrix

Co-authored-by: Montana Low <montanalow@users.noreply.github.com>
Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
This commit is contained in:
Lorenzo
2022-10-31 10:44:57 +00:00
committed by GitHub
parent bb71656137
commit 52eb6ce023
110 changed files with 10327 additions and 9107 deletions
+75 -30
View File
@@ -1,41 +1,85 @@
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::ArrayView1;
use crate::metrics::cluster_helpers::*;
use crate::numbers::basenum::Number;
use crate::metrics::Metrics;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
/// Homogeneity, completeness and V-Measure scores.
pub struct HCVScore {}
pub struct HCVScore<T> {
_phantom: PhantomData<T>,
homogeneity: Option<f64>,
completeness: Option<f64>,
v_measure: Option<f64>,
}
impl HCVScore {
/// Computes Homogeneity, completeness and V-Measure scores at once.
/// * `labels_true` - ground truth class labels to be used as a reference.
/// * `labels_pred` - cluster labels to evaluate.
pub fn get_score<T: RealNumber, V: BaseVector<T>>(
&self,
labels_true: &V,
labels_pred: &V,
) -> (T, T, T) {
let labels_true = labels_true.to_vec();
let labels_pred = labels_pred.to_vec();
let entropy_c = entropy(&labels_true);
let entropy_k = entropy(&labels_pred);
let contingency = contingency_matrix(&labels_true, &labels_pred);
let mi: T = mutual_info_score(&contingency);
impl<T: Number + Ord> HCVScore<T> {
/// return homogenity score
pub fn homogeneity(&self) -> Option<f64> {
self.homogeneity
}
/// return completeness score
pub fn completeness(&self) -> Option<f64> {
self.completeness
}
/// return v_measure score
pub fn v_measure(&self) -> Option<f64> {
self.v_measure
}
/// run computation for measures
pub fn compute(&mut self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) {
let entropy_c: Option<f64> = entropy(y_true);
let entropy_k: Option<f64> = entropy(y_pred);
let contingency = contingency_matrix(y_true, y_pred);
let mi = mutual_info_score(&contingency);
let homogeneity = entropy_c.map(|e| mi / e).unwrap_or_else(T::one);
let completeness = entropy_k.map(|e| mi / e).unwrap_or_else(T::one);
let homogeneity = entropy_c.map(|e| mi / e).unwrap_or(0f64);
let completeness = entropy_k.map(|e| mi / e).unwrap_or(0f64);
let v_measure_score = if homogeneity + completeness == T::zero() {
T::zero()
let v_measure_score = if homogeneity + completeness == 0f64 {
0f64
} else {
T::two() * homogeneity * completeness / (T::one() * homogeneity + completeness)
2.0f64 * homogeneity * completeness / (1.0f64 * homogeneity + completeness)
};
(homogeneity, completeness, v_measure_score)
self.homogeneity = Some(homogeneity);
self.completeness = Some(completeness);
self.v_measure = Some(v_measure_score);
}
}
impl<T: Number + Ord> Metrics<T> for HCVScore<T> {
/// create a typed object to call HCVScore functions
fn new() -> Self {
Self {
_phantom: PhantomData,
homogeneity: Option::None,
completeness: Option::None,
v_measure: Option::None,
}
}
fn new_with(_parameter: f64) -> Self {
Self {
_phantom: PhantomData,
homogeneity: Option::None,
completeness: Option::None,
v_measure: Option::None,
}
}
/// Computes Homogeneity, completeness and V-Measure scores at once.
/// * `y_true` - ground truth class labels to be used as a reference.
/// * `y_pred` - cluster labels to evaluate.
fn get_score(&self, _y_true: &dyn ArrayView1<T>, _y_pred: &dyn ArrayView1<T>) -> f64 {
// this functions should not be used for this struct
// use homogeneity(), completeness(), v_measure()
// TODO: implement Metrics -> Result<T, Failed>
0f64
}
}
@@ -46,12 +90,13 @@ mod tests {
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn homogeneity_score() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
let scores = HCVScore {}.get_score(&v1, &v2);
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
let v2 = vec![1, 0, 0, 0, 0, 1, 0];
let mut scores = HCVScore::new();
scores.compute(&v1, &v2);
assert!((0.2548f32 - scores.0).abs() < 1e-4);
assert!((0.5440f32 - scores.1).abs() < 1e-4);
assert!((0.3471f32 - scores.2).abs() < 1e-4);
assert!((0.2548 - scores.homogeneity.unwrap() as f64).abs() < 1e-4);
assert!((0.5440 - scores.completeness.unwrap() as f64).abs() < 1e-4);
assert!((0.3471 - scores.v_measure.unwrap() as f64).abs() < 1e-4);
}
}