Merge potential next release v0.4 (#187) Breaking Changes
* First draft of the new n-dimensional arrays + NB use case * Improves default implementation of multiple Array methods * Refactors tree methods * Adds matrix decomposition routines * Adds matrix decomposition methods to ndarray and nalgebra bindings * Refactoring + linear regression now uses array2 * Ridge & Linear regression * LBFGS optimizer & logistic regression * LBFGS optimizer & logistic regression * Changes linear methods, metrics and model selection methods to new n-dimensional arrays * Switches KNN and clustering algorithms to new n-d array layer * Refactors distance metrics * Optimizes knn and clustering methods * Refactors metrics module * Switches decomposition methods to n-dimensional arrays * Linalg refactoring - cleanup rng merge (#172) * Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure. * Exclude AUC metrics. Needs reimplementation * Improve developers walkthrough New traits system in place at `src/numbers` and `src/linalg` Co-authored-by: Lorenzo <tunedconsulting@gmail.com> * Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021 * Implement getters to use as_ref() in src/neighbors * Implement getters to use as_ref() in src/naive_bayes * Implement getters to use as_ref() in src/linear * Add Clone to src/naive_bayes * Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021 * Implement ndarray-bindings. Remove FloatNumber from implementations * Drop nalgebra-bindings support (as decided in conf-call to go for ndarray) * Remove benches. Benches will have their own repo at smartcore-benches * Implement SVC * Implement SVC serialization. Move search parameters in dedicated module * Implement SVR. Definitely too slow * Fix compilation issues for wasm (#202) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> * Fix tests (#203) * Port linalg/traits/stats.rs * Improve methods naming * Improve Display for DenseMatrix Co-authored-by: Montana Low <montanalow@users.noreply.github.com> Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
This commit is contained in:
+55
-16
@@ -8,10 +8,20 @@
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::accuracy::Accuracy;
|
||||
//! use smartcore::metrics::Metrics;
|
||||
//! let y_pred: Vec<f64> = vec![0., 2., 1., 3.];
|
||||
//! let y_true: Vec<f64> = vec![0., 1., 2., 3.];
|
||||
//!
|
||||
//! let score: f64 = Accuracy {}.get_score(&y_pred, &y_true);
|
||||
//! let score: f64 = Accuracy::new().get_score(&y_pred, &y_true);
|
||||
//! ```
|
||||
//! With integers:
|
||||
//! ```
|
||||
//! use smartcore::metrics::accuracy::Accuracy;
|
||||
//! use smartcore::metrics::Metrics;
|
||||
//! let y_pred: Vec<i64> = vec![0, 2, 1, 3];
|
||||
//! let y_true: Vec<i64> = vec![0, 1, 2, 3];
|
||||
//!
|
||||
//! let score: f64 = Accuracy::new().get_score(&y_pred, &y_true);
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
@@ -19,37 +29,53 @@
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
/// Accuracy metric.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct Accuracy {}
|
||||
pub struct Accuracy<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl Accuracy {
|
||||
impl<T: Number> Metrics<T> for Accuracy<T> {
|
||||
/// create a typed object to call Accuracy functions
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
fn new_with(_parameter: f64) -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
/// Function that calculated accuracy score.
|
||||
/// * `y_true` - cround truth (correct) labels
|
||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
|
||||
if y_true.len() != y_pred.len() {
|
||||
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
|
||||
if y_true.shape() != y_pred.shape() {
|
||||
panic!(
|
||||
"The vector sizes don't match: {} != {}",
|
||||
y_true.len(),
|
||||
y_pred.len()
|
||||
y_true.shape(),
|
||||
y_pred.shape()
|
||||
);
|
||||
}
|
||||
|
||||
let n = y_true.len();
|
||||
let n = y_true.shape();
|
||||
|
||||
let mut positive = 0;
|
||||
let mut positive: i32 = 0;
|
||||
for i in 0..n {
|
||||
if y_true.get(i) == y_pred.get(i) {
|
||||
if *y_true.get(i) == *y_pred.get(i) {
|
||||
positive += 1;
|
||||
}
|
||||
}
|
||||
|
||||
T::from_i64(positive).unwrap() / T::from_usize(n).unwrap()
|
||||
positive as f64 / n as f64
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,14 +85,27 @@ mod tests {
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn accuracy() {
|
||||
fn accuracy_float() {
|
||||
let y_pred: Vec<f64> = vec![0., 2., 1., 3.];
|
||||
let y_true: Vec<f64> = vec![0., 1., 2., 3.];
|
||||
|
||||
let score1: f64 = Accuracy {}.get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Accuracy {}.get_score(&y_true, &y_true);
|
||||
let score1: f64 = Accuracy::<f64>::new().get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Accuracy::<f64>::new().get_score(&y_true, &y_true);
|
||||
|
||||
assert!((score1 - 0.5).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn accuracy_int() {
|
||||
let y_pred: Vec<i32> = vec![0, 2, 1, 3];
|
||||
let y_true: Vec<i32> = vec![0, 1, 2, 3];
|
||||
|
||||
let score1: f64 = Accuracy::<i32>::new().get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Accuracy::<i32>::new().get_score(&y_true, &y_true);
|
||||
|
||||
assert_eq!(score1, 0.5);
|
||||
assert_eq!(score2, 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
+47
-24
@@ -7,11 +7,12 @@
|
||||
//! Example:
|
||||
//! ```
|
||||
//! use smartcore::metrics::auc::AUC;
|
||||
//! use smartcore::metrics::Metrics;
|
||||
//!
|
||||
//! let y_true: Vec<f64> = vec![0., 0., 1., 1.];
|
||||
//! let y_pred: Vec<f64> = vec![0.1, 0.4, 0.35, 0.8];
|
||||
//!
|
||||
//! let score1: f64 = AUC {}.get_score(&y_true, &y_pred);
|
||||
//! let score1: f64 = AUC::new().get_score(&y_true, &y_pred);
|
||||
//! ```
|
||||
//!
|
||||
//! ## References:
|
||||
@@ -20,32 +21,52 @@
|
||||
//! * ["The ROC-AUC and the Mann-Whitney U-test", Haupt, J.](https://johaupt.github.io/roc-auc/model%20evaluation/Area_under_ROC_curve.html)
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::algorithm::sort::quick_sort::QuickArgSort;
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::{Array1, ArrayView1, MutArrayView1};
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
/// Area Under the Receiver Operating Characteristic Curve (ROC AUC)
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct AUC {}
|
||||
pub struct AUC<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl AUC {
|
||||
impl<T: Number + Ord> Metrics<T> for AUC<T> {
|
||||
/// create a typed object to call AUC functions
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
fn new_with(_parameter: T) -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
/// AUC score.
|
||||
/// * `y_true` - cround truth (correct) labels.
|
||||
/// * `y_pred_probabilities` - probability estimates, as returned by a classifier.
|
||||
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred_prob: &V) -> T {
|
||||
/// * `y_true` - ground truth (correct) labels.
|
||||
/// * `y_pred_prob` - probability estimates, as returned by a classifier.
|
||||
fn get_score(
|
||||
&self,
|
||||
y_true: &dyn ArrayView1<T>,
|
||||
y_pred_prob: &dyn ArrayView1<T>,
|
||||
) -> f64 {
|
||||
let mut pos = T::zero();
|
||||
let mut neg = T::zero();
|
||||
|
||||
let n = y_true.len();
|
||||
let n = y_true.shape();
|
||||
|
||||
for i in 0..n {
|
||||
if y_true.get(i) == T::zero() {
|
||||
if y_true.get(i) == &T::zero() {
|
||||
neg += T::one();
|
||||
} else if y_true.get(i) == T::one() {
|
||||
} else if y_true.get(i) == &T::one() {
|
||||
pos += T::one();
|
||||
} else {
|
||||
panic!(
|
||||
@@ -55,21 +76,21 @@ impl AUC {
|
||||
}
|
||||
}
|
||||
|
||||
let mut y_pred = y_pred_prob.to_vec();
|
||||
let y_pred = y_pred_prob.clone();
|
||||
|
||||
let label_idx = y_pred.quick_argsort_mut();
|
||||
let label_idx = y_pred.argsort();
|
||||
|
||||
let mut rank = vec![T::zero(); n];
|
||||
let mut rank = vec![0f64; n];
|
||||
let mut i = 0;
|
||||
while i < n {
|
||||
if i == n - 1 || y_pred[i] != y_pred[i + 1] {
|
||||
rank[i] = T::from_usize(i + 1).unwrap();
|
||||
if i == n - 1 || y_pred.get(i) != y_pred.get(i + 1) {
|
||||
rank[i] = (i + 1) as f64;
|
||||
} else {
|
||||
let mut j = i + 1;
|
||||
while j < n && y_pred[j] == y_pred[i] {
|
||||
while j < n && y_pred.get(j) == y_pred.get(i) {
|
||||
j += 1;
|
||||
}
|
||||
let r = T::from_usize(i + 1 + j).unwrap() / T::two();
|
||||
let r = (i + 1 + j) as f64 / 2f64;
|
||||
for rank_k in rank.iter_mut().take(j).skip(i) {
|
||||
*rank_k = r;
|
||||
}
|
||||
@@ -78,14 +99,16 @@ impl AUC {
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let mut auc = T::zero();
|
||||
let mut auc = 0f64;
|
||||
for i in 0..n {
|
||||
if y_true.get(label_idx[i]) == T::one() {
|
||||
if y_true.get(label_idx[i]) == &T::one() {
|
||||
auc += rank[i];
|
||||
}
|
||||
}
|
||||
let pos = pos.to_f64().unwrap();
|
||||
let neg = neg.to_f64().unwrap();
|
||||
|
||||
(auc - (pos * (pos + T::one()) / T::two())) / (pos * neg)
|
||||
T::from(auc - (pos * (pos + 1f64) / 2.0)).unwrap() / T::from(pos * neg).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,8 +122,8 @@ mod tests {
|
||||
let y_true: Vec<f64> = vec![0., 0., 1., 1.];
|
||||
let y_pred: Vec<f64> = vec![0.1, 0.4, 0.35, 0.8];
|
||||
|
||||
let score1: f64 = AUC {}.get_score(&y_true, &y_pred);
|
||||
let score2: f64 = AUC {}.get_score(&y_true, &y_true);
|
||||
let score1: f64 = AUC::new().get_score(&y_true, &y_pred);
|
||||
let score2: f64 = AUC::new().get_score(&y_true, &y_true);
|
||||
|
||||
assert!((score1 - 0.75).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
|
||||
+75
-30
@@ -1,41 +1,85 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::metrics::cluster_helpers::*;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
/// Homogeneity, completeness and V-Measure scores.
|
||||
pub struct HCVScore {}
|
||||
pub struct HCVScore<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
homogeneity: Option<f64>,
|
||||
completeness: Option<f64>,
|
||||
v_measure: Option<f64>,
|
||||
}
|
||||
|
||||
impl HCVScore {
|
||||
/// Computes Homogeneity, completeness and V-Measure scores at once.
|
||||
/// * `labels_true` - ground truth class labels to be used as a reference.
|
||||
/// * `labels_pred` - cluster labels to evaluate.
|
||||
pub fn get_score<T: RealNumber, V: BaseVector<T>>(
|
||||
&self,
|
||||
labels_true: &V,
|
||||
labels_pred: &V,
|
||||
) -> (T, T, T) {
|
||||
let labels_true = labels_true.to_vec();
|
||||
let labels_pred = labels_pred.to_vec();
|
||||
let entropy_c = entropy(&labels_true);
|
||||
let entropy_k = entropy(&labels_pred);
|
||||
let contingency = contingency_matrix(&labels_true, &labels_pred);
|
||||
let mi: T = mutual_info_score(&contingency);
|
||||
impl<T: Number + Ord> HCVScore<T> {
|
||||
/// return homogenity score
|
||||
pub fn homogeneity(&self) -> Option<f64> {
|
||||
self.homogeneity
|
||||
}
|
||||
/// return completeness score
|
||||
pub fn completeness(&self) -> Option<f64> {
|
||||
self.completeness
|
||||
}
|
||||
/// return v_measure score
|
||||
pub fn v_measure(&self) -> Option<f64> {
|
||||
self.v_measure
|
||||
}
|
||||
/// run computation for measures
|
||||
pub fn compute(&mut self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) {
|
||||
let entropy_c: Option<f64> = entropy(y_true);
|
||||
let entropy_k: Option<f64> = entropy(y_pred);
|
||||
let contingency = contingency_matrix(y_true, y_pred);
|
||||
let mi = mutual_info_score(&contingency);
|
||||
|
||||
let homogeneity = entropy_c.map(|e| mi / e).unwrap_or_else(T::one);
|
||||
let completeness = entropy_k.map(|e| mi / e).unwrap_or_else(T::one);
|
||||
let homogeneity = entropy_c.map(|e| mi / e).unwrap_or(0f64);
|
||||
let completeness = entropy_k.map(|e| mi / e).unwrap_or(0f64);
|
||||
|
||||
let v_measure_score = if homogeneity + completeness == T::zero() {
|
||||
T::zero()
|
||||
let v_measure_score = if homogeneity + completeness == 0f64 {
|
||||
0f64
|
||||
} else {
|
||||
T::two() * homogeneity * completeness / (T::one() * homogeneity + completeness)
|
||||
2.0f64 * homogeneity * completeness / (1.0f64 * homogeneity + completeness)
|
||||
};
|
||||
|
||||
(homogeneity, completeness, v_measure_score)
|
||||
self.homogeneity = Some(homogeneity);
|
||||
self.completeness = Some(completeness);
|
||||
self.v_measure = Some(v_measure_score);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number + Ord> Metrics<T> for HCVScore<T> {
|
||||
/// create a typed object to call HCVScore functions
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
homogeneity: Option::None,
|
||||
completeness: Option::None,
|
||||
v_measure: Option::None,
|
||||
}
|
||||
}
|
||||
fn new_with(_parameter: f64) -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
homogeneity: Option::None,
|
||||
completeness: Option::None,
|
||||
v_measure: Option::None,
|
||||
}
|
||||
}
|
||||
/// Computes Homogeneity, completeness and V-Measure scores at once.
|
||||
/// * `y_true` - ground truth class labels to be used as a reference.
|
||||
/// * `y_pred` - cluster labels to evaluate.
|
||||
fn get_score(&self, _y_true: &dyn ArrayView1<T>, _y_pred: &dyn ArrayView1<T>) -> f64 {
|
||||
// this functions should not be used for this struct
|
||||
// use homogeneity(), completeness(), v_measure()
|
||||
// TODO: implement Metrics -> Result<T, Failed>
|
||||
0f64
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,12 +90,13 @@ mod tests {
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn homogeneity_score() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
|
||||
let scores = HCVScore {}.get_score(&v1, &v2);
|
||||
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
|
||||
let v2 = vec![1, 0, 0, 0, 0, 1, 0];
|
||||
let mut scores = HCVScore::new();
|
||||
scores.compute(&v1, &v2);
|
||||
|
||||
assert!((0.2548f32 - scores.0).abs() < 1e-4);
|
||||
assert!((0.5440f32 - scores.1).abs() < 1e-4);
|
||||
assert!((0.3471f32 - scores.2).abs() < 1e-4);
|
||||
assert!((0.2548 - scores.homogeneity.unwrap() as f64).abs() < 1e-4);
|
||||
assert!((0.5440 - scores.completeness.unwrap() as f64).abs() < 1e-4);
|
||||
assert!((0.3471 - scores.v_measure.unwrap() as f64).abs() < 1e-4);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#![allow(clippy::ptr_arg)]
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::math::vector::RealNumberVector;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
pub fn contingency_matrix<T: RealNumber>(
|
||||
labels_true: &Vec<T>,
|
||||
labels_pred: &Vec<T>,
|
||||
pub fn contingency_matrix<T: Number + Ord, V: ArrayView1<T> + ?Sized>(
|
||||
labels_true: &V,
|
||||
labels_pred: &V,
|
||||
) -> Vec<Vec<usize>> {
|
||||
let (classes, class_idx) = labels_true.unique_with_indices();
|
||||
let (clusters, cluster_idx) = labels_pred.unique_with_indices();
|
||||
@@ -24,28 +24,30 @@ pub fn contingency_matrix<T: RealNumber>(
|
||||
contingency_matrix
|
||||
}
|
||||
|
||||
pub fn entropy<T: RealNumber>(data: &[T]) -> Option<T> {
|
||||
let mut bincounts = HashMap::with_capacity(data.len());
|
||||
pub fn entropy<T: Number + Ord, V: ArrayView1<T> + ?Sized>(data: &V) -> Option<f64> {
|
||||
let mut bincounts = HashMap::with_capacity(data.shape());
|
||||
|
||||
for e in data.iter() {
|
||||
for e in data.iterator(0) {
|
||||
let k = e.to_i64().unwrap();
|
||||
bincounts.insert(k, bincounts.get(&k).unwrap_or(&0) + 1);
|
||||
}
|
||||
|
||||
let mut entropy = T::zero();
|
||||
let sum = T::from_usize(bincounts.values().sum()).unwrap();
|
||||
let mut entropy = 0f64;
|
||||
let sum: i64 = bincounts.values().sum();
|
||||
|
||||
for &c in bincounts.values() {
|
||||
if c > 0 {
|
||||
let pi = T::from_usize(c).unwrap();
|
||||
entropy -= (pi / sum) * (pi.ln() - sum.ln());
|
||||
let pi = c as f64;
|
||||
let pi_ln = pi.ln();
|
||||
let sum_ln = (sum as f64).ln();
|
||||
entropy -= (pi / sum as f64) * (pi_ln - sum_ln);
|
||||
}
|
||||
}
|
||||
|
||||
Some(entropy)
|
||||
}
|
||||
|
||||
pub fn mutual_info_score<T: RealNumber>(contingency: &[Vec<usize>]) -> T {
|
||||
pub fn mutual_info_score(contingency: &[Vec<usize>]) -> f64 {
|
||||
let mut contingency_sum = 0;
|
||||
let mut pi = vec![0; contingency.len()];
|
||||
let mut pj = vec![0; contingency[0].len()];
|
||||
@@ -64,37 +66,36 @@ pub fn mutual_info_score<T: RealNumber>(contingency: &[Vec<usize>]) -> T {
|
||||
}
|
||||
}
|
||||
|
||||
let contingency_sum = T::from_usize(contingency_sum).unwrap();
|
||||
let contingency_sum = contingency_sum as f64;
|
||||
let contingency_sum_ln = contingency_sum.ln();
|
||||
let pi_sum_l = T::from_usize(pi.iter().sum()).unwrap().ln();
|
||||
let pj_sum_l = T::from_usize(pj.iter().sum()).unwrap().ln();
|
||||
let pi_sum: usize = pi.iter().sum();
|
||||
let pj_sum: usize = pj.iter().sum();
|
||||
let pi_sum_l = (pi_sum as f64).ln();
|
||||
let pj_sum_l = (pj_sum as f64).ln();
|
||||
|
||||
let log_contingency_nm: Vec<T> = nz_val
|
||||
let log_contingency_nm: Vec<f64> = nz_val.iter().map(|v| (*v as f64).ln()).collect();
|
||||
let contingency_nm: Vec<f64> = nz_val
|
||||
.iter()
|
||||
.map(|v| T::from_usize(*v).unwrap().ln())
|
||||
.collect();
|
||||
let contingency_nm: Vec<T> = nz_val
|
||||
.iter()
|
||||
.map(|v| T::from_usize(*v).unwrap() / contingency_sum)
|
||||
.map(|v| (*v as f64) / contingency_sum)
|
||||
.collect();
|
||||
let outer: Vec<usize> = nzx
|
||||
.iter()
|
||||
.zip(nzy.iter())
|
||||
.map(|(&x, &y)| pi[x] * pj[y])
|
||||
.collect();
|
||||
let log_outer: Vec<T> = outer
|
||||
let log_outer: Vec<f64> = outer
|
||||
.iter()
|
||||
.map(|&o| -T::from_usize(o).unwrap().ln() + pi_sum_l + pj_sum_l)
|
||||
.map(|&o| -(o as f64).ln() + pi_sum_l + pj_sum_l)
|
||||
.collect();
|
||||
|
||||
let mut result = T::zero();
|
||||
let mut result = 0f64;
|
||||
|
||||
for i in 0..log_outer.len() {
|
||||
result += (contingency_nm[i] * (log_contingency_nm[i] - contingency_sum_ln))
|
||||
+ contingency_nm[i] * log_outer[i]
|
||||
}
|
||||
|
||||
result.max(T::zero())
|
||||
result.max(0f64)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -104,8 +105,8 @@ mod tests {
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn contingency_matrix_test() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
|
||||
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
|
||||
let v2 = vec![1, 0, 0, 0, 0, 1, 0];
|
||||
|
||||
assert_eq!(
|
||||
vec!(vec!(1, 2), vec!(2, 0), vec!(1, 0), vec!(1, 0)),
|
||||
@@ -116,17 +117,17 @@ mod tests {
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn entropy_test() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
|
||||
|
||||
assert!((1.2770f32 - entropy(&v1).unwrap()).abs() < 1e-4);
|
||||
assert!((1.2770 - entropy(&v1).unwrap() as f64).abs() < 1e-4);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn mutual_info_score_test() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
|
||||
let s: f32 = mutual_info_score(&contingency_matrix(&v1, &v2));
|
||||
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
|
||||
let v2 = vec![1, 0, 0, 0, 0, 1, 0];
|
||||
let s = mutual_info_score(&contingency_matrix(&v1, &v2));
|
||||
|
||||
assert!((0.3254 - s).abs() < 1e-4);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
//! # Euclidian Metric Distance
|
||||
//!
|
||||
//! The Euclidean distance (L2) between two points \\( x \\) and \\( y \\) in n-space is defined as
|
||||
//!
|
||||
//! \\[ d(x, y) = \sqrt{\sum_{i=1}^n (x-y)^2} \\]
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::euclidian::Euclidian;
|
||||
//!
|
||||
//! let x = vec![1., 1.];
|
||||
//! let y = vec![2., 2.];
|
||||
//!
|
||||
//! let l2: f64 = Euclidian::new().distance(&x, &y);
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
use super::Distance;
|
||||
|
||||
/// Euclidean distance is a measure of the true straight line distance between two points in Euclidean n-space.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Euclidian<T> {
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number> Default for Euclidian<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number> Euclidian<T> {
|
||||
/// instatiate the initial structure
|
||||
pub fn new() -> Euclidian<T> {
|
||||
Euclidian { _t: PhantomData }
|
||||
}
|
||||
|
||||
/// return sum of squared distances
|
||||
#[inline]
|
||||
pub(crate) fn squared_distance<A: ArrayView1<T>>(x: &A, y: &A) -> f64 {
|
||||
if x.shape() != y.shape() {
|
||||
panic!("Input vector sizes are different.");
|
||||
}
|
||||
|
||||
let sum: f64 = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(&a, &b)| {
|
||||
let r = a - b;
|
||||
(r * r).to_f64().unwrap()
|
||||
})
|
||||
.sum();
|
||||
|
||||
sum
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Euclidian<T> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
Euclidian::squared_distance(x, y).sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn squared_distance() {
|
||||
let a = vec![1, 2, 3];
|
||||
let b = vec![4, 5, 6];
|
||||
|
||||
let l2: f64 = Euclidian::new().distance(&a, &b);
|
||||
|
||||
assert!((l2 - 5.19615242).abs() < 1e-8);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
//! # Hamming Distance
|
||||
//!
|
||||
//! Hamming Distance measures the similarity between two integer-valued vectors of the same length.
|
||||
//! Given two vectors \\( x \in ℝ^n \\), \\( y \in ℝ^n \\) the hamming distance between \\( x \\) and \\( y \\), \\( d(x, y) \\), is the number of places where \\( x \\) and \\( y \\) differ.
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::hamming::Hamming;
|
||||
//!
|
||||
//! let a = vec![1, 0, 0, 1, 0, 0, 1];
|
||||
//! let b = vec![1, 1, 0, 0, 1, 0, 1];
|
||||
//!
|
||||
//! let h: f64 = Hamming::new().distance(&a, &b);
|
||||
//!
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use super::Distance;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
/// While comparing two integer-valued vectors of equal length, Hamming distance is the number of bit positions in which the two bits are different
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Hamming<T: Number> {
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number> Hamming<T> {
|
||||
/// instatiate the initial structure
|
||||
pub fn new() -> Hamming<T> {
|
||||
Hamming { _t: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number> Default for Hamming<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Hamming<T> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
if x.shape() != y.shape() {
|
||||
panic!("Input vector sizes are different");
|
||||
}
|
||||
|
||||
let dist: usize = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(a, b)| match a != b {
|
||||
true => 1,
|
||||
false => 0,
|
||||
})
|
||||
.sum();
|
||||
|
||||
dist as f64 / x.shape() as f64
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn hamming_distance() {
|
||||
let a = vec![1, 0, 0, 1, 0, 0, 1];
|
||||
let b = vec![1, 1, 0, 0, 1, 0, 1];
|
||||
|
||||
let h: f64 = Hamming::new().distance(&a, &b);
|
||||
|
||||
assert!((h - 0.42857142).abs() < 1e-8);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
//! # Mahalanobis Distance
|
||||
//!
|
||||
//! The Mahalanobis distance (MD) is the distance between two points in multivariate space.
|
||||
//! In a regular Euclidean space the distance between any two points can be measured with [Euclidean distance](../euclidian/index.html).
|
||||
//! For uncorrelated variables, the Euclidean distance equals the MD. However, if two or more variables are correlated the measurements become impossible
|
||||
//! with Euclidean distance because the axes are no longer at right angles to each other. MD on the other hand, is scale-invariant,
|
||||
//! it takes into account the covariance matrix of the dataset when calculating distance between 2 points that belong to the same space as the dataset.
|
||||
//!
|
||||
//! MD between two vectors \\( x \in ℝ^n \\) and \\( y \in ℝ^n \\) is defined as
|
||||
//! \\[ d(x, y) = \sqrt{(x - y)^TS^{-1}(x - y)}\\]
|
||||
//!
|
||||
//! where \\( S \\) is the covariance matrix of the dataset.
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::linalg::basic::arrays::ArrayView2;
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::mahalanobis::Mahalanobis;
|
||||
//!
|
||||
//! let data = DenseMatrix::from_2d_array(&[
|
||||
//! &[64., 580., 29.],
|
||||
//! &[66., 570., 33.],
|
||||
//! &[68., 590., 37.],
|
||||
//! &[69., 660., 46.],
|
||||
//! &[73., 600., 55.],
|
||||
//! ]);
|
||||
//!
|
||||
//! let a = data.mean_by(0);
|
||||
//! let b = vec![66., 640., 44.];
|
||||
//!
|
||||
//! let mahalanobis = Mahalanobis::new(&data);
|
||||
//!
|
||||
//! mahalanobis.distance(&a, &b);
|
||||
//! ```
|
||||
//!
|
||||
//! ## References
|
||||
//! * ["Introduction to Multivariate Statistical Analysis in Chemometrics", Varmuza, K., Filzmoser, P., 2016, p.46](https://www.taylorfrancis.com/books/9780429145049)
|
||||
//! * ["Example of Calculating the Mahalanobis Distance", McCaffrey, J.D.](https://jamesmccaffrey.wordpress.com/2017/11/09/example-of-calculating-the-mahalanobis-distance/)
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use super::Distance;
|
||||
use crate::linalg::basic::arrays::{Array, Array2, ArrayView1};
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::linalg::traits::lu::LUDecomposable;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
/// Mahalanobis distance.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Mahalanobis<T: Number, M: Array2<f64>> {
|
||||
/// covariance matrix of the dataset
|
||||
pub sigma: M,
|
||||
/// inverse of the covariance matrix
|
||||
pub sigmaInv: M,
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number, M: Array2<f64> + LUDecomposable<f64>> Mahalanobis<T, M> {
|
||||
/// Constructs new instance of `Mahalanobis` from given dataset
|
||||
/// * `data` - a matrix of _NxM_ where _N_ is number of observations and _M_ is number of attributes
|
||||
pub fn new<X: Array2<T>>(data: &X) -> Mahalanobis<T, M> {
|
||||
let (_, m) = data.shape();
|
||||
let mut sigma = M::zeros(m, m);
|
||||
data.cov(&mut sigma);
|
||||
let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap();
|
||||
Mahalanobis {
|
||||
sigma,
|
||||
sigmaInv,
|
||||
_t: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs new instance of `Mahalanobis` from given covariance matrix
|
||||
/// * `cov` - a covariance matrix
|
||||
pub fn new_from_covariance<X: Array2<f64> + LUDecomposable<f64>>(cov: &X) -> Mahalanobis<T, X> {
|
||||
let sigma = cov.clone();
|
||||
let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap();
|
||||
Mahalanobis {
|
||||
sigma,
|
||||
sigmaInv,
|
||||
_t: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Mahalanobis<T, DenseMatrix<f64>> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
let (nrows, ncols) = self.sigma.shape();
|
||||
if x.shape() != nrows {
|
||||
panic!(
|
||||
"Array x[{}] has different dimension with Sigma[{}][{}].",
|
||||
x.shape(),
|
||||
nrows,
|
||||
ncols
|
||||
);
|
||||
}
|
||||
|
||||
if y.shape() != nrows {
|
||||
panic!(
|
||||
"Array y[{}] has different dimension with Sigma[{}][{}].",
|
||||
y.shape(),
|
||||
nrows,
|
||||
ncols
|
||||
);
|
||||
}
|
||||
|
||||
let n = x.shape();
|
||||
|
||||
let z: Vec<f64> = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(&a, &b)| (a - b).to_f64().unwrap())
|
||||
.collect();
|
||||
|
||||
// np.dot(np.dot((a-b),VI),(a-b).T)
|
||||
let mut s = 0f64;
|
||||
for j in 0..n {
|
||||
for i in 0..n {
|
||||
s += *self.sigmaInv.get((i, j)) * z[i] * z[j];
|
||||
}
|
||||
}
|
||||
|
||||
s.sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::basic::arrays::ArrayView2;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn mahalanobis_distance() {
|
||||
let data = DenseMatrix::from_2d_array(&[
|
||||
&[64., 580., 29.],
|
||||
&[66., 570., 33.],
|
||||
&[68., 590., 37.],
|
||||
&[69., 660., 46.],
|
||||
&[73., 600., 55.],
|
||||
]);
|
||||
|
||||
let a = data.mean_by(0);
|
||||
let b = vec![66., 640., 44.];
|
||||
|
||||
let mahalanobis = Mahalanobis::new(&data);
|
||||
|
||||
let md: f64 = mahalanobis.distance(&a, &b);
|
||||
|
||||
assert!((md - 5.33).abs() < 1e-2);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
//! # Manhattan Distance
|
||||
//!
|
||||
//! The Manhattan distance between two points \\(x \in ℝ^n \\) and \\( y \in ℝ^n \\) in n-dimensional space is the sum of the distances in each dimension.
|
||||
//!
|
||||
//! \\[ d(x, y) = \sum_{i=0}^n \lvert x_i - y_i \rvert \\]
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::manhattan::Manhattan;
|
||||
//!
|
||||
//! let x = vec![1., 1.];
|
||||
//! let y = vec![2., 2.];
|
||||
//!
|
||||
//! let l1: f64 = Manhattan::new().distance(&x, &y);
|
||||
//! ```
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
use super::Distance;
|
||||
|
||||
/// Manhattan distance
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Manhattan<T: Number> {
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number> Manhattan<T> {
|
||||
/// instatiate the initial structure
|
||||
pub fn new() -> Manhattan<T> {
|
||||
Manhattan { _t: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number> Default for Manhattan<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Manhattan<T> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
if x.shape() != y.shape() {
|
||||
panic!("Input vector sizes are different");
|
||||
}
|
||||
|
||||
let dist: f64 = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(&a, &b)| (a - b).to_f64().unwrap().abs())
|
||||
.sum();
|
||||
|
||||
dist
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn manhattan_distance() {
|
||||
let a = vec![1., 2., 3.];
|
||||
let b = vec![4., 5., 6.];
|
||||
|
||||
let l1: f64 = Manhattan::new().distance(&a, &b);
|
||||
|
||||
assert!((l1 - 9.0).abs() < 1e-8);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
//! # Minkowski Distance
|
||||
//!
|
||||
//! The Minkowski distance of order _p_ (where _p_ is an integer) is a metric in a normed vector space which can be considered as a generalization of both the Euclidean distance and the Manhattan distance.
|
||||
//! The Manhattan distance between two points \\(x \in ℝ^n \\) and \\( y \in ℝ^n \\) in n-dimensional space is defined as:
|
||||
//!
|
||||
//! \\[ d(x, y) = \left(\sum_{i=0}^n \lvert x_i - y_i \rvert^p\right)^{1/p} \\]
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::minkowski::Minkowski;
|
||||
//!
|
||||
//! let x = vec![1., 1.];
|
||||
//! let y = vec![2., 2.];
|
||||
//!
|
||||
//! let l1: f64 = Minkowski::new(1).distance(&x, &y);
|
||||
//! let l2: f64 = Minkowski::new(2).distance(&x, &y);
|
||||
//!
|
||||
//! ```
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
use super::Distance;
|
||||
|
||||
/// Defines the Minkowski distance of order `p`
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Minkowski<T: Number> {
|
||||
/// order, integer
|
||||
pub p: u16,
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number> Minkowski<T> {
|
||||
/// instatiate the initial structure
|
||||
pub fn new(p: u16) -> Minkowski<T> {
|
||||
Minkowski { p, _t: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Minkowski<T> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
if x.shape() != y.shape() {
|
||||
panic!("Input vector sizes are different");
|
||||
}
|
||||
if self.p < 1 {
|
||||
panic!("p must be at least 1");
|
||||
}
|
||||
|
||||
let p_t = self.p as f64;
|
||||
|
||||
let dist: f64 = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(&a, &b)| (a - b).to_f64().unwrap().abs().powf(p_t))
|
||||
.sum();
|
||||
|
||||
dist.powf(1f64 / p_t)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn minkowski_distance() {
|
||||
let a = vec![1., 2., 3.];
|
||||
let b = vec![4., 5., 6.];
|
||||
|
||||
let l1: f64 = Minkowski::new(1).distance(&a, &b);
|
||||
let l2: f64 = Minkowski::new(2).distance(&a, &b);
|
||||
let l3: f64 = Minkowski::new(3).distance(&a, &b);
|
||||
|
||||
assert!((l1 - 9.0).abs() < 1e-8);
|
||||
assert!((l2 - 5.19615242).abs() < 1e-8);
|
||||
assert!((l3 - 4.32674871).abs() < 1e-8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "p must be at least 1")]
|
||||
fn minkowski_distance_negative_p() {
|
||||
let a = vec![1., 2., 3.];
|
||||
let b = vec![4., 5., 6.];
|
||||
|
||||
let _: f64 = Minkowski::new(0).distance(&a, &b);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
//! # Collection of Distance Functions
|
||||
//!
|
||||
//! Many algorithms in machine learning require a measure of distance between data points. Distance metric (or metric) is a function that defines a distance between a pair of point elements of a set.
|
||||
//! Formally, the distance can be any metric measure that is defined as \\( d(x, y) \geq 0\\) and follows three conditions:
|
||||
//! 1. \\( d(x, y) = 0 \\) if and only \\( x = y \\), positive definiteness
|
||||
//! 1. \\( d(x, y) = d(y, x) \\), symmetry
|
||||
//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality
|
||||
//!
|
||||
//! for all \\(x, y, z \in Z \\)
|
||||
//!
|
||||
//! A good distance metric helps to improve the performance of classification, clustering and information retrieval algorithms significantly.
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
|
||||
/// Euclidean Distance is the straight-line distance between two points in Euclidean spacere that presents the shortest distance between these points.
|
||||
pub mod euclidian;
|
||||
/// Hamming Distance between two strings is the number of positions at which the corresponding symbols are different.
|
||||
pub mod hamming;
|
||||
/// The Mahalanobis distance is the distance between two points in multivariate space.
|
||||
pub mod mahalanobis;
|
||||
/// Also known as rectilinear distance, city block distance, taxicab metric.
|
||||
pub mod manhattan;
|
||||
/// A generalization of both the Euclidean distance and the Manhattan distance.
|
||||
pub mod minkowski;
|
||||
|
||||
use crate::linalg::basic::arrays::Array2;
|
||||
use crate::linalg::traits::lu::LUDecomposable;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
/// Distance metric, a function that calculates distance between two points
|
||||
pub trait Distance<T>: Clone {
|
||||
/// Calculates distance between _a_ and _b_
|
||||
fn distance(&self, a: &T, b: &T) -> f64;
|
||||
}
|
||||
|
||||
/// Multitude of distance metric functions
|
||||
pub struct Distances {}
|
||||
|
||||
impl Distances {
|
||||
/// Euclidian distance, see [`Euclidian`](euclidian/index.html)
|
||||
pub fn euclidian<T: Number>() -> euclidian::Euclidian<T> {
|
||||
euclidian::Euclidian::new()
|
||||
}
|
||||
|
||||
/// Minkowski distance, see [`Minkowski`](minkowski/index.html)
|
||||
/// * `p` - function order. Should be >= 1
|
||||
pub fn minkowski<T: Number>(p: u16) -> minkowski::Minkowski<T> {
|
||||
minkowski::Minkowski::new(p)
|
||||
}
|
||||
|
||||
/// Manhattan distance, see [`Manhattan`](manhattan/index.html)
|
||||
pub fn manhattan<T: Number>() -> manhattan::Manhattan<T> {
|
||||
manhattan::Manhattan::new()
|
||||
}
|
||||
|
||||
/// Hamming distance, see [`Hamming`](hamming/index.html)
|
||||
pub fn hamming<T: Number>() -> hamming::Hamming<T> {
|
||||
hamming::Hamming::new()
|
||||
}
|
||||
|
||||
/// Mahalanobis distance, see [`Mahalanobis`](mahalanobis/index.html)
|
||||
pub fn mahalanobis<T: Number, M: Array2<T>, C: Array2<f64> + LUDecomposable<f64>>(
|
||||
data: &M,
|
||||
) -> mahalanobis::Mahalanobis<T, C> {
|
||||
mahalanobis::Mahalanobis::new(data)
|
||||
}
|
||||
}
|
||||
+42
-15
@@ -10,48 +10,71 @@
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::f1::F1;
|
||||
//! use smartcore::metrics::Metrics;
|
||||
//! let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
|
||||
//! let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
|
||||
//!
|
||||
//! let score: f64 = F1 {beta: 1.0}.get_score(&y_pred, &y_true);
|
||||
//! let beta = 1.0; // beta default is equal 1.0 anyway
|
||||
//! let score: f64 = F1::new_with(beta).get_score(&y_pred, &y_true);
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::metrics::precision::Precision;
|
||||
use crate::metrics::recall::Recall;
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::floatnum::FloatNumber;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
/// F-measure
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct F1<T: RealNumber> {
|
||||
pub struct F1<T> {
|
||||
/// a positive real factor
|
||||
pub beta: T,
|
||||
pub beta: f64,
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: RealNumber> F1<T> {
|
||||
impl<T: Number + RealNumber + FloatNumber> Metrics<T> for F1<T> {
|
||||
fn new() -> Self {
|
||||
let beta: f64 = 1f64;
|
||||
Self {
|
||||
beta,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
/// create a typed object to call Recall functions
|
||||
fn new_with(beta: f64) -> Self {
|
||||
Self {
|
||||
beta,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
/// Computes F1 score
|
||||
/// * `y_true` - cround truth (correct) labels.
|
||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||
pub fn get_score<V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
|
||||
if y_true.len() != y_pred.len() {
|
||||
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
|
||||
if y_true.shape() != y_pred.shape() {
|
||||
panic!(
|
||||
"The vector sizes don't match: {} != {}",
|
||||
y_true.len(),
|
||||
y_pred.len()
|
||||
y_true.shape(),
|
||||
y_pred.shape()
|
||||
);
|
||||
}
|
||||
let beta2 = self.beta * self.beta;
|
||||
|
||||
let p = Precision {}.get_score(y_true, y_pred);
|
||||
let r = Recall {}.get_score(y_true, y_pred);
|
||||
let p = Precision::new().get_score(y_true, y_pred);
|
||||
let r = Recall::new().get_score(y_true, y_pred);
|
||||
|
||||
(T::one() + beta2) * (p * r) / (beta2 * p + r)
|
||||
(1f64 + beta2) * (p * r) / ((beta2 * p) + r)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,8 +88,12 @@ mod tests {
|
||||
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
|
||||
let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
|
||||
|
||||
let score1: f64 = F1 { beta: 1.0 }.get_score(&y_pred, &y_true);
|
||||
let score2: f64 = F1 { beta: 1.0 }.get_score(&y_true, &y_true);
|
||||
let beta = 1.0;
|
||||
let score1: f64 = F1::new_with(beta).get_score(&y_pred, &y_true);
|
||||
let score2: f64 = F1::new_with(beta).get_score(&y_true, &y_true);
|
||||
|
||||
println!("{:?}", score1);
|
||||
println!("{:?}", score2);
|
||||
|
||||
assert!((score1 - 0.57142857).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
|
||||
@@ -10,45 +10,65 @@
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::mean_absolute_error::MeanAbsoluteError;
|
||||
//! use smartcore::metrics::Metrics;
|
||||
//! let y_pred: Vec<f64> = vec![3., -0.5, 2., 7.];
|
||||
//! let y_true: Vec<f64> = vec![2.5, 0.0, 2., 8.];
|
||||
//!
|
||||
//! let mse: f64 = MeanAbsoluteError {}.get_score(&y_pred, &y_true);
|
||||
//! let mse: f64 = MeanAbsoluteError::new().get_score(&y_pred, &y_true);
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::floatnum::FloatNumber;
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
/// Mean Absolute Error
|
||||
pub struct MeanAbsoluteError {}
|
||||
pub struct MeanAbsoluteError<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl MeanAbsoluteError {
|
||||
impl<T: Number + FloatNumber> Metrics<T> for MeanAbsoluteError<T> {
|
||||
/// create a typed object to call MeanAbsoluteError functions
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
fn new_with(_parameter: f64) -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
/// Computes mean absolute error
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
|
||||
if y_true.len() != y_pred.len() {
|
||||
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
|
||||
if y_true.shape() != y_pred.shape() {
|
||||
panic!(
|
||||
"The vector sizes don't match: {} != {}",
|
||||
y_true.len(),
|
||||
y_pred.len()
|
||||
y_true.shape(),
|
||||
y_pred.shape()
|
||||
);
|
||||
}
|
||||
|
||||
let n = y_true.len();
|
||||
let mut ras = T::zero();
|
||||
let n = y_true.shape();
|
||||
let mut ras: T = T::zero();
|
||||
for i in 0..n {
|
||||
ras += (y_true.get(i) - y_pred.get(i)).abs();
|
||||
let res: T = *y_true.get(i) - *y_pred.get(i);
|
||||
ras += res.abs();
|
||||
}
|
||||
|
||||
ras / T::from_usize(n).unwrap()
|
||||
ras.to_f64().unwrap() / n as f64
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,8 +82,8 @@ mod tests {
|
||||
let y_true: Vec<f64> = vec![3., -0.5, 2., 7.];
|
||||
let y_pred: Vec<f64> = vec![2.5, 0.0, 2., 8.];
|
||||
|
||||
let score1: f64 = MeanAbsoluteError {}.get_score(&y_pred, &y_true);
|
||||
let score2: f64 = MeanAbsoluteError {}.get_score(&y_true, &y_true);
|
||||
let score1: f64 = MeanAbsoluteError::new().get_score(&y_pred, &y_true);
|
||||
let score2: f64 = MeanAbsoluteError::new().get_score(&y_true, &y_true);
|
||||
|
||||
assert!((score1 - 0.5).abs() < 1e-8);
|
||||
assert!((score2 - 0.0).abs() < 1e-8);
|
||||
|
||||
@@ -10,45 +10,65 @@
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::mean_squared_error::MeanSquareError;
|
||||
//! use smartcore::metrics::Metrics;
|
||||
//! let y_pred: Vec<f64> = vec![3., -0.5, 2., 7.];
|
||||
//! let y_true: Vec<f64> = vec![2.5, 0.0, 2., 8.];
|
||||
//!
|
||||
//! let mse: f64 = MeanSquareError {}.get_score(&y_pred, &y_true);
|
||||
//! let mse: f64 = MeanSquareError::new().get_score(&y_pred, &y_true);
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::floatnum::FloatNumber;
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
/// Mean Squared Error
|
||||
pub struct MeanSquareError {}
|
||||
pub struct MeanSquareError<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl MeanSquareError {
|
||||
impl<T: Number + FloatNumber> Metrics<T> for MeanSquareError<T> {
|
||||
/// create a typed object to call MeanSquareError functions
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
fn new_with(_parameter: f64) -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
/// Computes mean squared error
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
|
||||
if y_true.len() != y_pred.len() {
|
||||
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
|
||||
if y_true.shape() != y_pred.shape() {
|
||||
panic!(
|
||||
"The vector sizes don't match: {} != {}",
|
||||
y_true.len(),
|
||||
y_pred.len()
|
||||
y_true.shape(),
|
||||
y_pred.shape()
|
||||
);
|
||||
}
|
||||
|
||||
let n = y_true.len();
|
||||
let n = y_true.shape();
|
||||
let mut rss = T::zero();
|
||||
for i in 0..n {
|
||||
rss += (y_true.get(i) - y_pred.get(i)).square();
|
||||
let res = *y_true.get(i) - *y_pred.get(i);
|
||||
rss += res * res;
|
||||
}
|
||||
|
||||
rss / T::from_usize(n).unwrap()
|
||||
rss.to_f64().unwrap() / n as f64
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,8 +82,8 @@ mod tests {
|
||||
let y_true: Vec<f64> = vec![3., -0.5, 2., 7.];
|
||||
let y_pred: Vec<f64> = vec![2.5, 0.0, 2., 8.];
|
||||
|
||||
let score1: f64 = MeanSquareError {}.get_score(&y_pred, &y_true);
|
||||
let score2: f64 = MeanSquareError {}.get_score(&y_true, &y_true);
|
||||
let score1: f64 = MeanSquareError::new().get_score(&y_pred, &y_true);
|
||||
let score2: f64 = MeanSquareError::new().get_score(&y_true, &y_true);
|
||||
|
||||
assert!((score1 - 0.375).abs() < 1e-8);
|
||||
assert!((score2 - 0.0).abs() < 1e-8);
|
||||
|
||||
+143
-68
@@ -12,7 +12,7 @@
|
||||
//!
|
||||
//! Example:
|
||||
//! ```
|
||||
//! use smartcore::linalg::naive::dense_matrix::*;
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::linear::logistic_regression::LogisticRegression;
|
||||
//! use smartcore::metrics::*;
|
||||
//!
|
||||
@@ -38,26 +38,29 @@
|
||||
//! &[6.6, 2.9, 4.6, 1.3],
|
||||
//! &[5.2, 2.7, 3.9, 1.4],
|
||||
//! ]);
|
||||
//! let y: Vec<f64> = vec![
|
||||
//! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
|
||||
//! let y: Vec<i8> = vec![
|
||||
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
//! ];
|
||||
//!
|
||||
//! let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||
//!
|
||||
//! let y_hat = lr.predict(&x).unwrap();
|
||||
//!
|
||||
//! let acc = ClassificationMetrics::accuracy().get_score(&y, &y_hat);
|
||||
//! let acc = ClassificationMetricsOrd::accuracy().get_score(&y, &y_hat);
|
||||
//! // or
|
||||
//! let acc = accuracy(&y, &y_hat);
|
||||
//! ```
|
||||
|
||||
/// Accuracy score.
|
||||
pub mod accuracy;
|
||||
/// Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores.
|
||||
pub mod auc;
|
||||
// TODO: reimplement AUC
|
||||
// /// Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores.
|
||||
// pub mod auc;
|
||||
/// Compute the homogeneity, completeness and V-Measure scores.
|
||||
pub mod cluster_hcv;
|
||||
pub(crate) mod cluster_helpers;
|
||||
/// Multitude of distance metrics are defined here
|
||||
pub mod distance;
|
||||
/// F1 score, also known as balanced F-score or F-measure.
|
||||
pub mod f1;
|
||||
/// Mean absolute error regression loss.
|
||||
@@ -71,150 +74,222 @@ pub mod r2;
|
||||
/// Computes the recall.
|
||||
pub mod recall;
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::{Array1, ArrayView1};
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::numbers::floatnum::FloatNumber;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// A trait to be implemented by all metrics
|
||||
pub trait Metrics<T> {
|
||||
/// instantiate a new Metrics trait-object
|
||||
/// https://doc.rust-lang.org/error-index.html#E0038
|
||||
fn new() -> Self
|
||||
where
|
||||
Self: Sized;
|
||||
/// used to instantiate metric with a paramenter
|
||||
fn new_with(_parameter: f64) -> Self
|
||||
where
|
||||
Self: Sized;
|
||||
/// compute score realated to this metric
|
||||
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64;
|
||||
}
|
||||
|
||||
/// Use these metrics to compare classification models.
|
||||
pub struct ClassificationMetrics {}
|
||||
pub struct ClassificationMetrics<T> {
|
||||
phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
/// Use these metrics to compare classification models for
|
||||
/// numbers that require `Ord`.
|
||||
pub struct ClassificationMetricsOrd<T> {
|
||||
phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
/// Metrics for regression models.
|
||||
pub struct RegressionMetrics {}
|
||||
pub struct RegressionMetrics<T> {
|
||||
phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
/// Cluster metrics.
|
||||
pub struct ClusterMetrics {}
|
||||
|
||||
impl ClassificationMetrics {
|
||||
/// Accuracy score, see [accuracy](accuracy/index.html).
|
||||
pub fn accuracy() -> accuracy::Accuracy {
|
||||
accuracy::Accuracy {}
|
||||
}
|
||||
pub struct ClusterMetrics<T> {
|
||||
phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number + RealNumber + FloatNumber> ClassificationMetrics<T> {
|
||||
/// Recall, see [recall](recall/index.html).
|
||||
pub fn recall() -> recall::Recall {
|
||||
recall::Recall {}
|
||||
pub fn recall() -> recall::Recall<T> {
|
||||
recall::Recall::new()
|
||||
}
|
||||
|
||||
/// Precision, see [precision](precision/index.html).
|
||||
pub fn precision() -> precision::Precision {
|
||||
precision::Precision {}
|
||||
pub fn precision() -> precision::Precision<T> {
|
||||
precision::Precision::new()
|
||||
}
|
||||
|
||||
/// F1 score, also known as balanced F-score or F-measure, see [F1](f1/index.html).
|
||||
pub fn f1<T: RealNumber>(beta: T) -> f1::F1<T> {
|
||||
f1::F1 { beta }
|
||||
pub fn f1(beta: f64) -> f1::F1<T> {
|
||||
f1::F1::new_with(beta)
|
||||
}
|
||||
|
||||
/// Area Under the Receiver Operating Characteristic Curve (ROC AUC), see [AUC](auc/index.html).
|
||||
pub fn roc_auc_score() -> auc::AUC {
|
||||
auc::AUC {}
|
||||
// /// Area Under the Receiver Operating Characteristic Curve (ROC AUC), see [AUC](auc/index.html).
|
||||
// pub fn roc_auc_score() -> auc::AUC<T> {
|
||||
// auc::AUC::<T>::new()
|
||||
// }
|
||||
}
|
||||
|
||||
impl<T: Number + Ord> ClassificationMetricsOrd<T> {
|
||||
/// Accuracy score, see [accuracy](accuracy/index.html).
|
||||
pub fn accuracy() -> accuracy::Accuracy<T> {
|
||||
accuracy::Accuracy::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl RegressionMetrics {
|
||||
impl<T: Number + FloatNumber> RegressionMetrics<T> {
|
||||
/// Mean squared error, see [mean squared error](mean_squared_error/index.html).
|
||||
pub fn mean_squared_error() -> mean_squared_error::MeanSquareError {
|
||||
mean_squared_error::MeanSquareError {}
|
||||
pub fn mean_squared_error() -> mean_squared_error::MeanSquareError<T> {
|
||||
mean_squared_error::MeanSquareError::new()
|
||||
}
|
||||
|
||||
/// Mean absolute error, see [mean absolute error](mean_absolute_error/index.html).
|
||||
pub fn mean_absolute_error() -> mean_absolute_error::MeanAbsoluteError {
|
||||
mean_absolute_error::MeanAbsoluteError {}
|
||||
pub fn mean_absolute_error() -> mean_absolute_error::MeanAbsoluteError<T> {
|
||||
mean_absolute_error::MeanAbsoluteError::new()
|
||||
}
|
||||
|
||||
/// Coefficient of determination (R2), see [R2](r2/index.html).
|
||||
pub fn r2() -> r2::R2 {
|
||||
r2::R2 {}
|
||||
pub fn r2() -> r2::R2<T> {
|
||||
r2::R2::<T>::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl ClusterMetrics {
|
||||
impl<T: Number + Ord> ClusterMetrics<T> {
|
||||
/// Homogeneity and completeness and V-Measure scores at once.
|
||||
pub fn hcv_score() -> cluster_hcv::HCVScore {
|
||||
cluster_hcv::HCVScore {}
|
||||
pub fn hcv_score() -> cluster_hcv::HCVScore<T> {
|
||||
cluster_hcv::HCVScore::<T>::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Function that calculated accuracy score, see [accuracy](accuracy/index.html).
|
||||
/// * `y_true` - cround truth (correct) labels
|
||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||
pub fn accuracy<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
|
||||
ClassificationMetrics::accuracy().get_score(y_true, y_pred)
|
||||
pub fn accuracy<T: Number + Ord, V: ArrayView1<T>>(y_true: &V, y_pred: &V) -> f64 {
|
||||
let obj = ClassificationMetricsOrd::<T>::accuracy();
|
||||
obj.get_score(y_true, y_pred)
|
||||
}
|
||||
|
||||
/// Calculated recall score, see [recall](recall/index.html)
|
||||
/// * `y_true` - cround truth (correct) labels.
|
||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||
pub fn recall<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
|
||||
ClassificationMetrics::recall().get_score(y_true, y_pred)
|
||||
pub fn recall<T: Number + RealNumber + FloatNumber, V: ArrayView1<T>>(
|
||||
y_true: &V,
|
||||
y_pred: &V,
|
||||
) -> f64 {
|
||||
let obj = ClassificationMetrics::<T>::recall();
|
||||
obj.get_score(y_true, y_pred)
|
||||
}
|
||||
|
||||
/// Calculated precision score, see [precision](precision/index.html).
|
||||
/// * `y_true` - cround truth (correct) labels.
|
||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||
pub fn precision<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
|
||||
ClassificationMetrics::precision().get_score(y_true, y_pred)
|
||||
pub fn precision<T: Number + RealNumber + FloatNumber, V: ArrayView1<T>>(
|
||||
y_true: &V,
|
||||
y_pred: &V,
|
||||
) -> f64 {
|
||||
let obj = ClassificationMetrics::<T>::precision();
|
||||
obj.get_score(y_true, y_pred)
|
||||
}
|
||||
|
||||
/// Computes F1 score, see [F1](f1/index.html).
|
||||
/// * `y_true` - cround truth (correct) labels.
|
||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||
pub fn f1<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V, beta: T) -> T {
|
||||
ClassificationMetrics::f1(beta).get_score(y_true, y_pred)
|
||||
pub fn f1<T: Number + RealNumber + FloatNumber, V: ArrayView1<T>>(
|
||||
y_true: &V,
|
||||
y_pred: &V,
|
||||
beta: f64,
|
||||
) -> f64 {
|
||||
let obj = ClassificationMetrics::<T>::f1(beta);
|
||||
obj.get_score(y_true, y_pred)
|
||||
}
|
||||
|
||||
/// AUC score, see [AUC](auc/index.html).
|
||||
/// * `y_true` - cround truth (correct) labels.
|
||||
/// * `y_pred_probabilities` - probability estimates, as returned by a classifier.
|
||||
pub fn roc_auc_score<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred_probabilities: &V) -> T {
|
||||
ClassificationMetrics::roc_auc_score().get_score(y_true, y_pred_probabilities)
|
||||
}
|
||||
// /// AUC score, see [AUC](auc/index.html).
|
||||
// /// * `y_true` - cround truth (correct) labels.
|
||||
// /// * `y_pred_probabilities` - probability estimates, as returned by a classifier.
|
||||
// pub fn roc_auc_score<T: Number + PartialOrd, V: ArrayView1<T> + Array1<T> + Array1<T>>(
|
||||
// y_true: &V,
|
||||
// y_pred_probabilities: &V,
|
||||
// ) -> T {
|
||||
// let obj = ClassificationMetrics::<T>::roc_auc_score();
|
||||
// obj.get_score(y_true, y_pred_probabilities)
|
||||
// }
|
||||
|
||||
/// Computes mean squared error, see [mean squared error](mean_squared_error/index.html).
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn mean_squared_error<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
|
||||
RegressionMetrics::mean_squared_error().get_score(y_true, y_pred)
|
||||
pub fn mean_squared_error<T: Number + FloatNumber, V: ArrayView1<T>>(
|
||||
y_true: &V,
|
||||
y_pred: &V,
|
||||
) -> f64 {
|
||||
RegressionMetrics::<T>::mean_squared_error().get_score(y_true, y_pred)
|
||||
}
|
||||
|
||||
/// Computes mean absolute error, see [mean absolute error](mean_absolute_error/index.html).
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn mean_absolute_error<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
|
||||
RegressionMetrics::mean_absolute_error().get_score(y_true, y_pred)
|
||||
pub fn mean_absolute_error<T: Number + FloatNumber, V: ArrayView1<T>>(
|
||||
y_true: &V,
|
||||
y_pred: &V,
|
||||
) -> f64 {
|
||||
RegressionMetrics::<T>::mean_absolute_error().get_score(y_true, y_pred)
|
||||
}
|
||||
|
||||
/// Computes R2 score, see [R2](r2/index.html).
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn r2<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
|
||||
RegressionMetrics::r2().get_score(y_true, y_pred)
|
||||
pub fn r2<T: Number + FloatNumber, V: ArrayView1<T>>(y_true: &V, y_pred: &V) -> f64 {
|
||||
RegressionMetrics::<T>::r2().get_score(y_true, y_pred)
|
||||
}
|
||||
|
||||
/// Homogeneity metric of a cluster labeling given a ground truth (range is between 0.0 and 1.0).
|
||||
/// A cluster result satisfies homogeneity if all of its clusters contain only data points which are members of a single class.
|
||||
/// * `labels_true` - ground truth class labels to be used as a reference.
|
||||
/// * `labels_pred` - cluster labels to evaluate.
|
||||
pub fn homogeneity_score<T: RealNumber, V: BaseVector<T>>(labels_true: &V, labels_pred: &V) -> T {
|
||||
ClusterMetrics::hcv_score()
|
||||
.get_score(labels_true, labels_pred)
|
||||
.0
|
||||
pub fn homogeneity_score<
|
||||
T: Number + FloatNumber + RealNumber + Ord,
|
||||
V: ArrayView1<T> + Array1<T>,
|
||||
>(
|
||||
y_true: &V,
|
||||
y_pred: &V,
|
||||
) -> f64 {
|
||||
let mut obj = ClusterMetrics::<T>::hcv_score();
|
||||
obj.compute(y_true, y_pred);
|
||||
obj.homogeneity().unwrap()
|
||||
}
|
||||
|
||||
///
|
||||
/// Completeness metric of a cluster labeling given a ground truth (range is between 0.0 and 1.0).
|
||||
/// * `labels_true` - ground truth class labels to be used as a reference.
|
||||
/// * `labels_pred` - cluster labels to evaluate.
|
||||
pub fn completeness_score<T: RealNumber, V: BaseVector<T>>(labels_true: &V, labels_pred: &V) -> T {
|
||||
ClusterMetrics::hcv_score()
|
||||
.get_score(labels_true, labels_pred)
|
||||
.1
|
||||
pub fn completeness_score<
|
||||
T: Number + FloatNumber + RealNumber + Ord,
|
||||
V: ArrayView1<T> + Array1<T>,
|
||||
>(
|
||||
y_true: &V,
|
||||
y_pred: &V,
|
||||
) -> f64 {
|
||||
let mut obj = ClusterMetrics::<T>::hcv_score();
|
||||
obj.compute(y_true, y_pred);
|
||||
obj.completeness().unwrap()
|
||||
}
|
||||
|
||||
/// The harmonic mean between homogeneity and completeness.
|
||||
/// * `labels_true` - ground truth class labels to be used as a reference.
|
||||
/// * `labels_pred` - cluster labels to evaluate.
|
||||
pub fn v_measure_score<T: RealNumber, V: BaseVector<T>>(labels_true: &V, labels_pred: &V) -> T {
|
||||
ClusterMetrics::hcv_score()
|
||||
.get_score(labels_true, labels_pred)
|
||||
.2
|
||||
pub fn v_measure_score<T: Number + FloatNumber + RealNumber + Ord, V: ArrayView1<T> + Array1<T>>(
|
||||
y_true: &V,
|
||||
y_pred: &V,
|
||||
) -> f64 {
|
||||
let mut obj = ClusterMetrics::<T>::hcv_score();
|
||||
obj.compute(y_true, y_pred);
|
||||
obj.v_measure().unwrap()
|
||||
}
|
||||
|
||||
+37
-20
@@ -10,59 +10,76 @@
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::precision::Precision;
|
||||
//! use smartcore::metrics::Metrics;
|
||||
//! let y_pred: Vec<f64> = vec![0., 1., 1., 0.];
|
||||
//! let y_true: Vec<f64> = vec![0., 0., 1., 1.];
|
||||
//!
|
||||
//! let score: f64 = Precision {}.get_score(&y_pred, &y_true);
|
||||
//! let score: f64 = Precision::new().get_score(&y_pred, &y_true);
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::collections::HashSet;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
/// Precision metric.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct Precision {}
|
||||
pub struct Precision<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl Precision {
|
||||
impl<T: RealNumber> Metrics<T> for Precision<T> {
|
||||
/// create a typed object to call Precision functions
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
fn new_with(_parameter: f64) -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
/// Calculated precision score
|
||||
/// * `y_true` - cround truth (correct) labels.
|
||||
/// * `y_true` - ground truth (correct) labels.
|
||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
|
||||
if y_true.len() != y_pred.len() {
|
||||
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
|
||||
if y_true.shape() != y_pred.shape() {
|
||||
panic!(
|
||||
"The vector sizes don't match: {} != {}",
|
||||
y_true.len(),
|
||||
y_pred.len()
|
||||
y_true.shape(),
|
||||
y_pred.shape()
|
||||
);
|
||||
}
|
||||
|
||||
let mut classes = HashSet::new();
|
||||
for i in 0..y_true.len() {
|
||||
for i in 0..y_true.shape() {
|
||||
classes.insert(y_true.get(i).to_f64_bits());
|
||||
}
|
||||
let classes = classes.len();
|
||||
|
||||
let mut tp = 0;
|
||||
let mut fp = 0;
|
||||
for i in 0..y_true.len() {
|
||||
for i in 0..y_true.shape() {
|
||||
if y_pred.get(i) == y_true.get(i) {
|
||||
if classes == 2 {
|
||||
if y_true.get(i) == T::one() {
|
||||
if *y_true.get(i) == T::one() {
|
||||
tp += 1;
|
||||
}
|
||||
} else {
|
||||
tp += 1;
|
||||
}
|
||||
} else if classes == 2 {
|
||||
if y_true.get(i) == T::one() {
|
||||
if *y_true.get(i) == T::one() {
|
||||
fp += 1;
|
||||
}
|
||||
} else {
|
||||
@@ -70,7 +87,7 @@ impl Precision {
|
||||
}
|
||||
}
|
||||
|
||||
T::from_i64(tp).unwrap() / (T::from_i64(tp).unwrap() + T::from_i64(fp).unwrap())
|
||||
tp as f64 / (tp as f64 + fp as f64)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,8 +101,8 @@ mod tests {
|
||||
let y_true: Vec<f64> = vec![0., 1., 1., 0.];
|
||||
let y_pred: Vec<f64> = vec![0., 0., 1., 1.];
|
||||
|
||||
let score1: f64 = Precision {}.get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Precision {}.get_score(&y_pred, &y_pred);
|
||||
let score1: f64 = Precision::new().get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Precision::new().get_score(&y_pred, &y_pred);
|
||||
|
||||
assert!((score1 - 0.5).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
@@ -93,7 +110,7 @@ mod tests {
|
||||
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
|
||||
let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
|
||||
|
||||
let score3: f64 = Precision {}.get_score(&y_pred, &y_true);
|
||||
let score3: f64 = Precision::new().get_score(&y_pred, &y_true);
|
||||
assert!((score3 - 0.5).abs() < 1e-8);
|
||||
}
|
||||
|
||||
@@ -103,8 +120,8 @@ mod tests {
|
||||
let y_true: Vec<f64> = vec![0., 0., 0., 1., 1., 1., 2., 2., 2.];
|
||||
let y_pred: Vec<f64> = vec![0., 1., 2., 0., 1., 2., 0., 1., 2.];
|
||||
|
||||
let score1: f64 = Precision {}.get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Precision {}.get_score(&y_pred, &y_pred);
|
||||
let score1: f64 = Precision::new().get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Precision::new().get_score(&y_pred, &y_pred);
|
||||
|
||||
assert!((score1 - 0.333333333).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
|
||||
+36
-25
@@ -10,59 +10,70 @@
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::mean_absolute_error::MeanAbsoluteError;
|
||||
//! use smartcore::metrics::Metrics;
|
||||
//! let y_pred: Vec<f64> = vec![3., -0.5, 2., 7.];
|
||||
//! let y_true: Vec<f64> = vec![2.5, 0.0, 2., 8.];
|
||||
//!
|
||||
//! let mse: f64 = MeanAbsoluteError {}.get_score(&y_pred, &y_true);
|
||||
//! let mse: f64 = MeanAbsoluteError::new().get_score(&y_pred, &y_true);
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
/// Coefficient of Determination (R2)
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct R2 {}
|
||||
pub struct R2<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl R2 {
|
||||
impl<T: Number> Metrics<T> for R2<T> {
|
||||
/// create a typed object to call R2 functions
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
fn new_with(_parameter: f64) -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
/// Computes R2 score
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
|
||||
if y_true.len() != y_pred.len() {
|
||||
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
|
||||
if y_true.shape() != y_pred.shape() {
|
||||
panic!(
|
||||
"The vector sizes don't match: {} != {}",
|
||||
y_true.len(),
|
||||
y_pred.len()
|
||||
y_true.shape(),
|
||||
y_pred.shape()
|
||||
);
|
||||
}
|
||||
|
||||
let n = y_true.len();
|
||||
|
||||
let mut mean = T::zero();
|
||||
|
||||
for i in 0..n {
|
||||
mean += y_true.get(i);
|
||||
}
|
||||
|
||||
mean /= T::from_usize(n).unwrap();
|
||||
let n = y_true.shape();
|
||||
|
||||
let mean: f64 = y_true.mean_by();
|
||||
let mut ss_tot = T::zero();
|
||||
let mut ss_res = T::zero();
|
||||
|
||||
for i in 0..n {
|
||||
let y_i = y_true.get(i);
|
||||
let f_i = y_pred.get(i);
|
||||
ss_tot += (y_i - mean).square();
|
||||
ss_res += (y_i - f_i).square();
|
||||
let y_i = *y_true.get(i);
|
||||
let f_i = *y_pred.get(i);
|
||||
ss_tot += (y_i - T::from(mean).unwrap()) * (y_i - T::from(mean).unwrap());
|
||||
ss_res += (y_i - f_i) * (y_i - f_i);
|
||||
}
|
||||
|
||||
T::one() - (ss_res / ss_tot)
|
||||
(T::one() - ss_res / ss_tot).to_f64().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,8 +87,8 @@ mod tests {
|
||||
let y_true: Vec<f64> = vec![3., -0.5, 2., 7.];
|
||||
let y_pred: Vec<f64> = vec![2.5, 0.0, 2., 8.];
|
||||
|
||||
let score1: f64 = R2 {}.get_score(&y_true, &y_pred);
|
||||
let score2: f64 = R2 {}.get_score(&y_true, &y_true);
|
||||
let score1: f64 = R2::new().get_score(&y_true, &y_pred);
|
||||
let score2: f64 = R2::new().get_score(&y_true, &y_true);
|
||||
|
||||
assert!((score1 - 0.948608137).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
|
||||
+38
-20
@@ -10,67 +10,85 @@
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::recall::Recall;
|
||||
//! use smartcore::metrics::Metrics;
|
||||
//! let y_pred: Vec<f64> = vec![0., 1., 1., 0.];
|
||||
//! let y_true: Vec<f64> = vec![0., 0., 1., 1.];
|
||||
//!
|
||||
//! let score: f64 = Recall {}.get_score(&y_pred, &y_true);
|
||||
//! let score: f64 = Recall::new().get_score(&y_pred, &y_true);
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::realnum::RealNumber;
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
||||
/// Recall metric.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct Recall {}
|
||||
pub struct Recall<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl Recall {
|
||||
impl<T: RealNumber> Metrics<T> for Recall<T> {
|
||||
/// create a typed object to call Recall functions
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
fn new_with(_parameter: f64) -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
/// Calculated recall score
|
||||
/// * `y_true` - cround truth (correct) labels.
|
||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
|
||||
if y_true.len() != y_pred.len() {
|
||||
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
|
||||
if y_true.shape() != y_pred.shape() {
|
||||
panic!(
|
||||
"The vector sizes don't match: {} != {}",
|
||||
y_true.len(),
|
||||
y_pred.len()
|
||||
y_true.shape(),
|
||||
y_pred.shape()
|
||||
);
|
||||
}
|
||||
|
||||
let mut classes = HashSet::new();
|
||||
for i in 0..y_true.len() {
|
||||
for i in 0..y_true.shape() {
|
||||
classes.insert(y_true.get(i).to_f64_bits());
|
||||
}
|
||||
let classes: i64 = classes.len().try_into().unwrap();
|
||||
|
||||
let mut tp = 0;
|
||||
let mut fne = 0;
|
||||
for i in 0..y_true.len() {
|
||||
for i in 0..y_true.shape() {
|
||||
if y_pred.get(i) == y_true.get(i) {
|
||||
if classes == 2 {
|
||||
if y_true.get(i) == T::one() {
|
||||
if *y_true.get(i) == T::one() {
|
||||
tp += 1;
|
||||
}
|
||||
} else {
|
||||
tp += 1;
|
||||
}
|
||||
} else if classes == 2 {
|
||||
if y_true.get(i) != T::one() {
|
||||
if *y_true.get(i) != T::one() {
|
||||
fne += 1;
|
||||
}
|
||||
} else {
|
||||
fne += 1;
|
||||
}
|
||||
}
|
||||
T::from_i64(tp).unwrap() / (T::from_i64(tp).unwrap() + T::from_i64(fne).unwrap())
|
||||
tp as f64 / (tp as f64 + fne as f64)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,8 +102,8 @@ mod tests {
|
||||
let y_true: Vec<f64> = vec![0., 1., 1., 0.];
|
||||
let y_pred: Vec<f64> = vec![0., 0., 1., 1.];
|
||||
|
||||
let score1: f64 = Recall {}.get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Recall {}.get_score(&y_pred, &y_pred);
|
||||
let score1: f64 = Recall::new().get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Recall::new().get_score(&y_pred, &y_pred);
|
||||
|
||||
assert!((score1 - 0.5).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
@@ -93,8 +111,8 @@ mod tests {
|
||||
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
|
||||
let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
|
||||
|
||||
let score3: f64 = Recall {}.get_score(&y_pred, &y_true);
|
||||
assert!((score3 - 0.66666666).abs() < 1e-8);
|
||||
let score3: f64 = Recall::new().get_score(&y_pred, &y_true);
|
||||
assert!((score3 - 0.6666666666666666).abs() < 1e-8);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
@@ -103,8 +121,8 @@ mod tests {
|
||||
let y_true: Vec<f64> = vec![0., 0., 0., 1., 1., 1., 2., 2., 2.];
|
||||
let y_pred: Vec<f64> = vec![0., 1., 2., 0., 1., 2., 0., 1., 2.];
|
||||
|
||||
let score1: f64 = Recall {}.get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Recall {}.get_score(&y_pred, &y_pred);
|
||||
let score1: f64 = Recall::new().get_score(&y_pred, &y_true);
|
||||
let score2: f64 = Recall::new().get_score(&y_pred, &y_pred);
|
||||
|
||||
assert!((score1 - 0.333333333).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
|
||||
Reference in New Issue
Block a user