Merge potential next release v0.4 (#187) Breaking Changes

* First draft of the new n-dimensional arrays + NB use case
* Improves default implementation of multiple Array methods
* Refactors tree methods
* Adds matrix decomposition routines
* Adds matrix decomposition methods to ndarray and nalgebra bindings
* Refactoring + linear regression now uses array2
* Ridge & Linear regression
* LBFGS optimizer & logistic regression
* LBFGS optimizer & logistic regression
* Changes linear methods, metrics and model selection methods to new n-dimensional arrays
* Switches KNN and clustering algorithms to new n-d array layer
* Refactors distance metrics
* Optimizes knn and clustering methods
* Refactors metrics module
* Switches decomposition methods to n-dimensional arrays
* Linalg refactoring - cleanup rng merge (#172)
* Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure.
* Exclude AUC metrics. Needs reimplementation
* Improve developers walkthrough

New traits system in place at `src/numbers` and `src/linalg`
Co-authored-by: Lorenzo <tunedconsulting@gmail.com>

* Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021
* Implement getters to use as_ref() in src/neighbors
* Implement getters to use as_ref() in src/naive_bayes
* Implement getters to use as_ref() in src/linear
* Add Clone to src/naive_bayes
* Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021
* Implement ndarray-bindings. Remove FloatNumber from implementations
* Drop nalgebra-bindings support (as decided in conf-call to go for ndarray)
* Remove benches. Benches will have their own repo at smartcore-benches
* Implement SVC
* Implement SVC serialization. Move search parameters in dedicated module
* Implement SVR. Definitely too slow
* Fix compilation issues for wasm (#202)

Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>
* Fix tests (#203)

* Port linalg/traits/stats.rs
* Improve methods naming
* Improve Display for DenseMatrix

Co-authored-by: Montana Low <montanalow@users.noreply.github.com>
Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
This commit is contained in:
Lorenzo
2022-10-31 10:44:57 +00:00
committed by GitHub
parent bb71656137
commit 52eb6ce023
110 changed files with 10327 additions and 9107 deletions
+55 -16
View File
@@ -8,10 +8,20 @@
//!
//! ```
//! use smartcore::metrics::accuracy::Accuracy;
//! use smartcore::metrics::Metrics;
//! let y_pred: Vec<f64> = vec![0., 2., 1., 3.];
//! let y_true: Vec<f64> = vec![0., 1., 2., 3.];
//!
//! let score: f64 = Accuracy {}.get_score(&y_pred, &y_true);
//! let score: f64 = Accuracy::new().get_score(&y_pred, &y_true);
//! ```
//! With integers:
//! ```
//! use smartcore::metrics::accuracy::Accuracy;
//! use smartcore::metrics::Metrics;
//! let y_pred: Vec<i64> = vec![0, 2, 1, 3];
//! let y_true: Vec<i64> = vec![0, 1, 2, 3];
//!
//! let score: f64 = Accuracy::new().get_score(&y_pred, &y_true);
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
@@ -19,37 +29,53 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::basenum::Number;
use std::marker::PhantomData;
use crate::metrics::Metrics;
/// Accuracy metric.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct Accuracy {}
pub struct Accuracy<T> {
_phantom: PhantomData<T>,
}
impl Accuracy {
impl<T: Number> Metrics<T> for Accuracy<T> {
/// create a typed object to call Accuracy functions
fn new() -> Self {
Self {
_phantom: PhantomData,
}
}
fn new_with(_parameter: f64) -> Self {
Self {
_phantom: PhantomData,
}
}
/// Function that calculated accuracy score.
/// * `y_true` - cround truth (correct) labels
/// * `y_pred` - predicted labels, as returned by a classifier.
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
if y_true.len() != y_pred.len() {
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
if y_true.shape() != y_pred.shape() {
panic!(
"The vector sizes don't match: {} != {}",
y_true.len(),
y_pred.len()
y_true.shape(),
y_pred.shape()
);
}
let n = y_true.len();
let n = y_true.shape();
let mut positive = 0;
let mut positive: i32 = 0;
for i in 0..n {
if y_true.get(i) == y_pred.get(i) {
if *y_true.get(i) == *y_pred.get(i) {
positive += 1;
}
}
T::from_i64(positive).unwrap() / T::from_usize(n).unwrap()
positive as f64 / n as f64
}
}
@@ -59,14 +85,27 @@ mod tests {
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn accuracy() {
fn accuracy_float() {
let y_pred: Vec<f64> = vec![0., 2., 1., 3.];
let y_true: Vec<f64> = vec![0., 1., 2., 3.];
let score1: f64 = Accuracy {}.get_score(&y_pred, &y_true);
let score2: f64 = Accuracy {}.get_score(&y_true, &y_true);
let score1: f64 = Accuracy::<f64>::new().get_score(&y_pred, &y_true);
let score2: f64 = Accuracy::<f64>::new().get_score(&y_true, &y_true);
assert!((score1 - 0.5).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn accuracy_int() {
let y_pred: Vec<i32> = vec![0, 2, 1, 3];
let y_true: Vec<i32> = vec![0, 1, 2, 3];
let score1: f64 = Accuracy::<i32>::new().get_score(&y_pred, &y_true);
let score2: f64 = Accuracy::<i32>::new().get_score(&y_true, &y_true);
assert_eq!(score1, 0.5);
assert_eq!(score2, 1.0);
}
}
+47 -24
View File
@@ -7,11 +7,12 @@
//! Example:
//! ```
//! use smartcore::metrics::auc::AUC;
//! use smartcore::metrics::Metrics;
//!
//! let y_true: Vec<f64> = vec![0., 0., 1., 1.];
//! let y_pred: Vec<f64> = vec![0.1, 0.4, 0.35, 0.8];
//!
//! let score1: f64 = AUC {}.get_score(&y_true, &y_pred);
//! let score1: f64 = AUC::new().get_score(&y_true, &y_pred);
//! ```
//!
//! ## References:
@@ -20,32 +21,52 @@
//! * ["The ROC-AUC and the Mann-Whitney U-test", Haupt, J.](https://johaupt.github.io/roc-auc/model%20evaluation/Area_under_ROC_curve.html)
#![allow(non_snake_case)]
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::algorithm::sort::quick_sort::QuickArgSort;
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::{Array1, ArrayView1, MutArrayView1};
use crate::numbers::basenum::Number;
use crate::metrics::Metrics;
/// Area Under the Receiver Operating Characteristic Curve (ROC AUC)
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct AUC {}
pub struct AUC<T> {
_phantom: PhantomData<T>,
}
impl AUC {
impl<T: Number + Ord> Metrics<T> for AUC<T> {
/// create a typed object to call AUC functions
fn new() -> Self {
Self {
_phantom: PhantomData,
}
}
fn new_with(_parameter: T) -> Self {
Self {
_phantom: PhantomData,
}
}
/// AUC score.
/// * `y_true` - cround truth (correct) labels.
/// * `y_pred_probabilities` - probability estimates, as returned by a classifier.
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred_prob: &V) -> T {
/// * `y_true` - ground truth (correct) labels.
/// * `y_pred_prob` - probability estimates, as returned by a classifier.
fn get_score(
&self,
y_true: &dyn ArrayView1<T>,
y_pred_prob: &dyn ArrayView1<T>,
) -> f64 {
let mut pos = T::zero();
let mut neg = T::zero();
let n = y_true.len();
let n = y_true.shape();
for i in 0..n {
if y_true.get(i) == T::zero() {
if y_true.get(i) == &T::zero() {
neg += T::one();
} else if y_true.get(i) == T::one() {
} else if y_true.get(i) == &T::one() {
pos += T::one();
} else {
panic!(
@@ -55,21 +76,21 @@ impl AUC {
}
}
let mut y_pred = y_pred_prob.to_vec();
let y_pred = y_pred_prob.clone();
let label_idx = y_pred.quick_argsort_mut();
let label_idx = y_pred.argsort();
let mut rank = vec![T::zero(); n];
let mut rank = vec![0f64; n];
let mut i = 0;
while i < n {
if i == n - 1 || y_pred[i] != y_pred[i + 1] {
rank[i] = T::from_usize(i + 1).unwrap();
if i == n - 1 || y_pred.get(i) != y_pred.get(i + 1) {
rank[i] = (i + 1) as f64;
} else {
let mut j = i + 1;
while j < n && y_pred[j] == y_pred[i] {
while j < n && y_pred.get(j) == y_pred.get(i) {
j += 1;
}
let r = T::from_usize(i + 1 + j).unwrap() / T::two();
let r = (i + 1 + j) as f64 / 2f64;
for rank_k in rank.iter_mut().take(j).skip(i) {
*rank_k = r;
}
@@ -78,14 +99,16 @@ impl AUC {
i += 1;
}
let mut auc = T::zero();
let mut auc = 0f64;
for i in 0..n {
if y_true.get(label_idx[i]) == T::one() {
if y_true.get(label_idx[i]) == &T::one() {
auc += rank[i];
}
}
let pos = pos.to_f64().unwrap();
let neg = neg.to_f64().unwrap();
(auc - (pos * (pos + T::one()) / T::two())) / (pos * neg)
T::from(auc - (pos * (pos + 1f64) / 2.0)).unwrap() / T::from(pos * neg).unwrap()
}
}
@@ -99,8 +122,8 @@ mod tests {
let y_true: Vec<f64> = vec![0., 0., 1., 1.];
let y_pred: Vec<f64> = vec![0.1, 0.4, 0.35, 0.8];
let score1: f64 = AUC {}.get_score(&y_true, &y_pred);
let score2: f64 = AUC {}.get_score(&y_true, &y_true);
let score1: f64 = AUC::new().get_score(&y_true, &y_pred);
let score2: f64 = AUC::new().get_score(&y_true, &y_true);
assert!((score1 - 0.75).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);
+75 -30
View File
@@ -1,41 +1,85 @@
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::ArrayView1;
use crate::metrics::cluster_helpers::*;
use crate::numbers::basenum::Number;
use crate::metrics::Metrics;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
/// Homogeneity, completeness and V-Measure scores.
pub struct HCVScore {}
pub struct HCVScore<T> {
_phantom: PhantomData<T>,
homogeneity: Option<f64>,
completeness: Option<f64>,
v_measure: Option<f64>,
}
impl HCVScore {
/// Computes Homogeneity, completeness and V-Measure scores at once.
/// * `labels_true` - ground truth class labels to be used as a reference.
/// * `labels_pred` - cluster labels to evaluate.
pub fn get_score<T: RealNumber, V: BaseVector<T>>(
&self,
labels_true: &V,
labels_pred: &V,
) -> (T, T, T) {
let labels_true = labels_true.to_vec();
let labels_pred = labels_pred.to_vec();
let entropy_c = entropy(&labels_true);
let entropy_k = entropy(&labels_pred);
let contingency = contingency_matrix(&labels_true, &labels_pred);
let mi: T = mutual_info_score(&contingency);
impl<T: Number + Ord> HCVScore<T> {
/// return homogenity score
pub fn homogeneity(&self) -> Option<f64> {
self.homogeneity
}
/// return completeness score
pub fn completeness(&self) -> Option<f64> {
self.completeness
}
/// return v_measure score
pub fn v_measure(&self) -> Option<f64> {
self.v_measure
}
/// run computation for measures
pub fn compute(&mut self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) {
let entropy_c: Option<f64> = entropy(y_true);
let entropy_k: Option<f64> = entropy(y_pred);
let contingency = contingency_matrix(y_true, y_pred);
let mi = mutual_info_score(&contingency);
let homogeneity = entropy_c.map(|e| mi / e).unwrap_or_else(T::one);
let completeness = entropy_k.map(|e| mi / e).unwrap_or_else(T::one);
let homogeneity = entropy_c.map(|e| mi / e).unwrap_or(0f64);
let completeness = entropy_k.map(|e| mi / e).unwrap_or(0f64);
let v_measure_score = if homogeneity + completeness == T::zero() {
T::zero()
let v_measure_score = if homogeneity + completeness == 0f64 {
0f64
} else {
T::two() * homogeneity * completeness / (T::one() * homogeneity + completeness)
2.0f64 * homogeneity * completeness / (1.0f64 * homogeneity + completeness)
};
(homogeneity, completeness, v_measure_score)
self.homogeneity = Some(homogeneity);
self.completeness = Some(completeness);
self.v_measure = Some(v_measure_score);
}
}
impl<T: Number + Ord> Metrics<T> for HCVScore<T> {
/// create a typed object to call HCVScore functions
fn new() -> Self {
Self {
_phantom: PhantomData,
homogeneity: Option::None,
completeness: Option::None,
v_measure: Option::None,
}
}
fn new_with(_parameter: f64) -> Self {
Self {
_phantom: PhantomData,
homogeneity: Option::None,
completeness: Option::None,
v_measure: Option::None,
}
}
/// Computes Homogeneity, completeness and V-Measure scores at once.
/// * `y_true` - ground truth class labels to be used as a reference.
/// * `y_pred` - cluster labels to evaluate.
fn get_score(&self, _y_true: &dyn ArrayView1<T>, _y_pred: &dyn ArrayView1<T>) -> f64 {
// this functions should not be used for this struct
// use homogeneity(), completeness(), v_measure()
// TODO: implement Metrics -> Result<T, Failed>
0f64
}
}
@@ -46,12 +90,13 @@ mod tests {
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn homogeneity_score() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
let scores = HCVScore {}.get_score(&v1, &v2);
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
let v2 = vec![1, 0, 0, 0, 0, 1, 0];
let mut scores = HCVScore::new();
scores.compute(&v1, &v2);
assert!((0.2548f32 - scores.0).abs() < 1e-4);
assert!((0.5440f32 - scores.1).abs() < 1e-4);
assert!((0.3471f32 - scores.2).abs() < 1e-4);
assert!((0.2548 - scores.homogeneity.unwrap() as f64).abs() < 1e-4);
assert!((0.5440 - scores.completeness.unwrap() as f64).abs() < 1e-4);
assert!((0.3471 - scores.v_measure.unwrap() as f64).abs() < 1e-4);
}
}
+34 -33
View File
@@ -1,12 +1,12 @@
#![allow(clippy::ptr_arg)]
use std::collections::HashMap;
use crate::math::num::RealNumber;
use crate::math::vector::RealNumberVector;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::basenum::Number;
pub fn contingency_matrix<T: RealNumber>(
labels_true: &Vec<T>,
labels_pred: &Vec<T>,
pub fn contingency_matrix<T: Number + Ord, V: ArrayView1<T> + ?Sized>(
labels_true: &V,
labels_pred: &V,
) -> Vec<Vec<usize>> {
let (classes, class_idx) = labels_true.unique_with_indices();
let (clusters, cluster_idx) = labels_pred.unique_with_indices();
@@ -24,28 +24,30 @@ pub fn contingency_matrix<T: RealNumber>(
contingency_matrix
}
pub fn entropy<T: RealNumber>(data: &[T]) -> Option<T> {
let mut bincounts = HashMap::with_capacity(data.len());
pub fn entropy<T: Number + Ord, V: ArrayView1<T> + ?Sized>(data: &V) -> Option<f64> {
let mut bincounts = HashMap::with_capacity(data.shape());
for e in data.iter() {
for e in data.iterator(0) {
let k = e.to_i64().unwrap();
bincounts.insert(k, bincounts.get(&k).unwrap_or(&0) + 1);
}
let mut entropy = T::zero();
let sum = T::from_usize(bincounts.values().sum()).unwrap();
let mut entropy = 0f64;
let sum: i64 = bincounts.values().sum();
for &c in bincounts.values() {
if c > 0 {
let pi = T::from_usize(c).unwrap();
entropy -= (pi / sum) * (pi.ln() - sum.ln());
let pi = c as f64;
let pi_ln = pi.ln();
let sum_ln = (sum as f64).ln();
entropy -= (pi / sum as f64) * (pi_ln - sum_ln);
}
}
Some(entropy)
}
pub fn mutual_info_score<T: RealNumber>(contingency: &[Vec<usize>]) -> T {
pub fn mutual_info_score(contingency: &[Vec<usize>]) -> f64 {
let mut contingency_sum = 0;
let mut pi = vec![0; contingency.len()];
let mut pj = vec![0; contingency[0].len()];
@@ -64,37 +66,36 @@ pub fn mutual_info_score<T: RealNumber>(contingency: &[Vec<usize>]) -> T {
}
}
let contingency_sum = T::from_usize(contingency_sum).unwrap();
let contingency_sum = contingency_sum as f64;
let contingency_sum_ln = contingency_sum.ln();
let pi_sum_l = T::from_usize(pi.iter().sum()).unwrap().ln();
let pj_sum_l = T::from_usize(pj.iter().sum()).unwrap().ln();
let pi_sum: usize = pi.iter().sum();
let pj_sum: usize = pj.iter().sum();
let pi_sum_l = (pi_sum as f64).ln();
let pj_sum_l = (pj_sum as f64).ln();
let log_contingency_nm: Vec<T> = nz_val
let log_contingency_nm: Vec<f64> = nz_val.iter().map(|v| (*v as f64).ln()).collect();
let contingency_nm: Vec<f64> = nz_val
.iter()
.map(|v| T::from_usize(*v).unwrap().ln())
.collect();
let contingency_nm: Vec<T> = nz_val
.iter()
.map(|v| T::from_usize(*v).unwrap() / contingency_sum)
.map(|v| (*v as f64) / contingency_sum)
.collect();
let outer: Vec<usize> = nzx
.iter()
.zip(nzy.iter())
.map(|(&x, &y)| pi[x] * pj[y])
.collect();
let log_outer: Vec<T> = outer
let log_outer: Vec<f64> = outer
.iter()
.map(|&o| -T::from_usize(o).unwrap().ln() + pi_sum_l + pj_sum_l)
.map(|&o| -(o as f64).ln() + pi_sum_l + pj_sum_l)
.collect();
let mut result = T::zero();
let mut result = 0f64;
for i in 0..log_outer.len() {
result += (contingency_nm[i] * (log_contingency_nm[i] - contingency_sum_ln))
+ contingency_nm[i] * log_outer[i]
}
result.max(T::zero())
result.max(0f64)
}
#[cfg(test)]
@@ -104,8 +105,8 @@ mod tests {
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn contingency_matrix_test() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
let v2 = vec![1, 0, 0, 0, 0, 1, 0];
assert_eq!(
vec!(vec!(1, 2), vec!(2, 0), vec!(1, 0), vec!(1, 0)),
@@ -116,17 +117,17 @@ mod tests {
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn entropy_test() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
assert!((1.2770f32 - entropy(&v1).unwrap()).abs() < 1e-4);
assert!((1.2770 - entropy(&v1).unwrap() as f64).abs() < 1e-4);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mutual_info_score_test() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
let s: f32 = mutual_info_score(&contingency_matrix(&v1, &v2));
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
let v2 = vec![1, 0, 0, 0, 0, 1, 0];
let s = mutual_info_score(&contingency_matrix(&v1, &v2));
assert!((0.3254 - s).abs() < 1e-4);
}
+89
View File
@@ -0,0 +1,89 @@
//! # Euclidian Metric Distance
//!
//! The Euclidean distance (L2) between two points \\( x \\) and \\( y \\) in n-space is defined as
//!
//! \\[ d(x, y) = \sqrt{\sum_{i=1}^n (x-y)^2} \\]
//!
//! Example:
//!
//! ```
//! use smartcore::metrics::distance::Distance;
//! use smartcore::metrics::distance::euclidian::Euclidian;
//!
//! let x = vec![1., 1.];
//! let y = vec![2., 2.];
//!
//! let l2: f64 = Euclidian::new().distance(&x, &y);
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::marker::PhantomData;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::basenum::Number;
use super::Distance;
/// Euclidean distance is a measure of the true straight line distance between two points in Euclidean n-space.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Euclidian<T> {
_t: PhantomData<T>,
}
impl<T: Number> Default for Euclidian<T> {
fn default() -> Self {
Self::new()
}
}
impl<T: Number> Euclidian<T> {
/// instatiate the initial structure
pub fn new() -> Euclidian<T> {
Euclidian { _t: PhantomData }
}
/// return sum of squared distances
#[inline]
pub(crate) fn squared_distance<A: ArrayView1<T>>(x: &A, y: &A) -> f64 {
if x.shape() != y.shape() {
panic!("Input vector sizes are different.");
}
let sum: f64 = x
.iterator(0)
.zip(y.iterator(0))
.map(|(&a, &b)| {
let r = a - b;
(r * r).to_f64().unwrap()
})
.sum();
sum
}
}
impl<T: Number, A: ArrayView1<T>> Distance<A> for Euclidian<T> {
fn distance(&self, x: &A, y: &A) -> f64 {
Euclidian::squared_distance(x, y).sqrt()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn squared_distance() {
let a = vec![1, 2, 3];
let b = vec![4, 5, 6];
let l2: f64 = Euclidian::new().distance(&a, &b);
assert!((l2 - 5.19615242).abs() < 1e-8);
}
}
+83
View File
@@ -0,0 +1,83 @@
//! # Hamming Distance
//!
//! Hamming Distance measures the similarity between two integer-valued vectors of the same length.
//! Given two vectors \\( x \in ^n \\), \\( y \in ^n \\) the hamming distance between \\( x \\) and \\( y \\), \\( d(x, y) \\), is the number of places where \\( x \\) and \\( y \\) differ.
//!
//! Example:
//!
//! ```
//! use smartcore::metrics::distance::Distance;
//! use smartcore::metrics::distance::hamming::Hamming;
//!
//! let a = vec![1, 0, 0, 1, 0, 0, 1];
//! let b = vec![1, 1, 0, 0, 1, 0, 1];
//!
//! let h: f64 = Hamming::new().distance(&a, &b);
//!
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::marker::PhantomData;
use super::Distance;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::basenum::Number;
/// While comparing two integer-valued vectors of equal length, Hamming distance is the number of bit positions in which the two bits are different
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Hamming<T: Number> {
_t: PhantomData<T>,
}
impl<T: Number> Hamming<T> {
/// instatiate the initial structure
pub fn new() -> Hamming<T> {
Hamming { _t: PhantomData }
}
}
impl<T: Number> Default for Hamming<T> {
fn default() -> Self {
Self::new()
}
}
impl<T: Number, A: ArrayView1<T>> Distance<A> for Hamming<T> {
fn distance(&self, x: &A, y: &A) -> f64 {
if x.shape() != y.shape() {
panic!("Input vector sizes are different");
}
let dist: usize = x
.iterator(0)
.zip(y.iterator(0))
.map(|(a, b)| match a != b {
true => 1,
false => 0,
})
.sum();
dist as f64 / x.shape() as f64
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn hamming_distance() {
let a = vec![1, 0, 0, 1, 0, 0, 1];
let b = vec![1, 1, 0, 0, 1, 0, 1];
let h: f64 = Hamming::new().distance(&a, &b);
assert!((h - 0.42857142).abs() < 1e-8);
}
}
+162
View File
@@ -0,0 +1,162 @@
//! # Mahalanobis Distance
//!
//! The Mahalanobis distance (MD) is the distance between two points in multivariate space.
//! In a regular Euclidean space the distance between any two points can be measured with [Euclidean distance](../euclidian/index.html).
//! For uncorrelated variables, the Euclidean distance equals the MD. However, if two or more variables are correlated the measurements become impossible
//! with Euclidean distance because the axes are no longer at right angles to each other. MD on the other hand, is scale-invariant,
//! it takes into account the covariance matrix of the dataset when calculating distance between 2 points that belong to the same space as the dataset.
//!
//! MD between two vectors \\( x \in ^n \\) and \\( y \in ^n \\) is defined as
//! \\[ d(x, y) = \sqrt{(x - y)^TS^{-1}(x - y)}\\]
//!
//! where \\( S \\) is the covariance matrix of the dataset.
//!
//! Example:
//!
//! ```
//! use smartcore::linalg::basic::matrix::DenseMatrix;
//! use smartcore::linalg::basic::arrays::ArrayView2;
//! use smartcore::metrics::distance::Distance;
//! use smartcore::metrics::distance::mahalanobis::Mahalanobis;
//!
//! let data = DenseMatrix::from_2d_array(&[
//! &[64., 580., 29.],
//! &[66., 570., 33.],
//! &[68., 590., 37.],
//! &[69., 660., 46.],
//! &[73., 600., 55.],
//! ]);
//!
//! let a = data.mean_by(0);
//! let b = vec![66., 640., 44.];
//!
//! let mahalanobis = Mahalanobis::new(&data);
//!
//! mahalanobis.distance(&a, &b);
//! ```
//!
//! ## References
//! * ["Introduction to Multivariate Statistical Analysis in Chemometrics", Varmuza, K., Filzmoser, P., 2016, p.46](https://www.taylorfrancis.com/books/9780429145049)
//! * ["Example of Calculating the Mahalanobis Distance", McCaffrey, J.D.](https://jamesmccaffrey.wordpress.com/2017/11/09/example-of-calculating-the-mahalanobis-distance/)
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#![allow(non_snake_case)]
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::marker::PhantomData;
use super::Distance;
use crate::linalg::basic::arrays::{Array, Array2, ArrayView1};
use crate::linalg::basic::matrix::DenseMatrix;
use crate::linalg::traits::lu::LUDecomposable;
use crate::numbers::basenum::Number;
/// Mahalanobis distance.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Mahalanobis<T: Number, M: Array2<f64>> {
/// covariance matrix of the dataset
pub sigma: M,
/// inverse of the covariance matrix
pub sigmaInv: M,
_t: PhantomData<T>,
}
impl<T: Number, M: Array2<f64> + LUDecomposable<f64>> Mahalanobis<T, M> {
/// Constructs new instance of `Mahalanobis` from given dataset
/// * `data` - a matrix of _NxM_ where _N_ is number of observations and _M_ is number of attributes
pub fn new<X: Array2<T>>(data: &X) -> Mahalanobis<T, M> {
let (_, m) = data.shape();
let mut sigma = M::zeros(m, m);
data.cov(&mut sigma);
let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap();
Mahalanobis {
sigma,
sigmaInv,
_t: PhantomData,
}
}
/// Constructs new instance of `Mahalanobis` from given covariance matrix
/// * `cov` - a covariance matrix
pub fn new_from_covariance<X: Array2<f64> + LUDecomposable<f64>>(cov: &X) -> Mahalanobis<T, X> {
let sigma = cov.clone();
let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap();
Mahalanobis {
sigma,
sigmaInv,
_t: PhantomData,
}
}
}
impl<T: Number, A: ArrayView1<T>> Distance<A> for Mahalanobis<T, DenseMatrix<f64>> {
fn distance(&self, x: &A, y: &A) -> f64 {
let (nrows, ncols) = self.sigma.shape();
if x.shape() != nrows {
panic!(
"Array x[{}] has different dimension with Sigma[{}][{}].",
x.shape(),
nrows,
ncols
);
}
if y.shape() != nrows {
panic!(
"Array y[{}] has different dimension with Sigma[{}][{}].",
y.shape(),
nrows,
ncols
);
}
let n = x.shape();
let z: Vec<f64> = x
.iterator(0)
.zip(y.iterator(0))
.map(|(&a, &b)| (a - b).to_f64().unwrap())
.collect();
// np.dot(np.dot((a-b),VI),(a-b).T)
let mut s = 0f64;
for j in 0..n {
for i in 0..n {
s += *self.sigmaInv.get((i, j)) * z[i] * z[j];
}
}
s.sqrt()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::basic::arrays::ArrayView2;
use crate::linalg::basic::matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mahalanobis_distance() {
let data = DenseMatrix::from_2d_array(&[
&[64., 580., 29.],
&[66., 570., 33.],
&[68., 590., 37.],
&[69., 660., 46.],
&[73., 600., 55.],
]);
let a = data.mean_by(0);
let b = vec![66., 640., 44.];
let mahalanobis = Mahalanobis::new(&data);
let md: f64 = mahalanobis.distance(&a, &b);
assert!((md - 5.33).abs() < 1e-2);
}
}
+79
View File
@@ -0,0 +1,79 @@
//! # Manhattan Distance
//!
//! The Manhattan distance between two points \\(x \in ^n \\) and \\( y \in ^n \\) in n-dimensional space is the sum of the distances in each dimension.
//!
//! \\[ d(x, y) = \sum_{i=0}^n \lvert x_i - y_i \rvert \\]
//!
//! Example:
//!
//! ```
//! use smartcore::metrics::distance::Distance;
//! use smartcore::metrics::distance::manhattan::Manhattan;
//!
//! let x = vec![1., 1.];
//! let y = vec![2., 2.];
//!
//! let l1: f64 = Manhattan::new().distance(&x, &y);
//! ```
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::marker::PhantomData;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::basenum::Number;
use super::Distance;
/// Manhattan distance
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Manhattan<T: Number> {
_t: PhantomData<T>,
}
impl<T: Number> Manhattan<T> {
/// instatiate the initial structure
pub fn new() -> Manhattan<T> {
Manhattan { _t: PhantomData }
}
}
impl<T: Number> Default for Manhattan<T> {
fn default() -> Self {
Self::new()
}
}
impl<T: Number, A: ArrayView1<T>> Distance<A> for Manhattan<T> {
fn distance(&self, x: &A, y: &A) -> f64 {
if x.shape() != y.shape() {
panic!("Input vector sizes are different");
}
let dist: f64 = x
.iterator(0)
.zip(y.iterator(0))
.map(|(&a, &b)| (a - b).to_f64().unwrap().abs())
.sum();
dist
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn manhattan_distance() {
let a = vec![1., 2., 3.];
let b = vec![4., 5., 6.];
let l1: f64 = Manhattan::new().distance(&a, &b);
assert!((l1 - 9.0).abs() < 1e-8);
}
}
+97
View File
@@ -0,0 +1,97 @@
//! # Minkowski Distance
//!
//! The Minkowski distance of order _p_ (where _p_ is an integer) is a metric in a normed vector space which can be considered as a generalization of both the Euclidean distance and the Manhattan distance.
//! The Manhattan distance between two points \\(x \in ^n \\) and \\( y \in ^n \\) in n-dimensional space is defined as:
//!
//! \\[ d(x, y) = \left(\sum_{i=0}^n \lvert x_i - y_i \rvert^p\right)^{1/p} \\]
//!
//! Example:
//!
//! ```
//! use smartcore::metrics::distance::Distance;
//! use smartcore::metrics::distance::minkowski::Minkowski;
//!
//! let x = vec![1., 1.];
//! let y = vec![2., 2.];
//!
//! let l1: f64 = Minkowski::new(1).distance(&x, &y);
//! let l2: f64 = Minkowski::new(2).distance(&x, &y);
//!
//! ```
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::marker::PhantomData;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::basenum::Number;
use super::Distance;
/// Defines the Minkowski distance of order `p`
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Minkowski<T: Number> {
/// order, integer
pub p: u16,
_t: PhantomData<T>,
}
impl<T: Number> Minkowski<T> {
/// instatiate the initial structure
pub fn new(p: u16) -> Minkowski<T> {
Minkowski { p, _t: PhantomData }
}
}
impl<T: Number, A: ArrayView1<T>> Distance<A> for Minkowski<T> {
fn distance(&self, x: &A, y: &A) -> f64 {
if x.shape() != y.shape() {
panic!("Input vector sizes are different");
}
if self.p < 1 {
panic!("p must be at least 1");
}
let p_t = self.p as f64;
let dist: f64 = x
.iterator(0)
.zip(y.iterator(0))
.map(|(&a, &b)| (a - b).to_f64().unwrap().abs().powf(p_t))
.sum();
dist.powf(1f64 / p_t)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn minkowski_distance() {
let a = vec![1., 2., 3.];
let b = vec![4., 5., 6.];
let l1: f64 = Minkowski::new(1).distance(&a, &b);
let l2: f64 = Minkowski::new(2).distance(&a, &b);
let l3: f64 = Minkowski::new(3).distance(&a, &b);
assert!((l1 - 9.0).abs() < 1e-8);
assert!((l2 - 5.19615242).abs() < 1e-8);
assert!((l3 - 4.32674871).abs() < 1e-8);
}
#[test]
#[should_panic(expected = "p must be at least 1")]
fn minkowski_distance_negative_p() {
let a = vec![1., 2., 3.];
let b = vec![4., 5., 6.];
let _: f64 = Minkowski::new(0).distance(&a, &b);
}
}
+68
View File
@@ -0,0 +1,68 @@
//! # Collection of Distance Functions
//!
//! Many algorithms in machine learning require a measure of distance between data points. Distance metric (or metric) is a function that defines a distance between a pair of point elements of a set.
//! Formally, the distance can be any metric measure that is defined as \\( d(x, y) \geq 0\\) and follows three conditions:
//! 1. \\( d(x, y) = 0 \\) if and only \\( x = y \\), positive definiteness
//! 1. \\( d(x, y) = d(y, x) \\), symmetry
//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality
//!
//! for all \\(x, y, z \in Z \\)
//!
//! A good distance metric helps to improve the performance of classification, clustering and information retrieval algorithms significantly.
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
/// Euclidean Distance is the straight-line distance between two points in Euclidean spacere that presents the shortest distance between these points.
pub mod euclidian;
/// Hamming Distance between two strings is the number of positions at which the corresponding symbols are different.
pub mod hamming;
/// The Mahalanobis distance is the distance between two points in multivariate space.
pub mod mahalanobis;
/// Also known as rectilinear distance, city block distance, taxicab metric.
pub mod manhattan;
/// A generalization of both the Euclidean distance and the Manhattan distance.
pub mod minkowski;
use crate::linalg::basic::arrays::Array2;
use crate::linalg::traits::lu::LUDecomposable;
use crate::numbers::basenum::Number;
/// Distance metric, a function that calculates distance between two points
pub trait Distance<T>: Clone {
/// Calculates distance between _a_ and _b_
fn distance(&self, a: &T, b: &T) -> f64;
}
/// Multitude of distance metric functions
pub struct Distances {}
impl Distances {
/// Euclidian distance, see [`Euclidian`](euclidian/index.html)
pub fn euclidian<T: Number>() -> euclidian::Euclidian<T> {
euclidian::Euclidian::new()
}
/// Minkowski distance, see [`Minkowski`](minkowski/index.html)
/// * `p` - function order. Should be >= 1
pub fn minkowski<T: Number>(p: u16) -> minkowski::Minkowski<T> {
minkowski::Minkowski::new(p)
}
/// Manhattan distance, see [`Manhattan`](manhattan/index.html)
pub fn manhattan<T: Number>() -> manhattan::Manhattan<T> {
manhattan::Manhattan::new()
}
/// Hamming distance, see [`Hamming`](hamming/index.html)
pub fn hamming<T: Number>() -> hamming::Hamming<T> {
hamming::Hamming::new()
}
/// Mahalanobis distance, see [`Mahalanobis`](mahalanobis/index.html)
pub fn mahalanobis<T: Number, M: Array2<T>, C: Array2<f64> + LUDecomposable<f64>>(
data: &M,
) -> mahalanobis::Mahalanobis<T, C> {
mahalanobis::Mahalanobis::new(data)
}
}
+42 -15
View File
@@ -10,48 +10,71 @@
//!
//! ```
//! use smartcore::metrics::f1::F1;
//! use smartcore::metrics::Metrics;
//! let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
//! let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
//!
//! let score: f64 = F1 {beta: 1.0}.get_score(&y_pred, &y_true);
//! let beta = 1.0; // beta default is equal 1.0 anyway
//! let score: f64 = F1::new_with(beta).get_score(&y_pred, &y_true);
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::ArrayView1;
use crate::metrics::precision::Precision;
use crate::metrics::recall::Recall;
use crate::numbers::basenum::Number;
use crate::numbers::floatnum::FloatNumber;
use crate::numbers::realnum::RealNumber;
use crate::metrics::Metrics;
/// F-measure
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct F1<T: RealNumber> {
pub struct F1<T> {
/// a positive real factor
pub beta: T,
pub beta: f64,
_phantom: PhantomData<T>,
}
impl<T: RealNumber> F1<T> {
impl<T: Number + RealNumber + FloatNumber> Metrics<T> for F1<T> {
fn new() -> Self {
let beta: f64 = 1f64;
Self {
beta,
_phantom: PhantomData,
}
}
/// create a typed object to call Recall functions
fn new_with(beta: f64) -> Self {
Self {
beta,
_phantom: PhantomData,
}
}
/// Computes F1 score
/// * `y_true` - cround truth (correct) labels.
/// * `y_pred` - predicted labels, as returned by a classifier.
pub fn get_score<V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
if y_true.len() != y_pred.len() {
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
if y_true.shape() != y_pred.shape() {
panic!(
"The vector sizes don't match: {} != {}",
y_true.len(),
y_pred.len()
y_true.shape(),
y_pred.shape()
);
}
let beta2 = self.beta * self.beta;
let p = Precision {}.get_score(y_true, y_pred);
let r = Recall {}.get_score(y_true, y_pred);
let p = Precision::new().get_score(y_true, y_pred);
let r = Recall::new().get_score(y_true, y_pred);
(T::one() + beta2) * (p * r) / (beta2 * p + r)
(1f64 + beta2) * (p * r) / ((beta2 * p) + r)
}
}
@@ -65,8 +88,12 @@ mod tests {
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
let score1: f64 = F1 { beta: 1.0 }.get_score(&y_pred, &y_true);
let score2: f64 = F1 { beta: 1.0 }.get_score(&y_true, &y_true);
let beta = 1.0;
let score1: f64 = F1::new_with(beta).get_score(&y_pred, &y_true);
let score2: f64 = F1::new_with(beta).get_score(&y_true, &y_true);
println!("{:?}", score1);
println!("{:?}", score2);
assert!((score1 - 0.57142857).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);
+35 -15
View File
@@ -10,45 +10,65 @@
//!
//! ```
//! use smartcore::metrics::mean_absolute_error::MeanAbsoluteError;
//! use smartcore::metrics::Metrics;
//! let y_pred: Vec<f64> = vec![3., -0.5, 2., 7.];
//! let y_true: Vec<f64> = vec![2.5, 0.0, 2., 8.];
//!
//! let mse: f64 = MeanAbsoluteError {}.get_score(&y_pred, &y_true);
//! let mse: f64 = MeanAbsoluteError::new().get_score(&y_pred, &y_true);
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::basenum::Number;
use crate::numbers::floatnum::FloatNumber;
use crate::metrics::Metrics;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
/// Mean Absolute Error
pub struct MeanAbsoluteError {}
pub struct MeanAbsoluteError<T> {
_phantom: PhantomData<T>,
}
impl MeanAbsoluteError {
impl<T: Number + FloatNumber> Metrics<T> for MeanAbsoluteError<T> {
/// create a typed object to call MeanAbsoluteError functions
fn new() -> Self {
Self {
_phantom: PhantomData,
}
}
fn new_with(_parameter: f64) -> Self {
Self {
_phantom: PhantomData,
}
}
/// Computes mean absolute error
/// * `y_true` - Ground truth (correct) target values.
/// * `y_pred` - Estimated target values.
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
if y_true.len() != y_pred.len() {
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
if y_true.shape() != y_pred.shape() {
panic!(
"The vector sizes don't match: {} != {}",
y_true.len(),
y_pred.len()
y_true.shape(),
y_pred.shape()
);
}
let n = y_true.len();
let mut ras = T::zero();
let n = y_true.shape();
let mut ras: T = T::zero();
for i in 0..n {
ras += (y_true.get(i) - y_pred.get(i)).abs();
let res: T = *y_true.get(i) - *y_pred.get(i);
ras += res.abs();
}
ras / T::from_usize(n).unwrap()
ras.to_f64().unwrap() / n as f64
}
}
@@ -62,8 +82,8 @@ mod tests {
let y_true: Vec<f64> = vec![3., -0.5, 2., 7.];
let y_pred: Vec<f64> = vec![2.5, 0.0, 2., 8.];
let score1: f64 = MeanAbsoluteError {}.get_score(&y_pred, &y_true);
let score2: f64 = MeanAbsoluteError {}.get_score(&y_true, &y_true);
let score1: f64 = MeanAbsoluteError::new().get_score(&y_pred, &y_true);
let score2: f64 = MeanAbsoluteError::new().get_score(&y_true, &y_true);
assert!((score1 - 0.5).abs() < 1e-8);
assert!((score2 - 0.0).abs() < 1e-8);
+34 -14
View File
@@ -10,45 +10,65 @@
//!
//! ```
//! use smartcore::metrics::mean_squared_error::MeanSquareError;
//! use smartcore::metrics::Metrics;
//! let y_pred: Vec<f64> = vec![3., -0.5, 2., 7.];
//! let y_true: Vec<f64> = vec![2.5, 0.0, 2., 8.];
//!
//! let mse: f64 = MeanSquareError {}.get_score(&y_pred, &y_true);
//! let mse: f64 = MeanSquareError::new().get_score(&y_pred, &y_true);
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::basenum::Number;
use crate::numbers::floatnum::FloatNumber;
use crate::metrics::Metrics;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
/// Mean Squared Error
pub struct MeanSquareError {}
pub struct MeanSquareError<T> {
_phantom: PhantomData<T>,
}
impl MeanSquareError {
impl<T: Number + FloatNumber> Metrics<T> for MeanSquareError<T> {
/// create a typed object to call MeanSquareError functions
fn new() -> Self {
Self {
_phantom: PhantomData,
}
}
fn new_with(_parameter: f64) -> Self {
Self {
_phantom: PhantomData,
}
}
/// Computes mean squared error
/// * `y_true` - Ground truth (correct) target values.
/// * `y_pred` - Estimated target values.
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
if y_true.len() != y_pred.len() {
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
if y_true.shape() != y_pred.shape() {
panic!(
"The vector sizes don't match: {} != {}",
y_true.len(),
y_pred.len()
y_true.shape(),
y_pred.shape()
);
}
let n = y_true.len();
let n = y_true.shape();
let mut rss = T::zero();
for i in 0..n {
rss += (y_true.get(i) - y_pred.get(i)).square();
let res = *y_true.get(i) - *y_pred.get(i);
rss += res * res;
}
rss / T::from_usize(n).unwrap()
rss.to_f64().unwrap() / n as f64
}
}
@@ -62,8 +82,8 @@ mod tests {
let y_true: Vec<f64> = vec![3., -0.5, 2., 7.];
let y_pred: Vec<f64> = vec![2.5, 0.0, 2., 8.];
let score1: f64 = MeanSquareError {}.get_score(&y_pred, &y_true);
let score2: f64 = MeanSquareError {}.get_score(&y_true, &y_true);
let score1: f64 = MeanSquareError::new().get_score(&y_pred, &y_true);
let score2: f64 = MeanSquareError::new().get_score(&y_true, &y_true);
assert!((score1 - 0.375).abs() < 1e-8);
assert!((score2 - 0.0).abs() < 1e-8);
+143 -68
View File
@@ -12,7 +12,7 @@
//!
//! Example:
//! ```
//! use smartcore::linalg::naive::dense_matrix::*;
//! use smartcore::linalg::basic::matrix::DenseMatrix;
//! use smartcore::linear::logistic_regression::LogisticRegression;
//! use smartcore::metrics::*;
//!
@@ -38,26 +38,29 @@
//! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4],
//! ]);
//! let y: Vec<f64> = vec![
//! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
//! let y: Vec<i8> = vec![
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
//! ];
//!
//! let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
//!
//! let y_hat = lr.predict(&x).unwrap();
//!
//! let acc = ClassificationMetrics::accuracy().get_score(&y, &y_hat);
//! let acc = ClassificationMetricsOrd::accuracy().get_score(&y, &y_hat);
//! // or
//! let acc = accuracy(&y, &y_hat);
//! ```
/// Accuracy score.
pub mod accuracy;
/// Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores.
pub mod auc;
// TODO: reimplement AUC
// /// Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores.
// pub mod auc;
/// Compute the homogeneity, completeness and V-Measure scores.
pub mod cluster_hcv;
pub(crate) mod cluster_helpers;
/// Multitude of distance metrics are defined here
pub mod distance;
/// F1 score, also known as balanced F-score or F-measure.
pub mod f1;
/// Mean absolute error regression loss.
@@ -71,150 +74,222 @@ pub mod r2;
/// Computes the recall.
pub mod recall;
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::{Array1, ArrayView1};
use crate::numbers::basenum::Number;
use crate::numbers::floatnum::FloatNumber;
use crate::numbers::realnum::RealNumber;
use std::marker::PhantomData;
/// A trait to be implemented by all metrics
pub trait Metrics<T> {
/// instantiate a new Metrics trait-object
/// https://doc.rust-lang.org/error-index.html#E0038
fn new() -> Self
where
Self: Sized;
/// used to instantiate metric with a paramenter
fn new_with(_parameter: f64) -> Self
where
Self: Sized;
/// compute score realated to this metric
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64;
}
/// Use these metrics to compare classification models.
pub struct ClassificationMetrics {}
pub struct ClassificationMetrics<T> {
phantom: PhantomData<T>,
}
/// Use these metrics to compare classification models for
/// numbers that require `Ord`.
pub struct ClassificationMetricsOrd<T> {
phantom: PhantomData<T>,
}
/// Metrics for regression models.
pub struct RegressionMetrics {}
pub struct RegressionMetrics<T> {
phantom: PhantomData<T>,
}
/// Cluster metrics.
pub struct ClusterMetrics {}
impl ClassificationMetrics {
/// Accuracy score, see [accuracy](accuracy/index.html).
pub fn accuracy() -> accuracy::Accuracy {
accuracy::Accuracy {}
}
pub struct ClusterMetrics<T> {
phantom: PhantomData<T>,
}
impl<T: Number + RealNumber + FloatNumber> ClassificationMetrics<T> {
/// Recall, see [recall](recall/index.html).
pub fn recall() -> recall::Recall {
recall::Recall {}
pub fn recall() -> recall::Recall<T> {
recall::Recall::new()
}
/// Precision, see [precision](precision/index.html).
pub fn precision() -> precision::Precision {
precision::Precision {}
pub fn precision() -> precision::Precision<T> {
precision::Precision::new()
}
/// F1 score, also known as balanced F-score or F-measure, see [F1](f1/index.html).
pub fn f1<T: RealNumber>(beta: T) -> f1::F1<T> {
f1::F1 { beta }
pub fn f1(beta: f64) -> f1::F1<T> {
f1::F1::new_with(beta)
}
/// Area Under the Receiver Operating Characteristic Curve (ROC AUC), see [AUC](auc/index.html).
pub fn roc_auc_score() -> auc::AUC {
auc::AUC {}
// /// Area Under the Receiver Operating Characteristic Curve (ROC AUC), see [AUC](auc/index.html).
// pub fn roc_auc_score() -> auc::AUC<T> {
// auc::AUC::<T>::new()
// }
}
impl<T: Number + Ord> ClassificationMetricsOrd<T> {
/// Accuracy score, see [accuracy](accuracy/index.html).
pub fn accuracy() -> accuracy::Accuracy<T> {
accuracy::Accuracy::new()
}
}
impl RegressionMetrics {
impl<T: Number + FloatNumber> RegressionMetrics<T> {
/// Mean squared error, see [mean squared error](mean_squared_error/index.html).
pub fn mean_squared_error() -> mean_squared_error::MeanSquareError {
mean_squared_error::MeanSquareError {}
pub fn mean_squared_error() -> mean_squared_error::MeanSquareError<T> {
mean_squared_error::MeanSquareError::new()
}
/// Mean absolute error, see [mean absolute error](mean_absolute_error/index.html).
pub fn mean_absolute_error() -> mean_absolute_error::MeanAbsoluteError {
mean_absolute_error::MeanAbsoluteError {}
pub fn mean_absolute_error() -> mean_absolute_error::MeanAbsoluteError<T> {
mean_absolute_error::MeanAbsoluteError::new()
}
/// Coefficient of determination (R2), see [R2](r2/index.html).
pub fn r2() -> r2::R2 {
r2::R2 {}
pub fn r2() -> r2::R2<T> {
r2::R2::<T>::new()
}
}
impl ClusterMetrics {
impl<T: Number + Ord> ClusterMetrics<T> {
/// Homogeneity and completeness and V-Measure scores at once.
pub fn hcv_score() -> cluster_hcv::HCVScore {
cluster_hcv::HCVScore {}
pub fn hcv_score() -> cluster_hcv::HCVScore<T> {
cluster_hcv::HCVScore::<T>::new()
}
}
/// Function that calculated accuracy score, see [accuracy](accuracy/index.html).
/// * `y_true` - cround truth (correct) labels
/// * `y_pred` - predicted labels, as returned by a classifier.
pub fn accuracy<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
ClassificationMetrics::accuracy().get_score(y_true, y_pred)
pub fn accuracy<T: Number + Ord, V: ArrayView1<T>>(y_true: &V, y_pred: &V) -> f64 {
let obj = ClassificationMetricsOrd::<T>::accuracy();
obj.get_score(y_true, y_pred)
}
/// Calculated recall score, see [recall](recall/index.html)
/// * `y_true` - cround truth (correct) labels.
/// * `y_pred` - predicted labels, as returned by a classifier.
pub fn recall<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
ClassificationMetrics::recall().get_score(y_true, y_pred)
pub fn recall<T: Number + RealNumber + FloatNumber, V: ArrayView1<T>>(
y_true: &V,
y_pred: &V,
) -> f64 {
let obj = ClassificationMetrics::<T>::recall();
obj.get_score(y_true, y_pred)
}
/// Calculated precision score, see [precision](precision/index.html).
/// * `y_true` - cround truth (correct) labels.
/// * `y_pred` - predicted labels, as returned by a classifier.
pub fn precision<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
ClassificationMetrics::precision().get_score(y_true, y_pred)
pub fn precision<T: Number + RealNumber + FloatNumber, V: ArrayView1<T>>(
y_true: &V,
y_pred: &V,
) -> f64 {
let obj = ClassificationMetrics::<T>::precision();
obj.get_score(y_true, y_pred)
}
/// Computes F1 score, see [F1](f1/index.html).
/// * `y_true` - cround truth (correct) labels.
/// * `y_pred` - predicted labels, as returned by a classifier.
pub fn f1<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V, beta: T) -> T {
ClassificationMetrics::f1(beta).get_score(y_true, y_pred)
pub fn f1<T: Number + RealNumber + FloatNumber, V: ArrayView1<T>>(
y_true: &V,
y_pred: &V,
beta: f64,
) -> f64 {
let obj = ClassificationMetrics::<T>::f1(beta);
obj.get_score(y_true, y_pred)
}
/// AUC score, see [AUC](auc/index.html).
/// * `y_true` - cround truth (correct) labels.
/// * `y_pred_probabilities` - probability estimates, as returned by a classifier.
pub fn roc_auc_score<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred_probabilities: &V) -> T {
ClassificationMetrics::roc_auc_score().get_score(y_true, y_pred_probabilities)
}
// /// AUC score, see [AUC](auc/index.html).
// /// * `y_true` - cround truth (correct) labels.
// /// * `y_pred_probabilities` - probability estimates, as returned by a classifier.
// pub fn roc_auc_score<T: Number + PartialOrd, V: ArrayView1<T> + Array1<T> + Array1<T>>(
// y_true: &V,
// y_pred_probabilities: &V,
// ) -> T {
// let obj = ClassificationMetrics::<T>::roc_auc_score();
// obj.get_score(y_true, y_pred_probabilities)
// }
/// Computes mean squared error, see [mean squared error](mean_squared_error/index.html).
/// * `y_true` - Ground truth (correct) target values.
/// * `y_pred` - Estimated target values.
pub fn mean_squared_error<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
RegressionMetrics::mean_squared_error().get_score(y_true, y_pred)
pub fn mean_squared_error<T: Number + FloatNumber, V: ArrayView1<T>>(
y_true: &V,
y_pred: &V,
) -> f64 {
RegressionMetrics::<T>::mean_squared_error().get_score(y_true, y_pred)
}
/// Computes mean absolute error, see [mean absolute error](mean_absolute_error/index.html).
/// * `y_true` - Ground truth (correct) target values.
/// * `y_pred` - Estimated target values.
pub fn mean_absolute_error<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
RegressionMetrics::mean_absolute_error().get_score(y_true, y_pred)
pub fn mean_absolute_error<T: Number + FloatNumber, V: ArrayView1<T>>(
y_true: &V,
y_pred: &V,
) -> f64 {
RegressionMetrics::<T>::mean_absolute_error().get_score(y_true, y_pred)
}
/// Computes R2 score, see [R2](r2/index.html).
/// * `y_true` - Ground truth (correct) target values.
/// * `y_pred` - Estimated target values.
pub fn r2<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
RegressionMetrics::r2().get_score(y_true, y_pred)
pub fn r2<T: Number + FloatNumber, V: ArrayView1<T>>(y_true: &V, y_pred: &V) -> f64 {
RegressionMetrics::<T>::r2().get_score(y_true, y_pred)
}
/// Homogeneity metric of a cluster labeling given a ground truth (range is between 0.0 and 1.0).
/// A cluster result satisfies homogeneity if all of its clusters contain only data points which are members of a single class.
/// * `labels_true` - ground truth class labels to be used as a reference.
/// * `labels_pred` - cluster labels to evaluate.
pub fn homogeneity_score<T: RealNumber, V: BaseVector<T>>(labels_true: &V, labels_pred: &V) -> T {
ClusterMetrics::hcv_score()
.get_score(labels_true, labels_pred)
.0
pub fn homogeneity_score<
T: Number + FloatNumber + RealNumber + Ord,
V: ArrayView1<T> + Array1<T>,
>(
y_true: &V,
y_pred: &V,
) -> f64 {
let mut obj = ClusterMetrics::<T>::hcv_score();
obj.compute(y_true, y_pred);
obj.homogeneity().unwrap()
}
///
/// Completeness metric of a cluster labeling given a ground truth (range is between 0.0 and 1.0).
/// * `labels_true` - ground truth class labels to be used as a reference.
/// * `labels_pred` - cluster labels to evaluate.
pub fn completeness_score<T: RealNumber, V: BaseVector<T>>(labels_true: &V, labels_pred: &V) -> T {
ClusterMetrics::hcv_score()
.get_score(labels_true, labels_pred)
.1
pub fn completeness_score<
T: Number + FloatNumber + RealNumber + Ord,
V: ArrayView1<T> + Array1<T>,
>(
y_true: &V,
y_pred: &V,
) -> f64 {
let mut obj = ClusterMetrics::<T>::hcv_score();
obj.compute(y_true, y_pred);
obj.completeness().unwrap()
}
/// The harmonic mean between homogeneity and completeness.
/// * `labels_true` - ground truth class labels to be used as a reference.
/// * `labels_pred` - cluster labels to evaluate.
pub fn v_measure_score<T: RealNumber, V: BaseVector<T>>(labels_true: &V, labels_pred: &V) -> T {
ClusterMetrics::hcv_score()
.get_score(labels_true, labels_pred)
.2
pub fn v_measure_score<T: Number + FloatNumber + RealNumber + Ord, V: ArrayView1<T> + Array1<T>>(
y_true: &V,
y_pred: &V,
) -> f64 {
let mut obj = ClusterMetrics::<T>::hcv_score();
obj.compute(y_true, y_pred);
obj.v_measure().unwrap()
}
+37 -20
View File
@@ -10,59 +10,76 @@
//!
//! ```
//! use smartcore::metrics::precision::Precision;
//! use smartcore::metrics::Metrics;
//! let y_pred: Vec<f64> = vec![0., 1., 1., 0.];
//! let y_true: Vec<f64> = vec![0., 0., 1., 1.];
//!
//! let score: f64 = Precision {}.get_score(&y_pred, &y_true);
//! let score: f64 = Precision::new().get_score(&y_pred, &y_true);
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::collections::HashSet;
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::realnum::RealNumber;
use crate::metrics::Metrics;
/// Precision metric.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct Precision {}
pub struct Precision<T> {
_phantom: PhantomData<T>,
}
impl Precision {
impl<T: RealNumber> Metrics<T> for Precision<T> {
/// create a typed object to call Precision functions
fn new() -> Self {
Self {
_phantom: PhantomData,
}
}
fn new_with(_parameter: f64) -> Self {
Self {
_phantom: PhantomData,
}
}
/// Calculated precision score
/// * `y_true` - cround truth (correct) labels.
/// * `y_true` - ground truth (correct) labels.
/// * `y_pred` - predicted labels, as returned by a classifier.
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
if y_true.len() != y_pred.len() {
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
if y_true.shape() != y_pred.shape() {
panic!(
"The vector sizes don't match: {} != {}",
y_true.len(),
y_pred.len()
y_true.shape(),
y_pred.shape()
);
}
let mut classes = HashSet::new();
for i in 0..y_true.len() {
for i in 0..y_true.shape() {
classes.insert(y_true.get(i).to_f64_bits());
}
let classes = classes.len();
let mut tp = 0;
let mut fp = 0;
for i in 0..y_true.len() {
for i in 0..y_true.shape() {
if y_pred.get(i) == y_true.get(i) {
if classes == 2 {
if y_true.get(i) == T::one() {
if *y_true.get(i) == T::one() {
tp += 1;
}
} else {
tp += 1;
}
} else if classes == 2 {
if y_true.get(i) == T::one() {
if *y_true.get(i) == T::one() {
fp += 1;
}
} else {
@@ -70,7 +87,7 @@ impl Precision {
}
}
T::from_i64(tp).unwrap() / (T::from_i64(tp).unwrap() + T::from_i64(fp).unwrap())
tp as f64 / (tp as f64 + fp as f64)
}
}
@@ -84,8 +101,8 @@ mod tests {
let y_true: Vec<f64> = vec![0., 1., 1., 0.];
let y_pred: Vec<f64> = vec![0., 0., 1., 1.];
let score1: f64 = Precision {}.get_score(&y_pred, &y_true);
let score2: f64 = Precision {}.get_score(&y_pred, &y_pred);
let score1: f64 = Precision::new().get_score(&y_pred, &y_true);
let score2: f64 = Precision::new().get_score(&y_pred, &y_pred);
assert!((score1 - 0.5).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);
@@ -93,7 +110,7 @@ mod tests {
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
let score3: f64 = Precision {}.get_score(&y_pred, &y_true);
let score3: f64 = Precision::new().get_score(&y_pred, &y_true);
assert!((score3 - 0.5).abs() < 1e-8);
}
@@ -103,8 +120,8 @@ mod tests {
let y_true: Vec<f64> = vec![0., 0., 0., 1., 1., 1., 2., 2., 2.];
let y_pred: Vec<f64> = vec![0., 1., 2., 0., 1., 2., 0., 1., 2.];
let score1: f64 = Precision {}.get_score(&y_pred, &y_true);
let score2: f64 = Precision {}.get_score(&y_pred, &y_pred);
let score1: f64 = Precision::new().get_score(&y_pred, &y_true);
let score2: f64 = Precision::new().get_score(&y_pred, &y_pred);
assert!((score1 - 0.333333333).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);
+36 -25
View File
@@ -10,59 +10,70 @@
//!
//! ```
//! use smartcore::metrics::mean_absolute_error::MeanAbsoluteError;
//! use smartcore::metrics::Metrics;
//! let y_pred: Vec<f64> = vec![3., -0.5, 2., 7.];
//! let y_true: Vec<f64> = vec![2.5, 0.0, 2., 8.];
//!
//! let mse: f64 = MeanAbsoluteError {}.get_score(&y_pred, &y_true);
//! let mse: f64 = MeanAbsoluteError::new().get_score(&y_pred, &y_true);
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::basenum::Number;
use crate::metrics::Metrics;
/// Coefficient of Determination (R2)
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct R2 {}
pub struct R2<T> {
_phantom: PhantomData<T>,
}
impl R2 {
impl<T: Number> Metrics<T> for R2<T> {
/// create a typed object to call R2 functions
fn new() -> Self {
Self {
_phantom: PhantomData,
}
}
fn new_with(_parameter: f64) -> Self {
Self {
_phantom: PhantomData,
}
}
/// Computes R2 score
/// * `y_true` - Ground truth (correct) target values.
/// * `y_pred` - Estimated target values.
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
if y_true.len() != y_pred.len() {
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
if y_true.shape() != y_pred.shape() {
panic!(
"The vector sizes don't match: {} != {}",
y_true.len(),
y_pred.len()
y_true.shape(),
y_pred.shape()
);
}
let n = y_true.len();
let mut mean = T::zero();
for i in 0..n {
mean += y_true.get(i);
}
mean /= T::from_usize(n).unwrap();
let n = y_true.shape();
let mean: f64 = y_true.mean_by();
let mut ss_tot = T::zero();
let mut ss_res = T::zero();
for i in 0..n {
let y_i = y_true.get(i);
let f_i = y_pred.get(i);
ss_tot += (y_i - mean).square();
ss_res += (y_i - f_i).square();
let y_i = *y_true.get(i);
let f_i = *y_pred.get(i);
ss_tot += (y_i - T::from(mean).unwrap()) * (y_i - T::from(mean).unwrap());
ss_res += (y_i - f_i) * (y_i - f_i);
}
T::one() - (ss_res / ss_tot)
(T::one() - ss_res / ss_tot).to_f64().unwrap()
}
}
@@ -76,8 +87,8 @@ mod tests {
let y_true: Vec<f64> = vec![3., -0.5, 2., 7.];
let y_pred: Vec<f64> = vec![2.5, 0.0, 2., 8.];
let score1: f64 = R2 {}.get_score(&y_true, &y_pred);
let score2: f64 = R2 {}.get_score(&y_true, &y_true);
let score1: f64 = R2::new().get_score(&y_true, &y_pred);
let score2: f64 = R2::new().get_score(&y_true, &y_true);
assert!((score1 - 0.948608137).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);
+38 -20
View File
@@ -10,67 +10,85 @@
//!
//! ```
//! use smartcore::metrics::recall::Recall;
//! use smartcore::metrics::Metrics;
//! let y_pred: Vec<f64> = vec![0., 1., 1., 0.];
//! let y_true: Vec<f64> = vec![0., 0., 1., 1.];
//!
//! let score: f64 = Recall {}.get_score(&y_pred, &y_true);
//! let score: f64 = Recall::new().get_score(&y_pred, &y_true);
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::collections::HashSet;
use std::convert::TryInto;
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::ArrayView1;
use crate::numbers::realnum::RealNumber;
use crate::metrics::Metrics;
/// Recall metric.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct Recall {}
pub struct Recall<T> {
_phantom: PhantomData<T>,
}
impl Recall {
impl<T: RealNumber> Metrics<T> for Recall<T> {
/// create a typed object to call Recall functions
fn new() -> Self {
Self {
_phantom: PhantomData,
}
}
fn new_with(_parameter: f64) -> Self {
Self {
_phantom: PhantomData,
}
}
/// Calculated recall score
/// * `y_true` - cround truth (correct) labels.
/// * `y_pred` - predicted labels, as returned by a classifier.
pub fn get_score<T: RealNumber, V: BaseVector<T>>(&self, y_true: &V, y_pred: &V) -> T {
if y_true.len() != y_pred.len() {
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
if y_true.shape() != y_pred.shape() {
panic!(
"The vector sizes don't match: {} != {}",
y_true.len(),
y_pred.len()
y_true.shape(),
y_pred.shape()
);
}
let mut classes = HashSet::new();
for i in 0..y_true.len() {
for i in 0..y_true.shape() {
classes.insert(y_true.get(i).to_f64_bits());
}
let classes: i64 = classes.len().try_into().unwrap();
let mut tp = 0;
let mut fne = 0;
for i in 0..y_true.len() {
for i in 0..y_true.shape() {
if y_pred.get(i) == y_true.get(i) {
if classes == 2 {
if y_true.get(i) == T::one() {
if *y_true.get(i) == T::one() {
tp += 1;
}
} else {
tp += 1;
}
} else if classes == 2 {
if y_true.get(i) != T::one() {
if *y_true.get(i) != T::one() {
fne += 1;
}
} else {
fne += 1;
}
}
T::from_i64(tp).unwrap() / (T::from_i64(tp).unwrap() + T::from_i64(fne).unwrap())
tp as f64 / (tp as f64 + fne as f64)
}
}
@@ -84,8 +102,8 @@ mod tests {
let y_true: Vec<f64> = vec![0., 1., 1., 0.];
let y_pred: Vec<f64> = vec![0., 0., 1., 1.];
let score1: f64 = Recall {}.get_score(&y_pred, &y_true);
let score2: f64 = Recall {}.get_score(&y_pred, &y_pred);
let score1: f64 = Recall::new().get_score(&y_pred, &y_true);
let score2: f64 = Recall::new().get_score(&y_pred, &y_pred);
assert!((score1 - 0.5).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);
@@ -93,8 +111,8 @@ mod tests {
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
let score3: f64 = Recall {}.get_score(&y_pred, &y_true);
assert!((score3 - 0.66666666).abs() < 1e-8);
let score3: f64 = Recall::new().get_score(&y_pred, &y_true);
assert!((score3 - 0.6666666666666666).abs() < 1e-8);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
@@ -103,8 +121,8 @@ mod tests {
let y_true: Vec<f64> = vec![0., 0., 0., 1., 1., 1., 2., 2., 2.];
let y_pred: Vec<f64> = vec![0., 1., 2., 0., 1., 2., 0., 1., 2.];
let score1: f64 = Recall {}.get_score(&y_pred, &y_true);
let score2: f64 = Recall {}.get_score(&y_pred, &y_pred);
let score1: f64 = Recall::new().get_score(&y_pred, &y_true);
let score2: f64 = Recall::new().get_score(&y_pred, &y_pred);
assert!((score1 - 0.333333333).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);