fix precision and recall calculations (#338)
* fix precision and recall calculations
This commit is contained in:
+88
-23
@@ -4,7 +4,9 @@
|
|||||||
//!
|
//!
|
||||||
//! \\[precision = \frac{tp}{tp + fp}\\]
|
//! \\[precision = \frac{tp}{tp + fp}\\]
|
||||||
//!
|
//!
|
||||||
//! where tp (true positive) - correct result, fp (false positive) - unexpected result
|
//! where tp (true positive) - correct result, fp (false positive) - unexpected result.
|
||||||
|
//! For binary classification, this is precision for the positive class (assumed to be 1.0).
|
||||||
|
//! For multiclass, this is macro-averaged precision (average of per-class precisions).
|
||||||
//!
|
//!
|
||||||
//! Example:
|
//! Example:
|
||||||
//!
|
//!
|
||||||
@@ -19,7 +21,8 @@
|
|||||||
//!
|
//!
|
||||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||||
use std::collections::HashSet;
|
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
@@ -61,33 +64,63 @@ impl<T: RealNumber> Metrics<T> for Precision<T> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut classes = HashSet::new();
|
let n = y_true.shape();
|
||||||
for i in 0..y_true.shape() {
|
|
||||||
classes.insert(y_true.get(i).to_f64_bits());
|
|
||||||
}
|
|
||||||
let classes = classes.len();
|
|
||||||
|
|
||||||
let mut tp = 0;
|
let mut classes_set: HashSet<u64> = HashSet::new();
|
||||||
let mut fp = 0;
|
for i in 0..n {
|
||||||
for i in 0..y_true.shape() {
|
classes_set.insert(y_true.get(i).to_f64_bits());
|
||||||
if y_pred.get(i) == y_true.get(i) {
|
}
|
||||||
if classes == 2 {
|
let classes: usize = classes_set.len();
|
||||||
if *y_true.get(i) == T::one() {
|
|
||||||
|
if classes == 2 {
|
||||||
|
// Binary case: precision for positive class (assumed T::one())
|
||||||
|
let positive = T::one();
|
||||||
|
let mut tp: usize = 0;
|
||||||
|
let mut fp_count: usize = 0;
|
||||||
|
for i in 0..n {
|
||||||
|
let t = *y_true.get(i);
|
||||||
|
let p = *y_pred.get(i);
|
||||||
|
if p == t {
|
||||||
|
if t == positive {
|
||||||
tp += 1;
|
tp += 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else if t != positive {
|
||||||
tp += 1;
|
fp_count += 1;
|
||||||
}
|
|
||||||
} else if classes == 2 {
|
|
||||||
if *y_true.get(i) == T::one() {
|
|
||||||
fp += 1;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if tp + fp_count == 0 {
|
||||||
|
0.0
|
||||||
} else {
|
} else {
|
||||||
fp += 1;
|
tp as f64 / (tp + fp_count) as f64
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Multiclass case: macro-averaged precision
|
||||||
|
let mut predicted: HashMap<u64, usize> = HashMap::new();
|
||||||
|
let mut tp_map: HashMap<u64, usize> = HashMap::new();
|
||||||
|
for i in 0..n {
|
||||||
|
let p_bits = y_pred.get(i).to_f64_bits();
|
||||||
|
*predicted.entry(p_bits).or_insert(0) += 1;
|
||||||
|
if *y_true.get(i) == *y_pred.get(i) {
|
||||||
|
*tp_map.entry(p_bits).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut precision_sum = 0.0;
|
||||||
|
for &bits in &classes_set {
|
||||||
|
let pred_count = *predicted.get(&bits).unwrap_or(&0);
|
||||||
|
let tp = *tp_map.get(&bits).unwrap_or(&0);
|
||||||
|
let prec = if pred_count > 0 {
|
||||||
|
tp as f64 / pred_count as f64
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
precision_sum += prec;
|
||||||
|
}
|
||||||
|
if classes == 0 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
precision_sum / classes as f64
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tp as f64 / (tp as f64 + fp as f64)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -114,7 +147,7 @@ mod tests {
|
|||||||
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
|
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
|
||||||
|
|
||||||
let score3: f64 = Precision::new().get_score(&y_true, &y_pred);
|
let score3: f64 = Precision::new().get_score(&y_true, &y_pred);
|
||||||
assert!((score3 - 0.6666666666).abs() < 1e-8);
|
assert!((score3 - 0.5).abs() < 1e-8);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
@@ -132,4 +165,36 @@ mod tests {
|
|||||||
assert!((score1 - 0.333333333).abs() < 1e-8);
|
assert!((score1 - 0.333333333).abs() < 1e-8);
|
||||||
assert!((score2 - 1.0).abs() < 1e-8);
|
assert!((score2 - 1.0).abs() < 1e-8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(
|
||||||
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
|
)]
|
||||||
|
#[test]
|
||||||
|
fn precision_multiclass_imbalanced() {
|
||||||
|
let y_true: Vec<f64> = vec![0., 0., 1., 2., 2., 2.];
|
||||||
|
let y_pred: Vec<f64> = vec![0., 1., 1., 2., 0., 2.];
|
||||||
|
|
||||||
|
let score: f64 = Precision::new().get_score(&y_true, &y_pred);
|
||||||
|
let expected = (0.5 + 0.5 + 1.0) / 3.0;
|
||||||
|
assert!((score - expected).abs() < 1e-8);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(
|
||||||
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
|
)]
|
||||||
|
#[test]
|
||||||
|
fn precision_multiclass_unpredicted_class() {
|
||||||
|
let y_true: Vec<f64> = vec![0., 0., 1., 2., 2., 2., 3.];
|
||||||
|
let y_pred: Vec<f64> = vec![0., 1., 1., 2., 0., 2., 0.];
|
||||||
|
|
||||||
|
let score: f64 = Precision::new().get_score(&y_true, &y_pred);
|
||||||
|
// Class 0: pred=3, tp=1 -> 1/3 ≈0.333
|
||||||
|
// Class 1: pred=2, tp=1 -> 0.5
|
||||||
|
// Class 2: pred=2, tp=2 -> 1.0
|
||||||
|
// Class 3: pred=0, tp=0 -> 0.0
|
||||||
|
let expected = (1.0 / 3.0 + 0.5 + 1.0 + 0.0) / 4.0;
|
||||||
|
assert!((score - expected).abs() < 1e-8);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+64
-24
@@ -4,7 +4,9 @@
|
|||||||
//!
|
//!
|
||||||
//! \\[recall = \frac{tp}{tp + fn}\\]
|
//! \\[recall = \frac{tp}{tp + fn}\\]
|
||||||
//!
|
//!
|
||||||
//! where tp (true positive) - correct result, fn (false negative) - missing result
|
//! where tp (true positive) - correct result, fn (false negative) - missing result.
|
||||||
|
//! For binary classification, this is recall for the positive class (assumed to be 1.0).
|
||||||
|
//! For multiclass, this is macro-averaged recall (average of per-class recalls).
|
||||||
//!
|
//!
|
||||||
//! Example:
|
//! Example:
|
||||||
//!
|
//!
|
||||||
@@ -20,8 +22,7 @@
|
|||||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::convert::TryInto;
|
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
@@ -52,7 +53,7 @@ impl<T: RealNumber> Metrics<T> for Recall<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Calculated recall score
|
/// Calculated recall score
|
||||||
/// * `y_true` - cround truth (correct) labels.
|
/// * `y_true` - ground truth (correct) labels.
|
||||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||||
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
|
fn get_score(&self, y_true: &dyn ArrayView1<T>, y_pred: &dyn ArrayView1<T>) -> f64 {
|
||||||
if y_true.shape() != y_pred.shape() {
|
if y_true.shape() != y_pred.shape() {
|
||||||
@@ -63,32 +64,57 @@ impl<T: RealNumber> Metrics<T> for Recall<T> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut classes = HashSet::new();
|
let n = y_true.shape();
|
||||||
for i in 0..y_true.shape() {
|
|
||||||
classes.insert(y_true.get(i).to_f64_bits());
|
|
||||||
}
|
|
||||||
let classes: i64 = classes.len().try_into().unwrap();
|
|
||||||
|
|
||||||
let mut tp = 0;
|
let mut classes_set = HashSet::new();
|
||||||
let mut fne = 0;
|
for i in 0..n {
|
||||||
for i in 0..y_true.shape() {
|
classes_set.insert(y_true.get(i).to_f64_bits());
|
||||||
if y_pred.get(i) == y_true.get(i) {
|
}
|
||||||
if classes == 2 {
|
let classes: usize = classes_set.len();
|
||||||
if *y_true.get(i) == T::one() {
|
|
||||||
|
if classes == 2 {
|
||||||
|
// Binary case: recall for positive class (assumed T::one())
|
||||||
|
let positive = T::one();
|
||||||
|
let mut tp: usize = 0;
|
||||||
|
let mut fn_count: usize = 0;
|
||||||
|
for i in 0..n {
|
||||||
|
let t = *y_true.get(i);
|
||||||
|
let p = *y_pred.get(i);
|
||||||
|
if p == t {
|
||||||
|
if t == positive {
|
||||||
tp += 1;
|
tp += 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else if t == positive {
|
||||||
tp += 1;
|
fn_count += 1;
|
||||||
}
|
|
||||||
} else if classes == 2 {
|
|
||||||
if *y_true.get(i) != T::one() {
|
|
||||||
fne += 1;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if tp + fn_count == 0 {
|
||||||
|
0.0
|
||||||
} else {
|
} else {
|
||||||
fne += 1;
|
tp as f64 / (tp + fn_count) as f64
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Multiclass case: macro-averaged recall
|
||||||
|
let mut support: HashMap<u64, usize> = HashMap::new();
|
||||||
|
let mut tp_map: HashMap<u64, usize> = HashMap::new();
|
||||||
|
for i in 0..n {
|
||||||
|
let t_bits = y_true.get(i).to_f64_bits();
|
||||||
|
*support.entry(t_bits).or_insert(0) += 1;
|
||||||
|
if *y_true.get(i) == *y_pred.get(i) {
|
||||||
|
*tp_map.entry(t_bits).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut recall_sum = 0.0;
|
||||||
|
for (&bits, &sup) in &support {
|
||||||
|
let tp = *tp_map.get(&bits).unwrap_or(&0);
|
||||||
|
recall_sum += tp as f64 / sup as f64;
|
||||||
|
}
|
||||||
|
if support.is_empty() {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
recall_sum / support.len() as f64
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tp as f64 / (tp as f64 + fne as f64)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,7 +141,7 @@ mod tests {
|
|||||||
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
|
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
|
||||||
|
|
||||||
let score3: f64 = Recall::new().get_score(&y_true, &y_pred);
|
let score3: f64 = Recall::new().get_score(&y_true, &y_pred);
|
||||||
assert!((score3 - 0.5).abs() < 1e-8);
|
assert!((score3 - (2.0 / 3.0)).abs() < 1e-8);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
@@ -133,4 +159,18 @@ mod tests {
|
|||||||
assert!((score1 - 0.333333333).abs() < 1e-8);
|
assert!((score1 - 0.333333333).abs() < 1e-8);
|
||||||
assert!((score2 - 1.0).abs() < 1e-8);
|
assert!((score2 - 1.0).abs() < 1e-8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(
|
||||||
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
|
)]
|
||||||
|
#[test]
|
||||||
|
fn recall_multiclass_imbalanced() {
|
||||||
|
let y_true: Vec<f64> = vec![0., 0., 1., 2., 2., 2.];
|
||||||
|
let y_pred: Vec<f64> = vec![0., 1., 1., 2., 0., 2.];
|
||||||
|
|
||||||
|
let score: f64 = Recall::new().get_score(&y_true, &y_pred);
|
||||||
|
let expected = (0.5 + 1.0 + (2.0 / 3.0)) / 3.0;
|
||||||
|
assert!((score - expected).abs() < 1e-8);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user