feat: adds F1 and roc_auc_score

This commit is contained in:
Volodymyr Orlov
2020-06-08 14:47:59 -07:00
parent ee581abf70
commit 14113b4152
10 changed files with 167 additions and 6 deletions
+81
View File
@@ -0,0 +1,81 @@
#![allow(non_snake_case)]
use serde::{Deserialize, Serialize};
use crate::algorithm::sort::quick_sort::QuickArgSort;
use crate::linalg::BaseVector;
use crate::math::num::FloatExt;
#[derive(Serialize, Deserialize, Debug)]
pub struct AUC {}
impl AUC {
pub fn get_score<T: FloatExt, V: BaseVector<T>>(&self, y_true: &V, y_pred_prob: &V) -> T {
let mut pos = T::zero();
let mut neg = T::zero();
let n = y_true.len();
for i in 0..n {
if y_true.get(i) == T::zero() {
neg = neg + T::one();
} else if y_true.get(i) == T::one() {
pos = pos + T::one();
} else {
panic!(
"AUC is only for binary classification. Invalid label: {}",
y_true.get(i)
);
}
}
let mut y_pred = y_pred_prob.to_vec();
let label_idx = y_pred.quick_argsort_mut();
let mut rank = vec![T::zero(); n];
let mut i = 0;
while i < n {
if i == n - 1 || y_pred[i] != y_pred[i + 1] {
rank[i] = T::from_usize(i + 1).unwrap();
} else {
let mut j = i + 1;
while j < n && y_pred[j] == y_pred[i] {
j += 1;
}
let r = T::from_usize(i + 1 + j).unwrap() / T::two();
for k in i..j {
rank[k] = r;
}
i = j - 1;
}
i += 1;
}
let mut auc = T::zero();
for i in 0..n {
if y_true.get(label_idx[i]) == T::one() {
auc = auc + rank[i];
}
}
(auc - (pos * (pos + T::one()) / T::two())) / (pos * neg)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn auc() {
let y_true: Vec<f64> = vec![0., 0., 1., 1.];
let y_pred: Vec<f64> = vec![0.1, 0.4, 0.35, 0.8];
let score1: f64 = AUC {}.get_score(&y_true, &y_pred);
let score2: f64 = AUC {}.get_score(&y_true, &y_true);
assert!((score1 - 0.75).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);
}
}
+44
View File
@@ -0,0 +1,44 @@
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::FloatExt;
use crate::metrics::precision::Precision;
use crate::metrics::recall::Recall;
#[derive(Serialize, Deserialize, Debug)]
pub struct F1 {}
impl F1 {
pub fn get_score<T: FloatExt, V: BaseVector<T>>(&self, y_true: &V, y_prod: &V) -> T {
if y_true.len() != y_prod.len() {
panic!(
"The vector sizes don't match: {} != {}",
y_true.len(),
y_prod.len()
);
}
let beta2 = T::one();
let p = Precision {}.get_score(y_true, y_prod);
let r = Recall {}.get_score(y_true, y_prod);
(T::one() + beta2) * (p * r) / (beta2 * p + r)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f1() {
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
let score1: f64 = F1 {}.get_score(&y_pred, &y_true);
let score2: f64 = F1 {}.get_score(&y_true, &y_true);
assert!((score1 - 0.57142857).abs() < 1e-8);
assert!((score2 - 1.0).abs() < 1e-8);
}
}
+2
View File
@@ -1,4 +1,6 @@
pub mod accuracy;
pub mod auc;
pub mod f1;
pub mod precision;
pub mod recall;