feat: adds F1 and roc_auc_score
This commit is contained in:
@@ -1,11 +1,18 @@
|
||||
use num_traits::Float;
|
||||
|
||||
pub trait QuickArgSort {
|
||||
fn quick_argsort(&mut self) -> Vec<usize>;
|
||||
fn quick_argsort_mut(&mut self) -> Vec<usize>;
|
||||
|
||||
fn quick_argsort(&self) -> Vec<usize>;
|
||||
}
|
||||
|
||||
impl<T: Float> QuickArgSort for Vec<T> {
|
||||
fn quick_argsort(&mut self) -> Vec<usize> {
|
||||
fn quick_argsort(&self) -> Vec<usize> {
|
||||
let mut v = self.clone();
|
||||
v.quick_argsort_mut()
|
||||
}
|
||||
|
||||
fn quick_argsort_mut(&mut self) -> Vec<usize> {
|
||||
let stack_size = 64;
|
||||
let mut jstack = -1;
|
||||
let mut l = 0;
|
||||
@@ -108,10 +115,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn with_capacity() {
|
||||
let mut arr1 = vec![0.3, 0.1, 0.2, 0.4, 0.9, 0.5, 0.7, 0.6, 0.8];
|
||||
let arr1 = vec![0.3, 0.1, 0.2, 0.4, 0.9, 0.5, 0.7, 0.6, 0.8];
|
||||
assert_eq!(vec![1, 2, 0, 3, 5, 7, 6, 8, 4], arr1.quick_argsort());
|
||||
|
||||
let mut arr2 = vec![
|
||||
let arr2 = vec![
|
||||
0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6,
|
||||
1.0, 1.3, 1.4,
|
||||
];
|
||||
|
||||
@@ -22,6 +22,8 @@ pub trait BaseVector<T: FloatExt>: Clone + Debug {
|
||||
fn set(&mut self, i: usize, x: T);
|
||||
|
||||
fn len(&self) -> usize;
|
||||
|
||||
fn to_vec(&self) -> Vec<T>;
|
||||
}
|
||||
|
||||
pub trait BaseMatrix<T: FloatExt>: Clone + Debug {
|
||||
|
||||
@@ -27,6 +27,11 @@ impl<T: FloatExt> BaseVector<T> for Vec<T> {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn to_vec(&self) -> Vec<T> {
|
||||
let v = self.clone();
|
||||
v
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
@@ -22,6 +22,10 @@ impl<T: FloatExt + 'static> BaseVector<T> for MatrixMN<T, U1, Dynamic> {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn to_vec(&self) -> Vec<T> {
|
||||
self.row(0).iter().map(|v| *v).collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: FloatExt + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
|
||||
@@ -384,6 +388,12 @@ mod tests {
|
||||
assert_eq!(5., BaseVector::get(&v, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vec_to_vec() {
|
||||
let v = RowDVector::from_vec(vec![1., 2., 3.]);
|
||||
assert_eq!(vec![1., 2., 3.], v.to_vec());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_set_dynamic() {
|
||||
let mut m = DMatrix::from_row_slice(2, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
|
||||
|
||||
@@ -27,6 +27,10 @@ impl<T: FloatExt> BaseVector<T> for ArrayBase<OwnedRepr<T>, Ix1> {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn to_vec(&self) -> Vec<T> {
|
||||
self.to_owned().to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: FloatExt + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum>
|
||||
@@ -351,6 +355,12 @@ mod tests {
|
||||
assert_eq!(3, v.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vec_to_vec() {
|
||||
let v = arr1(&[1., 2., 3.]);
|
||||
assert_eq!(vec![1., 2., 3.], v.to_vec());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_to_row_vec() {
|
||||
let vec = arr1(&[1., 2., 3.]);
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::algorithm::sort::quick_sort::QuickArgSort;
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::FloatExt;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct AUC {}
|
||||
|
||||
impl AUC {
|
||||
pub fn get_score<T: FloatExt, V: BaseVector<T>>(&self, y_true: &V, y_pred_prob: &V) -> T {
|
||||
let mut pos = T::zero();
|
||||
let mut neg = T::zero();
|
||||
|
||||
let n = y_true.len();
|
||||
|
||||
for i in 0..n {
|
||||
if y_true.get(i) == T::zero() {
|
||||
neg = neg + T::one();
|
||||
} else if y_true.get(i) == T::one() {
|
||||
pos = pos + T::one();
|
||||
} else {
|
||||
panic!(
|
||||
"AUC is only for binary classification. Invalid label: {}",
|
||||
y_true.get(i)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let mut y_pred = y_pred_prob.to_vec();
|
||||
|
||||
let label_idx = y_pred.quick_argsort_mut();
|
||||
|
||||
let mut rank = vec![T::zero(); n];
|
||||
let mut i = 0;
|
||||
while i < n {
|
||||
if i == n - 1 || y_pred[i] != y_pred[i + 1] {
|
||||
rank[i] = T::from_usize(i + 1).unwrap();
|
||||
} else {
|
||||
let mut j = i + 1;
|
||||
while j < n && y_pred[j] == y_pred[i] {
|
||||
j += 1;
|
||||
}
|
||||
let r = T::from_usize(i + 1 + j).unwrap() / T::two();
|
||||
for k in i..j {
|
||||
rank[k] = r;
|
||||
}
|
||||
i = j - 1;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let mut auc = T::zero();
|
||||
for i in 0..n {
|
||||
if y_true.get(label_idx[i]) == T::one() {
|
||||
auc = auc + rank[i];
|
||||
}
|
||||
}
|
||||
|
||||
(auc - (pos * (pos + T::one()) / T::two())) / (pos * neg)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn auc() {
|
||||
let y_true: Vec<f64> = vec![0., 0., 1., 1.];
|
||||
let y_pred: Vec<f64> = vec![0.1, 0.4, 0.35, 0.8];
|
||||
|
||||
let score1: f64 = AUC {}.get_score(&y_true, &y_pred);
|
||||
let score2: f64 = AUC {}.get_score(&y_true, &y_true);
|
||||
|
||||
assert!((score1 - 0.75).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::FloatExt;
|
||||
use crate::metrics::precision::Precision;
|
||||
use crate::metrics::recall::Recall;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct F1 {}
|
||||
|
||||
impl F1 {
|
||||
pub fn get_score<T: FloatExt, V: BaseVector<T>>(&self, y_true: &V, y_prod: &V) -> T {
|
||||
if y_true.len() != y_prod.len() {
|
||||
panic!(
|
||||
"The vector sizes don't match: {} != {}",
|
||||
y_true.len(),
|
||||
y_prod.len()
|
||||
);
|
||||
}
|
||||
let beta2 = T::one();
|
||||
|
||||
let p = Precision {}.get_score(y_true, y_prod);
|
||||
let r = Recall {}.get_score(y_true, y_prod);
|
||||
|
||||
(T::one() + beta2) * (p * r) / (beta2 * p + r)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn f1() {
|
||||
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
|
||||
let y_true: Vec<f64> = vec![0., 1., 1., 0., 1., 0.];
|
||||
|
||||
let score1: f64 = F1 {}.get_score(&y_pred, &y_true);
|
||||
let score2: f64 = F1 {}.get_score(&y_true, &y_true);
|
||||
|
||||
assert!((score1 - 0.57142857).abs() < 1e-8);
|
||||
assert!((score2 - 1.0).abs() < 1e-8);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,6 @@
|
||||
pub mod accuracy;
|
||||
pub mod auc;
|
||||
pub mod f1;
|
||||
pub mod precision;
|
||||
pub mod recall;
|
||||
|
||||
|
||||
@@ -239,7 +239,7 @@ impl<T: FloatExt> DecisionTreeClassifier<T> {
|
||||
let mut order: Vec<Vec<usize>> = Vec::new();
|
||||
|
||||
for i in 0..num_attributes {
|
||||
order.push(x.get_col_as_vec(i).quick_argsort());
|
||||
order.push(x.get_col_as_vec(i).quick_argsort_mut());
|
||||
}
|
||||
|
||||
let mut tree = DecisionTreeClassifier {
|
||||
|
||||
@@ -164,7 +164,7 @@ impl<T: FloatExt> DecisionTreeRegressor<T> {
|
||||
let mut order: Vec<Vec<usize>> = Vec::new();
|
||||
|
||||
for i in 0..num_attributes {
|
||||
order.push(x.get_col_as_vec(i).quick_argsort());
|
||||
order.push(x.get_col_as_vec(i).quick_argsort_mut());
|
||||
}
|
||||
|
||||
let mut tree = DecisionTreeRegressor {
|
||||
|
||||
Reference in New Issue
Block a user