feat: + cluster metrics
This commit is contained in:
@@ -0,0 +1,54 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::linalg::BaseVector;
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::metrics::cluster_helpers::*;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
/// Mean Absolute Error
|
||||
pub struct HCVScore {}
|
||||
|
||||
impl HCVScore {
|
||||
/// Computes mean absolute error
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn get_score<T: RealNumber, V: BaseVector<T>>(
|
||||
&self,
|
||||
labels_true: &V,
|
||||
labels_pred: &V,
|
||||
) -> (T, T, T) {
|
||||
let labels_true = labels_true.to_vec();
|
||||
let labels_pred = labels_pred.to_vec();
|
||||
let entropy_c = entropy(&labels_true);
|
||||
let entropy_k = entropy(&labels_pred);
|
||||
let contingency = contingency_matrix(&labels_true, &labels_pred);
|
||||
let mi: T = mutual_info_score(&contingency);
|
||||
|
||||
let homogeneity = entropy_c.map(|e| mi / e).unwrap_or(T::one());
|
||||
let completeness = entropy_k.map(|e| mi / e).unwrap_or(T::one());
|
||||
|
||||
let v_measure_score = if homogeneity + completeness == T::zero() {
|
||||
T::zero()
|
||||
} else {
|
||||
T::two() * homogeneity * completeness / (T::one() * homogeneity + completeness)
|
||||
};
|
||||
|
||||
(homogeneity, completeness, v_measure_score)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn homogeneity_score() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
|
||||
let scores = HCVScore {}.get_score(&v1, &v2);
|
||||
|
||||
assert!((0.2548f32 - scores.0).abs() < 1e-4);
|
||||
assert!((0.5440f32 - scores.1).abs() < 1e-4);
|
||||
assert!((0.3471f32 - scores.2).abs() < 1e-4);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::math::vector::RealNumberVector;
|
||||
|
||||
pub fn contingency_matrix<T: RealNumber>(
|
||||
labels_true: &Vec<T>,
|
||||
labels_pred: &Vec<T>,
|
||||
) -> Vec<Vec<usize>> {
|
||||
let (classes, class_idx) = labels_true.unique();
|
||||
let (clusters, cluster_idx) = labels_pred.unique();
|
||||
|
||||
let mut contingency_matrix = Vec::with_capacity(classes.len());
|
||||
|
||||
for _ in 0..classes.len() {
|
||||
contingency_matrix.push(vec![0; clusters.len()]);
|
||||
}
|
||||
|
||||
for i in 0..class_idx.len() {
|
||||
contingency_matrix[class_idx[i]][cluster_idx[i]] += 1;
|
||||
}
|
||||
|
||||
contingency_matrix
|
||||
}
|
||||
|
||||
pub fn entropy<T: RealNumber>(data: &Vec<T>) -> Option<T> {
|
||||
let mut bincounts = HashMap::with_capacity(data.len());
|
||||
|
||||
for e in data.iter() {
|
||||
let k = e.to_i64().unwrap();
|
||||
bincounts.insert(k, bincounts.get(&k).unwrap_or(&0) + 1);
|
||||
}
|
||||
|
||||
let mut entropy = T::zero();
|
||||
let sum = T::from_usize(bincounts.values().sum()).unwrap();
|
||||
|
||||
for &c in bincounts.values() {
|
||||
if c > 0 {
|
||||
let pi = T::from_usize(c).unwrap();
|
||||
entropy = entropy - (pi / sum) * (pi.ln() - sum.ln());
|
||||
}
|
||||
}
|
||||
|
||||
Some(entropy)
|
||||
}
|
||||
|
||||
pub fn mutual_info_score<T: RealNumber>(contingency: &Vec<Vec<usize>>) -> T {
|
||||
let mut contingency_sum = 0;
|
||||
let mut pi = vec![0; contingency.len()];
|
||||
let mut pj = vec![0; contingency[0].len()];
|
||||
let (mut nzx, mut nzy, mut nz_val) = (Vec::new(), Vec::new(), Vec::new());
|
||||
|
||||
for r in 0..contingency.len() {
|
||||
for c in 0..contingency[0].len() {
|
||||
contingency_sum += contingency[r][c];
|
||||
pi[r] += contingency[r][c];
|
||||
pj[c] += contingency[r][c];
|
||||
if contingency[r][c] > 0 {
|
||||
nzx.push(r);
|
||||
nzy.push(c);
|
||||
nz_val.push(contingency[r][c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let contingency_sum = T::from_usize(contingency_sum).unwrap();
|
||||
let contingency_sum_ln = contingency_sum.ln();
|
||||
let pi_sum_l = T::from_usize(pi.iter().sum()).unwrap().ln();
|
||||
let pj_sum_l = T::from_usize(pj.iter().sum()).unwrap().ln();
|
||||
|
||||
let log_contingency_nm: Vec<T> = nz_val
|
||||
.iter()
|
||||
.map(|v| T::from_usize(*v).unwrap().ln())
|
||||
.collect();
|
||||
let contingency_nm: Vec<T> = nz_val
|
||||
.iter()
|
||||
.map(|v| T::from_usize(*v).unwrap() / contingency_sum)
|
||||
.collect();
|
||||
let outer: Vec<usize> = nzx
|
||||
.iter()
|
||||
.zip(nzy.iter())
|
||||
.map(|(&x, &y)| pi[x] * pj[y])
|
||||
.collect();
|
||||
let log_outer: Vec<T> = outer
|
||||
.iter()
|
||||
.map(|&o| -T::from_usize(o).unwrap().ln() + pi_sum_l + pj_sum_l)
|
||||
.collect();
|
||||
|
||||
let mut result = T::zero();
|
||||
|
||||
for i in 0..log_outer.len() {
|
||||
result = result
|
||||
+ ((contingency_nm[i] * (log_contingency_nm[i] - contingency_sum_ln))
|
||||
+ contingency_nm[i] * log_outer[i])
|
||||
}
|
||||
|
||||
result.max(T::zero())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn contingency_matrix_test() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
|
||||
|
||||
println!("{:?}", contingency_matrix(&v1, &v2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn entropy_test() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
|
||||
println!("{:?}", entropy(&v1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mutual_info_score_test() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
|
||||
let s: f32 = mutual_info_score(&contingency_matrix(&v1, &v2));
|
||||
|
||||
println!("{}", s);
|
||||
}
|
||||
}
|
||||
@@ -54,6 +54,8 @@
|
||||
pub mod accuracy;
|
||||
/// Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores.
|
||||
pub mod auc;
|
||||
pub mod cluster_hcv;
|
||||
pub(crate) mod cluster_helpers;
|
||||
/// F1 score, also known as balanced F-score or F-measure.
|
||||
pub mod f1;
|
||||
/// Mean absolute error regression loss.
|
||||
@@ -76,6 +78,9 @@ pub struct ClassificationMetrics {}
|
||||
/// Metrics for regression models.
|
||||
pub struct RegressionMetrics {}
|
||||
|
||||
/// Cluster metrics.
|
||||
pub struct ClusterMetrics {}
|
||||
|
||||
impl ClassificationMetrics {
|
||||
/// Accuracy score, see [accuracy](accuracy/index.html).
|
||||
pub fn accuracy() -> accuracy::Accuracy {
|
||||
@@ -120,6 +125,13 @@ impl RegressionMetrics {
|
||||
}
|
||||
}
|
||||
|
||||
impl ClusterMetrics {
|
||||
/// Mean squared error, see [mean squared error](mean_squared_error/index.html).
|
||||
pub fn hcv_score() -> cluster_hcv::HCVScore {
|
||||
cluster_hcv::HCVScore {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Function that calculated accuracy score, see [accuracy](accuracy/index.html).
|
||||
/// * `y_true` - cround truth (correct) labels
|
||||
/// * `y_pred` - predicted labels, as returned by a classifier.
|
||||
@@ -175,3 +187,30 @@ pub fn mean_absolute_error<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred:
|
||||
pub fn r2<T: RealNumber, V: BaseVector<T>>(y_true: &V, y_pred: &V) -> T {
|
||||
RegressionMetrics::r2().get_score(y_true, y_pred)
|
||||
}
|
||||
|
||||
/// Computes R2 score, see [R2](r2/index.html).
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn homogeneity_score<T: RealNumber, V: BaseVector<T>>(labels_true: &V, labels_pred: &V) -> T {
|
||||
ClusterMetrics::hcv_score()
|
||||
.get_score(labels_true, labels_pred)
|
||||
.0
|
||||
}
|
||||
|
||||
/// Computes R2 score, see [R2](r2/index.html).
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn completeness_score<T: RealNumber, V: BaseVector<T>>(labels_true: &V, labels_pred: &V) -> T {
|
||||
ClusterMetrics::hcv_score()
|
||||
.get_score(labels_true, labels_pred)
|
||||
.1
|
||||
}
|
||||
|
||||
/// Computes R2 score, see [R2](r2/index.html).
|
||||
/// * `y_true` - Ground truth (correct) target values.
|
||||
/// * `y_pred` - Estimated target values.
|
||||
pub fn v_measure_score<T: RealNumber, V: BaseVector<T>>(labels_true: &V, labels_pred: &V) -> T {
|
||||
ClusterMetrics::hcv_score()
|
||||
.get_score(labels_true, labels_pred)
|
||||
.2
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user