Merge potential next release v0.4 (#187) Breaking Changes
* First draft of the new n-dimensional arrays + NB use case * Improves default implementation of multiple Array methods * Refactors tree methods * Adds matrix decomposition routines * Adds matrix decomposition methods to ndarray and nalgebra bindings * Refactoring + linear regression now uses array2 * Ridge & Linear regression * LBFGS optimizer & logistic regression * LBFGS optimizer & logistic regression * Changes linear methods, metrics and model selection methods to new n-dimensional arrays * Switches KNN and clustering algorithms to new n-d array layer * Refactors distance metrics * Optimizes knn and clustering methods * Refactors metrics module * Switches decomposition methods to n-dimensional arrays * Linalg refactoring - cleanup rng merge (#172) * Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure. * Exclude AUC metrics. Needs reimplementation * Improve developers walkthrough New traits system in place at `src/numbers` and `src/linalg` Co-authored-by: Lorenzo <tunedconsulting@gmail.com> * Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021 * Implement getters to use as_ref() in src/neighbors * Implement getters to use as_ref() in src/naive_bayes * Implement getters to use as_ref() in src/linear * Add Clone to src/naive_bayes * Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021 * Implement ndarray-bindings. Remove FloatNumber from implementations * Drop nalgebra-bindings support (as decided in conf-call to go for ndarray) * Remove benches. Benches will have their own repo at smartcore-benches * Implement SVC * Implement SVC serialization. Move search parameters in dedicated module * Implement SVR. Definitely too slow * Fix compilation issues for wasm (#202) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> * Fix tests (#203) * Port linalg/traits/stats.rs * Improve methods naming * Improve Display for DenseMatrix Co-authored-by: Montana Low <montanalow@users.noreply.github.com> Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
This commit is contained in:
@@ -1,12 +1,12 @@
|
||||
#![allow(clippy::ptr_arg)]
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::math::vector::RealNumberVector;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
pub fn contingency_matrix<T: RealNumber>(
|
||||
labels_true: &Vec<T>,
|
||||
labels_pred: &Vec<T>,
|
||||
pub fn contingency_matrix<T: Number + Ord, V: ArrayView1<T> + ?Sized>(
|
||||
labels_true: &V,
|
||||
labels_pred: &V,
|
||||
) -> Vec<Vec<usize>> {
|
||||
let (classes, class_idx) = labels_true.unique_with_indices();
|
||||
let (clusters, cluster_idx) = labels_pred.unique_with_indices();
|
||||
@@ -24,28 +24,30 @@ pub fn contingency_matrix<T: RealNumber>(
|
||||
contingency_matrix
|
||||
}
|
||||
|
||||
pub fn entropy<T: RealNumber>(data: &[T]) -> Option<T> {
|
||||
let mut bincounts = HashMap::with_capacity(data.len());
|
||||
pub fn entropy<T: Number + Ord, V: ArrayView1<T> + ?Sized>(data: &V) -> Option<f64> {
|
||||
let mut bincounts = HashMap::with_capacity(data.shape());
|
||||
|
||||
for e in data.iter() {
|
||||
for e in data.iterator(0) {
|
||||
let k = e.to_i64().unwrap();
|
||||
bincounts.insert(k, bincounts.get(&k).unwrap_or(&0) + 1);
|
||||
}
|
||||
|
||||
let mut entropy = T::zero();
|
||||
let sum = T::from_usize(bincounts.values().sum()).unwrap();
|
||||
let mut entropy = 0f64;
|
||||
let sum: i64 = bincounts.values().sum();
|
||||
|
||||
for &c in bincounts.values() {
|
||||
if c > 0 {
|
||||
let pi = T::from_usize(c).unwrap();
|
||||
entropy -= (pi / sum) * (pi.ln() - sum.ln());
|
||||
let pi = c as f64;
|
||||
let pi_ln = pi.ln();
|
||||
let sum_ln = (sum as f64).ln();
|
||||
entropy -= (pi / sum as f64) * (pi_ln - sum_ln);
|
||||
}
|
||||
}
|
||||
|
||||
Some(entropy)
|
||||
}
|
||||
|
||||
pub fn mutual_info_score<T: RealNumber>(contingency: &[Vec<usize>]) -> T {
|
||||
pub fn mutual_info_score(contingency: &[Vec<usize>]) -> f64 {
|
||||
let mut contingency_sum = 0;
|
||||
let mut pi = vec![0; contingency.len()];
|
||||
let mut pj = vec![0; contingency[0].len()];
|
||||
@@ -64,37 +66,36 @@ pub fn mutual_info_score<T: RealNumber>(contingency: &[Vec<usize>]) -> T {
|
||||
}
|
||||
}
|
||||
|
||||
let contingency_sum = T::from_usize(contingency_sum).unwrap();
|
||||
let contingency_sum = contingency_sum as f64;
|
||||
let contingency_sum_ln = contingency_sum.ln();
|
||||
let pi_sum_l = T::from_usize(pi.iter().sum()).unwrap().ln();
|
||||
let pj_sum_l = T::from_usize(pj.iter().sum()).unwrap().ln();
|
||||
let pi_sum: usize = pi.iter().sum();
|
||||
let pj_sum: usize = pj.iter().sum();
|
||||
let pi_sum_l = (pi_sum as f64).ln();
|
||||
let pj_sum_l = (pj_sum as f64).ln();
|
||||
|
||||
let log_contingency_nm: Vec<T> = nz_val
|
||||
let log_contingency_nm: Vec<f64> = nz_val.iter().map(|v| (*v as f64).ln()).collect();
|
||||
let contingency_nm: Vec<f64> = nz_val
|
||||
.iter()
|
||||
.map(|v| T::from_usize(*v).unwrap().ln())
|
||||
.collect();
|
||||
let contingency_nm: Vec<T> = nz_val
|
||||
.iter()
|
||||
.map(|v| T::from_usize(*v).unwrap() / contingency_sum)
|
||||
.map(|v| (*v as f64) / contingency_sum)
|
||||
.collect();
|
||||
let outer: Vec<usize> = nzx
|
||||
.iter()
|
||||
.zip(nzy.iter())
|
||||
.map(|(&x, &y)| pi[x] * pj[y])
|
||||
.collect();
|
||||
let log_outer: Vec<T> = outer
|
||||
let log_outer: Vec<f64> = outer
|
||||
.iter()
|
||||
.map(|&o| -T::from_usize(o).unwrap().ln() + pi_sum_l + pj_sum_l)
|
||||
.map(|&o| -(o as f64).ln() + pi_sum_l + pj_sum_l)
|
||||
.collect();
|
||||
|
||||
let mut result = T::zero();
|
||||
let mut result = 0f64;
|
||||
|
||||
for i in 0..log_outer.len() {
|
||||
result += (contingency_nm[i] * (log_contingency_nm[i] - contingency_sum_ln))
|
||||
+ contingency_nm[i] * log_outer[i]
|
||||
}
|
||||
|
||||
result.max(T::zero())
|
||||
result.max(0f64)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -104,8 +105,8 @@ mod tests {
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn contingency_matrix_test() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
|
||||
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
|
||||
let v2 = vec![1, 0, 0, 0, 0, 1, 0];
|
||||
|
||||
assert_eq!(
|
||||
vec!(vec!(1, 2), vec!(2, 0), vec!(1, 0), vec!(1, 0)),
|
||||
@@ -116,17 +117,17 @@ mod tests {
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn entropy_test() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
|
||||
|
||||
assert!((1.2770f32 - entropy(&v1).unwrap()).abs() < 1e-4);
|
||||
assert!((1.2770 - entropy(&v1).unwrap() as f64).abs() < 1e-4);
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn mutual_info_score_test() {
|
||||
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
|
||||
let v2 = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0];
|
||||
let s: f32 = mutual_info_score(&contingency_matrix(&v1, &v2));
|
||||
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
|
||||
let v2 = vec![1, 0, 0, 0, 0, 1, 0];
|
||||
let s = mutual_info_score(&contingency_matrix(&v1, &v2));
|
||||
|
||||
assert!((0.3254 - s).abs() < 1e-4);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user