Merge branch 'development' into march-2023-improvements

This commit is contained in:
Lorenzo
2025-01-27 23:28:58 +00:00
committed by GitHub
65 changed files with 1844 additions and 864 deletions
+2
View File
@@ -37,6 +37,8 @@ $ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213
``` ```
* find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file. * find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file.
* Please take a look to the output of a profiler to spot most evident performance problems, see [this guide about using a profiler](http://www.codeofview.com/fix-rs/2017/01/24/how-to-optimize-rust-programs-on-linux/).
## Issue Report Process ## Issue Report Process
1. Go to the project's issues. 1. Go to the project's issues.
+1 -1
View File
@@ -36,7 +36,7 @@ jobs:
- name: Install Rust toolchain - name: Install Rust toolchain
uses: actions-rs/toolchain@v1 uses: actions-rs/toolchain@v1
with: with:
toolchain: stable toolchain: 1.81 # 1.82 seems to break wasm32 tests https://github.com/rustwasm/wasm-bindgen/issues/4274
target: ${{ matrix.platform.target }} target: ${{ matrix.platform.target }}
profile: minimal profile: minimal
default: true default: true
+6
View File
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.4.0] - 2023-04-05
## Added
- WARNING: Breaking changes!
- `DenseMatrix` constructor now returns `Result` to avoid user instantiating inconsistent rows/cols count. Their return values need to be unwrapped with `unwrap()`, see tests
## [0.3.0] - 2022-11-09 ## [0.3.0] - 2022-11-09
## Added ## Added
+2 -2
View File
@@ -2,7 +2,7 @@
name = "smartcore" name = "smartcore"
description = "Machine Learning in Rust." description = "Machine Learning in Rust."
homepage = "https://smartcorelib.org" homepage = "https://smartcorelib.org"
version = "0.3.1" version = "0.4.0"
authors = ["smartcore Developers"] authors = ["smartcore Developers"]
edition = "2021" edition = "2021"
license = "Apache-2.0" license = "Apache-2.0"
@@ -48,7 +48,7 @@ getrandom = { version = "0.2.8", optional = true }
wasm-bindgen-test = "0.3" wasm-bindgen-test = "0.3"
[dev-dependencies] [dev-dependencies]
itertools = "0.10.5" itertools = "0.13.0"
serde_json = "1.0" serde_json = "1.0"
bincode = "1.3.1" bincode = "1.3.1"
+4 -3
View File
@@ -40,11 +40,11 @@ impl BBDTreeNode {
impl BBDTree { impl BBDTree {
pub fn new<T: Number, M: Array2<T>>(data: &M) -> BBDTree { pub fn new<T: Number, M: Array2<T>>(data: &M) -> BBDTree {
let nodes = Vec::new(); let nodes: Vec<BBDTreeNode> = Vec::new();
let (n, _) = data.shape(); let (n, _) = data.shape();
let index = (0..n).collect::<Vec<_>>(); let index = (0..n).collect::<Vec<usize>>();
let mut tree = BBDTree { let mut tree = BBDTree {
nodes, nodes,
@@ -343,7 +343,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let tree = BBDTree::new(&data); let tree = BBDTree::new(&data);
+4 -4
View File
@@ -124,7 +124,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
current_cover_set.push((d, &self.root)); current_cover_set.push((d, &self.root));
let mut heap = HeapSelection::with_capacity(k); let mut heap = HeapSelection::with_capacity(k);
heap.add(std::f64::MAX); heap.add(f64::MAX);
let mut empty_heap = true; let mut empty_heap = true;
if !self.identical_excluded || self.get_data_value(self.root.idx) != p { if !self.identical_excluded || self.get_data_value(self.root.idx) != p {
@@ -145,7 +145,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
} }
let upper_bound = if empty_heap { let upper_bound = if empty_heap {
std::f64::INFINITY f64::INFINITY
} else { } else {
*heap.peek() *heap.peek()
}; };
@@ -291,7 +291,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
} else { } else {
let max_dist = self.max(point_set); let max_dist = self.max(point_set);
let next_scale = (max_scale - 1).min(self.get_scale(max_dist)); let next_scale = (max_scale - 1).min(self.get_scale(max_dist));
if next_scale == std::i64::MIN { if next_scale == i64::MIN {
let mut children: Vec<Node> = Vec::new(); let mut children: Vec<Node> = Vec::new();
let mut leaf = self.new_leaf(p); let mut leaf = self.new_leaf(p);
children.push(leaf); children.push(leaf);
@@ -435,7 +435,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
fn get_scale(&self, d: f64) -> i64 { fn get_scale(&self, d: f64) -> i64 {
if d == 0f64 { if d == 0f64 {
std::i64::MIN i64::MIN
} else { } else {
(self.inv_log_base * d.ln()).ceil() as i64 (self.inv_log_base * d.ln()).ceil() as i64
} }
+15 -17
View File
@@ -17,7 +17,7 @@
/// &[4.6, 3.1, 1.5, 0.2], /// &[4.6, 3.1, 1.5, 0.2],
/// &[5.0, 3.6, 1.4, 0.2], /// &[5.0, 3.6, 1.4, 0.2],
/// &[5.4, 3.9, 1.7, 0.4], /// &[5.4, 3.9, 1.7, 0.4],
/// ]); /// ]).unwrap();
/// let fastpair = FastPair::new(&x); /// let fastpair = FastPair::new(&x);
/// let closest_pair: PairwiseDistance<f64> = fastpair.unwrap().closest_pair(); /// let closest_pair: PairwiseDistance<f64> = fastpair.unwrap().closest_pair();
/// ``` /// ```
@@ -52,10 +52,8 @@ pub struct FastPair<'a, T: RealNumber + FloatNumber, M: Array2<T>> {
} }
impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> { impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
///
/// Constructor /// Constructor
/// Instantiate and inizialise the algorithm /// Instantiate and initialize the algorithm
///
pub fn new(m: &'a M) -> Result<Self, Failed> { pub fn new(m: &'a M) -> Result<Self, Failed> {
if m.shape().0 < 3 { if m.shape().0 < 3 {
return Err(Failed::because( return Err(Failed::because(
@@ -74,10 +72,8 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
Ok(init) Ok(init)
} }
///
/// Initialise `FastPair` by passing a `Array2`. /// Initialise `FastPair` by passing a `Array2`.
/// Build a FastPairs data-structure from a set of (new) points. /// Build a FastPairs data-structure from a set of (new) points.
///
fn init(&mut self) { fn init(&mut self) {
// basic measures // basic measures
let len = self.samples.shape().0; let len = self.samples.shape().0;
@@ -158,9 +154,7 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
self.neighbours = neighbours; self.neighbours = neighbours;
} }
///
/// Find closest pair by scanning list of nearest neighbors. /// Find closest pair by scanning list of nearest neighbors.
///
#[allow(dead_code)] #[allow(dead_code)]
pub fn closest_pair(&self) -> PairwiseDistance<T> { pub fn closest_pair(&self) -> PairwiseDistance<T> {
let mut a = self.neighbours[0]; // Start with first point let mut a = self.neighbours[0]; // Start with first point
@@ -232,10 +226,10 @@ mod tests_fastpair {
use super::*; use super::*;
use crate::linalg::basic::{arrays::Array, matrix::DenseMatrix}; use crate::linalg::basic::{arrays::Array, matrix::DenseMatrix};
///
/// Brute force algorithm, used only for comparison and testing /// Brute force algorithm, used only for comparison and testing
/// pub fn closest_pair_brute(
pub fn closest_pair_brute(fastpair: &FastPair<f64, DenseMatrix<f64>>) -> PairwiseDistance<f64> { fastpair: &FastPair<'_, f64, DenseMatrix<f64>>,
) -> PairwiseDistance<f64> {
use itertools::Itertools; use itertools::Itertools;
let m = fastpair.samples.shape().0; let m = fastpair.samples.shape().0;
@@ -286,7 +280,7 @@ mod tests_fastpair {
fn dataset_has_at_least_three_points() { fn dataset_has_at_least_three_points() {
// Create a dataset which consists of only two points: // Create a dataset which consists of only two points:
// A(0.0, 0.0) and B(1.0, 1.0). // A(0.0, 0.0) and B(1.0, 1.0).
let dataset = DenseMatrix::<f64>::from_2d_array(&[&[0.0, 0.0], &[1.0, 1.0]]); let dataset = DenseMatrix::<f64>::from_2d_array(&[&[0.0, 0.0], &[1.0, 1.0]]).unwrap();
// We expect an error when we run `FastPair` on this dataset, // We expect an error when we run `FastPair` on this dataset,
// becuase `FastPair` currently only works on a minimum of 3 // becuase `FastPair` currently only works on a minimum of 3
@@ -303,7 +297,7 @@ mod tests_fastpair {
#[test] #[test]
fn one_dimensional_dataset_minimal() { fn one_dimensional_dataset_minimal() {
let dataset = DenseMatrix::<f64>::from_2d_array(&[&[0.0], &[2.0], &[9.0]]); let dataset = DenseMatrix::<f64>::from_2d_array(&[&[0.0], &[2.0], &[9.0]]).unwrap();
let result = FastPair::new(&dataset); let result = FastPair::new(&dataset);
assert!(result.is_ok()); assert!(result.is_ok());
@@ -323,7 +317,8 @@ mod tests_fastpair {
#[test] #[test]
fn one_dimensional_dataset_2() { fn one_dimensional_dataset_2() {
let dataset = DenseMatrix::<f64>::from_2d_array(&[&[27.0], &[0.0], &[9.0], &[2.0]]); let dataset =
DenseMatrix::<f64>::from_2d_array(&[&[27.0], &[0.0], &[9.0], &[2.0]]).unwrap();
let result = FastPair::new(&dataset); let result = FastPair::new(&dataset);
assert!(result.is_ok()); assert!(result.is_ok());
@@ -358,7 +353,8 @@ mod tests_fastpair {
&[6.9, 3.1, 4.9, 1.5], &[6.9, 3.1, 4.9, 1.5],
&[5.5, 2.3, 4.0, 1.3], &[5.5, 2.3, 4.0, 1.3],
&[6.5, 2.8, 4.6, 1.5], &[6.5, 2.8, 4.6, 1.5],
]); ])
.unwrap();
let fastpair = FastPair::new(&x); let fastpair = FastPair::new(&x);
assert!(fastpair.is_ok()); assert!(fastpair.is_ok());
@@ -531,7 +527,8 @@ mod tests_fastpair {
&[6.9, 3.1, 4.9, 1.5], &[6.9, 3.1, 4.9, 1.5],
&[5.5, 2.3, 4.0, 1.3], &[5.5, 2.3, 4.0, 1.3],
&[6.5, 2.8, 4.6, 1.5], &[6.5, 2.8, 4.6, 1.5],
]); ])
.unwrap();
// compute // compute
let fastpair = FastPair::new(&x); let fastpair = FastPair::new(&x);
assert!(fastpair.is_ok()); assert!(fastpair.is_ok());
@@ -579,7 +576,8 @@ mod tests_fastpair {
&[6.9, 3.1, 4.9, 1.5], &[6.9, 3.1, 4.9, 1.5],
&[5.5, 2.3, 4.0, 1.3], &[5.5, 2.3, 4.0, 1.3],
&[6.5, 2.8, 4.6, 1.5], &[6.5, 2.8, 4.6, 1.5],
]); ])
.unwrap();
// compute // compute
let fastpair = FastPair::new(&x); let fastpair = FastPair::new(&x);
assert!(fastpair.is_ok()); assert!(fastpair.is_ok());
+2 -2
View File
@@ -61,7 +61,7 @@ impl<T, D: Distance<T>> LinearKNNSearch<T, D> {
for _ in 0..k { for _ in 0..k {
heap.add(KNNPoint { heap.add(KNNPoint {
distance: std::f64::INFINITY, distance: f64::INFINITY,
index: None, index: None,
}); });
} }
@@ -215,7 +215,7 @@ mod tests {
}; };
let point_inf = KNNPoint { let point_inf = KNNPoint {
distance: std::f64::INFINITY, distance: f64::INFINITY,
index: Some(3), index: Some(3),
}; };
+2 -2
View File
@@ -133,7 +133,7 @@ mod tests {
#[test] #[test]
fn test_add1() { fn test_add1() {
let mut heap = HeapSelection::with_capacity(3); let mut heap = HeapSelection::with_capacity(3);
heap.add(std::f64::INFINITY); heap.add(f64::INFINITY);
heap.add(-5f64); heap.add(-5f64);
heap.add(4f64); heap.add(4f64);
heap.add(-1f64); heap.add(-1f64);
@@ -151,7 +151,7 @@ mod tests {
#[test] #[test]
fn test_add2() { fn test_add2() {
let mut heap = HeapSelection::with_capacity(3); let mut heap = HeapSelection::with_capacity(3);
heap.add(std::f64::INFINITY); heap.add(f64::INFINITY);
heap.add(0.0); heap.add(0.0);
heap.add(8.4852); heap.add(8.4852);
heap.add(5.6568); heap.add(5.6568);
+1
View File
@@ -3,6 +3,7 @@ use num_traits::Num;
pub trait QuickArgSort { pub trait QuickArgSort {
fn quick_argsort_mut(&mut self) -> Vec<usize>; fn quick_argsort_mut(&mut self) -> Vec<usize>;
#[allow(dead_code)]
fn quick_argsort(&self) -> Vec<usize>; fn quick_argsort(&self) -> Vec<usize>;
} }
+5 -4
View File
@@ -315,8 +315,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
} }
} }
while !neighbors.is_empty() { while let Some(neighbor) = neighbors.pop() {
let neighbor = neighbors.pop().unwrap();
let index = neighbor.0; let index = neighbor.0;
if y[index] == outlier { if y[index] == outlier {
@@ -443,7 +442,8 @@ mod tests {
&[2.2, 1.2], &[2.2, 1.2],
&[1.8, 0.8], &[1.8, 0.8],
&[3.0, 5.0], &[3.0, 5.0],
]); ])
.unwrap();
let expected_labels = vec![1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0]; let expected_labels = vec![1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0];
@@ -488,7 +488,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let dbscan = DBSCAN::fit(&x, Default::default()).unwrap(); let dbscan = DBSCAN::fit(&x, Default::default()).unwrap();
+10 -8
View File
@@ -41,7 +41,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! //!
//! let kmeans = KMeans::fit(&x, KMeansParameters::default().with_k(2)).unwrap(); // Fit to data, 2 clusters //! let kmeans = KMeans::fit(&x, KMeansParameters::default().with_k(2)).unwrap(); // Fit to data, 2 clusters
//! let y_hat: Vec<u8> = kmeans.predict(&x).unwrap(); // use the same points for prediction //! let y_hat: Vec<u8> = kmeans.predict(&x).unwrap(); // use the same points for prediction
@@ -96,7 +96,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> PartialEq for KMeans<
return false; return false;
} }
for j in 0..self.centroids[i].len() { for j in 0..self.centroids[i].len() {
if (self.centroids[i][j] - other.centroids[i][j]).abs() > std::f64::EPSILON { if (self.centroids[i][j] - other.centroids[i][j]).abs() > f64::EPSILON {
return false; return false;
} }
} }
@@ -270,7 +270,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> KMeans<TX, TY, X, Y>
let (n, d) = data.shape(); let (n, d) = data.shape();
let mut distortion = std::f64::MAX; let mut distortion = f64::MAX;
let mut y = KMeans::<TX, TY, X, Y>::kmeans_plus_plus(data, parameters.k, parameters.seed); let mut y = KMeans::<TX, TY, X, Y>::kmeans_plus_plus(data, parameters.k, parameters.seed);
let mut size = vec![0; parameters.k]; let mut size = vec![0; parameters.k];
let mut centroids = vec![vec![0f64; d]; parameters.k]; let mut centroids = vec![vec![0f64; d]; parameters.k];
@@ -331,7 +331,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> KMeans<TX, TY, X, Y>
let mut row = vec![0f64; x.shape().1]; let mut row = vec![0f64; x.shape().1];
for i in 0..n { for i in 0..n {
let mut min_dist = std::f64::MAX; let mut min_dist = f64::MAX;
let mut best_cluster = 0; let mut best_cluster = 0;
for j in 0..self.k { for j in 0..self.k {
@@ -361,7 +361,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> KMeans<TX, TY, X, Y>
.cloned() .cloned()
.collect(); .collect();
let mut d = vec![std::f64::MAX; n]; let mut d = vec![f64::MAX; n];
let mut row = vec![TX::zero(); data.shape().1]; let mut row = vec![TX::zero(); data.shape().1];
for j in 1..k { for j in 1..k {
@@ -424,7 +424,7 @@ mod tests {
)] )]
#[test] #[test]
fn invalid_k() { fn invalid_k() {
let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap();
assert!(KMeans::<i32, i32, DenseMatrix<i32>, Vec<i32>>::fit( assert!(KMeans::<i32, i32, DenseMatrix<i32>, Vec<i32>>::fit(
&x, &x,
@@ -492,7 +492,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let kmeans = KMeans::fit(&x, Default::default()).unwrap(); let kmeans = KMeans::fit(&x, Default::default()).unwrap();
@@ -531,7 +532,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let kmeans: KMeans<f32, f32, DenseMatrix<f32>, Vec<f32>> = let kmeans: KMeans<f32, f32, DenseMatrix<f32>, Vec<f32>> =
KMeans::fit(&x, Default::default()).unwrap(); KMeans::fit(&x, Default::default()).unwrap();
+1 -1
View File
@@ -40,7 +40,7 @@ pub fn load_dataset() -> Dataset<f32, u32> {
target: y, target: y,
num_samples, num_samples,
num_features, num_features,
feature_names: vec![ feature_names: [
"Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6", "Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6",
] ]
.iter() .iter()
+3 -5
View File
@@ -25,16 +25,14 @@ pub fn load_dataset() -> Dataset<f32, f32> {
target: y, target: y,
num_samples, num_samples,
num_features, num_features,
feature_names: vec![ feature_names: ["sepal length (cm)",
"sepal length (cm)",
"sepal width (cm)", "sepal width (cm)",
"petal length (cm)", "petal length (cm)",
"petal width (cm)", "petal width (cm)"]
]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(), .collect(),
target_names: vec!["setosa", "versicolor", "virginica"] target_names: ["setosa", "versicolor", "virginica"]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(), .collect(),
+2 -2
View File
@@ -36,7 +36,7 @@ pub fn load_dataset() -> Dataset<f32, u32> {
target: y, target: y,
num_samples, num_samples,
num_features, num_features,
feature_names: vec![ feature_names: [
"sepal length (cm)", "sepal length (cm)",
"sepal width (cm)", "sepal width (cm)",
"petal length (cm)", "petal length (cm)",
@@ -45,7 +45,7 @@ pub fn load_dataset() -> Dataset<f32, u32> {
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(), .collect(),
target_names: vec!["setosa", "versicolor", "virginica"] target_names: ["setosa", "versicolor", "virginica"]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(), .collect(),
+13 -7
View File
@@ -35,7 +35,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! //!
//! let pca = PCA::fit(&iris, PCAParameters::default().with_n_components(2)).unwrap(); // Reduce number of features to 2 //! let pca = PCA::fit(&iris, PCAParameters::default().with_n_components(2)).unwrap(); // Reduce number of features to 2
//! //!
@@ -443,6 +443,7 @@ mod tests {
&[2.6, 53.0, 66.0, 10.8], &[2.6, 53.0, 66.0, 10.8],
&[6.8, 161.0, 60.0, 15.6], &[6.8, 161.0, 60.0, 15.6],
]) ])
.unwrap()
} }
#[cfg_attr( #[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")), all(target_arch = "wasm32", not(target_os = "wasi")),
@@ -457,7 +458,8 @@ mod tests {
&[0.9952, 0.0588], &[0.9952, 0.0588],
&[0.0463, 0.9769], &[0.0463, 0.9769],
&[0.0752, 0.2007], &[0.0752, 0.2007],
]); ])
.unwrap();
let pca = PCA::fit(&us_arrests, Default::default()).unwrap(); let pca = PCA::fit(&us_arrests, Default::default()).unwrap();
@@ -500,7 +502,8 @@ mod tests {
-0.974080592182491, -0.974080592182491,
0.0723250196376097, 0.0723250196376097,
], ],
]); ])
.unwrap();
let expected_projection = DenseMatrix::from_2d_array(&[ let expected_projection = DenseMatrix::from_2d_array(&[
&[-64.8022, -11.448, 2.4949, -2.4079], &[-64.8022, -11.448, 2.4949, -2.4079],
@@ -553,7 +556,8 @@ mod tests {
&[91.5446, -22.9529, 0.402, -0.7369], &[91.5446, -22.9529, 0.402, -0.7369],
&[118.1763, 5.5076, 2.7113, -0.205], &[118.1763, 5.5076, 2.7113, -0.205],
&[10.4345, -5.9245, 3.7944, 0.5179], &[10.4345, -5.9245, 3.7944, 0.5179],
]); ])
.unwrap();
let expected_eigenvalues: Vec<f64> = vec![ let expected_eigenvalues: Vec<f64> = vec![
343544.6277001563, 343544.6277001563,
@@ -616,7 +620,8 @@ mod tests {
-0.0881962972508558, -0.0881962972508558,
-0.0096011588898465, -0.0096011588898465,
], ],
]); ])
.unwrap();
let expected_projection = DenseMatrix::from_2d_array(&[ let expected_projection = DenseMatrix::from_2d_array(&[
&[0.9856, -1.1334, 0.4443, -0.1563], &[0.9856, -1.1334, 0.4443, -0.1563],
@@ -669,7 +674,8 @@ mod tests {
&[-2.1086, -1.4248, -0.1048, -0.1319], &[-2.1086, -1.4248, -0.1048, -0.1319],
&[-2.0797, 0.6113, 0.1389, -0.1841], &[-2.0797, 0.6113, 0.1389, -0.1841],
&[-0.6294, -0.321, 0.2407, 0.1667], &[-0.6294, -0.321, 0.2407, 0.1667],
]); ])
.unwrap();
let expected_eigenvalues: Vec<f64> = vec![ let expected_eigenvalues: Vec<f64> = vec![
2.480241579149493, 2.480241579149493,
@@ -732,7 +738,7 @@ mod tests {
// &[4.9, 2.4, 3.3, 1.0], // &[4.9, 2.4, 3.3, 1.0],
// &[6.6, 2.9, 4.6, 1.3], // &[6.6, 2.9, 4.6, 1.3],
// &[5.2, 2.7, 3.9, 1.4], // &[5.2, 2.7, 3.9, 1.4],
// ]); // ]).unwrap();
// let pca = PCA::fit(&iris, Default::default()).unwrap(); // let pca = PCA::fit(&iris, Default::default()).unwrap();
+6 -4
View File
@@ -32,7 +32,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! //!
//! let svd = SVD::fit(&iris, SVDParameters::default(). //! let svd = SVD::fit(&iris, SVDParameters::default().
//! with_n_components(2)).unwrap(); // Reduce number of features to 2 //! with_n_components(2)).unwrap(); // Reduce number of features to 2
@@ -292,7 +292,8 @@ mod tests {
&[5.7, 81.0, 39.0, 9.3], &[5.7, 81.0, 39.0, 9.3],
&[2.6, 53.0, 66.0, 10.8], &[2.6, 53.0, 66.0, 10.8],
&[6.8, 161.0, 60.0, 15.6], &[6.8, 161.0, 60.0, 15.6],
]); ])
.unwrap();
let expected = DenseMatrix::from_2d_array(&[ let expected = DenseMatrix::from_2d_array(&[
&[243.54655757, -18.76673788], &[243.54655757, -18.76673788],
@@ -300,7 +301,8 @@ mod tests {
&[305.93972467, -15.39087376], &[305.93972467, -15.39087376],
&[197.28420365, -11.66808306], &[197.28420365, -11.66808306],
&[293.43187394, 1.91163633], &[293.43187394, 1.91163633],
]); ])
.unwrap();
let svd = SVD::fit(&x, Default::default()).unwrap(); let svd = SVD::fit(&x, Default::default()).unwrap();
let x_transformed = svd.transform(&x).unwrap(); let x_transformed = svd.transform(&x).unwrap();
@@ -341,7 +343,7 @@ mod tests {
// &[4.9, 2.4, 3.3, 1.0], // &[4.9, 2.4, 3.3, 1.0],
// &[6.6, 2.9, 4.6, 1.3], // &[6.6, 2.9, 4.6, 1.3],
// &[5.2, 2.7, 3.9, 1.4], // &[5.2, 2.7, 3.9, 1.4],
// ]); // ]).unwrap();
// let svd = SVD::fit(&iris, Default::default()).unwrap(); // let svd = SVD::fit(&iris, Default::default()).unwrap();
+7 -4
View File
@@ -33,7 +33,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! let y = vec![ //! let y = vec![
//! 0, 0, 0, 0, 0, 0, 0, 0, //! 0, 0, 0, 0, 0, 0, 0, 0,
//! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -660,7 +660,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
let classifier = RandomForestClassifier::fit( let classifier = RandomForestClassifier::fit(
@@ -733,7 +734,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
let classifier = RandomForestClassifier::fit( let classifier = RandomForestClassifier::fit(
@@ -786,7 +788,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
let forest = RandomForestClassifier::fit(&x, &y, Default::default()).unwrap(); let forest = RandomForestClassifier::fit(&x, &y, Default::default()).unwrap();
+7 -4
View File
@@ -29,7 +29,7 @@
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
//! ]); //! ]).unwrap();
//! let y = vec![ //! let y = vec![
//! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, //! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2,
//! 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9 //! 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9
@@ -574,7 +574,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y = vec![ let y = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
114.2, 115.7, 116.9, 114.2, 115.7, 116.9,
@@ -648,7 +649,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y = vec![ let y = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
114.2, 115.7, 116.9, 114.2, 115.7, 116.9,
@@ -702,7 +704,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y = vec![ let y = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
114.2, 115.7, 116.9, 114.2, 115.7, 116.9,
+19
View File
@@ -32,6 +32,8 @@ pub enum FailedError {
SolutionFailed, SolutionFailed,
/// Error in input parameters /// Error in input parameters
ParametersError, ParametersError,
/// Invalid state error (should never happen)
InvalidStateError,
} }
impl Failed { impl Failed {
@@ -64,6 +66,22 @@ impl Failed {
} }
} }
/// new instance of `FailedError::ParametersError`
pub fn input(msg: &str) -> Self {
Failed {
err: FailedError::ParametersError,
msg: msg.to_string(),
}
}
/// new instance of `FailedError::InvalidStateError`
pub fn invalid_state(msg: &str) -> Self {
Failed {
err: FailedError::InvalidStateError,
msg: msg.to_string(),
}
}
/// new instance of `err` /// new instance of `err`
pub fn because(err: FailedError, msg: &str) -> Self { pub fn because(err: FailedError, msg: &str) -> Self {
Failed { Failed {
@@ -97,6 +115,7 @@ impl fmt::Display for FailedError {
FailedError::DecompositionFailed => "Decomposition failed", FailedError::DecompositionFailed => "Decomposition failed",
FailedError::SolutionFailed => "Can't find solution", FailedError::SolutionFailed => "Can't find solution",
FailedError::ParametersError => "Error in input, check parameters", FailedError::ParametersError => "Error in input, check parameters",
FailedError::InvalidStateError => "Invalid state, this should never happen", // useful in development phase of lib
}; };
write!(f, "{failed_err_str}") write!(f, "{failed_err_str}")
} }
+1 -2
View File
@@ -7,7 +7,6 @@
clippy::approx_constant clippy::approx_constant
)] )]
#![warn(missing_docs)] #![warn(missing_docs)]
#![warn(rustdoc::missing_doc_code_examples)]
//! # smartcore //! # smartcore
//! //!
@@ -64,7 +63,7 @@
//! &[3., 4.], //! &[3., 4.],
//! &[5., 6.], //! &[5., 6.],
//! &[7., 8.], //! &[7., 8.],
//! &[9., 10.]]); //! &[9., 10.]]).unwrap();
//! // Our classes are defined as a vector //! // Our classes are defined as a vector
//! let y = vec![2, 2, 2, 3, 3]; //! let y = vec![2, 2, 2, 3, 3];
//! //!
+200 -164
View File
File diff suppressed because it is too large Load Diff
+226 -98
View File
@@ -19,6 +19,8 @@ use crate::linalg::traits::svd::SVDDecomposable;
use crate::numbers::basenum::Number; use crate::numbers::basenum::Number;
use crate::numbers::realnum::RealNumber; use crate::numbers::realnum::RealNumber;
use crate::error::Failed;
/// Dense matrix /// Dense matrix
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@@ -50,26 +52,26 @@ pub struct DenseMatrixMutView<'a, T: Debug + Display + Copy + Sized> {
} }
impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> { impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> {
fn new(m: &'a DenseMatrix<T>, rows: Range<usize>, cols: Range<usize>) -> Self { fn new(
let (start, end, stride) = if m.column_major { m: &'a DenseMatrix<T>,
( vrows: Range<usize>,
rows.start + cols.start * m.nrows, vcols: Range<usize>,
rows.end + (cols.end - 1) * m.nrows, ) -> Result<Self, Failed> {
m.nrows, if m.is_valid_view(m.shape().0, m.shape().1, &vrows, &vcols) {
) Err(Failed::input(
"The specified view is outside of the matrix range",
))
} else { } else {
( let (start, end, stride) =
rows.start * m.ncols + cols.start, m.stride_range(m.shape().0, m.shape().1, &vrows, &vcols, m.column_major);
(rows.end - 1) * m.ncols + cols.end,
m.ncols, Ok(DenseMatrixView {
) values: &m.values[start..end],
}; stride,
DenseMatrixView { nrows: vrows.end - vrows.start,
values: &m.values[start..end], ncols: vcols.end - vcols.start,
stride, column_major: m.column_major,
nrows: rows.end - rows.start, })
ncols: cols.end - cols.start,
column_major: m.column_major,
} }
} }
@@ -89,7 +91,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> {
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'a, T> { impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!( writeln!(
f, f,
@@ -102,26 +104,26 @@ impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'a,
} }
impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> { impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> {
fn new(m: &'a mut DenseMatrix<T>, rows: Range<usize>, cols: Range<usize>) -> Self { fn new(
let (start, end, stride) = if m.column_major { m: &'a mut DenseMatrix<T>,
( vrows: Range<usize>,
rows.start + cols.start * m.nrows, vcols: Range<usize>,
rows.end + (cols.end - 1) * m.nrows, ) -> Result<Self, Failed> {
m.nrows, if m.is_valid_view(m.shape().0, m.shape().1, &vrows, &vcols) {
) Err(Failed::input(
"The specified view is outside of the matrix range",
))
} else { } else {
( let (start, end, stride) =
rows.start * m.ncols + cols.start, m.stride_range(m.shape().0, m.shape().1, &vrows, &vcols, m.column_major);
(rows.end - 1) * m.ncols + cols.end,
m.ncols, Ok(DenseMatrixMutView {
) values: &mut m.values[start..end],
}; stride,
DenseMatrixMutView { nrows: vrows.end - vrows.start,
values: &mut m.values[start..end], ncols: vcols.end - vcols.start,
stride, column_major: m.column_major,
nrows: rows.end - rows.start, })
ncols: cols.end - cols.start,
column_major: m.column_major,
} }
} }
@@ -140,7 +142,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> {
} }
} }
fn iter_mut<'b>(&'b mut self, axis: u8) -> Box<dyn Iterator<Item = &mut T> + 'b> { fn iter_mut<'b>(&'b mut self, axis: u8) -> Box<dyn Iterator<Item = &'b mut T> + 'b> {
let column_major = self.column_major; let column_major = self.column_major;
let stride = self.stride; let stride = self.stride;
let ptr = self.values.as_mut_ptr(); let ptr = self.values.as_mut_ptr();
@@ -167,7 +169,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> {
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<'a, T> { impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!( writeln!(
f, f,
@@ -182,42 +184,102 @@ impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<
impl<T: Debug + Display + Copy + Sized> DenseMatrix<T> { impl<T: Debug + Display + Copy + Sized> DenseMatrix<T> {
/// Create new instance of `DenseMatrix` without copying data. /// Create new instance of `DenseMatrix` without copying data.
/// `values` should be in column-major order. /// `values` should be in column-major order.
pub fn new(nrows: usize, ncols: usize, values: Vec<T>, column_major: bool) -> Self { pub fn new(
DenseMatrix { nrows: usize,
ncols, ncols: usize,
nrows, values: Vec<T>,
values, column_major: bool,
column_major, ) -> Result<Self, Failed> {
let data_len = values.len();
if nrows * ncols != values.len() {
Err(Failed::input(&format!(
"The specified shape: (cols: {ncols}, rows: {nrows}) does not align with data len: {data_len}"
)))
} else {
Ok(DenseMatrix {
ncols,
nrows,
values,
column_major,
})
} }
} }
/// New instance of `DenseMatrix` from 2d array. /// New instance of `DenseMatrix` from 2d array.
pub fn from_2d_array(values: &[&[T]]) -> Self { pub fn from_2d_array(values: &[&[T]]) -> Result<Self, Failed> {
DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect()) DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect())
} }
/// New instance of `DenseMatrix` from 2d vector. /// New instance of `DenseMatrix` from 2d vector.
pub fn from_2d_vec(values: &Vec<Vec<T>>) -> Self { #[allow(clippy::ptr_arg)]
let nrows = values.len(); pub fn from_2d_vec(values: &Vec<Vec<T>>) -> Result<Self, Failed> {
let ncols = values if values.is_empty() || values[0].is_empty() {
.first() Err(Failed::input(
.unwrap_or_else(|| panic!("Cannot create 2d matrix from an empty vector")) "The 2d vec provided is empty; cannot instantiate the matrix",
.len(); ))
let mut m_values = Vec::with_capacity(nrows * ncols); } else {
let nrows = values.len();
let ncols = values
.first()
.unwrap_or_else(|| {
panic!("Invalid state: Cannot create 2d matrix from an empty vector")
})
.len();
let mut m_values = Vec::with_capacity(nrows * ncols);
for c in 0..ncols { for c in 0..ncols {
for r in values.iter().take(nrows) { for r in values.iter().take(nrows) {
m_values.push(r[c]) m_values.push(r[c])
}
} }
}
DenseMatrix::new(nrows, ncols, m_values, true) DenseMatrix::new(nrows, ncols, m_values, true)
}
} }
/// Iterate over values of matrix /// Iterate over values of matrix
pub fn iter(&self) -> Iter<'_, T> { pub fn iter(&self) -> Iter<'_, T> {
self.values.iter() self.values.iter()
} }
/// Check if the size of the requested view is bounded to matrix rows/cols count
fn is_valid_view(
&self,
n_rows: usize,
n_cols: usize,
vrows: &Range<usize>,
vcols: &Range<usize>,
) -> bool {
!(vrows.end <= n_rows
&& vcols.end <= n_cols
&& vrows.start <= n_rows
&& vcols.start <= n_cols)
}
/// Compute the range of the requested view: start, end, size of the slice
fn stride_range(
&self,
n_rows: usize,
n_cols: usize,
vrows: &Range<usize>,
vcols: &Range<usize>,
column_major: bool,
) -> (usize, usize, usize) {
let (start, end, stride) = if column_major {
(
vrows.start + vcols.start * n_rows,
vrows.end + (vcols.end - 1) * n_rows,
n_rows,
)
} else {
(
vrows.start * n_cols + vcols.start,
(vrows.end - 1) * n_cols + vcols.end,
n_cols,
)
};
(start, end, stride)
}
} }
impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrix<T> { impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrix<T> {
@@ -304,6 +366,7 @@ where
impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrix<T> { impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrix<T> {
fn get(&self, pos: (usize, usize)) -> &T { fn get(&self, pos: (usize, usize)) -> &T {
let (row, col) = pos; let (row, col) = pos;
if row >= self.nrows || col >= self.ncols { if row >= self.nrows || col >= self.ncols {
panic!( panic!(
"Invalid index ({},{}) for {}x{} matrix", "Invalid index ({},{}) for {}x{} matrix",
@@ -383,15 +446,15 @@ impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for DenseMatrix<T> {}
impl<T: Debug + Display + Copy + Sized> Array2<T> for DenseMatrix<T> { impl<T: Debug + Display + Copy + Sized> Array2<T> for DenseMatrix<T> {
fn get_row<'a>(&'a self, row: usize) -> Box<dyn ArrayView1<T> + 'a> { fn get_row<'a>(&'a self, row: usize) -> Box<dyn ArrayView1<T> + 'a> {
Box::new(DenseMatrixView::new(self, row..row + 1, 0..self.ncols)) Box::new(DenseMatrixView::new(self, row..row + 1, 0..self.ncols).unwrap())
} }
fn get_col<'a>(&'a self, col: usize) -> Box<dyn ArrayView1<T> + 'a> { fn get_col<'a>(&'a self, col: usize) -> Box<dyn ArrayView1<T> + 'a> {
Box::new(DenseMatrixView::new(self, 0..self.nrows, col..col + 1)) Box::new(DenseMatrixView::new(self, 0..self.nrows, col..col + 1).unwrap())
} }
fn slice<'a>(&'a self, rows: Range<usize>, cols: Range<usize>) -> Box<dyn ArrayView2<T> + 'a> { fn slice<'a>(&'a self, rows: Range<usize>, cols: Range<usize>) -> Box<dyn ArrayView2<T> + 'a> {
Box::new(DenseMatrixView::new(self, rows, cols)) Box::new(DenseMatrixView::new(self, rows, cols).unwrap())
} }
fn slice_mut<'a>( fn slice_mut<'a>(
@@ -402,15 +465,17 @@ impl<T: Debug + Display + Copy + Sized> Array2<T> for DenseMatrix<T> {
where where
Self: Sized, Self: Sized,
{ {
Box::new(DenseMatrixMutView::new(self, rows, cols)) Box::new(DenseMatrixMutView::new(self, rows, cols).unwrap())
} }
// private function so for now assume infalible
fn fill(nrows: usize, ncols: usize, value: T) -> Self { fn fill(nrows: usize, ncols: usize, value: T) -> Self {
DenseMatrix::new(nrows, ncols, vec![value; nrows * ncols], true) DenseMatrix::new(nrows, ncols, vec![value; nrows * ncols], true).unwrap()
} }
// private function so for now assume infalible
fn from_iterator<I: Iterator<Item = T>>(iter: I, nrows: usize, ncols: usize, axis: u8) -> Self { fn from_iterator<I: Iterator<Item = T>>(iter: I, nrows: usize, ncols: usize, axis: u8) -> Self {
DenseMatrix::new(nrows, ncols, iter.collect(), axis != 0) DenseMatrix::new(nrows, ncols, iter.collect(), axis != 0).unwrap()
} }
fn transpose(&self) -> Self { fn transpose(&self) -> Self {
@@ -428,12 +493,12 @@ impl<T: Number + RealNumber> EVDDecomposable<T> for DenseMatrix<T> {}
impl<T: Number + RealNumber> LUDecomposable<T> for DenseMatrix<T> {} impl<T: Number + RealNumber> LUDecomposable<T> for DenseMatrix<T> {}
impl<T: Number + RealNumber> SVDDecomposable<T> for DenseMatrix<T> {} impl<T: Number + RealNumber> SVDDecomposable<T> for DenseMatrix<T> {}
impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixView<'a, T> { impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixView<'_, T> {
fn get(&self, pos: (usize, usize)) -> &T { fn get(&self, pos: (usize, usize)) -> &T {
if self.column_major { if self.column_major {
&self.values[(pos.0 + pos.1 * self.stride)] &self.values[pos.0 + pos.1 * self.stride]
} else { } else {
&self.values[(pos.0 * self.stride + pos.1)] &self.values[pos.0 * self.stride + pos.1]
} }
} }
@@ -450,7 +515,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMa
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for DenseMatrixView<'a, T> { impl<T: Debug + Display + Copy + Sized> Array<T, usize> for DenseMatrixView<'_, T> {
fn get(&self, i: usize) -> &T { fn get(&self, i: usize) -> &T {
if self.nrows == 1 { if self.nrows == 1 {
if self.column_major { if self.column_major {
@@ -488,16 +553,16 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for DenseMatrixView<
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixView<'a, T> {} impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixView<'_, T> {}
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for DenseMatrixView<'a, T> {} impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for DenseMatrixView<'_, T> {}
impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixMutView<'a, T> { impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixMutView<'_, T> {
fn get(&self, pos: (usize, usize)) -> &T { fn get(&self, pos: (usize, usize)) -> &T {
if self.column_major { if self.column_major {
&self.values[(pos.0 + pos.1 * self.stride)] &self.values[pos.0 + pos.1 * self.stride]
} else { } else {
&self.values[(pos.0 * self.stride + pos.1)] &self.values[pos.0 * self.stride + pos.1]
} }
} }
@@ -514,14 +579,12 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMa
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)> impl<T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)> for DenseMatrixMutView<'_, T> {
for DenseMatrixMutView<'a, T>
{
fn set(&mut self, pos: (usize, usize), x: T) { fn set(&mut self, pos: (usize, usize), x: T) {
if self.column_major { if self.column_major {
self.values[(pos.0 + pos.1 * self.stride)] = x; self.values[pos.0 + pos.1 * self.stride] = x;
} else { } else {
self.values[(pos.0 * self.stride + pos.1)] = x; self.values[pos.0 * self.stride + pos.1] = x;
} }
} }
@@ -530,29 +593,89 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2<T> for DenseMatrixMutView<'a, T> {} impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for DenseMatrixMutView<'_, T> {}
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixMutView<'a, T> {} impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixMutView<'_, T> {}
impl<T: RealNumber> MatrixStats<T> for DenseMatrix<T> {} impl<T: RealNumber> MatrixStats<T> for DenseMatrix<T> {}
impl<T: RealNumber> MatrixPreprocessing<T> for DenseMatrix<T> {} impl<T: RealNumber> MatrixPreprocessing<T> for DenseMatrix<T> {}
#[cfg(test)] #[cfg(test)]
#[warn(clippy::reversed_empty_ranges)]
mod tests { mod tests {
use super::*; use super::*;
use approx::relative_eq; use approx::relative_eq;
#[test] #[test]
fn test_display() { fn test_instantiate_from_2d() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]); let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]);
assert!(x.is_ok());
}
#[test]
fn test_instantiate_from_2d_empty() {
let input: &[&[f64]] = &[&[]];
let x = DenseMatrix::from_2d_array(input);
assert!(x.is_err());
}
#[test]
fn test_instantiate_from_2d_empty2() {
let input: &[&[f64]] = &[&[], &[]];
let x = DenseMatrix::from_2d_array(input);
assert!(x.is_err());
}
#[test]
fn test_instantiate_ok_view1() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
let v = DenseMatrixView::new(&x, 0..2, 0..2);
assert!(v.is_ok());
}
#[test]
fn test_instantiate_ok_view2() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
let v = DenseMatrixView::new(&x, 0..3, 0..3);
assert!(v.is_ok());
}
#[test]
fn test_instantiate_ok_view3() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
let v = DenseMatrixView::new(&x, 2..3, 0..3);
assert!(v.is_ok());
}
#[test]
fn test_instantiate_ok_view4() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
let v = DenseMatrixView::new(&x, 3..3, 0..3);
assert!(v.is_ok());
}
#[test]
fn test_instantiate_err_view1() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
let v = DenseMatrixView::new(&x, 3..4, 0..3);
assert!(v.is_err());
}
#[test]
fn test_instantiate_err_view2() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
let v = DenseMatrixView::new(&x, 0..3, 3..4);
assert!(v.is_err());
}
#[test]
fn test_instantiate_err_view3() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
let v = DenseMatrixView::new(&x, 0..3, 4..3);
assert!(v.is_err());
}
#[test]
fn test_display() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
println!("{}", &x); println!("{}", &x);
} }
#[test] #[test]
fn test_get_row_col() { fn test_get_row_col() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]); let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
assert_eq!(15.0, x.get_col(1).sum()); assert_eq!(15.0, x.get_col(1).sum());
assert_eq!(15.0, x.get_row(1).sum()); assert_eq!(15.0, x.get_row(1).sum());
@@ -561,7 +684,7 @@ mod tests {
#[test] #[test]
fn test_row_major() { fn test_row_major() {
let mut x = DenseMatrix::new(2, 3, vec![1, 2, 3, 4, 5, 6], false); let mut x = DenseMatrix::new(2, 3, vec![1, 2, 3, 4, 5, 6], false).unwrap();
assert_eq!(5, *x.get_col(1).get(1)); assert_eq!(5, *x.get_col(1).get(1));
assert_eq!(7, x.get_col(1).sum()); assert_eq!(7, x.get_col(1).sum());
@@ -575,7 +698,8 @@ mod tests {
#[test] #[test]
fn test_get_slice() { fn test_get_slice() {
let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]); let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]])
.unwrap();
assert_eq!( assert_eq!(
vec![4, 5, 6], vec![4, 5, 6],
@@ -589,7 +713,7 @@ mod tests {
#[test] #[test]
fn test_iter_mut() { fn test_iter_mut() {
let mut x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]); let mut x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap();
assert_eq!(vec![1, 4, 7, 2, 5, 8, 3, 6, 9], x.values); assert_eq!(vec![1, 4, 7, 2, 5, 8, 3, 6, 9], x.values);
// add +2 to some elements // add +2 to some elements
@@ -625,7 +749,8 @@ mod tests {
#[test] #[test]
fn test_str_array() { fn test_str_array() {
let mut x = let mut x =
DenseMatrix::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"], &["7", "8", "9"]]); DenseMatrix::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"], &["7", "8", "9"]])
.unwrap();
assert_eq!(vec!["1", "4", "7", "2", "5", "8", "3", "6", "9"], x.values); assert_eq!(vec!["1", "4", "7", "2", "5", "8", "3", "6", "9"], x.values);
x.iterator_mut(0).for_each(|v| *v = "str"); x.iterator_mut(0).for_each(|v| *v = "str");
@@ -637,7 +762,7 @@ mod tests {
#[test] #[test]
fn test_transpose() { fn test_transpose() {
let x = DenseMatrix::<&str>::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"]]); let x = DenseMatrix::<&str>::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"]]).unwrap();
assert_eq!(vec!["1", "4", "2", "5", "3", "6"], x.values); assert_eq!(vec!["1", "4", "2", "5", "3", "6"], x.values);
assert!(x.column_major); assert!(x.column_major);
@@ -650,7 +775,7 @@ mod tests {
#[test] #[test]
fn test_from_iterator() { fn test_from_iterator() {
let data = vec![1, 2, 3, 4, 5, 6]; let data = [1, 2, 3, 4, 5, 6];
let m = DenseMatrix::from_iterator(data.iter(), 2, 3, 0); let m = DenseMatrix::from_iterator(data.iter(), 2, 3, 0);
@@ -664,8 +789,8 @@ mod tests {
#[test] #[test]
fn test_take() { fn test_take() {
let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap();
let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]); let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap();
println!("{a}"); println!("{a}");
// take column 0 and 2 // take column 0 and 2
@@ -677,7 +802,7 @@ mod tests {
#[test] #[test]
fn test_mut() { fn test_mut() {
let a = DenseMatrix::from_2d_array(&[&[1.3, -2.1, 3.4], &[-4., -5.3, 6.1]]); let a = DenseMatrix::from_2d_array(&[&[1.3, -2.1, 3.4], &[-4., -5.3, 6.1]]).unwrap();
let a = a.abs(); let a = a.abs();
assert_eq!(vec![1.3, 4.0, 2.1, 5.3, 3.4, 6.1], a.values); assert_eq!(vec![1.3, 4.0, 2.1, 5.3, 3.4, 6.1], a.values);
@@ -688,7 +813,8 @@ mod tests {
#[test] #[test]
fn test_reshape() { fn test_reshape() {
let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]); let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]])
.unwrap();
let a = a.reshape(2, 6, 0); let a = a.reshape(2, 6, 0);
assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], a.values); assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], a.values);
@@ -701,13 +827,15 @@ mod tests {
#[test] #[test]
fn test_eq() { fn test_eq() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]); let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).unwrap();
let b = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]); let b = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
let c = DenseMatrix::from_2d_array(&[ let c = DenseMatrix::from_2d_array(&[
&[1. + f32::EPSILON, 2., 3.], &[1. + f32::EPSILON, 2., 3.],
&[4., 5., 6. + f32::EPSILON], &[4., 5., 6. + f32::EPSILON],
]); ])
let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]]); .unwrap();
let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]])
.unwrap();
assert!(!relative_eq!(a, b)); assert!(!relative_eq!(a, b));
assert!(!relative_eq!(a, d)); assert!(!relative_eq!(a, d));
+28 -7
View File
@@ -15,6 +15,25 @@ pub struct VecView<'a, T: Debug + Display + Copy + Sized> {
ptr: &'a [T], ptr: &'a [T],
} }
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for &[T] {
fn get(&self, i: usize) -> &T {
&self[i]
}
fn shape(&self) -> usize {
self.len()
}
fn is_empty(&self) -> bool {
self.len() > 0
}
fn iterator<'b>(&'b self, axis: u8) -> Box<dyn Iterator<Item = &'b T> + 'b> {
assert!(axis == 0, "For one dimensional array `axis` should == 0");
Box::new(self.iter())
}
}
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for Vec<T> { impl<T: Debug + Display + Copy + Sized> Array<T, usize> for Vec<T> {
fn get(&self, i: usize) -> &T { fn get(&self, i: usize) -> &T {
&self[i] &self[i]
@@ -36,6 +55,7 @@ impl<T: Debug + Display + Copy + Sized> Array<T, usize> for Vec<T> {
impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for Vec<T> { impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for Vec<T> {
fn set(&mut self, i: usize, x: T) { fn set(&mut self, i: usize, x: T) {
// NOTE: this panics in case of out of bounds index
self[i] = x self[i] = x
} }
@@ -46,6 +66,7 @@ impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for Vec<T> {
} }
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for Vec<T> {} impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for Vec<T> {}
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for &[T] {}
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for Vec<T> {} impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for Vec<T> {}
@@ -98,7 +119,7 @@ impl<T: Debug + Display + Copy + Sized> Array1<T> for Vec<T> {
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecMutView<'a, T> { impl<T: Debug + Display + Copy + Sized> Array<T, usize> for VecMutView<'_, T> {
fn get(&self, i: usize) -> &T { fn get(&self, i: usize) -> &T {
&self.ptr[i] &self.ptr[i]
} }
@@ -117,7 +138,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecMutView<'a, T
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for VecMutView<'a, T> { impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for VecMutView<'_, T> {
fn set(&mut self, i: usize, x: T) { fn set(&mut self, i: usize, x: T) {
self.ptr[i] = x; self.ptr[i] = x;
} }
@@ -128,10 +149,10 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for VecMutView<'a
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for VecMutView<'a, T> {} impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for VecMutView<'_, T> {}
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1<T> for VecMutView<'a, T> {} impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for VecMutView<'_, T> {}
impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecView<'a, T> { impl<T: Debug + Display + Copy + Sized> Array<T, usize> for VecView<'_, T> {
fn get(&self, i: usize) -> &T { fn get(&self, i: usize) -> &T {
&self.ptr[i] &self.ptr[i]
} }
@@ -150,7 +171,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecView<'a, T> {
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for VecView<'a, T> {} impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for VecView<'_, T> {}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
@@ -191,7 +212,7 @@ mod tests {
#[test] #[test]
fn test_len() { fn test_len() {
let x = vec![1, 2, 3]; let x = [1, 2, 3];
assert_eq!(3, x.len()); assert_eq!(3, x.len());
} }
+6 -10
View File
@@ -68,7 +68,7 @@ impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayBase<OwnedRepr<T>
impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayBase<OwnedRepr<T>, Ix2> {} impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> for ArrayView<'a, T, Ix2> { impl<T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> for ArrayView<'_, T, Ix2> {
fn get(&self, pos: (usize, usize)) -> &T { fn get(&self, pos: (usize, usize)) -> &T {
&self[[pos.0, pos.1]] &self[[pos.0, pos.1]]
} }
@@ -144,11 +144,9 @@ impl<T: Number + RealNumber> EVDDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2>
impl<T: Number + RealNumber> LUDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {} impl<T: Number + RealNumber> LUDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
impl<T: Number + RealNumber> SVDDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {} impl<T: Number + RealNumber> SVDDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayView<'a, T, Ix2> {} impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayView<'_, T, Ix2> {}
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> impl<T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> for ArrayViewMut<'_, T, Ix2> {
for ArrayViewMut<'a, T, Ix2>
{
fn get(&self, pos: (usize, usize)) -> &T { fn get(&self, pos: (usize, usize)) -> &T {
&self[[pos.0, pos.1]] &self[[pos.0, pos.1]]
} }
@@ -175,9 +173,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)>
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)> impl<T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)> for ArrayViewMut<'_, T, Ix2> {
for ArrayViewMut<'a, T, Ix2>
{
fn set(&mut self, pos: (usize, usize), x: T) { fn set(&mut self, pos: (usize, usize), x: T) {
self[[pos.0, pos.1]] = x self[[pos.0, pos.1]] = x
} }
@@ -195,9 +191,9 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayViewMut<'a, T, Ix2> {} impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayViewMut<'_, T, Ix2> {}
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayViewMut<'a, T, Ix2> {} impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayViewMut<'_, T, Ix2> {}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
+6 -6
View File
@@ -41,7 +41,7 @@ impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayBase<OwnedRepr<T>
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayBase<OwnedRepr<T>, Ix1> {} impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayBase<OwnedRepr<T>, Ix1> {}
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayView<'a, T, Ix1> { impl<T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayView<'_, T, Ix1> {
fn get(&self, i: usize) -> &T { fn get(&self, i: usize) -> &T {
&self[i] &self[i]
} }
@@ -60,9 +60,9 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayView<'a
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayView<'a, T, Ix1> {} impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayView<'_, T, Ix1> {}
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayViewMut<'a, T, Ix1> { impl<T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayViewMut<'_, T, Ix1> {
fn get(&self, i: usize) -> &T { fn get(&self, i: usize) -> &T {
&self[i] &self[i]
} }
@@ -81,7 +81,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayViewMut
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for ArrayViewMut<'a, T, Ix1> { impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for ArrayViewMut<'_, T, Ix1> {
fn set(&mut self, i: usize, x: T) { fn set(&mut self, i: usize, x: T) {
self[i] = x; self[i] = x;
} }
@@ -92,8 +92,8 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for ArrayViewMut<
} }
} }
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayViewMut<'a, T, Ix1> {} impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayViewMut<'_, T, Ix1> {}
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayViewMut<'a, T, Ix1> {} impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayViewMut<'_, T, Ix1> {}
impl<T: Debug + Display + Copy + Sized> Array1<T> for ArrayBase<OwnedRepr<T>, Ix1> { impl<T: Debug + Display + Copy + Sized> Array1<T> for ArrayBase<OwnedRepr<T>, Ix1> {
fn slice<'a>(&'a self, range: Range<usize>) -> Box<dyn ArrayView1<T> + 'a> { fn slice<'a>(&'a self, range: Range<usize>) -> Box<dyn ArrayView1<T> + 'a> {
+11 -7
View File
@@ -15,7 +15,7 @@
//! &[25., 15., -5.], //! &[25., 15., -5.],
//! &[15., 18., 0.], //! &[15., 18., 0.],
//! &[-5., 0., 11.] //! &[-5., 0., 11.]
//! ]); //! ]).unwrap();
//! //!
//! let cholesky = A.cholesky().unwrap(); //! let cholesky = A.cholesky().unwrap();
//! let lower_triangular: DenseMatrix<f64> = cholesky.L(); //! let lower_triangular: DenseMatrix<f64> = cholesky.L();
@@ -175,11 +175,14 @@ mod tests {
)] )]
#[test] #[test]
fn cholesky_decompose() { fn cholesky_decompose() {
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]])
.unwrap();
let l = let l =
DenseMatrix::from_2d_array(&[&[5.0, 0.0, 0.0], &[3.0, 3.0, 0.0], &[-1.0, 1.0, 3.0]]); DenseMatrix::from_2d_array(&[&[5.0, 0.0, 0.0], &[3.0, 3.0, 0.0], &[-1.0, 1.0, 3.0]])
.unwrap();
let u = let u =
DenseMatrix::from_2d_array(&[&[5.0, 3.0, -1.0], &[0.0, 3.0, 1.0], &[0.0, 0.0, 3.0]]); DenseMatrix::from_2d_array(&[&[5.0, 3.0, -1.0], &[0.0, 3.0, 1.0], &[0.0, 0.0, 3.0]])
.unwrap();
let cholesky = a.cholesky().unwrap(); let cholesky = a.cholesky().unwrap();
assert!(relative_eq!(cholesky.L().abs(), l.abs(), epsilon = 1e-4)); assert!(relative_eq!(cholesky.L().abs(), l.abs(), epsilon = 1e-4));
@@ -197,9 +200,10 @@ mod tests {
)] )]
#[test] #[test]
fn cholesky_solve_mut() { fn cholesky_solve_mut() {
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]])
let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]); .unwrap();
let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]); let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]).unwrap();
let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]).unwrap();
let cholesky = a.cholesky().unwrap(); let cholesky = a.cholesky().unwrap();
+15 -9
View File
@@ -19,7 +19,7 @@
//! &[0.9000, 0.4000, 0.7000], //! &[0.9000, 0.4000, 0.7000],
//! &[0.4000, 0.5000, 0.3000], //! &[0.4000, 0.5000, 0.3000],
//! &[0.7000, 0.3000, 0.8000], //! &[0.7000, 0.3000, 0.8000],
//! ]); //! ]).unwrap();
//! //!
//! let evd = A.evd(true).unwrap(); //! let evd = A.evd(true).unwrap();
//! let eigenvectors: DenseMatrix<f64> = evd.V; //! let eigenvectors: DenseMatrix<f64> = evd.V;
@@ -820,7 +820,8 @@ mod tests {
&[0.9000, 0.4000, 0.7000], &[0.9000, 0.4000, 0.7000],
&[0.4000, 0.5000, 0.3000], &[0.4000, 0.5000, 0.3000],
&[0.7000, 0.3000, 0.8000], &[0.7000, 0.3000, 0.8000],
]); ])
.unwrap();
let eigen_values: Vec<f64> = vec![1.7498382, 0.3165784, 0.1335834]; let eigen_values: Vec<f64> = vec![1.7498382, 0.3165784, 0.1335834];
@@ -828,7 +829,8 @@ mod tests {
&[0.6881997, -0.07121225, 0.7220180], &[0.6881997, -0.07121225, 0.7220180],
&[0.3700456, 0.89044952, -0.2648886], &[0.3700456, 0.89044952, -0.2648886],
&[0.6240573, -0.44947578, -0.6391588], &[0.6240573, -0.44947578, -0.6391588],
]); ])
.unwrap();
let evd = A.evd(true).unwrap(); let evd = A.evd(true).unwrap();
@@ -839,7 +841,7 @@ mod tests {
)); ));
for (i, eigen_values_i) in eigen_values.iter().enumerate() { for (i, eigen_values_i) in eigen_values.iter().enumerate() {
assert!((eigen_values_i - evd.d[i]).abs() < 1e-4); assert!((eigen_values_i - evd.d[i]).abs() < 1e-4);
assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON); assert!((0f64 - evd.e[i]).abs() < f64::EPSILON);
} }
} }
#[cfg_attr( #[cfg_attr(
@@ -852,7 +854,8 @@ mod tests {
&[0.9000, 0.4000, 0.7000], &[0.9000, 0.4000, 0.7000],
&[0.4000, 0.5000, 0.3000], &[0.4000, 0.5000, 0.3000],
&[0.8000, 0.3000, 0.8000], &[0.8000, 0.3000, 0.8000],
]); ])
.unwrap();
let eigen_values: Vec<f64> = vec![1.79171122, 0.31908143, 0.08920735]; let eigen_values: Vec<f64> = vec![1.79171122, 0.31908143, 0.08920735];
@@ -860,7 +863,8 @@ mod tests {
&[0.7178958, 0.05322098, 0.6812010], &[0.7178958, 0.05322098, 0.6812010],
&[0.3837711, -0.84702111, -0.1494582], &[0.3837711, -0.84702111, -0.1494582],
&[0.6952105, 0.43984484, -0.7036135], &[0.6952105, 0.43984484, -0.7036135],
]); ])
.unwrap();
let evd = A.evd(false).unwrap(); let evd = A.evd(false).unwrap();
@@ -871,7 +875,7 @@ mod tests {
)); ));
for (i, eigen_values_i) in eigen_values.iter().enumerate() { for (i, eigen_values_i) in eigen_values.iter().enumerate() {
assert!((eigen_values_i - evd.d[i]).abs() < 1e-4); assert!((eigen_values_i - evd.d[i]).abs() < 1e-4);
assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON); assert!((0f64 - evd.e[i]).abs() < f64::EPSILON);
} }
} }
#[cfg_attr( #[cfg_attr(
@@ -885,7 +889,8 @@ mod tests {
&[4.0, -1.0, 1.0, 1.0], &[4.0, -1.0, 1.0, 1.0],
&[1.0, 1.0, 3.0, -2.0], &[1.0, 1.0, 3.0, -2.0],
&[1.0, 1.0, 4.0, -1.0], &[1.0, 1.0, 4.0, -1.0],
]); ])
.unwrap();
let eigen_values_d: Vec<f64> = vec![0.0, 2.0, 2.0, 0.0]; let eigen_values_d: Vec<f64> = vec![0.0, 2.0, 2.0, 0.0];
let eigen_values_e: Vec<f64> = vec![2.2361, 0.9999, -0.9999, -2.2361]; let eigen_values_e: Vec<f64> = vec![2.2361, 0.9999, -0.9999, -2.2361];
@@ -895,7 +900,8 @@ mod tests {
&[-0.6707, 0.1059, 0.901, 0.6289], &[-0.6707, 0.1059, 0.901, 0.6289],
&[0.9159, -0.1378, 0.3816, 0.0806], &[0.9159, -0.1378, 0.3816, 0.0806],
&[0.6707, 0.1059, 0.901, -0.6289], &[0.6707, 0.1059, 0.901, -0.6289],
]); ])
.unwrap();
let evd = A.evd(false).unwrap(); let evd = A.evd(false).unwrap();
+3 -3
View File
@@ -12,9 +12,9 @@ pub trait HighOrderOperations<T: Number>: Array2<T> {
/// use smartcore::linalg::traits::high_order::HighOrderOperations; /// use smartcore::linalg::traits::high_order::HighOrderOperations;
/// use smartcore::linalg::basic::arrays::Array2; /// use smartcore::linalg::basic::arrays::Array2;
/// ///
/// let a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]); /// let a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]).unwrap();
/// let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]); /// let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]).unwrap();
/// let expected = DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]]); /// let expected = DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]]).unwrap();
/// ///
/// assert_eq!(a.ab(true, &b, false), expected); /// assert_eq!(a.ab(true, &b, false), expected);
/// ``` /// ```
+8 -7
View File
@@ -18,7 +18,7 @@
//! &[1., 2., 3.], //! &[1., 2., 3.],
//! &[0., 1., 5.], //! &[0., 1., 5.],
//! &[5., 6., 0.] //! &[5., 6., 0.]
//! ]); //! ]).unwrap();
//! //!
//! let lu = A.lu().unwrap(); //! let lu = A.lu().unwrap();
//! let lower: DenseMatrix<f64> = lu.L(); //! let lower: DenseMatrix<f64> = lu.L();
@@ -263,13 +263,13 @@ mod tests {
)] )]
#[test] #[test]
fn decompose() { fn decompose() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]); let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]).unwrap();
let expected_L = let expected_L =
DenseMatrix::from_2d_array(&[&[1., 0., 0.], &[0., 1., 0.], &[0.2, 0.8, 1.]]); DenseMatrix::from_2d_array(&[&[1., 0., 0.], &[0., 1., 0.], &[0.2, 0.8, 1.]]).unwrap();
let expected_U = let expected_U =
DenseMatrix::from_2d_array(&[&[5., 6., 0.], &[0., 1., 5.], &[0., 0., -1.]]); DenseMatrix::from_2d_array(&[&[5., 6., 0.], &[0., 1., 5.], &[0., 0., -1.]]).unwrap();
let expected_pivot = let expected_pivot =
DenseMatrix::from_2d_array(&[&[0., 0., 1.], &[0., 1., 0.], &[1., 0., 0.]]); DenseMatrix::from_2d_array(&[&[0., 0., 1.], &[0., 1., 0.], &[1., 0., 0.]]).unwrap();
let lu = a.lu().unwrap(); let lu = a.lu().unwrap();
assert!(relative_eq!(lu.L(), expected_L, epsilon = 1e-4)); assert!(relative_eq!(lu.L(), expected_L, epsilon = 1e-4));
assert!(relative_eq!(lu.U(), expected_U, epsilon = 1e-4)); assert!(relative_eq!(lu.U(), expected_U, epsilon = 1e-4));
@@ -281,9 +281,10 @@ mod tests {
)] )]
#[test] #[test]
fn inverse() { fn inverse() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]); let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]).unwrap();
let expected = let expected =
DenseMatrix::from_2d_array(&[&[-6.0, 3.6, 1.4], &[5.0, -3.0, -1.0], &[-1.0, 0.8, 0.2]]); DenseMatrix::from_2d_array(&[&[-6.0, 3.6, 1.4], &[5.0, -3.0, -1.0], &[-1.0, 0.8, 0.2]])
.unwrap();
let a_inv = a.lu().and_then(|lu| lu.inverse()).unwrap(); let a_inv = a.lu().and_then(|lu| lu.inverse()).unwrap();
assert!(relative_eq!(a_inv, expected, epsilon = 1e-4)); assert!(relative_eq!(a_inv, expected, epsilon = 1e-4));
} }
+12 -7
View File
@@ -13,7 +13,7 @@
//! &[0.9, 0.4, 0.7], //! &[0.9, 0.4, 0.7],
//! &[0.4, 0.5, 0.3], //! &[0.4, 0.5, 0.3],
//! &[0.7, 0.3, 0.8] //! &[0.7, 0.3, 0.8]
//! ]); //! ]).unwrap();
//! //!
//! let qr = A.qr().unwrap(); //! let qr = A.qr().unwrap();
//! let orthogonal: DenseMatrix<f64> = qr.Q(); //! let orthogonal: DenseMatrix<f64> = qr.Q();
@@ -201,17 +201,20 @@ mod tests {
)] )]
#[test] #[test]
fn decompose() { fn decompose() {
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]])
.unwrap();
let q = DenseMatrix::from_2d_array(&[ let q = DenseMatrix::from_2d_array(&[
&[-0.7448, 0.2436, 0.6212], &[-0.7448, 0.2436, 0.6212],
&[-0.331, -0.9432, -0.027], &[-0.331, -0.9432, -0.027],
&[-0.5793, 0.2257, -0.7832], &[-0.5793, 0.2257, -0.7832],
]); ])
.unwrap();
let r = DenseMatrix::from_2d_array(&[ let r = DenseMatrix::from_2d_array(&[
&[-1.2083, -0.6373, -1.0842], &[-1.2083, -0.6373, -1.0842],
&[0.0, -0.3064, 0.0682], &[0.0, -0.3064, 0.0682],
&[0.0, 0.0, -0.1999], &[0.0, 0.0, -0.1999],
]); ])
.unwrap();
let qr = a.qr().unwrap(); let qr = a.qr().unwrap();
assert!(relative_eq!(qr.Q().abs(), q.abs(), epsilon = 1e-4)); assert!(relative_eq!(qr.Q().abs(), q.abs(), epsilon = 1e-4));
assert!(relative_eq!(qr.R().abs(), r.abs(), epsilon = 1e-4)); assert!(relative_eq!(qr.R().abs(), r.abs(), epsilon = 1e-4));
@@ -223,13 +226,15 @@ mod tests {
)] )]
#[test] #[test]
fn qr_solve_mut() { fn qr_solve_mut() {
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]])
let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]); .unwrap();
let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]).unwrap();
let expected_w = DenseMatrix::from_2d_array(&[ let expected_w = DenseMatrix::from_2d_array(&[
&[-0.2027027, -1.2837838], &[-0.2027027, -1.2837838],
&[0.8783784, 2.2297297], &[0.8783784, 2.2297297],
&[0.4729730, 0.6621622], &[0.4729730, 0.6621622],
]); ])
.unwrap();
let w = a.qr_solve_mut(b).unwrap(); let w = a.qr_solve_mut(b).unwrap();
assert!(relative_eq!(w, expected_w, epsilon = 1e-2)); assert!(relative_eq!(w, expected_w, epsilon = 1e-2));
} }
+17 -14
View File
@@ -136,13 +136,12 @@ pub trait MatrixPreprocessing<T: RealNumber>: MutArrayView2<T> + Clone {
/// ```rust /// ```rust
/// use smartcore::linalg::basic::matrix::DenseMatrix; /// use smartcore::linalg::basic::matrix::DenseMatrix;
/// use smartcore::linalg::traits::stats::MatrixPreprocessing; /// use smartcore::linalg::traits::stats::MatrixPreprocessing;
/// let mut a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]); /// let mut a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]).unwrap();
/// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]); /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]).unwrap();
/// a.binarize_mut(0.); /// a.binarize_mut(0.);
/// ///
/// assert_eq!(a, expected); /// assert_eq!(a, expected);
/// ``` /// ```
fn binarize_mut(&mut self, threshold: T) { fn binarize_mut(&mut self, threshold: T) {
let (nrows, ncols) = self.shape(); let (nrows, ncols) = self.shape();
for row in 0..nrows { for row in 0..nrows {
@@ -159,8 +158,8 @@ pub trait MatrixPreprocessing<T: RealNumber>: MutArrayView2<T> + Clone {
/// ```rust /// ```rust
/// use smartcore::linalg::basic::matrix::DenseMatrix; /// use smartcore::linalg::basic::matrix::DenseMatrix;
/// use smartcore::linalg::traits::stats::MatrixPreprocessing; /// use smartcore::linalg::traits::stats::MatrixPreprocessing;
/// let a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]); /// let a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]).unwrap();
/// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]); /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]).unwrap();
/// ///
/// assert_eq!(a.binarize(0.), expected); /// assert_eq!(a.binarize(0.), expected);
/// ``` /// ```
@@ -186,7 +185,8 @@ mod tests {
&[1., 2., 3., 1., 2.], &[1., 2., 3., 1., 2.],
&[4., 5., 6., 3., 4.], &[4., 5., 6., 3., 4.],
&[7., 8., 9., 5., 6.], &[7., 8., 9., 5., 6.],
]); ])
.unwrap();
let expected_0 = vec![4., 5., 6., 3., 4.]; let expected_0 = vec![4., 5., 6., 3., 4.];
let expected_1 = vec![1.8, 4.4, 7.]; let expected_1 = vec![1.8, 4.4, 7.];
@@ -196,7 +196,7 @@ mod tests {
#[test] #[test]
fn test_var() { fn test_var() {
let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]); let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]).unwrap();
let expected_0 = vec![4., 4., 4., 4.]; let expected_0 = vec![4., 4., 4., 4.];
let expected_1 = vec![1.25, 1.25]; let expected_1 = vec![1.25, 1.25];
@@ -211,12 +211,13 @@ mod tests {
let m = DenseMatrix::from_2d_array(&[ let m = DenseMatrix::from_2d_array(&[
&[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25], &[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25],
&[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25], &[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25],
]); ])
.unwrap();
let expected_0 = vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; let expected_0 = vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
let expected_1 = vec![1.25, 1.25]; let expected_1 = vec![1.25, 1.25];
assert!(m.var(0).approximate_eq(&expected_0, std::f64::EPSILON)); assert!(m.var(0).approximate_eq(&expected_0, f64::EPSILON));
assert!(m.var(1).approximate_eq(&expected_1, std::f64::EPSILON)); assert!(m.var(1).approximate_eq(&expected_1, f64::EPSILON));
assert_eq!( assert_eq!(
m.mean(0), m.mean(0),
vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25] vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
@@ -230,7 +231,8 @@ mod tests {
&[1., 2., 3., 1., 2.], &[1., 2., 3., 1., 2.],
&[4., 5., 6., 3., 4.], &[4., 5., 6., 3., 4.],
&[7., 8., 9., 5., 6.], &[7., 8., 9., 5., 6.],
]); ])
.unwrap();
let expected_0 = vec![ let expected_0 = vec![
2.449489742783178, 2.449489742783178,
2.449489742783178, 2.449489742783178,
@@ -251,10 +253,10 @@ mod tests {
#[test] #[test]
fn test_scale() { fn test_scale() {
let m: DenseMatrix<f64> = let m: DenseMatrix<f64> =
DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]); DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]).unwrap();
let expected_0: DenseMatrix<f64> = let expected_0: DenseMatrix<f64> =
DenseMatrix::from_2d_array(&[&[-1., -1., -1., -1.], &[1., 1., 1., 1.]]); DenseMatrix::from_2d_array(&[&[-1., -1., -1., -1.], &[1., 1., 1., 1.]]).unwrap();
let expected_1: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[ let expected_1: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
&[ &[
-1.3416407864998738, -1.3416407864998738,
@@ -268,7 +270,8 @@ mod tests {
0.4472135954999579, 0.4472135954999579,
1.3416407864998738, 1.3416407864998738,
], ],
]); ])
.unwrap();
assert_eq!(m.mean(0), vec![3.0, 4.0, 5.0, 6.0]); assert_eq!(m.mean(0), vec![3.0, 4.0, 5.0, 6.0]);
assert_eq!(m.mean(1), vec![2.5, 6.5]); assert_eq!(m.mean(1), vec![2.5, 6.5]);
+20 -14
View File
@@ -17,7 +17,7 @@
//! &[0.9, 0.4, 0.7], //! &[0.9, 0.4, 0.7],
//! &[0.4, 0.5, 0.3], //! &[0.4, 0.5, 0.3],
//! &[0.7, 0.3, 0.8] //! &[0.7, 0.3, 0.8]
//! ]); //! ]).unwrap();
//! //!
//! let svd = A.svd().unwrap(); //! let svd = A.svd().unwrap();
//! let u: DenseMatrix<f64> = svd.U; //! let u: DenseMatrix<f64> = svd.U;
@@ -48,11 +48,9 @@ pub struct SVD<T: Number + RealNumber, M: SVDDecomposable<T>> {
pub V: M, pub V: M,
/// Singular values of the original matrix /// Singular values of the original matrix
pub s: Vec<T>, pub s: Vec<T>,
///
m: usize, m: usize,
///
n: usize, n: usize,
/// /// Tolerance
tol: T, tol: T,
} }
@@ -489,7 +487,8 @@ mod tests {
&[0.9000, 0.4000, 0.7000], &[0.9000, 0.4000, 0.7000],
&[0.4000, 0.5000, 0.3000], &[0.4000, 0.5000, 0.3000],
&[0.7000, 0.3000, 0.8000], &[0.7000, 0.3000, 0.8000],
]); ])
.unwrap();
let s: Vec<f64> = vec![1.7498382, 0.3165784, 0.1335834]; let s: Vec<f64> = vec![1.7498382, 0.3165784, 0.1335834];
@@ -497,13 +496,15 @@ mod tests {
&[0.6881997, -0.07121225, 0.7220180], &[0.6881997, -0.07121225, 0.7220180],
&[0.3700456, 0.89044952, -0.2648886], &[0.3700456, 0.89044952, -0.2648886],
&[0.6240573, -0.44947578, -0.639158], &[0.6240573, -0.44947578, -0.639158],
]); ])
.unwrap();
let V = DenseMatrix::from_2d_array(&[ let V = DenseMatrix::from_2d_array(&[
&[0.6881997, -0.07121225, 0.7220180], &[0.6881997, -0.07121225, 0.7220180],
&[0.3700456, 0.89044952, -0.2648886], &[0.3700456, 0.89044952, -0.2648886],
&[0.6240573, -0.44947578, -0.6391588], &[0.6240573, -0.44947578, -0.6391588],
]); ])
.unwrap();
let svd = A.svd().unwrap(); let svd = A.svd().unwrap();
@@ -577,7 +578,8 @@ mod tests {
-0.2158704, -0.2158704,
-0.27529472, -0.27529472,
], ],
]); ])
.unwrap();
let s: Vec<f64> = vec![ let s: Vec<f64> = vec![
3.8589375, 3.4396766, 2.6487176, 2.2317399, 1.5165054, 0.8109055, 0.2706515, 3.8589375, 3.4396766, 2.6487176, 2.2317399, 1.5165054, 0.8109055, 0.2706515,
@@ -647,7 +649,8 @@ mod tests {
0.73034065, 0.73034065,
-0.43965505, -0.43965505,
], ],
]); ])
.unwrap();
let V = DenseMatrix::from_2d_array(&[ let V = DenseMatrix::from_2d_array(&[
&[ &[
@@ -707,7 +710,8 @@ mod tests {
0.1654796, 0.1654796,
-0.32346758, -0.32346758,
], ],
]); ])
.unwrap();
let svd = A.svd().unwrap(); let svd = A.svd().unwrap();
@@ -723,10 +727,11 @@ mod tests {
)] )]
#[test] #[test]
fn solve() { fn solve() {
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]])
let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]); .unwrap();
let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]).unwrap();
let expected_w = let expected_w =
DenseMatrix::from_2d_array(&[&[-0.20, -1.28], &[0.87, 2.22], &[0.47, 0.66]]); DenseMatrix::from_2d_array(&[&[-0.20, -1.28], &[0.87, 2.22], &[0.47, 0.66]]).unwrap();
let w = a.svd_solve_mut(b).unwrap(); let w = a.svd_solve_mut(b).unwrap();
assert!(relative_eq!(w, expected_w, epsilon = 1e-2)); assert!(relative_eq!(w, expected_w, epsilon = 1e-2));
} }
@@ -737,7 +742,8 @@ mod tests {
)] )]
#[test] #[test]
fn decompose_restore() { fn decompose_restore() {
let a = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0, 4.0], &[5.0, 6.0, 7.0, 8.0]]); let a =
DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0, 4.0], &[5.0, 6.0, 7.0, 8.0]]).unwrap();
let svd = a.svd().unwrap(); let svd = a.svd().unwrap();
let u: &DenseMatrix<f32> = &svd.U; //U let u: &DenseMatrix<f32> = &svd.U; //U
let v: &DenseMatrix<f32> = &svd.V; // V let v: &DenseMatrix<f32> = &svd.V; // V
+9 -7
View File
@@ -12,7 +12,8 @@
//! pub struct BGSolver {} //! pub struct BGSolver {}
//! impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X> for BGSolver {} //! impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X> for BGSolver {}
//! //!
//! let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); //! let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0.,
//! 11.]]).unwrap();
//! let b = vec![40., 51., 28.]; //! let b = vec![40., 51., 28.];
//! let expected = vec![1.0, 2.0, 3.0]; //! let expected = vec![1.0, 2.0, 3.0];
//! let mut x = Vec::zeros(3); //! let mut x = Vec::zeros(3);
@@ -26,9 +27,9 @@ use crate::error::Failed;
use crate::linalg::basic::arrays::{Array, Array1, Array2, ArrayView1, MutArrayView1}; use crate::linalg::basic::arrays::{Array, Array1, Array2, ArrayView1, MutArrayView1};
use crate::numbers::floatnum::FloatNumber; use crate::numbers::floatnum::FloatNumber;
/// /// Trait for Biconjugate Gradient Solver
pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2<T>> { pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2<T>> {
/// /// Solve Ax = b
fn solve_mut( fn solve_mut(
&self, &self,
a: &'a X, a: &'a X,
@@ -108,7 +109,7 @@ pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2<T>> {
Ok(err) Ok(err)
} }
/// /// solve preconditioner
fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) { fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) {
let diag = Self::diag(a); let diag = Self::diag(a);
let n = diag.len(); let n = diag.len();
@@ -132,7 +133,7 @@ pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2<T>> {
y.copy_from(&x.xa(true, a)); y.copy_from(&x.xa(true, a));
} }
/// /// Extract the diagonal from a matrix
fn diag(a: &X) -> Vec<T> { fn diag(a: &X) -> Vec<T> {
let (nrows, ncols) = a.shape(); let (nrows, ncols) = a.shape();
let n = nrows.min(ncols); let n = nrows.min(ncols);
@@ -158,9 +159,10 @@ mod tests {
#[test] #[test]
fn bg_solver() { fn bg_solver() {
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]])
.unwrap();
let b = vec![40., 51., 28.]; let b = vec![40., 51., 28.];
let expected = vec![1.0, 2.0, 3.0]; let expected = [1.0, 2.0, 3.0];
let mut x = Vec::zeros(3); let mut x = Vec::zeros(3);
+6 -4
View File
@@ -38,7 +38,7 @@
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
//! ]); //! ]).unwrap();
//! //!
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
@@ -511,7 +511,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
@@ -562,7 +563,8 @@ mod tests {
&[17.0, 1918.0, 1.4054969025700674], &[17.0, 1918.0, 1.4054969025700674],
&[18.0, 1929.0, 1.3271699396384906], &[18.0, 1929.0, 1.3271699396384906],
&[19.0, 1915.0, 1.1373332337674806], &[19.0, 1915.0, 1.1373332337674806],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
1.48, 2.72, 4.52, 5.72, 5.25, 4.07, 3.75, 4.75, 6.77, 4.72, 6.78, 6.79, 8.3, 7.42, 1.48, 2.72, 4.52, 5.72, 5.25, 4.07, 3.75, 4.75, 6.77, 4.72, 6.78, 6.79, 8.3, 7.42,
@@ -627,7 +629,7 @@ mod tests {
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], // &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], // &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], // &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
// ]); // ]).unwrap();
// let y = vec![ // let y = vec![
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, // 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+2 -1
View File
@@ -418,7 +418,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+4 -10
View File
@@ -16,7 +16,7 @@ use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1, MutArray, MutArra
use crate::linear::bg_solver::BiconjugateGradientSolver; use crate::linear::bg_solver::BiconjugateGradientSolver;
use crate::numbers::floatnum::FloatNumber; use crate::numbers::floatnum::FloatNumber;
/// /// Interior Point Optimizer
pub struct InteriorPointOptimizer<T: FloatNumber, X: Array2<T>> { pub struct InteriorPointOptimizer<T: FloatNumber, X: Array2<T>> {
ata: X, ata: X,
d1: Vec<T>, d1: Vec<T>,
@@ -25,9 +25,8 @@ pub struct InteriorPointOptimizer<T: FloatNumber, X: Array2<T>> {
prs: Vec<T>, prs: Vec<T>,
} }
///
impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> { impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
/// /// Initialize a new Interior Point Optimizer
pub fn new(a: &X, n: usize) -> InteriorPointOptimizer<T, X> { pub fn new(a: &X, n: usize) -> InteriorPointOptimizer<T, X> {
InteriorPointOptimizer { InteriorPointOptimizer {
ata: a.ab(true, a, false), ata: a.ab(true, a, false),
@@ -38,7 +37,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
} }
} }
/// /// Run the optimization
pub fn optimize( pub fn optimize(
&mut self, &mut self,
x: &X, x: &X,
@@ -101,7 +100,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
// CALCULATE DUALITY GAP // CALCULATE DUALITY GAP
let xnu = nu.xa(false, x); let xnu = nu.xa(false, x);
let max_xnu = xnu.norm(std::f64::INFINITY); let max_xnu = xnu.norm(f64::INFINITY);
if max_xnu > lambda_f64 { if max_xnu > lambda_f64 {
let lnu = T::from_f64(lambda_f64 / max_xnu).unwrap(); let lnu = T::from_f64(lambda_f64 / max_xnu).unwrap();
nu.mul_scalar_mut(lnu); nu.mul_scalar_mut(lnu);
@@ -208,7 +207,6 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
Ok(w) Ok(w)
} }
///
fn sumlogneg(f: &X) -> T { fn sumlogneg(f: &X) -> T {
let (n, _) = f.shape(); let (n, _) = f.shape();
let mut sum = T::zero(); let mut sum = T::zero();
@@ -220,11 +218,9 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
} }
} }
///
impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X> impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X>
for InteriorPointOptimizer<T, X> for InteriorPointOptimizer<T, X>
{ {
///
fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) { fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) {
let (_, p) = a.shape(); let (_, p) = a.shape();
@@ -234,7 +230,6 @@ impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X>
} }
} }
///
fn mat_vec_mul(&self, _: &X, x: &Vec<T>, y: &mut Vec<T>) { fn mat_vec_mul(&self, _: &X, x: &Vec<T>, y: &mut Vec<T>) {
let (_, p) = self.ata.shape(); let (_, p) = self.ata.shape();
let x_slice = Vec::from_slice(x.slice(0..p).as_ref()); let x_slice = Vec::from_slice(x.slice(0..p).as_ref());
@@ -246,7 +241,6 @@ impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X>
} }
} }
///
fn mat_t_vec_mul(&self, a: &X, x: &Vec<T>, y: &mut Vec<T>) { fn mat_t_vec_mul(&self, a: &X, x: &Vec<T>, y: &mut Vec<T>) {
self.mat_vec_mul(a, x, y); self.mat_vec_mul(a, x, y);
} }
+4 -3
View File
@@ -40,7 +40,7 @@
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
//! ]); //! ]).unwrap();
//! //!
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
@@ -341,7 +341,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8,
@@ -393,7 +394,7 @@ mod tests {
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], // &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], // &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], // &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
// ]); // ]).unwrap();
// let y = vec![ // let y = vec![
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, // 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+94 -51
View File
@@ -35,7 +35,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! let y: Vec<i32> = vec![ //! let y: Vec<i32> = vec![
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
//! ]; //! ];
@@ -183,14 +183,11 @@ pub struct LogisticRegression<
} }
trait ObjectiveFunction<T: Number + FloatNumber, X: Array2<T>> { trait ObjectiveFunction<T: Number + FloatNumber, X: Array2<T>> {
///
fn f(&self, w_bias: &[T]) -> T; fn f(&self, w_bias: &[T]) -> T;
///
#[allow(clippy::ptr_arg)] #[allow(clippy::ptr_arg)]
fn df(&self, g: &mut Vec<T>, w_bias: &Vec<T>); fn df(&self, g: &mut Vec<T>, w_bias: &Vec<T>);
///
#[allow(clippy::ptr_arg)] #[allow(clippy::ptr_arg)]
fn partial_dot(w: &[T], x: &X, v_col: usize, m_row: usize) -> T { fn partial_dot(w: &[T], x: &X, v_col: usize, m_row: usize) -> T {
let mut sum = T::zero(); let mut sum = T::zero();
@@ -261,8 +258,8 @@ impl<TX: Number + FloatNumber + RealNumber, TY: Number + Ord, X: Array2<TX>, Y:
} }
} }
impl<'a, T: Number + FloatNumber, X: Array2<T>> ObjectiveFunction<T, X> impl<T: Number + FloatNumber, X: Array2<T>> ObjectiveFunction<T, X>
for BinaryObjectiveFunction<'a, T, X> for BinaryObjectiveFunction<'_, T, X>
{ {
fn f(&self, w_bias: &[T]) -> T { fn f(&self, w_bias: &[T]) -> T {
let mut f = T::zero(); let mut f = T::zero();
@@ -316,8 +313,8 @@ struct MultiClassObjectiveFunction<'a, T: Number + FloatNumber, X: Array2<T>> {
_phantom_t: PhantomData<T>, _phantom_t: PhantomData<T>,
} }
impl<'a, T: Number + FloatNumber + RealNumber, X: Array2<T>> ObjectiveFunction<T, X> impl<T: Number + FloatNumber + RealNumber, X: Array2<T>> ObjectiveFunction<T, X>
for MultiClassObjectiveFunction<'a, T, X> for MultiClassObjectiveFunction<'_, T, X>
{ {
fn f(&self, w_bias: &[T]) -> T { fn f(&self, w_bias: &[T]) -> T {
let mut f = T::zero(); let mut f = T::zero();
@@ -611,7 +608,8 @@ mod tests {
&[10., -2.], &[10., -2.],
&[8., 2.], &[8., 2.],
&[9., 0.], &[9., 0.],
]); ])
.unwrap();
let y = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; let y = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
@@ -628,11 +626,11 @@ mod tests {
objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]); objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]); objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
assert!((g[0] + 33.000068218163484).abs() < std::f64::EPSILON); assert!((g[0] + 33.000068218163484).abs() < f64::EPSILON);
let f = objective.f(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]); let f = objective.f(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]);
assert!((f - 408.0052230582765).abs() < std::f64::EPSILON); assert!((f - 408.0052230582765).abs() < f64::EPSILON);
let objective_reg = MultiClassObjectiveFunction { let objective_reg = MultiClassObjectiveFunction {
x: &x, x: &x,
@@ -671,7 +669,8 @@ mod tests {
&[10., -2.], &[10., -2.],
&[8., 2.], &[8., 2.],
&[9., 0.], &[9., 0.],
]); ])
.unwrap();
let y = vec![0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1]; let y = vec![0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1];
@@ -687,13 +686,13 @@ mod tests {
objective.df(&mut g, &vec![1., 2., 3.]); objective.df(&mut g, &vec![1., 2., 3.]);
objective.df(&mut g, &vec![1., 2., 3.]); objective.df(&mut g, &vec![1., 2., 3.]);
assert!((g[0] - 26.051064349381285).abs() < std::f64::EPSILON); assert!((g[0] - 26.051064349381285).abs() < f64::EPSILON);
assert!((g[1] - 10.239000702928523).abs() < std::f64::EPSILON); assert!((g[1] - 10.239000702928523).abs() < f64::EPSILON);
assert!((g[2] - 3.869294270156324).abs() < std::f64::EPSILON); assert!((g[2] - 3.869294270156324).abs() < f64::EPSILON);
let f = objective.f(&[1., 2., 3.]); let f = objective.f(&[1., 2., 3.]);
assert!((f - 59.76994756647412).abs() < std::f64::EPSILON); assert!((f - 59.76994756647412).abs() < f64::EPSILON);
let objective_reg = BinaryObjectiveFunction { let objective_reg = BinaryObjectiveFunction {
x: &x, x: &x,
@@ -733,7 +732,8 @@ mod tests {
&[10., -2.], &[10., -2.],
&[8., 2.], &[8., 2.],
&[9., 0.], &[9., 0.],
]); ])
.unwrap();
let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
@@ -818,37 +818,41 @@ mod tests {
assert!(reg_coeff_sum < coeff); assert!(reg_coeff_sum < coeff);
} }
// TODO: serialization for the new DenseMatrix needs to be implemented //TODO: serialization for the new DenseMatrix needs to be implemented
// #[cfg_attr(all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test)] #[cfg_attr(
// #[test] all(target_arch = "wasm32", not(target_os = "wasi")),
// #[cfg(feature = "serde")] wasm_bindgen_test::wasm_bindgen_test
// fn serde() { )]
// let x = DenseMatrix::from_2d_array(&[ #[test]
// &[1., -5.], #[cfg(feature = "serde")]
// &[2., 5.], fn serde() {
// &[3., -2.], let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
// &[1., 2.], &[1., -5.],
// &[2., 0.], &[2., 5.],
// &[6., -5.], &[3., -2.],
// &[7., 5.], &[1., 2.],
// &[6., -2.], &[2., 0.],
// &[7., 2.], &[6., -5.],
// &[6., 0.], &[7., 5.],
// &[8., -5.], &[6., -2.],
// &[9., 5.], &[7., 2.],
// &[10., -2.], &[6., 0.],
// &[8., 2.], &[8., -5.],
// &[9., 0.], &[9., 5.],
// ]); &[10., -2.],
// let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; &[8., 2.],
&[9., 0.],
])
.unwrap();
let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
// let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
// let deserialized_lr: LogisticRegression<f64, i32, DenseMatrix<f64>, Vec<i32>> = let deserialized_lr: LogisticRegression<f64, i32, DenseMatrix<f64>, Vec<i32>> =
// serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap(); serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
// assert_eq!(lr, deserialized_lr); assert_eq!(lr, deserialized_lr);
// } }
#[cfg_attr( #[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")), all(target_arch = "wasm32", not(target_os = "wasi")),
@@ -877,7 +881,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
@@ -890,11 +895,7 @@ mod tests {
let y_hat = lr.predict(&x).unwrap(); let y_hat = lr.predict(&x).unwrap();
let error: i32 = y let error: i32 = y.into_iter().zip(y_hat).map(|(a, b)| (a - b).abs()).sum();
.into_iter()
.zip(y_hat.into_iter())
.map(|(a, b)| (a - b).abs())
.sum();
assert!(error <= 1); assert!(error <= 1);
@@ -903,4 +904,46 @@ mod tests {
assert!(reg_coeff_sum < coeff); assert!(reg_coeff_sum < coeff);
} }
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test
)]
#[test]
fn lr_fit_predict_random() {
let x: DenseMatrix<f32> = DenseMatrix::rand(52181, 94);
let y1: Vec<i32> = vec![1; 2181];
let y2: Vec<i32> = vec![0; 50000];
let y: Vec<i32> = y1.into_iter().chain(y2).collect();
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
let lr_reg = LogisticRegression::fit(
&x,
&y,
LogisticRegressionParameters::default().with_alpha(1.0),
)
.unwrap();
let y_hat = lr.predict(&x).unwrap();
let y_hat_reg = lr_reg.predict(&x).unwrap();
assert_eq!(y.len(), y_hat.len());
assert_eq!(y.len(), y_hat_reg.len());
}
#[test]
fn test_logit() {
let x: &DenseMatrix<f64> = &DenseMatrix::rand(52181, 94);
let y1: Vec<u32> = vec![1; 2181];
let y2: Vec<u32> = vec![0; 50000];
let y: &Vec<u32> = &(y1.into_iter().chain(y2).collect());
println!("y vec height: {:?}", y.len());
println!("x matrix shape: {:?}", x.shape());
let lr = LogisticRegression::fit(x, y, Default::default()).unwrap();
let y_hat = lr.predict(x).unwrap();
println!("y_hat shape: {:?}", y_hat.shape());
assert_eq!(y_hat.shape(), 52181);
}
} }
+4 -3
View File
@@ -40,7 +40,7 @@
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
//! ]); //! ]).unwrap();
//! //!
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
@@ -455,7 +455,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
@@ -513,7 +514,7 @@ mod tests {
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], // &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], // &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], // &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
// ]); // ]).unwrap();
// let y = vec![ // let y = vec![
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, // 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+3 -2
View File
@@ -25,7 +25,7 @@
//! &[68., 590., 37.], //! &[68., 590., 37.],
//! &[69., 660., 46.], //! &[69., 660., 46.],
//! &[73., 600., 55.], //! &[73., 600., 55.],
//! ]); //! ]).unwrap();
//! //!
//! let a = data.mean_by(0); //! let a = data.mean_by(0);
//! let b = vec![66., 640., 44.]; //! let b = vec![66., 640., 44.];
@@ -151,7 +151,8 @@ mod tests {
&[68., 590., 37.], &[68., 590., 37.],
&[69., 660., 46.], &[69., 660., 46.],
&[73., 600., 55.], &[73., 600., 55.],
]); ])
.unwrap();
let a = data.mean_by(0); let a = data.mean_by(0);
let b = vec![66., 640., 44.]; let b = vec![66., 640., 44.];
+1 -1
View File
@@ -37,7 +37,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! let y: Vec<i8> = vec![ //! let y: Vec<i8> = vec![
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
//! ]; //! ];
@@ -3,9 +3,9 @@
use crate::{ use crate::{
api::{Predictor, SupervisedEstimator}, api::{Predictor, SupervisedEstimator},
error::{Failed, FailedError}, error::{Failed, FailedError},
linalg::basic::arrays::{Array2, Array1}, linalg::basic::arrays::{Array1, Array2},
numbers::realnum::RealNumber,
numbers::basenum::Number, numbers::basenum::Number,
numbers::realnum::RealNumber,
}; };
use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult}; use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult};
+2 -6
View File
@@ -283,9 +283,7 @@ mod tests {
(vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]), (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
(vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]), (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
]; ];
for ((train, test), (expected_train, expected_test)) in for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) {
k.split(&x).into_iter().zip(expected)
{
assert_eq!(test, expected_test); assert_eq!(test, expected_test);
assert_eq!(train, expected_train); assert_eq!(train, expected_train);
} }
@@ -307,9 +305,7 @@ mod tests {
(vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]), (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
(vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]), (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
]; ];
for ((train, test), (expected_train, expected_test)) in for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) {
k.split(&x).into_iter().zip(expected)
{
assert_eq!(test.len(), expected_test.len()); assert_eq!(test.len(), expected_test.len());
assert_eq!(train.len(), expected_train.len()); assert_eq!(train.len(), expected_train.len());
} }
+10 -6
View File
@@ -36,7 +36,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! let y: Vec<f64> = vec![ //! let y: Vec<f64> = vec![
//! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., //! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
//! ]; //! ];
@@ -84,7 +84,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! let y: Vec<i32> = vec![ //! let y: Vec<i32> = vec![
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
//! ]; //! ];
@@ -396,7 +396,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
let cv = KFold { let cv = KFold {
@@ -441,7 +442,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y = vec![ let y = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
114.2, 115.7, 116.9, 114.2, 115.7, 116.9,
@@ -489,7 +491,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
114.2, 115.7, 116.9, 114.2, 115.7, 116.9,
@@ -539,7 +542,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
let cv = KFold::default().with_n_splits(3); let cv = KFold::default().with_n_splits(3);
+13 -9
View File
@@ -19,14 +19,14 @@
//! &[0, 1, 0, 0, 1, 0], //! &[0, 1, 0, 0, 1, 0],
//! &[0, 1, 0, 1, 0, 0], //! &[0, 1, 0, 1, 0, 0],
//! &[0, 1, 1, 0, 0, 1], //! &[0, 1, 1, 0, 0, 1],
//! ]); //! ]).unwrap();
//! let y: Vec<u32> = vec![0, 0, 0, 1]; //! let y: Vec<u32> = vec![0, 0, 0, 1];
//! //!
//! let nb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); //! let nb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
//! //!
//! // Testing data point is: //! // Testing data point is:
//! // Chinese Chinese Chinese Tokyo Japan //! // Chinese Chinese Chinese Tokyo Japan
//! let x_test = DenseMatrix::from_2d_array(&[&[0, 1, 1, 0, 0, 1]]); //! let x_test = DenseMatrix::from_2d_array(&[&[0, 1, 1, 0, 0, 1]]).unwrap();
//! let y_hat = nb.predict(&x_test).unwrap(); //! let y_hat = nb.predict(&x_test).unwrap();
//! ``` //! ```
//! //!
@@ -258,7 +258,7 @@ impl<TY: Number + Ord + Unsigned> BernoulliNBDistribution<TY> {
/// * `x` - training data. /// * `x` - training data.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, /// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
/// priors are adjusted according to the data. /// priors are adjusted according to the data.
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter. /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
/// * `binarize` - Threshold for binarizing. /// * `binarize` - Threshold for binarizing.
fn fit<TX: Number + PartialOrd, X: Array2<TX>, Y: Array1<TY>>( fn fit<TX: Number + PartialOrd, X: Array2<TX>, Y: Array1<TY>>(
@@ -402,10 +402,10 @@ impl<TX: Number + PartialOrd, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Arr
{ {
/// Fits BernoulliNB with given data /// Fits BernoulliNB with given data
/// * `x` - training data of size NxM where N is the number of samples and M is the number of /// * `x` - training data of size NxM where N is the number of samples and M is the number of
/// features. /// features.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `parameters` - additional parameters like class priors, alpha for smoothing and /// * `parameters` - additional parameters like class priors, alpha for smoothing and
/// binarizing threshold. /// binarizing threshold.
pub fn fit(x: &X, y: &Y, parameters: BernoulliNBParameters<TX>) -> Result<Self, Failed> { pub fn fit(x: &X, y: &Y, parameters: BernoulliNBParameters<TX>) -> Result<Self, Failed> {
let distribution = if let Some(threshold) = parameters.binarize { let distribution = if let Some(threshold) = parameters.binarize {
BernoulliNBDistribution::fit( BernoulliNBDistribution::fit(
@@ -427,6 +427,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Arr
/// Estimates the class labels for the provided data. /// Estimates the class labels for the provided data.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
///
/// Returns a vector of size N with class estimates. /// Returns a vector of size N with class estimates.
pub fn predict(&self, x: &X) -> Result<Y, Failed> { pub fn predict(&self, x: &X) -> Result<Y, Failed> {
if let Some(threshold) = self.binarize { if let Some(threshold) = self.binarize {
@@ -527,7 +528,8 @@ mod tests {
&[0.0, 1.0, 0.0, 0.0, 1.0, 0.0], &[0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
&[0.0, 1.0, 0.0, 1.0, 0.0, 0.0], &[0.0, 1.0, 0.0, 1.0, 0.0, 0.0],
&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0], &[0.0, 1.0, 1.0, 0.0, 0.0, 1.0],
]); ])
.unwrap();
let y: Vec<u32> = vec![0, 0, 0, 1]; let y: Vec<u32> = vec![0, 0, 0, 1];
let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
@@ -558,7 +560,7 @@ mod tests {
// Testing data point is: // Testing data point is:
// Chinese Chinese Chinese Tokyo Japan // Chinese Chinese Chinese Tokyo Japan
let x_test = DenseMatrix::from_2d_array(&[&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0]]); let x_test = DenseMatrix::from_2d_array(&[&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0]]).unwrap();
let y_hat = bnb.predict(&x_test).unwrap(); let y_hat = bnb.predict(&x_test).unwrap();
assert_eq!(y_hat, &[1]); assert_eq!(y_hat, &[1]);
@@ -586,7 +588,8 @@ mod tests {
&[2, 0, 3, 3, 1, 2, 0, 2, 4, 1], &[2, 0, 3, 3, 1, 2, 0, 2, 4, 1],
&[2, 4, 0, 4, 2, 4, 1, 3, 1, 4], &[2, 4, 0, 4, 2, 4, 1, 3, 1, 4],
&[0, 2, 2, 3, 4, 0, 4, 4, 4, 4], &[0, 2, 2, 3, 4, 0, 4, 4, 4, 4],
]); ])
.unwrap();
let y: Vec<u32> = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2]; let y: Vec<u32> = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2];
let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
@@ -643,7 +646,8 @@ mod tests {
&[0, 1, 0, 0, 1, 0], &[0, 1, 0, 0, 1, 0],
&[0, 1, 0, 1, 0, 0], &[0, 1, 0, 1, 0, 0],
&[0, 1, 1, 0, 0, 1], &[0, 1, 1, 0, 0, 1],
]); ])
.unwrap();
let y: Vec<u32> = vec![0, 0, 0, 1]; let y: Vec<u32> = vec![0, 0, 0, 1];
let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
+11 -7
View File
@@ -24,7 +24,7 @@
//! &[3, 4, 2, 4], //! &[3, 4, 2, 4],
//! &[0, 3, 1, 2], //! &[0, 3, 1, 2],
//! &[0, 4, 1, 2], //! &[0, 4, 1, 2],
//! ]); //! ]).unwrap();
//! let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; //! let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
//! //!
//! let nb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); //! let nb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
@@ -95,7 +95,7 @@ impl<T: Number + Unsigned> PartialEq for CategoricalNBDistribution<T> {
return false; return false;
} }
for (a_i_j, b_i_j) in a_i.iter().zip(b_i.iter()) { for (a_i_j, b_i_j) in a_i.iter().zip(b_i.iter()) {
if (*a_i_j - *b_i_j).abs() > std::f64::EPSILON { if (*a_i_j - *b_i_j).abs() > f64::EPSILON {
return false; return false;
} }
} }
@@ -363,7 +363,7 @@ impl<T: Number + Unsigned, X: Array2<T>, Y: Array1<T>> Predictor<X, Y> for Categ
impl<T: Number + Unsigned, X: Array2<T>, Y: Array1<T>> CategoricalNB<T, X, Y> { impl<T: Number + Unsigned, X: Array2<T>, Y: Array1<T>> CategoricalNB<T, X, Y> {
/// Fits CategoricalNB with given data /// Fits CategoricalNB with given data
/// * `x` - training data of size NxM where N is the number of samples and M is the number of /// * `x` - training data of size NxM where N is the number of samples and M is the number of
/// features. /// features.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `parameters` - additional parameters like alpha for smoothing /// * `parameters` - additional parameters like alpha for smoothing
pub fn fit(x: &X, y: &Y, parameters: CategoricalNBParameters) -> Result<Self, Failed> { pub fn fit(x: &X, y: &Y, parameters: CategoricalNBParameters) -> Result<Self, Failed> {
@@ -375,6 +375,7 @@ impl<T: Number + Unsigned, X: Array2<T>, Y: Array1<T>> CategoricalNB<T, X, Y> {
/// Estimates the class labels for the provided data. /// Estimates the class labels for the provided data.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
///
/// Returns a vector of size N with class estimates. /// Returns a vector of size N with class estimates.
pub fn predict(&self, x: &X) -> Result<Y, Failed> { pub fn predict(&self, x: &X) -> Result<Y, Failed> {
self.inner.as_ref().unwrap().predict(x) self.inner.as_ref().unwrap().predict(x)
@@ -455,7 +456,8 @@ mod tests {
&[1, 1, 1, 1], &[1, 1, 1, 1],
&[1, 2, 0, 0], &[1, 2, 0, 0],
&[2, 1, 1, 1], &[2, 1, 1, 1],
]); ])
.unwrap();
let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
@@ -513,7 +515,7 @@ mod tests {
] ]
); );
let x_test = DenseMatrix::from_2d_array(&[&[0, 2, 1, 0], &[2, 2, 0, 0]]); let x_test = DenseMatrix::from_2d_array(&[&[0, 2, 1, 0], &[2, 2, 0, 0]]).unwrap();
let y_hat = cnb.predict(&x_test).unwrap(); let y_hat = cnb.predict(&x_test).unwrap();
assert_eq!(y_hat, vec![0, 1]); assert_eq!(y_hat, vec![0, 1]);
} }
@@ -539,7 +541,8 @@ mod tests {
&[3, 4, 2, 4], &[3, 4, 2, 4],
&[0, 3, 1, 2], &[0, 3, 1, 2],
&[0, 4, 1, 2], &[0, 4, 1, 2],
]); ])
.unwrap();
let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
@@ -571,7 +574,8 @@ mod tests {
&[3, 4, 2, 4], &[3, 4, 2, 4],
&[0, 3, 1, 2], &[0, 3, 1, 2],
&[0, 4, 1, 2], &[0, 4, 1, 2],
]); ])
.unwrap();
let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
+10 -6
View File
@@ -16,7 +16,7 @@
//! &[ 1., 1.], //! &[ 1., 1.],
//! &[ 2., 1.], //! &[ 2., 1.],
//! &[ 3., 2.], //! &[ 3., 2.],
//! ]); //! ]).unwrap();
//! let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2]; //! let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
//! //!
//! let nb = GaussianNB::fit(&x, &y, Default::default()).unwrap(); //! let nb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
@@ -175,7 +175,7 @@ impl<TY: Number + Ord + Unsigned> GaussianNBDistribution<TY> {
/// * `x` - training data. /// * `x` - training data.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, /// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
/// priors are adjusted according to the data. /// priors are adjusted according to the data.
pub fn fit<TX: Number + RealNumber, X: Array2<TX>, Y: Array1<TY>>( pub fn fit<TX: Number + RealNumber, X: Array2<TX>, Y: Array1<TY>>(
x: &X, x: &X,
y: &Y, y: &Y,
@@ -317,7 +317,7 @@ impl<TX: Number + RealNumber, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Arr
{ {
/// Fits GaussianNB with given data /// Fits GaussianNB with given data
/// * `x` - training data of size NxM where N is the number of samples and M is the number of /// * `x` - training data of size NxM where N is the number of samples and M is the number of
/// features. /// features.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `parameters` - additional parameters like class priors. /// * `parameters` - additional parameters like class priors.
pub fn fit(x: &X, y: &Y, parameters: GaussianNBParameters) -> Result<Self, Failed> { pub fn fit(x: &X, y: &Y, parameters: GaussianNBParameters) -> Result<Self, Failed> {
@@ -328,6 +328,7 @@ impl<TX: Number + RealNumber, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Arr
/// Estimates the class labels for the provided data. /// Estimates the class labels for the provided data.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
///
/// Returns a vector of size N with class estimates. /// Returns a vector of size N with class estimates.
pub fn predict(&self, x: &X) -> Result<Y, Failed> { pub fn predict(&self, x: &X) -> Result<Y, Failed> {
self.inner.as_ref().unwrap().predict(x) self.inner.as_ref().unwrap().predict(x)
@@ -395,7 +396,8 @@ mod tests {
&[1., 1.], &[1., 1.],
&[2., 1.], &[2., 1.],
&[3., 2.], &[3., 2.],
]); ])
.unwrap();
let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2]; let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap(); let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
@@ -435,7 +437,8 @@ mod tests {
&[1., 1.], &[1., 1.],
&[2., 1.], &[2., 1.],
&[3., 2.], &[3., 2.],
]); ])
.unwrap();
let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2]; let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
let priors = vec![0.3, 0.7]; let priors = vec![0.3, 0.7];
@@ -462,7 +465,8 @@ mod tests {
&[1., 1.], &[1., 1.],
&[2., 1.], &[2., 1.],
&[3., 2.], &[3., 2.],
]); ])
.unwrap();
let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2]; let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap(); let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
+532 -20
View File
@@ -89,33 +89,545 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: NBDistribution<TX,
/// Estimates the class labels for the provided data. /// Estimates the class labels for the provided data.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
///
/// Returns a vector of size N with class estimates. /// Returns a vector of size N with class estimates.
pub fn predict(&self, x: &X) -> Result<Y, Failed> { pub fn predict(&self, x: &X) -> Result<Y, Failed> {
let y_classes = self.distribution.classes(); let y_classes = self.distribution.classes();
if y_classes.is_empty() {
return Err(Failed::predict("Failed to predict, no classes available"));
}
let (rows, _) = x.shape(); let (rows, _) = x.shape();
let predictions = (0..rows) let mut predictions = Vec::with_capacity(rows);
.map(|row_index| { let mut all_probs_nan = true;
let row = x.get_row(row_index);
let (prediction, _probability) = y_classes for row_index in 0..rows {
.iter() let row = x.get_row(row_index);
.enumerate() let mut max_log_prob = f64::NEG_INFINITY;
.map(|(class_index, class)| { let mut max_class = None;
(
class, for (class_index, class) in y_classes.iter().enumerate() {
self.distribution.log_likelihood(class_index, &row) let log_likelihood = self.distribution.log_likelihood(class_index, &row);
+ self.distribution.prior(class_index).ln(), let log_prob = log_likelihood + self.distribution.prior(class_index).ln();
)
}) if !log_prob.is_nan() && log_prob > max_log_prob {
.max_by(|(_, p1), (_, p2)| p1.partial_cmp(p2).unwrap()) max_log_prob = log_prob;
.unwrap(); max_class = Some(*class);
*prediction all_probs_nan = false;
}) }
.collect::<Vec<TY>>(); }
let y_hat = Y::from_vec_slice(&predictions);
Ok(y_hat) predictions.push(max_class.unwrap_or(y_classes[0]));
}
if all_probs_nan {
Err(Failed::predict(
"Failed to predict, all probabilities were NaN",
))
} else {
Ok(Y::from_vec_slice(&predictions))
}
} }
} }
pub mod bernoulli; pub mod bernoulli;
pub mod categorical; pub mod categorical;
pub mod gaussian; pub mod gaussian;
pub mod multinomial; pub mod multinomial;
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::basic::arrays::Array;
use crate::linalg::basic::matrix::DenseMatrix;
use num_traits::float::Float;
type Model<'d> = BaseNaiveBayes<i32, i32, DenseMatrix<i32>, Vec<i32>, TestDistribution<'d>>;
#[derive(Debug, PartialEq, Clone)]
struct TestDistribution<'d>(&'d Vec<i32>);
impl NBDistribution<i32, i32> for TestDistribution<'_> {
fn prior(&self, _class_index: usize) -> f64 {
1.
}
fn log_likelihood<'a>(
&'a self,
class_index: usize,
_j: &'a Box<dyn ArrayView1<i32> + 'a>,
) -> f64 {
match self.0.get(class_index) {
&v @ 2 | &v @ 10 | &v @ 20 => v as f64,
_ => f64::nan(),
}
}
fn classes(&self) -> &Vec<i32> {
self.0
}
}
#[test]
fn test_predict() {
let matrix = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap();
let val = vec![];
match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) {
Ok(_) => panic!("Should return error in case of empty classes"),
Err(err) => assert_eq!(
err.to_string(),
"Predict failed: Failed to predict, no classes available"
),
}
let val = vec![1, 2, 3];
match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) {
Ok(r) => assert_eq!(r, vec![2, 2, 2]),
Err(_) => panic!("Should success in normal case with NaNs"),
}
let val = vec![20, 2, 10];
match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) {
Ok(r) => assert_eq!(r, vec![20, 20, 20]),
Err(_) => panic!("Should success in normal case without NaNs"),
}
}
// A simple test distribution using float
#[derive(Debug, PartialEq, Clone)]
struct TestDistributionAgain {
classes: Vec<u32>,
probs: Vec<f64>,
}
impl NBDistribution<f64, u32> for TestDistributionAgain {
fn classes(&self) -> &Vec<u32> {
&self.classes
}
fn prior(&self, class_index: usize) -> f64 {
self.probs[class_index]
}
fn log_likelihood<'a>(
&'a self,
class_index: usize,
_j: &'a Box<dyn ArrayView1<f64> + 'a>,
) -> f64 {
self.probs[class_index].ln()
}
}
type TestNB = BaseNaiveBayes<f64, u32, DenseMatrix<f64>, Vec<u32>, TestDistributionAgain>;
#[test]
fn test_predict_empty_classes() {
let dist = TestDistributionAgain {
classes: vec![],
probs: vec![],
};
let nb = TestNB::fit(dist).unwrap();
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
assert!(nb.predict(&x).is_err());
}
#[test]
fn test_predict_single_class() {
let dist = TestDistributionAgain {
classes: vec![1],
probs: vec![1.0],
};
let nb = TestNB::fit(dist).unwrap();
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
let result = nb.predict(&x).unwrap();
assert_eq!(result, vec![1, 1]);
}
#[test]
fn test_predict_multiple_classes() {
let dist = TestDistributionAgain {
classes: vec![1, 2, 3],
probs: vec![0.2, 0.5, 0.3],
};
let nb = TestNB::fit(dist).unwrap();
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0], &[5.0, 6.0]]).unwrap();
let result = nb.predict(&x).unwrap();
assert_eq!(result, vec![2, 2, 2]);
}
#[test]
fn test_predict_with_nans() {
let dist = TestDistributionAgain {
classes: vec![1, 2],
probs: vec![f64::NAN, 0.5],
};
let nb = TestNB::fit(dist).unwrap();
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
let result = nb.predict(&x).unwrap();
assert_eq!(result, vec![2, 2]);
}
#[test]
fn test_predict_all_nans() {
let dist = TestDistributionAgain {
classes: vec![1, 2],
probs: vec![f64::NAN, f64::NAN],
};
let nb = TestNB::fit(dist).unwrap();
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
assert!(nb.predict(&x).is_err());
}
#[test]
fn test_predict_extreme_probabilities() {
let dist = TestDistributionAgain {
classes: vec![1, 2],
probs: vec![1e-300, 1e-301],
};
let nb = TestNB::fit(dist).unwrap();
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
let result = nb.predict(&x).unwrap();
assert_eq!(result, vec![1, 1]);
}
#[test]
fn test_predict_with_infinity() {
let dist = TestDistributionAgain {
classes: vec![1, 2, 3],
probs: vec![f64::INFINITY, 1.0, 2.0],
};
let nb = TestNB::fit(dist).unwrap();
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
let result = nb.predict(&x).unwrap();
assert_eq!(result, vec![1, 1]);
}
#[test]
fn test_predict_with_negative_infinity() {
let dist = TestDistributionAgain {
classes: vec![1, 2, 3],
probs: vec![f64::NEG_INFINITY, 1.0, 2.0],
};
let nb = TestNB::fit(dist).unwrap();
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
let result = nb.predict(&x).unwrap();
assert_eq!(result, vec![3, 3]);
}
#[test]
fn test_gaussian_naive_bayes_numerical_stability() {
#[derive(Debug, PartialEq, Clone)]
struct GaussianTestDistribution {
classes: Vec<u32>,
means: Vec<Vec<f64>>,
variances: Vec<Vec<f64>>,
priors: Vec<f64>,
}
impl NBDistribution<f64, u32> for GaussianTestDistribution {
fn classes(&self) -> &Vec<u32> {
&self.classes
}
fn prior(&self, class_index: usize) -> f64 {
self.priors[class_index]
}
fn log_likelihood<'a>(
&'a self,
class_index: usize,
j: &'a Box<dyn ArrayView1<f64> + 'a>,
) -> f64 {
let means = &self.means[class_index];
let variances = &self.variances[class_index];
j.iterator(0)
.enumerate()
.map(|(i, &xi)| {
let mean = means[i];
let var = variances[i] + 1e-9; // Small smoothing for numerical stability
let coeff = -0.5 * (2.0 * std::f64::consts::PI * var).ln();
let exponent = -(xi - mean).powi(2) / (2.0 * var);
coeff + exponent
})
.sum()
}
}
fn train_distribution(x: &DenseMatrix<f64>, y: &[u32]) -> GaussianTestDistribution {
let mut classes: Vec<u32> = y
.iter()
.cloned()
.collect::<std::collections::HashSet<u32>>()
.into_iter()
.collect();
classes.sort();
let n_classes = classes.len();
let n_features = x.shape().1;
let mut means = vec![vec![0.0; n_features]; n_classes];
let mut variances = vec![vec![0.0; n_features]; n_classes];
let mut class_counts = vec![0; n_classes];
// Calculate means and count samples per class
for (sample, &class) in x.row_iter().zip(y.iter()) {
let class_idx = classes.iter().position(|&c| c == class).unwrap();
class_counts[class_idx] += 1;
for (i, &value) in sample.iterator(0).enumerate() {
means[class_idx][i] += value;
}
}
// Normalize means
for (class_idx, mean) in means.iter_mut().enumerate() {
for value in mean.iter_mut() {
*value /= class_counts[class_idx] as f64;
}
}
// Calculate variances
for (sample, &class) in x.row_iter().zip(y.iter()) {
let class_idx = classes.iter().position(|&c| c == class).unwrap();
for (i, &value) in sample.iterator(0).enumerate() {
let diff = value - means[class_idx][i];
variances[class_idx][i] += diff * diff;
}
}
// Normalize variances and add small epsilon to avoid zero variance
let epsilon = 1e-9;
for (class_idx, variance) in variances.iter_mut().enumerate() {
for value in variance.iter_mut() {
*value = *value / class_counts[class_idx] as f64 + epsilon;
}
}
// Calculate priors
let total_samples = y.len() as f64;
let priors: Vec<f64> = class_counts
.iter()
.map(|&count| count as f64 / total_samples)
.collect();
GaussianTestDistribution {
classes,
means,
variances,
priors,
}
}
type TestNBGaussian =
BaseNaiveBayes<f64, u32, DenseMatrix<f64>, Vec<u32>, GaussianTestDistribution>;
// Create a constant training dataset
let n_samples = 1000;
let n_features = 5;
let n_classes = 4;
let mut x_data = Vec::with_capacity(n_samples * n_features);
let mut y_data = Vec::with_capacity(n_samples);
for i in 0..n_samples {
for j in 0..n_features {
x_data.push((i * j) as f64 % 10.0);
}
y_data.push((i % n_classes) as u32);
}
let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap();
let y = y_data;
// Train the model
let dist = train_distribution(&x, &y);
let nb = TestNBGaussian::fit(dist).unwrap();
// Create constant test data
let n_test_samples = 100;
let mut test_x_data = Vec::with_capacity(n_test_samples * n_features);
for i in 0..n_test_samples {
for j in 0..n_features {
test_x_data.push((i * j * 2) as f64 % 15.0);
}
}
let test_x = DenseMatrix::new(n_test_samples, n_features, test_x_data, true).unwrap();
// Make predictions
let predictions = nb
.predict(&test_x)
.map_err(|e| format!("Prediction failed: {}", e))
.unwrap();
// Check numerical stability
assert_eq!(
predictions.len(),
n_test_samples,
"Number of predictions should match number of test samples"
);
// Check that all predictions are valid class labels
for &pred in predictions.iter() {
assert!(pred < n_classes as u32, "Predicted class should be valid");
}
// Check consistency of predictions
let repeated_predictions = nb
.predict(&test_x)
.map_err(|e| format!("Repeated prediction failed: {}", e))
.unwrap();
assert_eq!(
predictions, repeated_predictions,
"Predictions should be consistent when repeated"
);
// Check extreme values
let extreme_x =
DenseMatrix::new(2, n_features, vec![f64::MAX; n_features * 2], true).unwrap();
let extreme_predictions = nb.predict(&extreme_x);
assert!(
extreme_predictions.is_err(),
"Extreme value input should result in an error"
);
assert_eq!(
extreme_predictions.unwrap_err().to_string(),
"Predict failed: Failed to predict, all probabilities were NaN",
"Incorrect error message for extreme values"
);
// Check for NaN handling
let nan_x = DenseMatrix::new(2, n_features, vec![f64::NAN; n_features * 2], true).unwrap();
let nan_predictions = nb.predict(&nan_x);
assert!(
nan_predictions.is_err(),
"NaN input should result in an error"
);
// Check for very small values
let small_x =
DenseMatrix::new(2, n_features, vec![f64::MIN_POSITIVE; n_features * 2], true).unwrap();
let small_predictions = nb
.predict(&small_x)
.map_err(|e| format!("Small value prediction failed: {}", e))
.unwrap();
for &pred in small_predictions.iter() {
assert!(
pred < n_classes as u32,
"Predictions for very small values should be valid"
);
}
// Check for values close to zero
let near_zero_x =
DenseMatrix::new(2, n_features, vec![1e-300; n_features * 2], true).unwrap();
let near_zero_predictions = nb
.predict(&near_zero_x)
.map_err(|e| format!("Near-zero value prediction failed: {}", e))
.unwrap();
for &pred in near_zero_predictions.iter() {
assert!(
pred < n_classes as u32,
"Predictions for near-zero values should be valid"
);
}
println!("All numerical stability checks passed!");
}
#[test]
fn test_gaussian_naive_bayes_numerical_stability_random_data() {
#[derive(Debug)]
struct MySimpleRng {
state: u64,
}
impl MySimpleRng {
fn new(seed: u64) -> Self {
MySimpleRng { state: seed }
}
/// Get the next u64 in the sequence.
fn next_u64(&mut self) -> u64 {
// LCG parameters; these are somewhat arbitrary but commonly used.
// Feel free to tweak the multiplier/adder etc.
self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
self.state
}
/// Get an f64 in the range [min, max).
fn next_f64(&mut self, min: f64, max: f64) -> f64 {
let fraction = (self.next_u64() as f64) / (u64::MAX as f64);
min + fraction * (max - min)
}
/// Get a usize in the range [min, max). This floors the floating result.
fn gen_range_usize(&mut self, min: usize, max: usize) -> usize {
let v = self.next_f64(min as f64, max as f64);
// Truncate into the integer range. Because of floating inexactness,
// ensure we also clamp.
let int_v = v.floor() as isize;
// simple clamp to avoid any float rounding out of range
let clamped = int_v.max(min as isize).min((max - 1) as isize);
clamped as usize
}
}
use crate::naive_bayes::gaussian::GaussianNB;
// We will generate random data in a reproducible way (using a fixed seed).
// We will generate random data in a reproducible way:
let mut rng = MySimpleRng::new(42);
let n_samples = 1000;
let n_features = 5;
let n_classes = 4;
// Our feature matrix and label vector
let mut x_data = Vec::with_capacity(n_samples * n_features);
let mut y_data = Vec::with_capacity(n_samples);
// Fill x_data with random values and y_data with random class labels.
for _i in 0..n_samples {
for _j in 0..n_features {
// Well pick random values in [-10, 10).
x_data.push(rng.next_f64(-10.0, 10.0));
}
let class = rng.gen_range_usize(0, n_classes) as u32;
y_data.push(class);
}
// Create DenseMatrix from x_data
let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap();
// Train GaussianNB
let gnb = GaussianNB::fit(&x, &y_data, Default::default())
.expect("Fitting GaussianNB with random data failed.");
// Predict on the same training data to verify no numerical instability
let predictions = gnb.predict(&x).expect("Prediction on random data failed.");
// Basic sanity checks
assert_eq!(
predictions.len(),
n_samples,
"Prediction size must match n_samples"
);
for &pred_class in &predictions {
assert!(
(pred_class as usize) < n_classes,
"Predicted class {} is out of range [0..n_classes).",
pred_class
);
}
// If you want to compare with scikit-learn, you can do something like:
// println!("X = {:?}", &x);
// println!("Y = {:?}", &y_data);
// println!("predictions = {:?}", &predictions);
// and then in Python:
// import numpy as np
// from sklearn.naive_bayes import GaussianNB
// X = np.reshape(np.array(x), (1000, 5), order='F')
// Y = np.array(y)
// gnb = GaussianNB().fit(X, Y)
// preds = gnb.predict(X)
// expected = np.array(predictions)
// assert expected == preds
// They should match closely (or exactly) depending on floating rounding.
}
}
+13 -9
View File
@@ -20,13 +20,13 @@
//! &[0, 2, 0, 0, 1, 0], //! &[0, 2, 0, 0, 1, 0],
//! &[0, 1, 0, 1, 0, 0], //! &[0, 1, 0, 1, 0, 0],
//! &[0, 1, 1, 0, 0, 1], //! &[0, 1, 1, 0, 0, 1],
//! ]); //! ]).unwrap();
//! let y: Vec<u32> = vec![0, 0, 0, 1]; //! let y: Vec<u32> = vec![0, 0, 0, 1];
//! let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); //! let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
//! //!
//! // Testing data point is: //! // Testing data point is:
//! // Chinese Chinese Chinese Tokyo Japan //! // Chinese Chinese Chinese Tokyo Japan
//! let x_test = DenseMatrix::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]); //! let x_test = DenseMatrix::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]).unwrap();
//! let y_hat = nb.predict(&x_test).unwrap(); //! let y_hat = nb.predict(&x_test).unwrap();
//! ``` //! ```
//! //!
@@ -208,7 +208,7 @@ impl<TY: Number + Ord + Unsigned> MultinomialNBDistribution<TY> {
/// * `x` - training data. /// * `x` - training data.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, /// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
/// priors are adjusted according to the data. /// priors are adjusted according to the data.
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter. /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
pub fn fit<TX: Number + Unsigned, X: Array2<TX>, Y: Array1<TY>>( pub fn fit<TX: Number + Unsigned, X: Array2<TX>, Y: Array1<TY>>(
x: &X, x: &X,
@@ -345,10 +345,10 @@ impl<TX: Number + Unsigned, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Array
{ {
/// Fits MultinomialNB with given data /// Fits MultinomialNB with given data
/// * `x` - training data of size NxM where N is the number of samples and M is the number of /// * `x` - training data of size NxM where N is the number of samples and M is the number of
/// features. /// features.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `parameters` - additional parameters like class priors, alpha for smoothing and /// * `parameters` - additional parameters like class priors, alpha for smoothing and
/// binarizing threshold. /// binarizing threshold.
pub fn fit(x: &X, y: &Y, parameters: MultinomialNBParameters) -> Result<Self, Failed> { pub fn fit(x: &X, y: &Y, parameters: MultinomialNBParameters) -> Result<Self, Failed> {
let distribution = let distribution =
MultinomialNBDistribution::fit(x, y, parameters.alpha, parameters.priors)?; MultinomialNBDistribution::fit(x, y, parameters.alpha, parameters.priors)?;
@@ -358,6 +358,7 @@ impl<TX: Number + Unsigned, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Array
/// Estimates the class labels for the provided data. /// Estimates the class labels for the provided data.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
///
/// Returns a vector of size N with class estimates. /// Returns a vector of size N with class estimates.
pub fn predict(&self, x: &X) -> Result<Y, Failed> { pub fn predict(&self, x: &X) -> Result<Y, Failed> {
self.inner.as_ref().unwrap().predict(x) self.inner.as_ref().unwrap().predict(x)
@@ -433,7 +434,8 @@ mod tests {
&[0, 2, 0, 0, 1, 0], &[0, 2, 0, 0, 1, 0],
&[0, 1, 0, 1, 0, 0], &[0, 1, 0, 1, 0, 0],
&[0, 1, 1, 0, 0, 1], &[0, 1, 1, 0, 0, 1],
]); ])
.unwrap();
let y: Vec<u32> = vec![0, 0, 0, 1]; let y: Vec<u32> = vec![0, 0, 0, 1];
let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
@@ -467,7 +469,7 @@ mod tests {
// Testing data point is: // Testing data point is:
// Chinese Chinese Chinese Tokyo Japan // Chinese Chinese Chinese Tokyo Japan
let x_test = DenseMatrix::<u32>::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]); let x_test = DenseMatrix::<u32>::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]).unwrap();
let y_hat = mnb.predict(&x_test).unwrap(); let y_hat = mnb.predict(&x_test).unwrap();
assert_eq!(y_hat, &[0]); assert_eq!(y_hat, &[0]);
@@ -495,7 +497,8 @@ mod tests {
&[2, 0, 3, 3, 1, 2, 0, 2, 4, 1], &[2, 0, 3, 3, 1, 2, 0, 2, 4, 1],
&[2, 4, 0, 4, 2, 4, 1, 3, 1, 4], &[2, 4, 0, 4, 2, 4, 1, 3, 1, 4],
&[0, 2, 2, 3, 4, 0, 4, 4, 4, 4], &[0, 2, 2, 3, 4, 0, 4, 4, 4, 4],
]); ])
.unwrap();
let y: Vec<u32> = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2]; let y: Vec<u32> = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2];
let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
@@ -554,7 +557,8 @@ mod tests {
&[0, 1, 0, 0, 1, 0], &[0, 1, 0, 0, 1, 0],
&[0, 1, 0, 1, 0, 0], &[0, 1, 0, 1, 0, 0],
&[0, 1, 1, 0, 0, 1], &[0, 1, 1, 0, 0, 1],
]); ])
.unwrap();
let y = vec![0, 0, 0, 1]; let y = vec![0, 0, 0, 1];
let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
+10 -5
View File
@@ -22,7 +22,7 @@
//! &[3., 4.], //! &[3., 4.],
//! &[5., 6.], //! &[5., 6.],
//! &[7., 8.], //! &[7., 8.],
//! &[9., 10.]]); //! &[9., 10.]]).unwrap();
//! let y = vec![2, 2, 2, 3, 3]; //your class labels //! let y = vec![2, 2, 2, 3, 3]; //your class labels
//! //!
//! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap(); //! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
@@ -261,6 +261,7 @@ impl<TX: Number, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec
/// Estimates the class labels for the provided data. /// Estimates the class labels for the provided data.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
///
/// Returns a vector of size N with class estimates. /// Returns a vector of size N with class estimates.
pub fn predict(&self, x: &X) -> Result<Y, Failed> { pub fn predict(&self, x: &X) -> Result<Y, Failed> {
let mut result = Y::zeros(x.shape().0); let mut result = Y::zeros(x.shape().0);
@@ -311,7 +312,8 @@ mod tests {
#[test] #[test]
fn knn_fit_predict() { fn knn_fit_predict() {
let x = let x =
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
.unwrap();
let y = vec![2, 2, 2, 3, 3]; let y = vec![2, 2, 2, 3, 3];
let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap(); let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
let y_hat = knn.predict(&x).unwrap(); let y_hat = knn.predict(&x).unwrap();
@@ -325,7 +327,7 @@ mod tests {
)] )]
#[test] #[test]
fn knn_fit_predict_weighted() { fn knn_fit_predict_weighted() {
let x = DenseMatrix::from_2d_array(&[&[1.], &[2.], &[3.], &[4.], &[5.]]); let x = DenseMatrix::from_2d_array(&[&[1.], &[2.], &[3.], &[4.], &[5.]]).unwrap();
let y = vec![2, 2, 2, 3, 3]; let y = vec![2, 2, 2, 3, 3];
let knn = KNNClassifier::fit( let knn = KNNClassifier::fit(
&x, &x,
@@ -336,7 +338,9 @@ mod tests {
.with_weight(KNNWeightFunction::Distance), .with_weight(KNNWeightFunction::Distance),
) )
.unwrap(); .unwrap();
let y_hat = knn.predict(&DenseMatrix::from_2d_array(&[&[4.1]])).unwrap(); let y_hat = knn
.predict(&DenseMatrix::from_2d_array(&[&[4.1]]).unwrap())
.unwrap();
assert_eq!(vec![3], y_hat); assert_eq!(vec![3], y_hat);
} }
@@ -348,7 +352,8 @@ mod tests {
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
fn serde() { fn serde() {
let x = let x =
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
.unwrap();
let y = vec![2, 2, 2, 3, 3]; let y = vec![2, 2, 2, 3, 3];
let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap(); let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
+11 -11
View File
@@ -24,7 +24,7 @@
//! &[2., 2.], //! &[2., 2.],
//! &[3., 3.], //! &[3., 3.],
//! &[4., 4.], //! &[4., 4.],
//! &[5., 5.]]); //! &[5., 5.]]).unwrap();
//! let y = vec![1., 2., 3., 4., 5.]; //your target values //! let y = vec![1., 2., 3., 4., 5.]; //your target values
//! //!
//! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap(); //! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
@@ -88,25 +88,21 @@ pub struct KNNRegressor<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D:
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>> impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
KNNRegressor<TX, TY, X, Y, D> KNNRegressor<TX, TY, X, Y, D>
{ {
///
fn y(&self) -> &Y { fn y(&self) -> &Y {
self.y.as_ref().unwrap() self.y.as_ref().unwrap()
} }
///
fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> { fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> {
self.knn_algorithm self.knn_algorithm
.as_ref() .as_ref()
.expect("Missing parameter: KNNAlgorithm") .expect("Missing parameter: KNNAlgorithm")
} }
///
fn weight(&self) -> &KNNWeightFunction { fn weight(&self) -> &KNNWeightFunction {
self.weight.as_ref().expect("Missing parameter: weight") self.weight.as_ref().expect("Missing parameter: weight")
} }
#[allow(dead_code)] #[allow(dead_code)]
///
fn k(&self) -> usize { fn k(&self) -> usize {
self.k.unwrap() self.k.unwrap()
} }
@@ -250,6 +246,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
/// Predict the target for the provided data. /// Predict the target for the provided data.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
///
/// Returns a vector of size N with estimates. /// Returns a vector of size N with estimates.
pub fn predict(&self, x: &X) -> Result<Y, Failed> { pub fn predict(&self, x: &X) -> Result<Y, Failed> {
let mut result = Y::zeros(x.shape().0); let mut result = Y::zeros(x.shape().0);
@@ -295,9 +292,10 @@ mod tests {
#[test] #[test]
fn knn_fit_predict_weighted() { fn knn_fit_predict_weighted() {
let x = let x =
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
.unwrap();
let y: Vec<f64> = vec![1., 2., 3., 4., 5.]; let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
let y_exp = vec![1., 2., 3., 4., 5.]; let y_exp = [1., 2., 3., 4., 5.];
let knn = KNNRegressor::fit( let knn = KNNRegressor::fit(
&x, &x,
&y, &y,
@@ -311,7 +309,7 @@ mod tests {
let y_hat = knn.predict(&x).unwrap(); let y_hat = knn.predict(&x).unwrap();
assert_eq!(5, Vec::len(&y_hat)); assert_eq!(5, Vec::len(&y_hat));
for i in 0..y_hat.len() { for i in 0..y_hat.len() {
assert!((y_hat[i] - y_exp[i]).abs() < std::f64::EPSILON); assert!((y_hat[i] - y_exp[i]).abs() < f64::EPSILON);
} }
} }
@@ -322,9 +320,10 @@ mod tests {
#[test] #[test]
fn knn_fit_predict_uniform() { fn knn_fit_predict_uniform() {
let x = let x =
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
.unwrap();
let y: Vec<f64> = vec![1., 2., 3., 4., 5.]; let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
let y_exp = vec![2., 2., 3., 4., 4.]; let y_exp = [2., 2., 3., 4., 4.];
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap(); let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
let y_hat = knn.predict(&x).unwrap(); let y_hat = knn.predict(&x).unwrap();
assert_eq!(5, Vec::len(&y_hat)); assert_eq!(5, Vec::len(&y_hat));
@@ -341,7 +340,8 @@ mod tests {
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
fn serde() { fn serde() {
let x = let x =
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
.unwrap();
let y = vec![1., 2., 3., 4., 5.]; let y = vec![1., 2., 3., 4., 5.];
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap(); let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
@@ -1,5 +1,3 @@
// TODO: missing documentation
use std::default::Default; use std::default::Default;
use crate::linalg::basic::arrays::Array1; use crate::linalg::basic::arrays::Array1;
@@ -8,30 +6,27 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
use crate::optimization::line_search::LineSearchMethod; use crate::optimization::line_search::LineSearchMethod;
use crate::optimization::{DF, F}; use crate::optimization::{DF, F};
/// /// Gradient Descent optimization algorithm
pub struct GradientDescent { pub struct GradientDescent {
/// /// Maximum number of iterations
pub max_iter: usize, pub max_iter: usize,
/// /// Relative tolerance for the gradient norm
pub g_rtol: f64, pub g_rtol: f64,
/// /// Absolute tolerance for the gradient norm
pub g_atol: f64, pub g_atol: f64,
} }
///
impl Default for GradientDescent { impl Default for GradientDescent {
fn default() -> Self { fn default() -> Self {
GradientDescent { GradientDescent {
max_iter: 10000, max_iter: 10000,
g_rtol: std::f64::EPSILON.sqrt(), g_rtol: f64::EPSILON.sqrt(),
g_atol: std::f64::EPSILON, g_atol: f64::EPSILON,
} }
} }
} }
///
impl<T: FloatNumber> FirstOrderOptimizer<T> for GradientDescent { impl<T: FloatNumber> FirstOrderOptimizer<T> for GradientDescent {
///
fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>( fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>(
&self, &self,
f: &'a F<'_, T, X>, f: &'a F<'_, T, X>,
+14 -25
View File
@@ -11,31 +11,29 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
use crate::optimization::line_search::LineSearchMethod; use crate::optimization::line_search::LineSearchMethod;
use crate::optimization::{DF, F}; use crate::optimization::{DF, F};
/// /// Limited-memory BFGS optimization algorithm
pub struct LBFGS { pub struct LBFGS {
/// /// Maximum number of iterations
pub max_iter: usize, pub max_iter: usize,
/// /// TODO: Add documentation
pub g_rtol: f64, pub g_rtol: f64,
/// /// TODO: Add documentation
pub g_atol: f64, pub g_atol: f64,
/// /// TODO: Add documentation
pub x_atol: f64, pub x_atol: f64,
/// /// TODO: Add documentation
pub x_rtol: f64, pub x_rtol: f64,
/// /// TODO: Add documentation
pub f_abstol: f64, pub f_abstol: f64,
/// /// TODO: Add documentation
pub f_reltol: f64, pub f_reltol: f64,
/// /// TODO: Add documentation
pub successive_f_tol: usize, pub successive_f_tol: usize,
/// /// TODO: Add documentation
pub m: usize, pub m: usize,
} }
///
impl Default for LBFGS { impl Default for LBFGS {
///
fn default() -> Self { fn default() -> Self {
LBFGS { LBFGS {
max_iter: 1000, max_iter: 1000,
@@ -51,9 +49,7 @@ impl Default for LBFGS {
} }
} }
///
impl LBFGS { impl LBFGS {
///
fn two_loops<T: FloatNumber + RealNumber, X: Array1<T>>(&self, state: &mut LBFGSState<T, X>) { fn two_loops<T: FloatNumber + RealNumber, X: Array1<T>>(&self, state: &mut LBFGSState<T, X>) {
let lower = state.iteration.max(self.m) - self.m; let lower = state.iteration.max(self.m) - self.m;
let upper = state.iteration; let upper = state.iteration;
@@ -95,7 +91,6 @@ impl LBFGS {
state.s.mul_scalar_mut(-T::one()); state.s.mul_scalar_mut(-T::one());
} }
///
fn init_state<T: FloatNumber + RealNumber, X: Array1<T>>(&self, x: &X) -> LBFGSState<T, X> { fn init_state<T: FloatNumber + RealNumber, X: Array1<T>>(&self, x: &X) -> LBFGSState<T, X> {
LBFGSState { LBFGSState {
x: x.clone(), x: x.clone(),
@@ -119,7 +114,6 @@ impl LBFGS {
} }
} }
///
fn update_state<'a, T: FloatNumber + RealNumber, X: Array1<T>, LS: LineSearchMethod<T>>( fn update_state<'a, T: FloatNumber + RealNumber, X: Array1<T>, LS: LineSearchMethod<T>>(
&self, &self,
f: &'a F<'_, T, X>, f: &'a F<'_, T, X>,
@@ -161,7 +155,6 @@ impl LBFGS {
df(&mut state.x_df, &state.x); df(&mut state.x_df, &state.x);
} }
///
fn assess_convergence<T: FloatNumber, X: Array1<T>>( fn assess_convergence<T: FloatNumber, X: Array1<T>>(
&self, &self,
state: &mut LBFGSState<T, X>, state: &mut LBFGSState<T, X>,
@@ -173,7 +166,7 @@ impl LBFGS {
} }
if state.x.max_diff(&state.x_prev) if state.x.max_diff(&state.x_prev)
<= T::from_f64(self.x_rtol * state.x.norm(std::f64::INFINITY)).unwrap() <= T::from_f64(self.x_rtol * state.x.norm(f64::INFINITY)).unwrap()
{ {
x_converged = true; x_converged = true;
} }
@@ -188,14 +181,13 @@ impl LBFGS {
state.counter_f_tol += 1; state.counter_f_tol += 1;
} }
if state.x_df.norm(std::f64::INFINITY) <= self.g_atol { if state.x_df.norm(f64::INFINITY) <= self.g_atol {
g_converged = true; g_converged = true;
} }
g_converged || x_converged || state.counter_f_tol > self.successive_f_tol g_converged || x_converged || state.counter_f_tol > self.successive_f_tol
} }
///
fn update_hessian<T: FloatNumber, X: Array1<T>>( fn update_hessian<T: FloatNumber, X: Array1<T>>(
&self, &self,
_: &DF<'_, X>, _: &DF<'_, X>,
@@ -212,7 +204,6 @@ impl LBFGS {
} }
} }
///
#[derive(Debug)] #[derive(Debug)]
struct LBFGSState<T: FloatNumber, X: Array1<T>> { struct LBFGSState<T: FloatNumber, X: Array1<T>> {
x: X, x: X,
@@ -234,9 +225,7 @@ struct LBFGSState<T: FloatNumber, X: Array1<T>> {
alpha: T, alpha: T,
} }
///
impl<T: FloatNumber + RealNumber> FirstOrderOptimizer<T> for LBFGS { impl<T: FloatNumber + RealNumber> FirstOrderOptimizer<T> for LBFGS {
///
fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>( fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>(
&self, &self,
f: &F<'_, T, X>, f: &F<'_, T, X>,
@@ -248,7 +237,7 @@ impl<T: FloatNumber + RealNumber> FirstOrderOptimizer<T> for LBFGS {
df(&mut state.x_df, x0); df(&mut state.x_df, x0);
let g_converged = state.x_df.norm(std::f64::INFINITY) < self.g_atol; let g_converged = state.x_df.norm(f64::INFINITY) < self.g_atol;
let mut converged = g_converged; let mut converged = g_converged;
let stopped = false; let stopped = false;
@@ -299,7 +288,7 @@ mod tests {
let result = optimizer.optimize(&f, &df, &x0, &ls); let result = optimizer.optimize(&f, &df, &x0, &ls);
assert!((result.f_x - 0.0).abs() < std::f64::EPSILON); assert!((result.f_x - 0.0).abs() < f64::EPSILON);
assert!((result.x[0] - 1.0).abs() < 1e-8); assert!((result.x[0] - 1.0).abs() < 1e-8);
assert!((result.x[1] - 1.0).abs() < 1e-8); assert!((result.x[1] - 1.0).abs() < 1e-8);
assert!(result.iterations <= 24); assert!(result.iterations <= 24);
+8 -8
View File
@@ -1,6 +1,6 @@
/// /// Gradient descent optimization algorithm
pub mod gradient_descent; pub mod gradient_descent;
/// /// Limited-memory BFGS optimization algorithm
pub mod lbfgs; pub mod lbfgs;
use std::clone::Clone; use std::clone::Clone;
@@ -11,9 +11,9 @@ use crate::numbers::floatnum::FloatNumber;
use crate::optimization::line_search::LineSearchMethod; use crate::optimization::line_search::LineSearchMethod;
use crate::optimization::{DF, F}; use crate::optimization::{DF, F};
/// /// First-order optimization is a class of algorithms that use the first derivative of a function to find optimal solutions.
pub trait FirstOrderOptimizer<T: FloatNumber> { pub trait FirstOrderOptimizer<T: FloatNumber> {
/// /// run first order optimization
fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>( fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>(
&self, &self,
f: &F<'_, T, X>, f: &F<'_, T, X>,
@@ -23,13 +23,13 @@ pub trait FirstOrderOptimizer<T: FloatNumber> {
) -> OptimizerResult<T, X>; ) -> OptimizerResult<T, X>;
} }
/// /// Result of optimization
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct OptimizerResult<T: FloatNumber, X: Array1<T>> { pub struct OptimizerResult<T: FloatNumber, X: Array1<T>> {
/// /// Solution
pub x: X, pub x: X,
/// /// f(x) value
pub f_x: T, pub f_x: T,
/// /// number of iterations
pub iterations: usize, pub iterations: usize,
} }
+12 -17
View File
@@ -1,11 +1,9 @@
// TODO: missing documentation
use crate::optimization::FunctionOrder; use crate::optimization::FunctionOrder;
use num_traits::Float; use num_traits::Float;
/// /// Line search optimization.
pub trait LineSearchMethod<T: Float> { pub trait LineSearchMethod<T: Float> {
/// /// Find alpha that satisfies strong Wolfe conditions.
fn search( fn search(
&self, &self,
f: &(dyn Fn(T) -> T), f: &(dyn Fn(T) -> T),
@@ -16,32 +14,31 @@ pub trait LineSearchMethod<T: Float> {
) -> LineSearchResult<T>; ) -> LineSearchResult<T>;
} }
/// /// Line search result
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct LineSearchResult<T: Float> { pub struct LineSearchResult<T: Float> {
/// /// Alpha value
pub alpha: T, pub alpha: T,
/// /// f(alpha) value
pub f_x: T, pub f_x: T,
} }
/// /// Backtracking line search method.
pub struct Backtracking<T: Float> { pub struct Backtracking<T: Float> {
/// /// TODO: Add documentation
pub c1: T, pub c1: T,
/// /// Maximum number of iterations for Backtracking single run
pub max_iterations: usize, pub max_iterations: usize,
/// /// TODO: Add documentation
pub max_infinity_iterations: usize, pub max_infinity_iterations: usize,
/// /// TODO: Add documentation
pub phi: T, pub phi: T,
/// /// TODO: Add documentation
pub plo: T, pub plo: T,
/// /// function order
pub order: FunctionOrder, pub order: FunctionOrder,
} }
///
impl<T: Float> Default for Backtracking<T> { impl<T: Float> Default for Backtracking<T> {
fn default() -> Self { fn default() -> Self {
Backtracking { Backtracking {
@@ -55,9 +52,7 @@ impl<T: Float> Default for Backtracking<T> {
} }
} }
///
impl<T: Float> LineSearchMethod<T> for Backtracking<T> { impl<T: Float> LineSearchMethod<T> for Backtracking<T> {
///
fn search( fn search(
&self, &self,
f: &(dyn Fn(T) -> T), f: &(dyn Fn(T) -> T),
+7 -9
View File
@@ -1,21 +1,19 @@
// TODO: missing documentation /// first order optimization algorithms
///
pub mod first_order; pub mod first_order;
/// /// line search algorithms
pub mod line_search; pub mod line_search;
/// /// Function f(x) = y
pub type F<'a, T, X> = dyn for<'b> Fn(&'b X) -> T + 'a; pub type F<'a, T, X> = dyn for<'b> Fn(&'b X) -> T + 'a;
/// /// Function df(x)
pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a; pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a;
/// /// Function order
#[allow(clippy::upper_case_acronyms)] #[allow(clippy::upper_case_acronyms)]
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum FunctionOrder { pub enum FunctionOrder {
/// /// Second order
SECOND, SECOND,
/// /// Third order
THIRD, THIRD,
} }
+12 -7
View File
@@ -12,7 +12,7 @@
//! &[1.5, 2.0, 1.5, 4.0], //! &[1.5, 2.0, 1.5, 4.0],
//! &[1.5, 1.0, 1.5, 5.0], //! &[1.5, 1.0, 1.5, 5.0],
//! &[1.5, 2.0, 1.5, 6.0], //! &[1.5, 2.0, 1.5, 6.0],
//! ]); //! ]).unwrap();
//! let encoder_params = OneHotEncoderParams::from_cat_idx(&[1, 3]); //! let encoder_params = OneHotEncoderParams::from_cat_idx(&[1, 3]);
//! // Infer number of categories from data and return a reusable encoder //! // Infer number of categories from data and return a reusable encoder
//! let encoder = OneHotEncoder::fit(&data, encoder_params).unwrap(); //! let encoder = OneHotEncoder::fit(&data, encoder_params).unwrap();
@@ -240,14 +240,16 @@ mod tests {
&[2.0, 1.5, 4.0], &[2.0, 1.5, 4.0],
&[1.0, 1.5, 5.0], &[1.0, 1.5, 5.0],
&[2.0, 1.5, 6.0], &[2.0, 1.5, 6.0],
]); ])
.unwrap();
let oh_enc = DenseMatrix::from_2d_array(&[ let oh_enc = DenseMatrix::from_2d_array(&[
&[1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0], &[1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
&[0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0], &[0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
&[1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0], &[1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
&[0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0], &[0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
]); ])
.unwrap();
(orig, oh_enc) (orig, oh_enc)
} }
@@ -259,14 +261,16 @@ mod tests {
&[1.5, 2.0, 1.5, 4.0], &[1.5, 2.0, 1.5, 4.0],
&[1.5, 1.0, 1.5, 5.0], &[1.5, 1.0, 1.5, 5.0],
&[1.5, 2.0, 1.5, 6.0], &[1.5, 2.0, 1.5, 6.0],
]); ])
.unwrap();
let oh_enc = DenseMatrix::from_2d_array(&[ let oh_enc = DenseMatrix::from_2d_array(&[
&[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0], &[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
&[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0], &[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
&[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0], &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
&[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0], &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
]); ])
.unwrap();
(orig, oh_enc) (orig, oh_enc)
} }
@@ -277,7 +281,7 @@ mod tests {
)] )]
#[test] #[test]
fn hash_encode_f64_series() { fn hash_encode_f64_series() {
let series = vec![3.0, 1.0, 2.0, 1.0]; let series = [3.0, 1.0, 2.0, 1.0];
let hashable_series: Vec<CategoricalFloat> = let hashable_series: Vec<CategoricalFloat> =
series.iter().map(|v| v.to_category()).collect(); series.iter().map(|v| v.to_category()).collect();
let enc = CategoryMapper::from_positional_category_vec(hashable_series); let enc = CategoryMapper::from_positional_category_vec(hashable_series);
@@ -334,7 +338,8 @@ mod tests {
&[2.0, 1.5, 4.0], &[2.0, 1.5, 4.0],
&[1.0, 1.5, 5.0], &[1.0, 1.5, 5.0],
&[2.0, 1.5, 6.0], &[2.0, 1.5, 6.0],
]); ])
.unwrap();
let params = OneHotEncoderParams::from_cat_idx(&[1]); let params = OneHotEncoderParams::from_cat_idx(&[1]);
let result = OneHotEncoder::fit(&m, params); let result = OneHotEncoder::fit(&m, params);
+55 -50
View File
@@ -11,7 +11,7 @@
//! vec![0.0, 0.0], //! vec![0.0, 0.0],
//! vec![1.0, 1.0], //! vec![1.0, 1.0],
//! vec![1.0, 1.0], //! vec![1.0, 1.0],
//! ]); //! ]).unwrap();
//! //!
//! let standard_scaler = //! let standard_scaler =
//! numerical::StandardScaler::fit(&data, numerical::StandardScalerParameters::default()) //! numerical::StandardScaler::fit(&data, numerical::StandardScalerParameters::default())
@@ -24,7 +24,7 @@
//! vec![-1.0, -1.0], //! vec![-1.0, -1.0],
//! vec![1.0, 1.0], //! vec![1.0, 1.0],
//! vec![1.0, 1.0], //! vec![1.0, 1.0],
//! ]) //! ]).unwrap()
//! ); //! );
//! ``` //! ```
use std::marker::PhantomData; use std::marker::PhantomData;
@@ -172,18 +172,14 @@ where
T: Number + RealNumber, T: Number + RealNumber,
M: Array2<T>, M: Array2<T>,
{ {
if let Some(output_matrix) = columns.first().cloned() { columns.first().cloned().map(|output_matrix| {
return Some( columns
columns .iter()
.iter() .skip(1)
.skip(1) .fold(output_matrix, |current_matrix, new_colum| {
.fold(output_matrix, |current_matrix, new_colum| { current_matrix.h_stack(new_colum)
current_matrix.h_stack(new_colum) })
}), })
);
} else {
None
}
} }
#[cfg(test)] #[cfg(test)]
@@ -197,15 +193,18 @@ mod tests {
fn combine_three_columns() { fn combine_three_columns() {
assert_eq!( assert_eq!(
build_matrix_from_columns(vec![ build_matrix_from_columns(vec![
DenseMatrix::from_2d_vec(&vec![vec![1.0], vec![1.0], vec![1.0],]), DenseMatrix::from_2d_vec(&vec![vec![1.0], vec![1.0], vec![1.0],]).unwrap(),
DenseMatrix::from_2d_vec(&vec![vec![2.0], vec![2.0], vec![2.0],]), DenseMatrix::from_2d_vec(&vec![vec![2.0], vec![2.0], vec![2.0],]).unwrap(),
DenseMatrix::from_2d_vec(&vec![vec![3.0], vec![3.0], vec![3.0],]) DenseMatrix::from_2d_vec(&vec![vec![3.0], vec![3.0], vec![3.0],]).unwrap()
]), ]),
Some(DenseMatrix::from_2d_vec(&vec![ Some(
vec![1.0, 2.0, 3.0], DenseMatrix::from_2d_vec(&vec![
vec![1.0, 2.0, 3.0], vec![1.0, 2.0, 3.0],
vec![1.0, 2.0, 3.0] vec![1.0, 2.0, 3.0],
])) vec![1.0, 2.0, 3.0]
])
.unwrap()
)
) )
} }
@@ -287,13 +286,15 @@ mod tests {
/// sklearn. /// sklearn.
#[test] #[test]
fn fit_transform_random_values() { fn fit_transform_random_values() {
let transformed_values = let transformed_values = fit_transform_with_default_standard_scaler(
fit_transform_with_default_standard_scaler(&DenseMatrix::from_2d_array(&[ &DenseMatrix::from_2d_array(&[
&[0.1004222429, 0.2194113576, 0.9310663354, 0.3313593793], &[0.1004222429, 0.2194113576, 0.9310663354, 0.3313593793],
&[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264], &[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264],
&[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046], &[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046],
&[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442], &[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442],
])); ])
.unwrap(),
);
println!("{transformed_values}"); println!("{transformed_values}");
assert!(transformed_values.approximate_eq( assert!(transformed_values.approximate_eq(
&DenseMatrix::from_2d_array(&[ &DenseMatrix::from_2d_array(&[
@@ -301,7 +302,8 @@ mod tests {
&[-0.7615464283, -0.7076698384, -1.1075452562, 1.2632979631], &[-0.7615464283, -0.7076698384, -1.1075452562, 1.2632979631],
&[0.4832504303, -0.6106747444, 1.0630075435, 0.5494084257], &[0.4832504303, -0.6106747444, 1.0630075435, 0.5494084257],
&[1.3936980634, 1.7215431158, -0.8839228078, -1.3855590021], &[1.3936980634, 1.7215431158, -0.8839228078, -1.3855590021],
]), ])
.unwrap(),
1.0 1.0
)) ))
} }
@@ -310,13 +312,10 @@ mod tests {
#[test] #[test]
fn fit_transform_with_zero_variance() { fn fit_transform_with_zero_variance() {
assert_eq!( assert_eq!(
fit_transform_with_default_standard_scaler(&DenseMatrix::from_2d_array(&[ fit_transform_with_default_standard_scaler(
&[1.0], &DenseMatrix::from_2d_array(&[&[1.0], &[1.0], &[1.0], &[1.0]]).unwrap()
&[1.0], ),
&[1.0], DenseMatrix::from_2d_array(&[&[0.0], &[0.0], &[0.0], &[0.0]]).unwrap(),
&[1.0]
])),
DenseMatrix::from_2d_array(&[&[0.0], &[0.0], &[0.0], &[0.0]]),
"When scaling values with zero variance, zero is expected as return value" "When scaling values with zero variance, zero is expected as return value"
) )
} }
@@ -331,7 +330,8 @@ mod tests {
&[1.0, 2.0, 5.0], &[1.0, 2.0, 5.0],
&[1.0, 1.0, 1.0], &[1.0, 1.0, 1.0],
&[1.0, 2.0, 5.0] &[1.0, 2.0, 5.0]
]), ])
.unwrap(),
StandardScalerParameters::default(), StandardScalerParameters::default(),
), ),
Ok(StandardScaler { Ok(StandardScaler {
@@ -354,7 +354,8 @@ mod tests {
&[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264], &[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264],
&[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046], &[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046],
&[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442], &[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442],
]), ])
.unwrap(),
StandardScalerParameters::default(), StandardScalerParameters::default(),
) )
.unwrap(); .unwrap();
@@ -364,17 +365,18 @@ mod tests {
vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625], vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625],
); );
assert!( assert!(&DenseMatrix::<f64>::from_2d_vec(&vec![fitted_scaler.stds])
&DenseMatrix::<f64>::from_2d_vec(&vec![fitted_scaler.stds]).approximate_eq( .unwrap()
.approximate_eq(
&DenseMatrix::from_2d_array(&[&[ &DenseMatrix::from_2d_array(&[&[
0.29426447500954, 0.29426447500954,
0.16758497615485, 0.16758497615485,
0.20820945786863, 0.20820945786863,
0.23329718831165 0.23329718831165
],]), ],])
.unwrap(),
0.00000000000001 0.00000000000001
) ))
)
} }
/// If `with_std` is set to `false` the values should not be /// If `with_std` is set to `false` the values should not be
@@ -392,8 +394,9 @@ mod tests {
}; };
assert_eq!( assert_eq!(
standard_scaler.transform(&DenseMatrix::from_2d_array(&[&[0.0, 2.0], &[2.0, 4.0]])), standard_scaler
Ok(DenseMatrix::from_2d_array(&[&[-1.0, -1.0], &[1.0, 1.0]])) .transform(&DenseMatrix::from_2d_array(&[&[0.0, 2.0], &[2.0, 4.0]]).unwrap()),
Ok(DenseMatrix::from_2d_array(&[&[-1.0, -1.0], &[1.0, 1.0]]).unwrap())
) )
} }
@@ -413,8 +416,8 @@ mod tests {
assert_eq!( assert_eq!(
standard_scaler standard_scaler
.transform(&DenseMatrix::from_2d_array(&[&[0.0, 9.0], &[4.0, 12.0]])), .transform(&DenseMatrix::from_2d_array(&[&[0.0, 9.0], &[4.0, 12.0]]).unwrap()),
Ok(DenseMatrix::from_2d_array(&[&[0.0, 3.0], &[2.0, 4.0]])) Ok(DenseMatrix::from_2d_array(&[&[0.0, 3.0], &[2.0, 4.0]]).unwrap())
) )
} }
@@ -433,7 +436,8 @@ mod tests {
&[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264], &[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264],
&[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046], &[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046],
&[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442], &[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442],
]), ])
.unwrap(),
StandardScalerParameters::default(), StandardScalerParameters::default(),
) )
.unwrap(); .unwrap();
@@ -446,17 +450,18 @@ mod tests {
vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625], vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625],
); );
assert!( assert!(&DenseMatrix::from_2d_vec(&vec![deserialized_scaler.stds])
&DenseMatrix::from_2d_vec(&vec![deserialized_scaler.stds]).approximate_eq( .unwrap()
.approximate_eq(
&DenseMatrix::from_2d_array(&[&[ &DenseMatrix::from_2d_array(&[&[
0.29426447500954, 0.29426447500954,
0.16758497615485, 0.16758497615485,
0.20820945786863, 0.20820945786863,
0.23329718831165 0.23329718831165
],]), ],])
.unwrap(),
0.00000000000001 0.00000000000001
) ))
)
} }
} }
} }
+7 -12
View File
@@ -30,7 +30,7 @@ pub struct CSVDefinition<'a> {
/// What seperates the fields in your csv-file? /// What seperates the fields in your csv-file?
field_seperator: &'a str, field_seperator: &'a str,
} }
impl<'a> Default for CSVDefinition<'a> { impl Default for CSVDefinition<'_> {
fn default() -> Self { fn default() -> Self {
Self { Self {
n_rows_header: 1, n_rows_header: 1,
@@ -83,7 +83,7 @@ where
Matrix: Array2<T>, Matrix: Array2<T>,
{ {
let csv_text = read_string_from_source(source)?; let csv_text = read_string_from_source(source)?;
let rows: Vec<Vec<T>> = extract_row_vectors_from_csv_text::<T, RowVector, Matrix>( let rows: Vec<Vec<T>> = extract_row_vectors_from_csv_text(
&csv_text, &csv_text,
&definition, &definition,
detect_row_format(&csv_text, &definition)?, detect_row_format(&csv_text, &definition)?,
@@ -103,12 +103,7 @@ where
/// Given a string containing the contents of a csv file, extract its value /// Given a string containing the contents of a csv file, extract its value
/// into row-vectors. /// into row-vectors.
fn extract_row_vectors_from_csv_text< fn extract_row_vectors_from_csv_text<'a, T: Number + RealNumber + std::str::FromStr>(
'a,
T: Number + RealNumber + std::str::FromStr,
RowVector: Array1<T>,
Matrix: Array2<T>,
>(
csv_text: &'a str, csv_text: &'a str,
definition: &'a CSVDefinition<'_>, definition: &'a CSVDefinition<'_>,
row_format: CSVRowFormat<'_>, row_format: CSVRowFormat<'_>,
@@ -243,7 +238,8 @@ mod tests {
&[5.1, 3.5, 1.4, 0.2], &[5.1, 3.5, 1.4, 0.2],
&[4.9, 3.0, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2],
&[4.7, 3.2, 1.3, 0.2], &[4.7, 3.2, 1.3, 0.2],
])) ])
.unwrap())
) )
} }
#[test] #[test]
@@ -266,7 +262,7 @@ mod tests {
&[5.1, 3.5, 1.4, 0.2], &[5.1, 3.5, 1.4, 0.2],
&[4.9, 3.0, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2],
&[4.7, 3.2, 1.3, 0.2], &[4.7, 3.2, 1.3, 0.2],
])) ]).unwrap())
) )
} }
#[test] #[test]
@@ -305,12 +301,11 @@ mod tests {
} }
mod extract_row_vectors_from_csv_text { mod extract_row_vectors_from_csv_text {
use super::super::{extract_row_vectors_from_csv_text, CSVDefinition, CSVRowFormat}; use super::super::{extract_row_vectors_from_csv_text, CSVDefinition, CSVRowFormat};
use crate::linalg::basic::matrix::DenseMatrix;
#[test] #[test]
fn read_default_csv() { fn read_default_csv() {
assert_eq!( assert_eq!(
extract_row_vectors_from_csv_text::<f64, Vec<_>, DenseMatrix<_>>( extract_row_vectors_from_csv_text::<f64>(
"column 1, column 2, column3\n1.0,2.0,3.0\n4.0,5.0,6.0", "column 1, column 2, column3\n1.0,2.0,3.0\n4.0,5.0,6.0",
&CSVDefinition::default(), &CSVDefinition::default(),
CSVRowFormat { CSVRowFormat {
+2 -2
View File
@@ -56,7 +56,7 @@ pub struct Kernels;
impl Kernels { impl Kernels {
/// Return a default linear /// Return a default linear
pub fn linear() -> LinearKernel { pub fn linear() -> LinearKernel {
LinearKernel::default() LinearKernel
} }
/// Return a default RBF /// Return a default RBF
pub fn rbf() -> RBFKernel { pub fn rbf() -> RBFKernel {
@@ -292,7 +292,7 @@ mod tests {
.unwrap() .unwrap()
.abs(); .abs();
assert!((4913f64 - result) < std::f64::EPSILON); assert!((4913f64 - result).abs() < f64::EPSILON);
} }
#[cfg_attr( #[cfg_attr(
+69 -73
View File
@@ -53,7 +53,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! let y = vec![ -1, -1, -1, -1, -1, -1, -1, -1, //! let y = vec![ -1, -1, -1, -1, -1, -1, -1, -1,
//! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
//! //!
@@ -322,19 +322,26 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX> + 'a, Y: Array
let (n, _) = x.shape(); let (n, _) = x.shape();
let mut y_hat: Vec<TX> = Array1::zeros(n); let mut y_hat: Vec<TX> = Array1::zeros(n);
let mut row = Vec::with_capacity(n);
for i in 0..n { for i in 0..n {
let row_pred: TX = row.clear();
self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)); row.extend(x.get_row(i).iterator(0).copied());
let row_pred: TX = self.predict_for_row(&row);
y_hat.set(i, row_pred); y_hat.set(i, row_pred);
} }
Ok(y_hat) Ok(y_hat)
} }
fn predict_for_row(&self, x: Vec<TX>) -> TX { fn predict_for_row(&self, x: &[TX]) -> TX {
let mut f = self.b.unwrap(); let mut f = self.b.unwrap();
let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
for i in 0..self.instances.as_ref().unwrap().len() { for i in 0..self.instances.as_ref().unwrap().len() {
let xj: Vec<_> = self.instances.as_ref().unwrap()[i]
.iter()
.map(|e| e.to_f64().unwrap())
.collect();
f += self.w.as_ref().unwrap()[i] f += self.w.as_ref().unwrap()[i]
* TX::from( * TX::from(
self.parameters self.parameters
@@ -343,13 +350,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX> + 'a, Y: Array
.kernel .kernel
.as_ref() .as_ref()
.unwrap() .unwrap()
.apply( .apply(&xi, &xj)
&x.iter().map(|e| e.to_f64().unwrap()).collect(),
&self.instances.as_ref().unwrap()[i]
.iter()
.map(|e| e.to_f64().unwrap())
.collect(),
)
.unwrap(), .unwrap(),
) )
.unwrap(); .unwrap();
@@ -359,8 +360,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX> + 'a, Y: Array
} }
} }
impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq impl<TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq
for SVC<'a, TX, TY, X, Y> for SVC<'_, TX, TY, X, Y>
{ {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
if (self.b.unwrap().sub(other.b.unwrap())).abs() > TX::epsilon() * TX::two() if (self.b.unwrap().sub(other.b.unwrap())).abs() > TX::epsilon() * TX::two()
@@ -472,14 +473,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
let tol = self.parameters.tol; let tol = self.parameters.tol;
let good_enough = TX::from_i32(1000).unwrap(); let good_enough = TX::from_i32(1000).unwrap();
let mut x = Vec::with_capacity(n);
for _ in 0..self.parameters.epoch { for _ in 0..self.parameters.epoch {
for i in self.permutate(n) { for i in self.permutate(n) {
self.process( x.clear();
i, x.extend(self.x.get_row(i).iterator(0).take(n).copied());
Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), self.process(i, &x, *self.y.get(i), &mut cache);
*self.y.get(i),
&mut cache,
);
loop { loop {
self.reprocess(tol, &mut cache); self.reprocess(tol, &mut cache);
self.find_min_max_gradient(); self.find_min_max_gradient();
@@ -511,24 +510,17 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
let mut cp = 0; let mut cp = 0;
let mut cn = 0; let mut cn = 0;
let mut x = Vec::with_capacity(n);
for i in self.permutate(n) { for i in self.permutate(n) {
x.clear();
x.extend(self.x.get_row(i).iterator(0).take(n).copied());
if *self.y.get(i) == TY::one() && cp < few { if *self.y.get(i) == TY::one() && cp < few {
if self.process( if self.process(i, &x, *self.y.get(i), cache) {
i,
Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n),
*self.y.get(i),
cache,
) {
cp += 1; cp += 1;
} }
} else if *self.y.get(i) == TY::from(-1).unwrap() } else if *self.y.get(i) == TY::from(-1).unwrap()
&& cn < few && cn < few
&& self.process( && self.process(i, &x, *self.y.get(i), cache)
i,
Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n),
*self.y.get(i),
cache,
)
{ {
cn += 1; cn += 1;
} }
@@ -539,7 +531,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
} }
} }
fn process(&mut self, i: usize, x: Vec<TX>, y: TY, cache: &mut Cache<TX, TY, X, Y>) -> bool { fn process(&mut self, i: usize, x: &[TX], y: TY, cache: &mut Cache<TX, TY, X, Y>) -> bool {
for j in 0..self.sv.len() { for j in 0..self.sv.len() {
if self.sv[j].index == i { if self.sv[j].index == i {
return true; return true;
@@ -551,15 +543,14 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
let mut cache_values: Vec<((usize, usize), TX)> = Vec::new(); let mut cache_values: Vec<((usize, usize), TX)> = Vec::new();
for v in self.sv.iter() { for v in self.sv.iter() {
let xi: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect();
let xj: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
let k = self let k = self
.parameters .parameters
.kernel .kernel
.as_ref() .as_ref()
.unwrap() .unwrap()
.apply( .apply(&xi, &xj)
&v.x.iter().map(|e| e.to_f64().unwrap()).collect(),
&x.iter().map(|e| e.to_f64().unwrap()).collect(),
)
.unwrap(); .unwrap();
cache_values.push(((i, v.index), TX::from(k).unwrap())); cache_values.push(((i, v.index), TX::from(k).unwrap()));
g -= v.alpha * k; g -= v.alpha * k;
@@ -578,7 +569,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
cache.insert(v.0, v.1.to_f64().unwrap()); cache.insert(v.0, v.1.to_f64().unwrap());
} }
let x_f64 = x.iter().map(|e| e.to_f64().unwrap()).collect(); let x_f64: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
let k_v = self let k_v = self
.parameters .parameters
.kernel .kernel
@@ -701,8 +692,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
let km = sv1.k; let km = sv1.k;
let gm = sv1.grad; let gm = sv1.grad;
let mut best = 0f64; let mut best = 0f64;
let xi: Vec<_> = sv1.x.iter().map(|e| e.to_f64().unwrap()).collect();
for i in 0..self.sv.len() { for i in 0..self.sv.len() {
let v = &self.sv[i]; let v = &self.sv[i];
let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect();
let z = v.grad - gm; let z = v.grad - gm;
let k = cache.get( let k = cache.get(
sv1, sv1,
@@ -711,10 +704,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
.kernel .kernel
.as_ref() .as_ref()
.unwrap() .unwrap()
.apply( .apply(&xi, &xj)
&sv1.x.iter().map(|e| e.to_f64().unwrap()).collect(),
&v.x.iter().map(|e| e.to_f64().unwrap()).collect(),
)
.unwrap(), .unwrap(),
); );
let mut curv = km + v.k - 2f64 * k; let mut curv = km + v.k - 2f64 * k;
@@ -732,6 +722,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
} }
} }
let xi: Vec<_> = self.sv[idx_1]
.x
.iter()
.map(|e| e.to_f64().unwrap())
.collect::<Vec<_>>();
idx_2.map(|idx_2| { idx_2.map(|idx_2| {
( (
idx_1, idx_1,
@@ -742,16 +738,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
.as_ref() .as_ref()
.unwrap() .unwrap()
.apply( .apply(
&self.sv[idx_1] &xi,
.x
.iter()
.map(|e| e.to_f64().unwrap())
.collect(),
&self.sv[idx_2] &self.sv[idx_2]
.x .x
.iter() .iter()
.map(|e| e.to_f64().unwrap()) .map(|e| e.to_f64().unwrap())
.collect(), .collect::<Vec<_>>(),
) )
.unwrap() .unwrap()
}), }),
@@ -765,8 +757,11 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
let km = sv2.k; let km = sv2.k;
let gm = sv2.grad; let gm = sv2.grad;
let mut best = 0f64; let mut best = 0f64;
let xi: Vec<_> = sv2.x.iter().map(|e| e.to_f64().unwrap()).collect();
for i in 0..self.sv.len() { for i in 0..self.sv.len() {
let v = &self.sv[i]; let v = &self.sv[i];
let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect();
let z = gm - v.grad; let z = gm - v.grad;
let k = cache.get( let k = cache.get(
sv2, sv2,
@@ -775,10 +770,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
.kernel .kernel
.as_ref() .as_ref()
.unwrap() .unwrap()
.apply( .apply(&xi, &xj)
&sv2.x.iter().map(|e| e.to_f64().unwrap()).collect(),
&v.x.iter().map(|e| e.to_f64().unwrap()).collect(),
)
.unwrap(), .unwrap(),
); );
let mut curv = km + v.k - 2f64 * k; let mut curv = km + v.k - 2f64 * k;
@@ -797,6 +789,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
} }
} }
let xj: Vec<_> = self.sv[idx_2]
.x
.iter()
.map(|e| e.to_f64().unwrap())
.collect();
idx_1.map(|idx_1| { idx_1.map(|idx_1| {
( (
idx_1, idx_1,
@@ -811,12 +809,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
.x .x
.iter() .iter()
.map(|e| e.to_f64().unwrap()) .map(|e| e.to_f64().unwrap())
.collect(), .collect::<Vec<_>>(),
&self.sv[idx_2] &xj,
.x
.iter()
.map(|e| e.to_f64().unwrap())
.collect(),
) )
.unwrap() .unwrap()
}), }),
@@ -835,12 +829,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
.x .x
.iter() .iter()
.map(|e| e.to_f64().unwrap()) .map(|e| e.to_f64().unwrap())
.collect(), .collect::<Vec<_>>(),
&self.sv[idx_2] &self.sv[idx_2]
.x .x
.iter() .iter()
.map(|e| e.to_f64().unwrap()) .map(|e| e.to_f64().unwrap())
.collect(), .collect::<Vec<_>>(),
) )
.unwrap(), .unwrap(),
)), )),
@@ -895,7 +889,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
self.sv[v1].alpha -= step.to_f64().unwrap(); self.sv[v1].alpha -= step.to_f64().unwrap();
self.sv[v2].alpha += step.to_f64().unwrap(); self.sv[v2].alpha += step.to_f64().unwrap();
let xi_v1: Vec<_> = self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect();
let xi_v2: Vec<_> = self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect();
for i in 0..self.sv.len() { for i in 0..self.sv.len() {
let xj: Vec<_> = self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect();
let k2 = cache.get( let k2 = cache.get(
&self.sv[v2], &self.sv[v2],
&self.sv[i], &self.sv[i],
@@ -903,10 +900,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
.kernel .kernel
.as_ref() .as_ref()
.unwrap() .unwrap()
.apply( .apply(&xi_v2, &xj)
&self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect(),
&self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(),
)
.unwrap(), .unwrap(),
); );
let k1 = cache.get( let k1 = cache.get(
@@ -916,10 +910,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
.kernel .kernel
.as_ref() .as_ref()
.unwrap() .unwrap()
.apply( .apply(&xi_v1, &xj)
&self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect(),
&self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(),
)
.unwrap(), .unwrap(),
); );
self.sv[i].grad -= step.to_f64().unwrap() * (k2 - k1); self.sv[i].grad -= step.to_f64().unwrap() * (k2 - k1);
@@ -966,7 +957,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y: Vec<i32> = vec![ let y: Vec<i32> = vec![
-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -992,7 +984,8 @@ mod tests {
)] )]
#[test] #[test]
fn svc_fit_decision_function() { fn svc_fit_decision_function() {
let x = DenseMatrix::from_2d_array(&[&[4.0, 0.0], &[0.0, 4.0], &[8.0, 0.0], &[0.0, 8.0]]); let x = DenseMatrix::from_2d_array(&[&[4.0, 0.0], &[0.0, 4.0], &[8.0, 0.0], &[0.0, 8.0]])
.unwrap();
let x2 = DenseMatrix::from_2d_array(&[ let x2 = DenseMatrix::from_2d_array(&[
&[3.0, 3.0], &[3.0, 3.0],
@@ -1001,7 +994,8 @@ mod tests {
&[10.0, 10.0], &[10.0, 10.0],
&[1.0, 1.0], &[1.0, 1.0],
&[0.0, 0.0], &[0.0, 0.0],
]); ])
.unwrap();
let y: Vec<i32> = vec![-1, -1, 1, 1]; let y: Vec<i32> = vec![-1, -1, 1, 1];
@@ -1054,7 +1048,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y: Vec<i32> = vec![ let y: Vec<i32> = vec![
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -1103,7 +1098,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y: Vec<i32> = vec![ let y: Vec<i32> = vec![
-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -1114,7 +1110,7 @@ mod tests {
let svc = SVC::fit(&x, &y, &params).unwrap(); let svc = SVC::fit(&x, &y, &params).unwrap();
// serialization // serialization
let deserialized_svc: SVC<f64, i32, _, _> = let deserialized_svc: SVC<'_, f64, i32, _, _> =
serde_json::from_str(&serde_json::to_string(&svc).unwrap()).unwrap(); serde_json::from_str(&serde_json::to_string(&svc).unwrap()).unwrap();
assert_eq!(svc, deserialized_svc); assert_eq!(svc, deserialized_svc);
+15 -15
View File
@@ -44,7 +44,7 @@
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
//! ]); //! ]).unwrap();
//! //!
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
@@ -248,19 +248,20 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> SVR<'
let mut y_hat: Vec<T> = Vec::<T>::zeros(n); let mut y_hat: Vec<T> = Vec::<T>::zeros(n);
let mut x_i = Vec::with_capacity(n);
for i in 0..n { for i in 0..n {
y_hat.set( x_i.clear();
i, x_i.extend(x.get_row(i).iterator(0).copied());
self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)), y_hat.set(i, self.predict_for_row(&x_i));
);
} }
Ok(y_hat) Ok(y_hat)
} }
pub(crate) fn predict_for_row(&self, x: Vec<T>) -> T { pub(crate) fn predict_for_row(&self, x: &[T]) -> T {
let mut f = self.b; let mut f = self.b;
let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
for i in 0..self.instances.as_ref().unwrap().len() { for i in 0..self.instances.as_ref().unwrap().len() {
f += self.w.as_ref().unwrap()[i] f += self.w.as_ref().unwrap()[i]
* T::from( * T::from(
@@ -270,10 +271,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> SVR<'
.kernel .kernel
.as_ref() .as_ref()
.unwrap() .unwrap()
.apply( .apply(&xi, &self.instances.as_ref().unwrap()[i])
&x.iter().map(|e| e.to_f64().unwrap()).collect(),
&self.instances.as_ref().unwrap()[i],
)
.unwrap(), .unwrap(),
) )
.unwrap() .unwrap()
@@ -283,8 +281,8 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> SVR<'
} }
} }
impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> PartialEq impl<T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> PartialEq
for SVR<'a, T, X, Y> for SVR<'_, T, X, Y>
{ {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
if (self.b - other.b).abs() > T::epsilon() * T::two() if (self.b - other.b).abs() > T::epsilon() * T::two()
@@ -642,7 +640,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
@@ -690,7 +689,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
@@ -702,7 +702,7 @@ mod tests {
let svr = SVR::fit(&x, &y, &params).unwrap(); let svr = SVR::fit(&x, &y, &params).unwrap();
let deserialized_svr: SVR<f64, DenseMatrix<f64>, _> = let deserialized_svr: SVR<'_, f64, DenseMatrix<f64>, _> =
serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap(); serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
assert_eq!(svr, deserialized_svr); assert_eq!(svr, deserialized_svr);
+214 -37
View File
@@ -48,7 +48,7 @@
//! &[4.9, 2.4, 3.3, 1.0], //! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3], //! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4], //! &[5.2, 2.7, 3.9, 1.4],
//! ]); //! ]).unwrap();
//! let y = vec![ 0, 0, 0, 0, 0, 0, 0, 0, //! let y = vec![ 0, 0, 0, 0, 0, 0, 0, 0,
//! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
//! //!
@@ -77,7 +77,9 @@ use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator}; use crate::api::{Predictor, SupervisedEstimator};
use crate::error::Failed; use crate::error::Failed;
use crate::linalg::basic::arrays::MutArray;
use crate::linalg::basic::arrays::{Array1, Array2, MutArrayView1}; use crate::linalg::basic::arrays::{Array1, Array2, MutArrayView1};
use crate::linalg::basic::matrix::DenseMatrix;
use crate::numbers::basenum::Number; use crate::numbers::basenum::Number;
use crate::rand_custom::get_rng_impl; use crate::rand_custom::get_rng_impl;
@@ -116,6 +118,7 @@ pub struct DecisionTreeClassifier<
num_classes: usize, num_classes: usize,
classes: Vec<TY>, classes: Vec<TY>,
depth: u16, depth: u16,
num_features: usize,
_phantom_tx: PhantomData<TX>, _phantom_tx: PhantomData<TX>,
_phantom_x: PhantomData<X>, _phantom_x: PhantomData<X>,
_phantom_y: PhantomData<Y>, _phantom_y: PhantomData<Y>,
@@ -159,11 +162,13 @@ pub enum SplitCriterion {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct Node { struct Node {
output: usize, output: usize,
n_node_samples: usize,
split_feature: usize, split_feature: usize,
split_value: Option<f64>, split_value: Option<f64>,
split_score: Option<f64>, split_score: Option<f64>,
true_child: Option<usize>, true_child: Option<usize>,
false_child: Option<usize>, false_child: Option<usize>,
impurity: Option<f64>,
} }
impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq
@@ -194,12 +199,12 @@ impl PartialEq for Node {
self.output == other.output self.output == other.output
&& self.split_feature == other.split_feature && self.split_feature == other.split_feature
&& match (self.split_value, other.split_value) { && match (self.split_value, other.split_value) {
(Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
(None, None) => true, (None, None) => true,
_ => false, _ => false,
} }
&& match (self.split_score, other.split_score) { && match (self.split_score, other.split_score) {
(Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
(None, None) => true, (None, None) => true,
_ => false, _ => false,
} }
@@ -400,14 +405,16 @@ impl Default for DecisionTreeClassifierSearchParameters {
} }
impl Node { impl Node {
fn new(output: usize) -> Self { fn new(output: usize, n_node_samples: usize) -> Self {
Node { Node {
output, output,
n_node_samples,
split_feature: 0, split_feature: 0,
split_value: Option::None, split_value: Option::None,
split_score: Option::None, split_score: Option::None,
true_child: Option::None, true_child: Option::None,
false_child: Option::None, false_child: Option::None,
impurity: Option::None,
} }
} }
} }
@@ -507,6 +514,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
num_classes: 0usize, num_classes: 0usize,
classes: vec![], classes: vec![],
depth: 0u16, depth: 0u16,
num_features: 0usize,
_phantom_tx: PhantomData, _phantom_tx: PhantomData,
_phantom_x: PhantomData, _phantom_x: PhantomData,
_phantom_y: PhantomData, _phantom_y: PhantomData,
@@ -578,7 +586,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
count[yi[i]] += samples[i]; count[yi[i]] += samples[i];
} }
let root = Node::new(which_max(&count)); let root = Node::new(which_max(&count), y_ncols);
change_nodes.push(root); change_nodes.push(root);
let mut order: Vec<Vec<usize>> = Vec::new(); let mut order: Vec<Vec<usize>> = Vec::new();
@@ -593,6 +601,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
num_classes: k, num_classes: k,
classes, classes,
depth: 0u16, depth: 0u16,
num_features: num_attributes,
_phantom_tx: PhantomData, _phantom_tx: PhantomData,
_phantom_x: PhantomData, _phantom_x: PhantomData,
_phantom_y: PhantomData, _phantom_y: PhantomData,
@@ -606,7 +615,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
visitor_queue.push_back(visitor); visitor_queue.push_back(visitor);
} }
while tree.depth() < tree.parameters().max_depth.unwrap_or(std::u16::MAX) { while tree.depth() < tree.parameters().max_depth.unwrap_or(u16::MAX) {
match visitor_queue.pop_front() { match visitor_queue.pop_front() {
Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng), Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng),
None => break, None => break,
@@ -643,7 +652,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
if node.true_child.is_none() && node.false_child.is_none() { if node.true_child.is_none() && node.false_child.is_none() {
result = node.output; result = node.output;
} else if x.get((row, node.split_feature)).to_f64().unwrap() } else if x.get((row, node.split_feature)).to_f64().unwrap()
<= node.split_value.unwrap_or(std::f64::NAN) <= node.split_value.unwrap_or(f64::NAN)
{ {
queue.push_back(node.true_child.unwrap()); queue.push_back(node.true_child.unwrap());
} else { } else {
@@ -678,16 +687,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
} }
} }
if is_pure {
return false;
}
let n = visitor.samples.iter().sum(); let n = visitor.samples.iter().sum();
if n <= self.parameters().min_samples_split {
return false;
}
let mut count = vec![0; self.num_classes]; let mut count = vec![0; self.num_classes];
let mut false_count = vec![0; self.num_classes]; let mut false_count = vec![0; self.num_classes];
for i in 0..n_rows { for i in 0..n_rows {
@@ -696,7 +696,15 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
} }
} }
let parent_impurity = impurity(&self.parameters().criterion, &count, n); self.nodes[visitor.node].impurity = Some(impurity(&self.parameters().criterion, &count, n));
if is_pure {
return false;
}
if n <= self.parameters().min_samples_split {
return false;
}
let mut variables = (0..n_attr).collect::<Vec<_>>(); let mut variables = (0..n_attr).collect::<Vec<_>>();
@@ -705,14 +713,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
} }
for variable in variables.iter().take(mtry) { for variable in variables.iter().take(mtry) {
self.find_best_split( self.find_best_split(visitor, n, &count, &mut false_count, *variable);
visitor,
n,
&count,
&mut false_count,
parent_impurity,
*variable,
);
} }
self.nodes()[visitor.node].split_score.is_some() self.nodes()[visitor.node].split_score.is_some()
@@ -724,7 +725,6 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
n: usize, n: usize,
count: &[usize], count: &[usize],
false_count: &mut [usize], false_count: &mut [usize],
parent_impurity: f64,
j: usize, j: usize,
) { ) {
let mut true_count = vec![0; self.num_classes]; let mut true_count = vec![0; self.num_classes];
@@ -760,6 +760,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
let true_label = which_max(&true_count); let true_label = which_max(&true_count);
let false_label = which_max(false_count); let false_label = which_max(false_count);
let parent_impurity = self.nodes()[visitor.node].impurity.unwrap();
let gain = parent_impurity let gain = parent_impurity
- tc as f64 / n as f64 - tc as f64 / n as f64
* impurity(&self.parameters().criterion, &true_count, tc) * impurity(&self.parameters().criterion, &true_count, tc)
@@ -804,9 +805,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
.get((i, self.nodes()[visitor.node].split_feature)) .get((i, self.nodes()[visitor.node].split_feature))
.to_f64() .to_f64()
.unwrap() .unwrap()
<= self.nodes()[visitor.node] <= self.nodes()[visitor.node].split_value.unwrap_or(f64::NAN)
.split_value
.unwrap_or(std::f64::NAN)
{ {
*true_sample = visitor.samples[i]; *true_sample = visitor.samples[i];
tc += *true_sample; tc += *true_sample;
@@ -827,9 +826,9 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
let true_child_idx = self.nodes().len(); let true_child_idx = self.nodes().len();
self.nodes.push(Node::new(visitor.true_child_output)); self.nodes.push(Node::new(visitor.true_child_output, tc));
let false_child_idx = self.nodes().len(); let false_child_idx = self.nodes().len();
self.nodes.push(Node::new(visitor.false_child_output)); self.nodes.push(Node::new(visitor.false_child_output, fc));
self.nodes[visitor.node].true_child = Some(true_child_idx); self.nodes[visitor.node].true_child = Some(true_child_idx);
self.nodes[visitor.node].false_child = Some(false_child_idx); self.nodes[visitor.node].false_child = Some(false_child_idx);
@@ -863,11 +862,104 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
true true
} }
/// Compute feature importances for the fitted tree.
pub fn compute_feature_importances(&self, normalize: bool) -> Vec<f64> {
let mut importances = vec![0f64; self.num_features];
for node in self.nodes().iter() {
if node.true_child.is_none() && node.false_child.is_none() {
continue;
}
let left = &self.nodes()[node.true_child.unwrap()];
let right = &self.nodes()[node.false_child.unwrap()];
importances[node.split_feature] += node.n_node_samples as f64 * node.impurity.unwrap()
- left.n_node_samples as f64 * left.impurity.unwrap()
- right.n_node_samples as f64 * right.impurity.unwrap();
}
for item in importances.iter_mut() {
*item /= self.nodes()[0].n_node_samples as f64;
}
if normalize {
let sum = importances.iter().sum::<f64>();
for importance in importances.iter_mut() {
*importance /= sum;
}
}
importances
}
/// Predict class probabilities for the input samples.
///
/// # Arguments
///
/// * `x` - The input samples as a matrix where each row is a sample and each column is a feature.
///
/// # Returns
///
/// A `Result` containing a `DenseMatrix<f64>` where each row corresponds to a sample and each column
/// corresponds to a class. The values represent the probability of the sample belonging to each class.
///
/// # Errors
///
/// Returns an error if at least one row prediction process fails.
pub fn predict_proba(&self, x: &X) -> Result<DenseMatrix<f64>, Failed> {
let (n_samples, _) = x.shape();
let n_classes = self.classes().len();
let mut result = DenseMatrix::<f64>::zeros(n_samples, n_classes);
for i in 0..n_samples {
let probs = self.predict_proba_for_row(x, i)?;
for (j, &prob) in probs.iter().enumerate() {
result.set((i, j), prob);
}
}
Ok(result)
}
/// Predict class probabilities for a single input sample.
///
/// # Arguments
///
/// * `x` - The input matrix containing all samples.
/// * `row` - The index of the row in `x` for which to predict probabilities.
///
/// # Returns
///
/// A vector of probabilities, one for each class, representing the probability
/// of the input sample belonging to each class.
fn predict_proba_for_row(&self, x: &X, row: usize) -> Result<Vec<f64>, Failed> {
let mut node = 0;
while let Some(current_node) = self.nodes().get(node) {
if current_node.true_child.is_none() && current_node.false_child.is_none() {
// Leaf node reached
let mut probs = vec![0.0; self.classes().len()];
probs[current_node.output] = 1.0;
return Ok(probs);
}
let split_feature = current_node.split_feature;
let split_value = current_node.split_value.unwrap_or(f64::NAN);
if x.get((row, split_feature)).to_f64().unwrap() <= split_value {
node = current_node.true_child.unwrap();
} else {
node = current_node.false_child.unwrap();
}
}
// This should never happen if the tree is properly constructed
Err(Failed::predict("Nodes iteration did not reach leaf"))
}
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::linalg::basic::arrays::Array;
use crate::linalg::basic::matrix::DenseMatrix; use crate::linalg::basic::matrix::DenseMatrix;
#[test] #[test]
@@ -899,17 +991,62 @@ mod tests {
)] )]
#[test] #[test]
fn gini_impurity() { fn gini_impurity() {
assert!((impurity(&SplitCriterion::Gini, &[7, 3], 10) - 0.42).abs() < std::f64::EPSILON); assert!((impurity(&SplitCriterion::Gini, &[7, 3], 10) - 0.42).abs() < f64::EPSILON);
assert!( assert!(
(impurity(&SplitCriterion::Entropy, &[7, 3], 10) - 0.8812908992306927).abs() (impurity(&SplitCriterion::Entropy, &[7, 3], 10) - 0.8812908992306927).abs()
< std::f64::EPSILON < f64::EPSILON
); );
assert!( assert!(
(impurity(&SplitCriterion::ClassificationError, &[7, 3], 10) - 0.3).abs() (impurity(&SplitCriterion::ClassificationError, &[7, 3], 10) - 0.3).abs()
< std::f64::EPSILON < f64::EPSILON
); );
} }
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test
)]
#[test]
fn test_predict_proba() {
let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
&[4.9, 3.0, 1.4, 0.2],
&[4.7, 3.2, 1.3, 0.2],
&[4.6, 3.1, 1.5, 0.2],
&[5.0, 3.6, 1.4, 0.2],
&[7.0, 3.2, 4.7, 1.4],
&[6.4, 3.2, 4.5, 1.5],
&[6.9, 3.1, 4.9, 1.5],
&[5.5, 2.3, 4.0, 1.3],
&[6.5, 2.8, 4.6, 1.5],
])
.unwrap();
let y: Vec<usize> = vec![0, 0, 0, 0, 0, 1, 1, 1, 1, 1];
let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap();
let probabilities = tree.predict_proba(&x).unwrap();
assert_eq!(probabilities.shape(), (10, 2));
for row in 0..10 {
let row_sum: f64 = probabilities.get_row(row).sum();
assert!(
(row_sum - 1.0).abs() < 1e-6,
"Row probabilities should sum to 1"
);
}
// Check if the first 5 samples have higher probability for class 0
for i in 0..5 {
assert!(probabilities.get((i, 0)) > probabilities.get((i, 1)));
}
// Check if the last 5 samples have higher probability for class 1
for i in 5..10 {
assert!(probabilities.get((i, 1)) > probabilities.get((i, 0)));
}
}
#[cfg_attr( #[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")), all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test wasm_bindgen_test::wasm_bindgen_test
@@ -938,7 +1075,8 @@ mod tests {
&[4.9, 2.4, 3.3, 1.0], &[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3], &[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4], &[5.2, 2.7, 3.9, 1.4],
]); ])
.unwrap();
let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
assert_eq!( assert_eq!(
@@ -1005,7 +1143,8 @@ mod tests {
&[0., 0., 1., 1.], &[0., 0., 1., 1.],
&[0., 0., 0., 0.], &[0., 0., 0., 0.],
&[0., 0., 0., 1.], &[0., 0., 0., 1.],
]); ])
.unwrap();
let y: Vec<u32> = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]; let y: Vec<u32> = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0];
assert_eq!( assert_eq!(
@@ -1016,6 +1155,43 @@ mod tests {
); );
} }
#[test]
fn test_compute_feature_importances() {
let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
&[1., 1., 1., 0.],
&[1., 1., 1., 0.],
&[1., 1., 1., 1.],
&[1., 1., 0., 0.],
&[1., 1., 0., 1.],
&[1., 0., 1., 0.],
&[1., 0., 1., 0.],
&[1., 0., 1., 1.],
&[1., 0., 0., 0.],
&[1., 0., 0., 1.],
&[0., 1., 1., 0.],
&[0., 1., 1., 0.],
&[0., 1., 1., 1.],
&[0., 1., 0., 0.],
&[0., 1., 0., 1.],
&[0., 0., 1., 0.],
&[0., 0., 1., 0.],
&[0., 0., 1., 1.],
&[0., 0., 0., 0.],
&[0., 0., 0., 1.],
])
.unwrap();
let y: Vec<u32> = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0];
let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap();
assert_eq!(
tree.compute_feature_importances(false),
vec![0., 0., 0.21333333333333332, 0.26666666666666666]
);
assert_eq!(
tree.compute_feature_importances(true),
vec![0., 0., 0.4444444444444444, 0.5555555555555556]
);
}
#[cfg_attr( #[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")), all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test wasm_bindgen_test::wasm_bindgen_test
@@ -1044,7 +1220,8 @@ mod tests {
&[0., 0., 1., 1.], &[0., 0., 1., 1.],
&[0., 0., 0., 0.], &[0., 0., 0., 0.],
&[0., 0., 0., 1.], &[0., 0., 0., 1.],
]); ])
.unwrap();
let y = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]; let y = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0];
let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap(); let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap();
+13 -13
View File
@@ -39,7 +39,7 @@
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
//! ]); //! ]).unwrap();
//! let y: Vec<f64> = vec![ //! let y: Vec<f64> = vec![
//! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, //! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0,
//! 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, //! 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9,
@@ -311,15 +311,15 @@ impl Node {
impl PartialEq for Node { impl PartialEq for Node {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
(self.output - other.output).abs() < std::f64::EPSILON (self.output - other.output).abs() < f64::EPSILON
&& self.split_feature == other.split_feature && self.split_feature == other.split_feature
&& match (self.split_value, other.split_value) { && match (self.split_value, other.split_value) {
(Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
(None, None) => true, (None, None) => true,
_ => false, _ => false,
} }
&& match (self.split_score, other.split_score) { && match (self.split_score, other.split_score) {
(Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
(None, None) => true, (None, None) => true,
_ => false, _ => false,
} }
@@ -478,7 +478,7 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
visitor_queue.push_back(visitor); visitor_queue.push_back(visitor);
} }
while tree.depth() < tree.parameters().max_depth.unwrap_or(std::u16::MAX) { while tree.depth() < tree.parameters().max_depth.unwrap_or(u16::MAX) {
match visitor_queue.pop_front() { match visitor_queue.pop_front() {
Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng), Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng),
None => break, None => break,
@@ -515,7 +515,7 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
if node.true_child.is_none() && node.false_child.is_none() { if node.true_child.is_none() && node.false_child.is_none() {
result = node.output; result = node.output;
} else if x.get((row, node.split_feature)).to_f64().unwrap() } else if x.get((row, node.split_feature)).to_f64().unwrap()
<= node.split_value.unwrap_or(std::f64::NAN) <= node.split_value.unwrap_or(f64::NAN)
{ {
queue.push_back(node.true_child.unwrap()); queue.push_back(node.true_child.unwrap());
} else { } else {
@@ -640,9 +640,7 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
.get((i, self.nodes()[visitor.node].split_feature)) .get((i, self.nodes()[visitor.node].split_feature))
.to_f64() .to_f64()
.unwrap() .unwrap()
<= self.nodes()[visitor.node] <= self.nodes()[visitor.node].split_value.unwrap_or(f64::NAN)
.split_value
.unwrap_or(std::f64::NAN)
{ {
*true_sample = visitor.samples[i]; *true_sample = visitor.samples[i];
tc += *true_sample; tc += *true_sample;
@@ -753,7 +751,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
114.2, 115.7, 116.9, 114.2, 115.7, 116.9,
@@ -767,7 +766,7 @@ mod tests {
assert!((y_hat[i] - y[i]).abs() < 0.1); assert!((y_hat[i] - y[i]).abs() < 0.1);
} }
let expected_y = vec![ let expected_y = [
87.3, 87.3, 87.3, 87.3, 98.9, 98.9, 98.9, 98.9, 98.9, 107.9, 107.9, 107.9, 114.85, 87.3, 87.3, 87.3, 87.3, 98.9, 98.9, 98.9, 98.9, 98.9, 107.9, 107.9, 107.9, 114.85,
114.85, 114.85, 114.85, 114.85, 114.85, 114.85,
]; ];
@@ -788,7 +787,7 @@ mod tests {
assert!((y_hat[i] - expected_y[i]).abs() < 0.1); assert!((y_hat[i] - expected_y[i]).abs() < 0.1);
} }
let expected_y = vec![ let expected_y = [
83.0, 88.35, 88.35, 89.5, 97.15, 97.15, 99.5, 99.5, 101.2, 104.6, 109.6, 109.6, 113.4, 83.0, 88.35, 88.35, 89.5, 97.15, 97.15, 99.5, 99.5, 101.2, 104.6, 109.6, 109.6, 113.4,
113.4, 116.30, 116.30, 113.4, 116.30, 116.30,
]; ];
@@ -834,7 +833,8 @@ mod tests {
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]); ])
.unwrap();
let y: Vec<f64> = vec![ let y: Vec<f64> = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
114.2, 115.7, 116.9, 114.2, 115.7, 116.9,