Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
76d1ef610d | ||
|
|
4092e24c2a | ||
|
|
17dc9f3bbf | ||
|
|
c8ec8fec00 | ||
|
|
3da433f757 |
@@ -19,14 +19,13 @@ jobs:
|
||||
{ os: "ubuntu", target: "i686-unknown-linux-gnu" },
|
||||
{ os: "ubuntu", target: "wasm32-unknown-unknown" },
|
||||
{ os: "macos", target: "aarch64-apple-darwin" },
|
||||
{ os: "ubuntu", target: "wasm32-wasi" },
|
||||
]
|
||||
env:
|
||||
TZ: "/usr/share/zoneinfo/your/location"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Cache .cargo and target
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo
|
||||
@@ -36,16 +35,13 @@ jobs:
|
||||
- name: Install Rust toolchain
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: 1.81 # 1.82 seems to break wasm32 tests https://github.com/rustwasm/wasm-bindgen/issues/4274
|
||||
toolchain: stable
|
||||
target: ${{ matrix.platform.target }}
|
||||
profile: minimal
|
||||
default: true
|
||||
- name: Install test runner for wasm
|
||||
if: matrix.platform.target == 'wasm32-unknown-unknown'
|
||||
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
|
||||
- name: Install test runner for wasi
|
||||
if: matrix.platform.target == 'wasm32-wasi'
|
||||
run: curl https://wasmtime.dev/install.sh -sSf | bash
|
||||
- name: Stable Build with all features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -65,12 +61,6 @@ jobs:
|
||||
- name: Tests in WASM
|
||||
if: matrix.platform.target == 'wasm32-unknown-unknown'
|
||||
run: wasm-pack test --node -- --all-features
|
||||
- name: Tests in WASI
|
||||
if: matrix.platform.target == 'wasm32-wasi'
|
||||
run: |
|
||||
export WASMTIME_HOME="$HOME/.wasmtime"
|
||||
export PATH="$WASMTIME_HOME/bin:$PATH"
|
||||
cargo install cargo-wasi && cargo wasi test
|
||||
|
||||
check_features:
|
||||
runs-on: "${{ matrix.platform.os }}-latest"
|
||||
@@ -81,9 +71,9 @@ jobs:
|
||||
env:
|
||||
TZ: "/usr/share/zoneinfo/your/location"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Cache .cargo and target
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo
|
||||
|
||||
@@ -12,9 +12,9 @@ jobs:
|
||||
env:
|
||||
TZ: "/usr/share/zoneinfo/your/location"
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Cache .cargo
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo
|
||||
|
||||
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Cache .cargo and target
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo
|
||||
|
||||
+1
-1
@@ -2,7 +2,7 @@
|
||||
name = "smartcore"
|
||||
description = "Machine Learning in Rust."
|
||||
homepage = "https://smartcorelib.org"
|
||||
version = "0.4.0"
|
||||
version = "0.4.1"
|
||||
authors = ["smartcore Developers"]
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -18,4 +18,4 @@
|
||||
-----
|
||||
[](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml)
|
||||
|
||||
To start getting familiar with the new smartcore v0.3 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
|
||||
To start getting familiar with the new smartcore v0.4 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
|
||||
|
||||
@@ -173,6 +173,21 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Return order dissimilarities from closest to furthest
|
||||
///
|
||||
#[allow(dead_code)]
|
||||
pub fn ordered_pairs(&self) -> std::vec::IntoIter<&PairwiseDistance<T>> {
|
||||
// improvement: implement this to return `impl Iterator<Item = &PairwiseDistance<T>>`
|
||||
// need to implement trait `Iterator` for `Vec<&PairwiseDistance<T>>`
|
||||
let mut distances = self
|
||||
.distances
|
||||
.values()
|
||||
.collect::<Vec<&PairwiseDistance<T>>>();
|
||||
distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
distances.into_iter()
|
||||
}
|
||||
|
||||
//
|
||||
// Compute distances from input to all other points in data-structure.
|
||||
// input is the row index of the sample matrix
|
||||
@@ -212,7 +227,9 @@ mod tests_fastpair {
|
||||
use crate::linalg::basic::{arrays::Array, matrix::DenseMatrix};
|
||||
|
||||
/// Brute force algorithm, used only for comparison and testing
|
||||
pub fn closest_pair_brute(fastpair: &FastPair<f64, DenseMatrix<f64>>) -> PairwiseDistance<f64> {
|
||||
pub fn closest_pair_brute(
|
||||
fastpair: &FastPair<'_, f64, DenseMatrix<f64>>,
|
||||
) -> PairwiseDistance<f64> {
|
||||
use itertools::Itertools;
|
||||
let m = fastpair.samples.shape().0;
|
||||
|
||||
@@ -586,4 +603,103 @@ mod tests_fastpair {
|
||||
|
||||
assert_eq!(closest, min_dissimilarity);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastpair_ordered_pairs() {
|
||||
let x = DenseMatrix::<f64>::from_2d_array(&[
|
||||
&[5.1, 3.5, 1.4, 0.2],
|
||||
&[4.9, 3.0, 1.4, 0.2],
|
||||
&[4.7, 3.2, 1.3, 0.2],
|
||||
&[4.6, 3.1, 1.5, 0.2],
|
||||
&[5.0, 3.6, 1.4, 0.2],
|
||||
&[5.4, 3.9, 1.7, 0.4],
|
||||
&[4.9, 3.1, 1.5, 0.1],
|
||||
&[7.0, 3.2, 4.7, 1.4],
|
||||
&[6.4, 3.2, 4.5, 1.5],
|
||||
&[6.9, 3.1, 4.9, 1.5],
|
||||
&[5.5, 2.3, 4.0, 1.3],
|
||||
&[6.5, 2.8, 4.6, 1.5],
|
||||
&[4.6, 3.4, 1.4, 0.3],
|
||||
&[5.0, 3.4, 1.5, 0.2],
|
||||
&[4.4, 2.9, 1.4, 0.2],
|
||||
])
|
||||
.unwrap();
|
||||
let fastpair = FastPair::new(&x).unwrap();
|
||||
|
||||
let ordered = fastpair.ordered_pairs();
|
||||
|
||||
let mut previous: f64 = -1.0;
|
||||
for p in ordered {
|
||||
if previous == -1.0 {
|
||||
previous = p.distance.unwrap();
|
||||
} else {
|
||||
let current = p.distance.unwrap();
|
||||
assert!(current >= previous);
|
||||
previous = current;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_set() {
|
||||
let empty_matrix = DenseMatrix::<f64>::zeros(0, 0);
|
||||
let result = FastPair::new(&empty_matrix);
|
||||
assert!(result.is_err());
|
||||
if let Err(e) = result {
|
||||
assert_eq!(
|
||||
e,
|
||||
Failed::because(FailedError::FindFailed, "min number of rows should be 3")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_point() {
|
||||
let single_point = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]).unwrap();
|
||||
let result = FastPair::new(&single_point);
|
||||
assert!(result.is_err());
|
||||
if let Err(e) = result {
|
||||
assert_eq!(
|
||||
e,
|
||||
Failed::because(FailedError::FindFailed, "min number of rows should be 3")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_two_points() {
|
||||
let two_points = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||
let result = FastPair::new(&two_points);
|
||||
assert!(result.is_err());
|
||||
if let Err(e) = result {
|
||||
assert_eq!(
|
||||
e,
|
||||
Failed::because(FailedError::FindFailed, "min number of rows should be 3")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_three_identical_points() {
|
||||
let identical_points =
|
||||
DenseMatrix::from_2d_array(&[&[1.0, 1.0], &[1.0, 1.0], &[1.0, 1.0]]).unwrap();
|
||||
let result = FastPair::new(&identical_points);
|
||||
assert!(result.is_ok());
|
||||
let fastpair = result.unwrap();
|
||||
let closest_pair = fastpair.closest_pair();
|
||||
assert_eq!(closest_pair.distance, Some(0.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_result_unwrapping() {
|
||||
let valid_matrix =
|
||||
DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0], &[5.0, 6.0], &[7.0, 8.0]])
|
||||
.unwrap();
|
||||
|
||||
let result = FastPair::new(&valid_matrix);
|
||||
assert!(result.is_ok());
|
||||
|
||||
// This should not panic
|
||||
let _fastpair = result.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
clippy::approx_constant
|
||||
)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(rustdoc::missing_doc_code_examples)]
|
||||
|
||||
//! # smartcore
|
||||
//!
|
||||
|
||||
+13
-13
@@ -91,7 +91,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'a, T> {
|
||||
impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'_, T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
writeln!(
|
||||
f,
|
||||
@@ -142,7 +142,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> {
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_mut<'b>(&'b mut self, axis: u8) -> Box<dyn Iterator<Item = &mut T> + 'b> {
|
||||
fn iter_mut<'b>(&'b mut self, axis: u8) -> Box<dyn Iterator<Item = &'b mut T> + 'b> {
|
||||
let column_major = self.column_major;
|
||||
let stride = self.stride;
|
||||
let ptr = self.values.as_mut_ptr();
|
||||
@@ -169,7 +169,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<'a, T> {
|
||||
impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<'_, T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
writeln!(
|
||||
f,
|
||||
@@ -493,7 +493,7 @@ impl<T: Number + RealNumber> EVDDecomposable<T> for DenseMatrix<T> {}
|
||||
impl<T: Number + RealNumber> LUDecomposable<T> for DenseMatrix<T> {}
|
||||
impl<T: Number + RealNumber> SVDDecomposable<T> for DenseMatrix<T> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixView<'a, T> {
|
||||
impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixView<'_, T> {
|
||||
fn get(&self, pos: (usize, usize)) -> &T {
|
||||
if self.column_major {
|
||||
&self.values[pos.0 + pos.1 * self.stride]
|
||||
@@ -515,7 +515,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMa
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for DenseMatrixView<'a, T> {
|
||||
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for DenseMatrixView<'_, T> {
|
||||
fn get(&self, i: usize) -> &T {
|
||||
if self.nrows == 1 {
|
||||
if self.column_major {
|
||||
@@ -553,11 +553,11 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for DenseMatrixView<
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixView<'a, T> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixView<'_, T> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for DenseMatrixView<'a, T> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for DenseMatrixView<'_, T> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixMutView<'a, T> {
|
||||
impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixMutView<'_, T> {
|
||||
fn get(&self, pos: (usize, usize)) -> &T {
|
||||
if self.column_major {
|
||||
&self.values[pos.0 + pos.1 * self.stride]
|
||||
@@ -579,9 +579,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMa
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
|
||||
for DenseMatrixMutView<'a, T>
|
||||
{
|
||||
impl<T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)> for DenseMatrixMutView<'_, T> {
|
||||
fn set(&mut self, pos: (usize, usize), x: T) {
|
||||
if self.column_major {
|
||||
self.values[pos.0 + pos.1 * self.stride] = x;
|
||||
@@ -595,15 +593,16 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2<T> for DenseMatrixMutView<'a, T> {}
|
||||
impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for DenseMatrixMutView<'_, T> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixMutView<'a, T> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixMutView<'_, T> {}
|
||||
|
||||
impl<T: RealNumber> MatrixStats<T> for DenseMatrix<T> {}
|
||||
|
||||
impl<T: RealNumber> MatrixPreprocessing<T> for DenseMatrix<T> {}
|
||||
|
||||
#[cfg(test)]
|
||||
#[warn(clippy::reversed_empty_ranges)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use approx::relative_eq;
|
||||
@@ -664,6 +663,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_instantiate_err_view3() {
|
||||
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||
#[allow(clippy::reversed_empty_ranges)]
|
||||
let v = DenseMatrixView::new(&x, 0..3, 4..3);
|
||||
assert!(v.is_err());
|
||||
}
|
||||
|
||||
@@ -119,7 +119,7 @@ impl<T: Debug + Display + Copy + Sized> Array1<T> for Vec<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecMutView<'a, T> {
|
||||
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for VecMutView<'_, T> {
|
||||
fn get(&self, i: usize) -> &T {
|
||||
&self.ptr[i]
|
||||
}
|
||||
@@ -138,7 +138,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecMutView<'a, T
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for VecMutView<'a, T> {
|
||||
impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for VecMutView<'_, T> {
|
||||
fn set(&mut self, i: usize, x: T) {
|
||||
self.ptr[i] = x;
|
||||
}
|
||||
@@ -149,10 +149,10 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for VecMutView<'a
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for VecMutView<'a, T> {}
|
||||
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1<T> for VecMutView<'a, T> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for VecMutView<'_, T> {}
|
||||
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for VecMutView<'_, T> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecView<'a, T> {
|
||||
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for VecView<'_, T> {
|
||||
fn get(&self, i: usize) -> &T {
|
||||
&self.ptr[i]
|
||||
}
|
||||
@@ -171,7 +171,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecView<'a, T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for VecView<'a, T> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for VecView<'_, T> {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -68,7 +68,7 @@ impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayBase<OwnedRepr<T>
|
||||
|
||||
impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> for ArrayView<'a, T, Ix2> {
|
||||
impl<T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> for ArrayView<'_, T, Ix2> {
|
||||
fn get(&self, pos: (usize, usize)) -> &T {
|
||||
&self[[pos.0, pos.1]]
|
||||
}
|
||||
@@ -144,11 +144,9 @@ impl<T: Number + RealNumber> EVDDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2>
|
||||
impl<T: Number + RealNumber> LUDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
|
||||
impl<T: Number + RealNumber> SVDDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayView<'a, T, Ix2> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayView<'_, T, Ix2> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)>
|
||||
for ArrayViewMut<'a, T, Ix2>
|
||||
{
|
||||
impl<T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> for ArrayViewMut<'_, T, Ix2> {
|
||||
fn get(&self, pos: (usize, usize)) -> &T {
|
||||
&self[[pos.0, pos.1]]
|
||||
}
|
||||
@@ -175,9 +173,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)>
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
|
||||
for ArrayViewMut<'a, T, Ix2>
|
||||
{
|
||||
impl<T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)> for ArrayViewMut<'_, T, Ix2> {
|
||||
fn set(&mut self, pos: (usize, usize), x: T) {
|
||||
self[[pos.0, pos.1]] = x
|
||||
}
|
||||
@@ -195,9 +191,9 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayViewMut<'a, T, Ix2> {}
|
||||
impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayViewMut<'_, T, Ix2> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayViewMut<'a, T, Ix2> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayViewMut<'_, T, Ix2> {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -41,7 +41,7 @@ impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayBase<OwnedRepr<T>
|
||||
|
||||
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayBase<OwnedRepr<T>, Ix1> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayView<'a, T, Ix1> {
|
||||
impl<T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayView<'_, T, Ix1> {
|
||||
fn get(&self, i: usize) -> &T {
|
||||
&self[i]
|
||||
}
|
||||
@@ -60,9 +60,9 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayView<'a
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayView<'a, T, Ix1> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayView<'_, T, Ix1> {}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayViewMut<'a, T, Ix1> {
|
||||
impl<T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayViewMut<'_, T, Ix1> {
|
||||
fn get(&self, i: usize) -> &T {
|
||||
&self[i]
|
||||
}
|
||||
@@ -81,7 +81,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayViewMut
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for ArrayViewMut<'a, T, Ix1> {
|
||||
impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for ArrayViewMut<'_, T, Ix1> {
|
||||
fn set(&mut self, i: usize, x: T) {
|
||||
self[i] = x;
|
||||
}
|
||||
@@ -92,8 +92,8 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for ArrayViewMut<
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayViewMut<'a, T, Ix1> {}
|
||||
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayViewMut<'a, T, Ix1> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayViewMut<'_, T, Ix1> {}
|
||||
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayViewMut<'_, T, Ix1> {}
|
||||
|
||||
impl<T: Debug + Display + Copy + Sized> Array1<T> for ArrayBase<OwnedRepr<T>, Ix1> {
|
||||
fn slice<'a>(&'a self, range: Range<usize>) -> Box<dyn ArrayView1<T> + 'a> {
|
||||
|
||||
@@ -142,7 +142,6 @@ pub trait MatrixPreprocessing<T: RealNumber>: MutArrayView2<T> + Clone {
|
||||
///
|
||||
/// assert_eq!(a, expected);
|
||||
/// ```
|
||||
|
||||
fn binarize_mut(&mut self, threshold: T) {
|
||||
let (nrows, ncols) = self.shape();
|
||||
for row in 0..nrows {
|
||||
|
||||
@@ -258,8 +258,8 @@ impl<TX: Number + FloatNumber + RealNumber, TY: Number + Ord, X: Array2<TX>, Y:
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Number + FloatNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
||||
for BinaryObjectiveFunction<'a, T, X>
|
||||
impl<T: Number + FloatNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
||||
for BinaryObjectiveFunction<'_, T, X>
|
||||
{
|
||||
fn f(&self, w_bias: &[T]) -> T {
|
||||
let mut f = T::zero();
|
||||
@@ -313,8 +313,8 @@ struct MultiClassObjectiveFunction<'a, T: Number + FloatNumber, X: Array2<T>> {
|
||||
_phantom_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<'a, T: Number + FloatNumber + RealNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
||||
for MultiClassObjectiveFunction<'a, T, X>
|
||||
impl<T: Number + FloatNumber + RealNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
||||
for MultiClassObjectiveFunction<'_, T, X>
|
||||
{
|
||||
fn f(&self, w_bias: &[T]) -> T {
|
||||
let mut f = T::zero();
|
||||
|
||||
@@ -257,8 +257,7 @@ impl<TY: Number + Ord + Unsigned> BernoulliNBDistribution<TY> {
|
||||
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
|
||||
/// * `x` - training data.
|
||||
/// * `y` - vector with target values (classes) of length N.
|
||||
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
|
||||
/// priors are adjusted according to the data.
|
||||
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
|
||||
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
|
||||
/// * `binarize` - Threshold for binarizing.
|
||||
fn fit<TX: Number + PartialOrd, X: Array2<TX>, Y: Array1<TY>>(
|
||||
|
||||
@@ -174,8 +174,7 @@ impl<TY: Number + Ord + Unsigned> GaussianNBDistribution<TY> {
|
||||
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
|
||||
/// * `x` - training data.
|
||||
/// * `y` - vector with target values (classes) of length N.
|
||||
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
|
||||
/// priors are adjusted according to the data.
|
||||
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
|
||||
pub fn fit<TX: Number + RealNumber, X: Array2<TX>, Y: Array1<TY>>(
|
||||
x: &X,
|
||||
y: &Y,
|
||||
|
||||
+473
-36
@@ -40,7 +40,7 @@ use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1};
|
||||
use crate::numbers::basenum::Number;
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{cmp::Ordering, marker::PhantomData};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// Distribution used in the Naive Bayes classifier.
|
||||
pub(crate) trait NBDistribution<X: Number, Y: Number>: Clone {
|
||||
@@ -93,42 +93,42 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: NBDistribution<TX,
|
||||
/// Returns a vector of size N with class estimates.
|
||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
let y_classes = self.distribution.classes();
|
||||
let predictions = x
|
||||
.row_iter()
|
||||
.map(|row| {
|
||||
y_classes
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(class_index, class)| {
|
||||
(
|
||||
class,
|
||||
self.distribution.log_likelihood(class_index, &row)
|
||||
+ self.distribution.prior(class_index).ln(),
|
||||
)
|
||||
})
|
||||
// For some reason, the max_by method cannot use NaNs for finding the maximum value, it panics.
|
||||
// NaN must be considered as minimum values,
|
||||
// therefore it's like NaNs would not be considered for choosing the maximum value.
|
||||
// So we need to handle this case for avoiding panicking by using `Option::unwrap`.
|
||||
.max_by(|(_, p1), (_, p2)| match p1.partial_cmp(p2) {
|
||||
Some(ordering) => ordering,
|
||||
None => {
|
||||
if p1.is_nan() {
|
||||
Ordering::Less
|
||||
} else if p2.is_nan() {
|
||||
Ordering::Greater
|
||||
|
||||
if y_classes.is_empty() {
|
||||
return Err(Failed::predict("Failed to predict, no classes available"));
|
||||
}
|
||||
|
||||
let (rows, _) = x.shape();
|
||||
let mut predictions = Vec::with_capacity(rows);
|
||||
let mut all_probs_nan = true;
|
||||
|
||||
for row_index in 0..rows {
|
||||
let row = x.get_row(row_index);
|
||||
let mut max_log_prob = f64::NEG_INFINITY;
|
||||
let mut max_class = None;
|
||||
|
||||
for (class_index, class) in y_classes.iter().enumerate() {
|
||||
let log_likelihood = self.distribution.log_likelihood(class_index, &row);
|
||||
let log_prob = log_likelihood + self.distribution.prior(class_index).ln();
|
||||
|
||||
if !log_prob.is_nan() && log_prob > max_log_prob {
|
||||
max_log_prob = log_prob;
|
||||
max_class = Some(*class);
|
||||
all_probs_nan = false;
|
||||
}
|
||||
}
|
||||
|
||||
predictions.push(max_class.unwrap_or(y_classes[0]));
|
||||
}
|
||||
|
||||
if all_probs_nan {
|
||||
Err(Failed::predict(
|
||||
"Failed to predict, all probabilities were NaN",
|
||||
))
|
||||
} else {
|
||||
Ordering::Equal
|
||||
Ok(Y::from_vec_slice(&predictions))
|
||||
}
|
||||
}
|
||||
})
|
||||
.map(|(prediction, _probability)| *prediction)
|
||||
.ok_or_else(|| Failed::predict("Failed to predict, there is no result"))
|
||||
})
|
||||
.collect::<Result<Vec<TY>, Failed>>()?;
|
||||
let y_hat = Y::from_vec_slice(&predictions);
|
||||
Ok(y_hat)
|
||||
}
|
||||
}
|
||||
pub mod bernoulli;
|
||||
pub mod categorical;
|
||||
@@ -147,7 +147,7 @@ mod tests {
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
struct TestDistribution<'d>(&'d Vec<i32>);
|
||||
|
||||
impl<'d> NBDistribution<i32, i32> for TestDistribution<'d> {
|
||||
impl NBDistribution<i32, i32> for TestDistribution<'_> {
|
||||
fn prior(&self, _class_index: usize) -> f64 {
|
||||
1.
|
||||
}
|
||||
@@ -177,7 +177,7 @@ mod tests {
|
||||
Ok(_) => panic!("Should return error in case of empty classes"),
|
||||
Err(err) => assert_eq!(
|
||||
err.to_string(),
|
||||
"Predict failed: Failed to predict, there is no result"
|
||||
"Predict failed: Failed to predict, no classes available"
|
||||
),
|
||||
}
|
||||
|
||||
@@ -193,4 +193,441 @@ mod tests {
|
||||
Err(_) => panic!("Should success in normal case without NaNs"),
|
||||
}
|
||||
}
|
||||
|
||||
// A simple test distribution using float
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
struct TestDistributionAgain {
|
||||
classes: Vec<u32>,
|
||||
probs: Vec<f64>,
|
||||
}
|
||||
|
||||
impl NBDistribution<f64, u32> for TestDistributionAgain {
|
||||
fn classes(&self) -> &Vec<u32> {
|
||||
&self.classes
|
||||
}
|
||||
fn prior(&self, class_index: usize) -> f64 {
|
||||
self.probs[class_index]
|
||||
}
|
||||
fn log_likelihood<'a>(
|
||||
&'a self,
|
||||
class_index: usize,
|
||||
_j: &'a Box<dyn ArrayView1<f64> + 'a>,
|
||||
) -> f64 {
|
||||
self.probs[class_index].ln()
|
||||
}
|
||||
}
|
||||
|
||||
type TestNB = BaseNaiveBayes<f64, u32, DenseMatrix<f64>, Vec<u32>, TestDistributionAgain>;
|
||||
|
||||
#[test]
|
||||
fn test_predict_empty_classes() {
|
||||
let dist = TestDistributionAgain {
|
||||
classes: vec![],
|
||||
probs: vec![],
|
||||
};
|
||||
let nb = TestNB::fit(dist).unwrap();
|
||||
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||
assert!(nb.predict(&x).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predict_single_class() {
|
||||
let dist = TestDistributionAgain {
|
||||
classes: vec![1],
|
||||
probs: vec![1.0],
|
||||
};
|
||||
let nb = TestNB::fit(dist).unwrap();
|
||||
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||
let result = nb.predict(&x).unwrap();
|
||||
assert_eq!(result, vec![1, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predict_multiple_classes() {
|
||||
let dist = TestDistributionAgain {
|
||||
classes: vec![1, 2, 3],
|
||||
probs: vec![0.2, 0.5, 0.3],
|
||||
};
|
||||
let nb = TestNB::fit(dist).unwrap();
|
||||
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0], &[5.0, 6.0]]).unwrap();
|
||||
let result = nb.predict(&x).unwrap();
|
||||
assert_eq!(result, vec![2, 2, 2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predict_with_nans() {
|
||||
let dist = TestDistributionAgain {
|
||||
classes: vec![1, 2],
|
||||
probs: vec![f64::NAN, 0.5],
|
||||
};
|
||||
let nb = TestNB::fit(dist).unwrap();
|
||||
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||
let result = nb.predict(&x).unwrap();
|
||||
assert_eq!(result, vec![2, 2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predict_all_nans() {
|
||||
let dist = TestDistributionAgain {
|
||||
classes: vec![1, 2],
|
||||
probs: vec![f64::NAN, f64::NAN],
|
||||
};
|
||||
let nb = TestNB::fit(dist).unwrap();
|
||||
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||
assert!(nb.predict(&x).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predict_extreme_probabilities() {
|
||||
let dist = TestDistributionAgain {
|
||||
classes: vec![1, 2],
|
||||
probs: vec![1e-300, 1e-301],
|
||||
};
|
||||
let nb = TestNB::fit(dist).unwrap();
|
||||
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||
let result = nb.predict(&x).unwrap();
|
||||
assert_eq!(result, vec![1, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predict_with_infinity() {
|
||||
let dist = TestDistributionAgain {
|
||||
classes: vec![1, 2, 3],
|
||||
probs: vec![f64::INFINITY, 1.0, 2.0],
|
||||
};
|
||||
let nb = TestNB::fit(dist).unwrap();
|
||||
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||
let result = nb.predict(&x).unwrap();
|
||||
assert_eq!(result, vec![1, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predict_with_negative_infinity() {
|
||||
let dist = TestDistributionAgain {
|
||||
classes: vec![1, 2, 3],
|
||||
probs: vec![f64::NEG_INFINITY, 1.0, 2.0],
|
||||
};
|
||||
let nb = TestNB::fit(dist).unwrap();
|
||||
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||
let result = nb.predict(&x).unwrap();
|
||||
assert_eq!(result, vec![3, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gaussian_naive_bayes_numerical_stability() {
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
struct GaussianTestDistribution {
|
||||
classes: Vec<u32>,
|
||||
means: Vec<Vec<f64>>,
|
||||
variances: Vec<Vec<f64>>,
|
||||
priors: Vec<f64>,
|
||||
}
|
||||
|
||||
impl NBDistribution<f64, u32> for GaussianTestDistribution {
|
||||
fn classes(&self) -> &Vec<u32> {
|
||||
&self.classes
|
||||
}
|
||||
|
||||
fn prior(&self, class_index: usize) -> f64 {
|
||||
self.priors[class_index]
|
||||
}
|
||||
|
||||
fn log_likelihood<'a>(
|
||||
&'a self,
|
||||
class_index: usize,
|
||||
j: &'a Box<dyn ArrayView1<f64> + 'a>,
|
||||
) -> f64 {
|
||||
let means = &self.means[class_index];
|
||||
let variances = &self.variances[class_index];
|
||||
j.iterator(0)
|
||||
.enumerate()
|
||||
.map(|(i, &xi)| {
|
||||
let mean = means[i];
|
||||
let var = variances[i] + 1e-9; // Small smoothing for numerical stability
|
||||
let coeff = -0.5 * (2.0 * std::f64::consts::PI * var).ln();
|
||||
let exponent = -(xi - mean).powi(2) / (2.0 * var);
|
||||
coeff + exponent
|
||||
})
|
||||
.sum()
|
||||
}
|
||||
}
|
||||
|
||||
fn train_distribution(x: &DenseMatrix<f64>, y: &[u32]) -> GaussianTestDistribution {
|
||||
let mut classes: Vec<u32> = y
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect::<std::collections::HashSet<u32>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
classes.sort();
|
||||
let n_classes = classes.len();
|
||||
let n_features = x.shape().1;
|
||||
|
||||
let mut means = vec![vec![0.0; n_features]; n_classes];
|
||||
let mut variances = vec![vec![0.0; n_features]; n_classes];
|
||||
let mut class_counts = vec![0; n_classes];
|
||||
|
||||
// Calculate means and count samples per class
|
||||
for (sample, &class) in x.row_iter().zip(y.iter()) {
|
||||
let class_idx = classes.iter().position(|&c| c == class).unwrap();
|
||||
class_counts[class_idx] += 1;
|
||||
for (i, &value) in sample.iterator(0).enumerate() {
|
||||
means[class_idx][i] += value;
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize means
|
||||
for (class_idx, mean) in means.iter_mut().enumerate() {
|
||||
for value in mean.iter_mut() {
|
||||
*value /= class_counts[class_idx] as f64;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate variances
|
||||
for (sample, &class) in x.row_iter().zip(y.iter()) {
|
||||
let class_idx = classes.iter().position(|&c| c == class).unwrap();
|
||||
for (i, &value) in sample.iterator(0).enumerate() {
|
||||
let diff = value - means[class_idx][i];
|
||||
variances[class_idx][i] += diff * diff;
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize variances and add small epsilon to avoid zero variance
|
||||
let epsilon = 1e-9;
|
||||
for (class_idx, variance) in variances.iter_mut().enumerate() {
|
||||
for value in variance.iter_mut() {
|
||||
*value = *value / class_counts[class_idx] as f64 + epsilon;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate priors
|
||||
let total_samples = y.len() as f64;
|
||||
let priors: Vec<f64> = class_counts
|
||||
.iter()
|
||||
.map(|&count| count as f64 / total_samples)
|
||||
.collect();
|
||||
|
||||
GaussianTestDistribution {
|
||||
classes,
|
||||
means,
|
||||
variances,
|
||||
priors,
|
||||
}
|
||||
}
|
||||
|
||||
type TestNBGaussian =
|
||||
BaseNaiveBayes<f64, u32, DenseMatrix<f64>, Vec<u32>, GaussianTestDistribution>;
|
||||
|
||||
// Create a constant training dataset
|
||||
let n_samples = 1000;
|
||||
let n_features = 5;
|
||||
let n_classes = 4;
|
||||
|
||||
let mut x_data = Vec::with_capacity(n_samples * n_features);
|
||||
let mut y_data = Vec::with_capacity(n_samples);
|
||||
|
||||
for i in 0..n_samples {
|
||||
for j in 0..n_features {
|
||||
x_data.push((i * j) as f64 % 10.0);
|
||||
}
|
||||
y_data.push((i % n_classes) as u32);
|
||||
}
|
||||
|
||||
let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap();
|
||||
let y = y_data;
|
||||
|
||||
// Train the model
|
||||
let dist = train_distribution(&x, &y);
|
||||
let nb = TestNBGaussian::fit(dist).unwrap();
|
||||
|
||||
// Create constant test data
|
||||
let n_test_samples = 100;
|
||||
let mut test_x_data = Vec::with_capacity(n_test_samples * n_features);
|
||||
for i in 0..n_test_samples {
|
||||
for j in 0..n_features {
|
||||
test_x_data.push((i * j * 2) as f64 % 15.0);
|
||||
}
|
||||
}
|
||||
let test_x = DenseMatrix::new(n_test_samples, n_features, test_x_data, true).unwrap();
|
||||
|
||||
// Make predictions
|
||||
let predictions = nb
|
||||
.predict(&test_x)
|
||||
.map_err(|e| format!("Prediction failed: {}", e))
|
||||
.unwrap();
|
||||
|
||||
// Check numerical stability
|
||||
assert_eq!(
|
||||
predictions.len(),
|
||||
n_test_samples,
|
||||
"Number of predictions should match number of test samples"
|
||||
);
|
||||
|
||||
// Check that all predictions are valid class labels
|
||||
for &pred in predictions.iter() {
|
||||
assert!(pred < n_classes as u32, "Predicted class should be valid");
|
||||
}
|
||||
|
||||
// Check consistency of predictions
|
||||
let repeated_predictions = nb
|
||||
.predict(&test_x)
|
||||
.map_err(|e| format!("Repeated prediction failed: {}", e))
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
predictions, repeated_predictions,
|
||||
"Predictions should be consistent when repeated"
|
||||
);
|
||||
|
||||
// Check extreme values
|
||||
let extreme_x =
|
||||
DenseMatrix::new(2, n_features, vec![f64::MAX; n_features * 2], true).unwrap();
|
||||
let extreme_predictions = nb.predict(&extreme_x);
|
||||
assert!(
|
||||
extreme_predictions.is_err(),
|
||||
"Extreme value input should result in an error"
|
||||
);
|
||||
assert_eq!(
|
||||
extreme_predictions.unwrap_err().to_string(),
|
||||
"Predict failed: Failed to predict, all probabilities were NaN",
|
||||
"Incorrect error message for extreme values"
|
||||
);
|
||||
|
||||
// Check for NaN handling
|
||||
let nan_x = DenseMatrix::new(2, n_features, vec![f64::NAN; n_features * 2], true).unwrap();
|
||||
let nan_predictions = nb.predict(&nan_x);
|
||||
assert!(
|
||||
nan_predictions.is_err(),
|
||||
"NaN input should result in an error"
|
||||
);
|
||||
|
||||
// Check for very small values
|
||||
let small_x =
|
||||
DenseMatrix::new(2, n_features, vec![f64::MIN_POSITIVE; n_features * 2], true).unwrap();
|
||||
let small_predictions = nb
|
||||
.predict(&small_x)
|
||||
.map_err(|e| format!("Small value prediction failed: {}", e))
|
||||
.unwrap();
|
||||
for &pred in small_predictions.iter() {
|
||||
assert!(
|
||||
pred < n_classes as u32,
|
||||
"Predictions for very small values should be valid"
|
||||
);
|
||||
}
|
||||
|
||||
// Check for values close to zero
|
||||
let near_zero_x =
|
||||
DenseMatrix::new(2, n_features, vec![1e-300; n_features * 2], true).unwrap();
|
||||
let near_zero_predictions = nb
|
||||
.predict(&near_zero_x)
|
||||
.map_err(|e| format!("Near-zero value prediction failed: {}", e))
|
||||
.unwrap();
|
||||
for &pred in near_zero_predictions.iter() {
|
||||
assert!(
|
||||
pred < n_classes as u32,
|
||||
"Predictions for near-zero values should be valid"
|
||||
);
|
||||
}
|
||||
|
||||
println!("All numerical stability checks passed!");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gaussian_naive_bayes_numerical_stability_random_data() {
|
||||
#[derive(Debug)]
|
||||
struct MySimpleRng {
|
||||
state: u64,
|
||||
}
|
||||
|
||||
impl MySimpleRng {
|
||||
fn new(seed: u64) -> Self {
|
||||
MySimpleRng { state: seed }
|
||||
}
|
||||
|
||||
/// Get the next u64 in the sequence.
|
||||
fn next_u64(&mut self) -> u64 {
|
||||
// LCG parameters; these are somewhat arbitrary but commonly used.
|
||||
// Feel free to tweak the multiplier/adder etc.
|
||||
self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
|
||||
self.state
|
||||
}
|
||||
|
||||
/// Get an f64 in the range [min, max).
|
||||
fn next_f64(&mut self, min: f64, max: f64) -> f64 {
|
||||
let fraction = (self.next_u64() as f64) / (u64::MAX as f64);
|
||||
min + fraction * (max - min)
|
||||
}
|
||||
|
||||
/// Get a usize in the range [min, max). This floors the floating result.
|
||||
fn gen_range_usize(&mut self, min: usize, max: usize) -> usize {
|
||||
let v = self.next_f64(min as f64, max as f64);
|
||||
// Truncate into the integer range. Because of floating inexactness,
|
||||
// ensure we also clamp.
|
||||
let int_v = v.floor() as isize;
|
||||
// simple clamp to avoid any float rounding out of range
|
||||
let clamped = int_v.max(min as isize).min((max - 1) as isize);
|
||||
clamped as usize
|
||||
}
|
||||
}
|
||||
use crate::naive_bayes::gaussian::GaussianNB;
|
||||
// We will generate random data in a reproducible way (using a fixed seed).
|
||||
// We will generate random data in a reproducible way:
|
||||
let mut rng = MySimpleRng::new(42);
|
||||
|
||||
let n_samples = 1000;
|
||||
let n_features = 5;
|
||||
let n_classes = 4;
|
||||
|
||||
// Our feature matrix and label vector
|
||||
let mut x_data = Vec::with_capacity(n_samples * n_features);
|
||||
let mut y_data = Vec::with_capacity(n_samples);
|
||||
|
||||
// Fill x_data with random values and y_data with random class labels.
|
||||
for _i in 0..n_samples {
|
||||
for _j in 0..n_features {
|
||||
// We’ll pick random values in [-10, 10).
|
||||
x_data.push(rng.next_f64(-10.0, 10.0));
|
||||
}
|
||||
let class = rng.gen_range_usize(0, n_classes) as u32;
|
||||
y_data.push(class);
|
||||
}
|
||||
|
||||
// Create DenseMatrix from x_data
|
||||
let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap();
|
||||
|
||||
// Train GaussianNB
|
||||
let gnb = GaussianNB::fit(&x, &y_data, Default::default())
|
||||
.expect("Fitting GaussianNB with random data failed.");
|
||||
|
||||
// Predict on the same training data to verify no numerical instability
|
||||
let predictions = gnb.predict(&x).expect("Prediction on random data failed.");
|
||||
|
||||
// Basic sanity checks
|
||||
assert_eq!(
|
||||
predictions.len(),
|
||||
n_samples,
|
||||
"Prediction size must match n_samples"
|
||||
);
|
||||
for &pred_class in &predictions {
|
||||
assert!(
|
||||
(pred_class as usize) < n_classes,
|
||||
"Predicted class {} is out of range [0..n_classes).",
|
||||
pred_class
|
||||
);
|
||||
}
|
||||
|
||||
// If you want to compare with scikit-learn, you can do something like:
|
||||
// println!("X = {:?}", &x);
|
||||
// println!("Y = {:?}", &y_data);
|
||||
// println!("predictions = {:?}", &predictions);
|
||||
// and then in Python:
|
||||
// import numpy as np
|
||||
// from sklearn.naive_bayes import GaussianNB
|
||||
// X = np.reshape(np.array(x), (1000, 5), order='F')
|
||||
// Y = np.array(y)
|
||||
// gnb = GaussianNB().fit(X, Y)
|
||||
// preds = gnb.predict(X)
|
||||
// expected = np.array(predictions)
|
||||
// assert expected == preds
|
||||
// They should match closely (or exactly) depending on floating rounding.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -207,8 +207,7 @@ impl<TY: Number + Ord + Unsigned> MultinomialNBDistribution<TY> {
|
||||
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
|
||||
/// * `x` - training data.
|
||||
/// * `y` - vector with target values (classes) of length N.
|
||||
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
|
||||
/// priors are adjusted according to the data.
|
||||
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
|
||||
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
|
||||
pub fn fit<TX: Number + Unsigned, X: Array2<TX>, Y: Array1<TY>>(
|
||||
x: &X,
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
//! // &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0]
|
||||
//! // &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0]
|
||||
//! ```
|
||||
use std::iter;
|
||||
use std::iter::repeat_n;
|
||||
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::basic::arrays::Array2;
|
||||
@@ -75,11 +75,7 @@ fn find_new_idxs(num_params: usize, cat_sizes: &[usize], cat_idxs: &[usize]) ->
|
||||
let offset = (0..1).chain(offset_);
|
||||
|
||||
let new_param_idxs: Vec<usize> = (0..num_params)
|
||||
.zip(
|
||||
repeats
|
||||
.zip(offset)
|
||||
.flat_map(|(r, o)| iter::repeat(o).take(r)),
|
||||
)
|
||||
.zip(repeats.zip(offset).flat_map(|(r, o)| repeat_n(o, r)))
|
||||
.map(|(idx, ofst)| idx + ofst)
|
||||
.collect();
|
||||
new_param_idxs
|
||||
@@ -124,7 +120,7 @@ impl OneHotEncoder {
|
||||
let (nrows, _) = data.shape();
|
||||
|
||||
// col buffer to avoid allocations
|
||||
let mut col_buf: Vec<T> = iter::repeat(T::zero()).take(nrows).collect();
|
||||
let mut col_buf: Vec<T> = repeat_n(T::zero(), nrows).collect();
|
||||
|
||||
let mut res: Vec<CategoryMapper<CategoricalFloat>> = Vec::with_capacity(idxs.len());
|
||||
|
||||
|
||||
@@ -172,18 +172,14 @@ where
|
||||
T: Number + RealNumber,
|
||||
M: Array2<T>,
|
||||
{
|
||||
if let Some(output_matrix) = columns.first().cloned() {
|
||||
return Some(
|
||||
columns.first().cloned().map(|output_matrix| {
|
||||
columns
|
||||
.iter()
|
||||
.skip(1)
|
||||
.fold(output_matrix, |current_matrix, new_colum| {
|
||||
current_matrix.h_stack(new_colum)
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
+1
-1
@@ -30,7 +30,7 @@ pub struct CSVDefinition<'a> {
|
||||
/// What seperates the fields in your csv-file?
|
||||
field_seperator: &'a str,
|
||||
}
|
||||
impl<'a> Default for CSVDefinition<'a> {
|
||||
impl Default for CSVDefinition<'_> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
n_rows_header: 1,
|
||||
|
||||
+3
-3
@@ -360,8 +360,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX> + 'a, Y: Array
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq
|
||||
for SVC<'a, TX, TY, X, Y>
|
||||
impl<TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq
|
||||
for SVC<'_, TX, TY, X, Y>
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
if (self.b.unwrap().sub(other.b.unwrap())).abs() > TX::epsilon() * TX::two()
|
||||
@@ -1110,7 +1110,7 @@ mod tests {
|
||||
let svc = SVC::fit(&x, &y, ¶ms).unwrap();
|
||||
|
||||
// serialization
|
||||
let deserialized_svc: SVC<f64, i32, _, _> =
|
||||
let deserialized_svc: SVC<'_, f64, i32, _, _> =
|
||||
serde_json::from_str(&serde_json::to_string(&svc).unwrap()).unwrap();
|
||||
|
||||
assert_eq!(svc, deserialized_svc);
|
||||
|
||||
+3
-3
@@ -281,8 +281,8 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> SVR<'
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> PartialEq
|
||||
for SVR<'a, T, X, Y>
|
||||
impl<T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> PartialEq
|
||||
for SVR<'_, T, X, Y>
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
if (self.b - other.b).abs() > T::epsilon() * T::two()
|
||||
@@ -702,7 +702,7 @@ mod tests {
|
||||
|
||||
let svr = SVR::fit(&x, &y, ¶ms).unwrap();
|
||||
|
||||
let deserialized_svr: SVR<f64, DenseMatrix<f64>, _> =
|
||||
let deserialized_svr: SVR<'_, f64, DenseMatrix<f64>, _> =
|
||||
serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
|
||||
|
||||
assert_eq!(svr, deserialized_svr);
|
||||
|
||||
@@ -77,7 +77,9 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::api::{Predictor, SupervisedEstimator};
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::basic::arrays::MutArray;
|
||||
use crate::linalg::basic::arrays::{Array1, Array2, MutArrayView1};
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::rand_custom::get_rng_impl;
|
||||
|
||||
@@ -887,11 +889,77 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
||||
}
|
||||
importances
|
||||
}
|
||||
|
||||
/// Predict class probabilities for the input samples.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `x` - The input samples as a matrix where each row is a sample and each column is a feature.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A `Result` containing a `DenseMatrix<f64>` where each row corresponds to a sample and each column
|
||||
/// corresponds to a class. The values represent the probability of the sample belonging to each class.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if at least one row prediction process fails.
|
||||
pub fn predict_proba(&self, x: &X) -> Result<DenseMatrix<f64>, Failed> {
|
||||
let (n_samples, _) = x.shape();
|
||||
let n_classes = self.classes().len();
|
||||
let mut result = DenseMatrix::<f64>::zeros(n_samples, n_classes);
|
||||
|
||||
for i in 0..n_samples {
|
||||
let probs = self.predict_proba_for_row(x, i)?;
|
||||
for (j, &prob) in probs.iter().enumerate() {
|
||||
result.set((i, j), prob);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Predict class probabilities for a single input sample.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `x` - The input matrix containing all samples.
|
||||
/// * `row` - The index of the row in `x` for which to predict probabilities.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A vector of probabilities, one for each class, representing the probability
|
||||
/// of the input sample belonging to each class.
|
||||
fn predict_proba_for_row(&self, x: &X, row: usize) -> Result<Vec<f64>, Failed> {
|
||||
let mut node = 0;
|
||||
|
||||
while let Some(current_node) = self.nodes().get(node) {
|
||||
if current_node.true_child.is_none() && current_node.false_child.is_none() {
|
||||
// Leaf node reached
|
||||
let mut probs = vec![0.0; self.classes().len()];
|
||||
probs[current_node.output] = 1.0;
|
||||
return Ok(probs);
|
||||
}
|
||||
|
||||
let split_feature = current_node.split_feature;
|
||||
let split_value = current_node.split_value.unwrap_or(f64::NAN);
|
||||
|
||||
if x.get((row, split_feature)).to_f64().unwrap() <= split_value {
|
||||
node = current_node.true_child.unwrap();
|
||||
} else {
|
||||
node = current_node.false_child.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// This should never happen if the tree is properly constructed
|
||||
Err(Failed::predict("Nodes iteration did not reach leaf"))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::basic::arrays::Array;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
|
||||
#[test]
|
||||
@@ -934,6 +1002,51 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg_attr(
|
||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||
wasm_bindgen_test::wasm_bindgen_test
|
||||
)]
|
||||
#[test]
|
||||
fn test_predict_proba() {
|
||||
let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
|
||||
&[5.1, 3.5, 1.4, 0.2],
|
||||
&[4.9, 3.0, 1.4, 0.2],
|
||||
&[4.7, 3.2, 1.3, 0.2],
|
||||
&[4.6, 3.1, 1.5, 0.2],
|
||||
&[5.0, 3.6, 1.4, 0.2],
|
||||
&[7.0, 3.2, 4.7, 1.4],
|
||||
&[6.4, 3.2, 4.5, 1.5],
|
||||
&[6.9, 3.1, 4.9, 1.5],
|
||||
&[5.5, 2.3, 4.0, 1.3],
|
||||
&[6.5, 2.8, 4.6, 1.5],
|
||||
])
|
||||
.unwrap();
|
||||
let y: Vec<usize> = vec![0, 0, 0, 0, 0, 1, 1, 1, 1, 1];
|
||||
|
||||
let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap();
|
||||
let probabilities = tree.predict_proba(&x).unwrap();
|
||||
|
||||
assert_eq!(probabilities.shape(), (10, 2));
|
||||
|
||||
for row in 0..10 {
|
||||
let row_sum: f64 = probabilities.get_row(row).sum();
|
||||
assert!(
|
||||
(row_sum - 1.0).abs() < 1e-6,
|
||||
"Row probabilities should sum to 1"
|
||||
);
|
||||
}
|
||||
|
||||
// Check if the first 5 samples have higher probability for class 0
|
||||
for i in 0..5 {
|
||||
assert!(probabilities.get((i, 0)) > probabilities.get((i, 1)));
|
||||
}
|
||||
|
||||
// Check if the last 5 samples have higher probability for class 1
|
||||
for i in 5..10 {
|
||||
assert!(probabilities.get((i, 1)) > probabilities.get((i, 0)));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(
|
||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||
wasm_bindgen_test::wasm_bindgen_test
|
||||
|
||||
Reference in New Issue
Block a user