Compare commits
25 Commits
release-0.3
...
v0.4.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
76d1ef610d | ||
|
|
4092e24c2a | ||
|
|
17dc9f3bbf | ||
|
|
c8ec8fec00 | ||
|
|
3da433f757 | ||
|
|
4523ac73ff | ||
|
|
ba75f9ffad | ||
|
|
239c00428f | ||
|
|
80a93c1a0e | ||
|
|
4eadd16ce4 | ||
|
|
886b5631b7 | ||
|
|
9c07925d8a | ||
|
|
6f22bbd150 | ||
|
|
dbdc2b2a77 | ||
|
|
2d7c055154 | ||
|
|
545ed6ce2b | ||
|
|
8939ed93b9 | ||
|
|
9cd7348403 | ||
|
|
d52830a818 | ||
|
|
d15ea43975 | ||
|
|
f498f9629e | ||
|
|
7d059c4fb1 | ||
|
|
c7353d0b57 | ||
|
|
83dcf9a8ac | ||
|
|
3126ee87d3 |
@@ -37,6 +37,8 @@ $ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213
|
|||||||
```
|
```
|
||||||
* find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file.
|
* find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file.
|
||||||
|
|
||||||
|
* Please take a look to the output of a profiler to spot most evident performance problems, see [this guide about using a profiler](http://www.codeofview.com/fix-rs/2017/01/24/how-to-optimize-rust-programs-on-linux/).
|
||||||
|
|
||||||
## Issue Report Process
|
## Issue Report Process
|
||||||
|
|
||||||
1. Go to the project's issues.
|
1. Go to the project's issues.
|
||||||
|
|||||||
@@ -19,14 +19,13 @@ jobs:
|
|||||||
{ os: "ubuntu", target: "i686-unknown-linux-gnu" },
|
{ os: "ubuntu", target: "i686-unknown-linux-gnu" },
|
||||||
{ os: "ubuntu", target: "wasm32-unknown-unknown" },
|
{ os: "ubuntu", target: "wasm32-unknown-unknown" },
|
||||||
{ os: "macos", target: "aarch64-apple-darwin" },
|
{ os: "macos", target: "aarch64-apple-darwin" },
|
||||||
{ os: "ubuntu", target: "wasm32-wasi" },
|
|
||||||
]
|
]
|
||||||
env:
|
env:
|
||||||
TZ: "/usr/share/zoneinfo/your/location"
|
TZ: "/usr/share/zoneinfo/your/location"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Cache .cargo and target
|
- name: Cache .cargo and target
|
||||||
uses: actions/cache@v2
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.cargo
|
~/.cargo
|
||||||
@@ -43,9 +42,6 @@ jobs:
|
|||||||
- name: Install test runner for wasm
|
- name: Install test runner for wasm
|
||||||
if: matrix.platform.target == 'wasm32-unknown-unknown'
|
if: matrix.platform.target == 'wasm32-unknown-unknown'
|
||||||
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
|
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
|
||||||
- name: Install test runner for wasi
|
|
||||||
if: matrix.platform.target == 'wasm32-wasi'
|
|
||||||
run: curl https://wasmtime.dev/install.sh -sSf | bash
|
|
||||||
- name: Stable Build with all features
|
- name: Stable Build with all features
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@@ -65,12 +61,6 @@ jobs:
|
|||||||
- name: Tests in WASM
|
- name: Tests in WASM
|
||||||
if: matrix.platform.target == 'wasm32-unknown-unknown'
|
if: matrix.platform.target == 'wasm32-unknown-unknown'
|
||||||
run: wasm-pack test --node -- --all-features
|
run: wasm-pack test --node -- --all-features
|
||||||
- name: Tests in WASI
|
|
||||||
if: matrix.platform.target == 'wasm32-wasi'
|
|
||||||
run: |
|
|
||||||
export WASMTIME_HOME="$HOME/.wasmtime"
|
|
||||||
export PATH="$WASMTIME_HOME/bin:$PATH"
|
|
||||||
cargo install cargo-wasi && cargo wasi test
|
|
||||||
|
|
||||||
check_features:
|
check_features:
|
||||||
runs-on: "${{ matrix.platform.os }}-latest"
|
runs-on: "${{ matrix.platform.os }}-latest"
|
||||||
@@ -81,9 +71,9 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
TZ: "/usr/share/zoneinfo/your/location"
|
TZ: "/usr/share/zoneinfo/your/location"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Cache .cargo and target
|
- name: Cache .cargo and target
|
||||||
uses: actions/cache@v2
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.cargo
|
~/.cargo
|
||||||
|
|||||||
@@ -12,9 +12,9 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
TZ: "/usr/share/zoneinfo/your/location"
|
TZ: "/usr/share/zoneinfo/your/location"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v4
|
||||||
- name: Cache .cargo
|
- name: Cache .cargo
|
||||||
uses: actions/cache@v2
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.cargo
|
~/.cargo
|
||||||
@@ -41,4 +41,4 @@ jobs:
|
|||||||
- name: Upload to codecov.io
|
- name: Upload to codecov.io
|
||||||
uses: codecov/codecov-action@v2
|
uses: codecov/codecov-action@v2
|
||||||
with:
|
with:
|
||||||
fail_ci_if_error: true
|
fail_ci_if_error: false
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Cache .cargo and target
|
- name: Cache .cargo and target
|
||||||
uses: actions/cache@v2
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.cargo
|
~/.cargo
|
||||||
|
|||||||
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [0.4.0] - 2023-04-05
|
||||||
|
|
||||||
|
## Added
|
||||||
|
- WARNING: Breaking changes!
|
||||||
|
- `DenseMatrix` constructor now returns `Result` to avoid user instantiating inconsistent rows/cols count. Their return values need to be unwrapped with `unwrap()`, see tests
|
||||||
|
|
||||||
## [0.3.0] - 2022-11-09
|
## [0.3.0] - 2022-11-09
|
||||||
|
|
||||||
## Added
|
## Added
|
||||||
|
|||||||
+3
-3
@@ -2,7 +2,7 @@
|
|||||||
name = "smartcore"
|
name = "smartcore"
|
||||||
description = "Machine Learning in Rust."
|
description = "Machine Learning in Rust."
|
||||||
homepage = "https://smartcorelib.org"
|
homepage = "https://smartcorelib.org"
|
||||||
version = "0.3.0"
|
version = "0.4.1"
|
||||||
authors = ["smartcore Developers"]
|
authors = ["smartcore Developers"]
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
@@ -42,13 +42,13 @@ std_rand = ["rand/std_rng", "rand/std"]
|
|||||||
js = ["getrandom/js"]
|
js = ["getrandom/js"]
|
||||||
|
|
||||||
[target.'cfg(target_arch = "wasm32")'.dependencies]
|
[target.'cfg(target_arch = "wasm32")'.dependencies]
|
||||||
getrandom = { version = "*", optional = true }
|
getrandom = { version = "0.2.8", optional = true }
|
||||||
|
|
||||||
[target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies]
|
[target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies]
|
||||||
wasm-bindgen-test = "0.3"
|
wasm-bindgen-test = "0.3"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
itertools = "*"
|
itertools = "0.13.0"
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
bincode = "1.3.1"
|
bincode = "1.3.1"
|
||||||
|
|
||||||
|
|||||||
@@ -18,4 +18,4 @@
|
|||||||
-----
|
-----
|
||||||
[](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml)
|
[](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml)
|
||||||
|
|
||||||
To start getting familiar with the new smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
|
To start getting familiar with the new smartcore v0.4 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
|
||||||
|
|||||||
@@ -1,15 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<module type="RUST_MODULE" version="4">
|
|
||||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
|
||||||
<exclude-output />
|
|
||||||
<content url="file://$MODULE_DIR$">
|
|
||||||
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
|
||||||
<sourceFolder url="file://$MODULE_DIR$/examples" isTestSource="false" />
|
|
||||||
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
|
|
||||||
<sourceFolder url="file://$MODULE_DIR$/benches" isTestSource="true" />
|
|
||||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
|
||||||
</content>
|
|
||||||
<orderEntry type="inheritedJdk" />
|
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
|
||||||
</component>
|
|
||||||
</module>
|
|
||||||
@@ -40,11 +40,11 @@ impl BBDTreeNode {
|
|||||||
|
|
||||||
impl BBDTree {
|
impl BBDTree {
|
||||||
pub fn new<T: Number, M: Array2<T>>(data: &M) -> BBDTree {
|
pub fn new<T: Number, M: Array2<T>>(data: &M) -> BBDTree {
|
||||||
let nodes = Vec::new();
|
let nodes: Vec<BBDTreeNode> = Vec::new();
|
||||||
|
|
||||||
let (n, _) = data.shape();
|
let (n, _) = data.shape();
|
||||||
|
|
||||||
let index = (0..n).collect::<Vec<_>>();
|
let index = (0..n).collect::<Vec<usize>>();
|
||||||
|
|
||||||
let mut tree = BBDTree {
|
let mut tree = BBDTree {
|
||||||
nodes,
|
nodes,
|
||||||
@@ -343,7 +343,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let tree = BBDTree::new(&data);
|
let tree = BBDTree::new(&data);
|
||||||
|
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
|
|||||||
current_cover_set.push((d, &self.root));
|
current_cover_set.push((d, &self.root));
|
||||||
|
|
||||||
let mut heap = HeapSelection::with_capacity(k);
|
let mut heap = HeapSelection::with_capacity(k);
|
||||||
heap.add(std::f64::MAX);
|
heap.add(f64::MAX);
|
||||||
|
|
||||||
let mut empty_heap = true;
|
let mut empty_heap = true;
|
||||||
if !self.identical_excluded || self.get_data_value(self.root.idx) != p {
|
if !self.identical_excluded || self.get_data_value(self.root.idx) != p {
|
||||||
@@ -145,7 +145,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let upper_bound = if empty_heap {
|
let upper_bound = if empty_heap {
|
||||||
std::f64::INFINITY
|
f64::INFINITY
|
||||||
} else {
|
} else {
|
||||||
*heap.peek()
|
*heap.peek()
|
||||||
};
|
};
|
||||||
@@ -291,7 +291,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
|
|||||||
} else {
|
} else {
|
||||||
let max_dist = self.max(point_set);
|
let max_dist = self.max(point_set);
|
||||||
let next_scale = (max_scale - 1).min(self.get_scale(max_dist));
|
let next_scale = (max_scale - 1).min(self.get_scale(max_dist));
|
||||||
if next_scale == std::i64::MIN {
|
if next_scale == i64::MIN {
|
||||||
let mut children: Vec<Node> = Vec::new();
|
let mut children: Vec<Node> = Vec::new();
|
||||||
let mut leaf = self.new_leaf(p);
|
let mut leaf = self.new_leaf(p);
|
||||||
children.push(leaf);
|
children.push(leaf);
|
||||||
@@ -435,7 +435,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
|
|||||||
|
|
||||||
fn get_scale(&self, d: f64) -> i64 {
|
fn get_scale(&self, d: f64) -> i64 {
|
||||||
if d == 0f64 {
|
if d == 0f64 {
|
||||||
std::i64::MIN
|
i64::MIN
|
||||||
} else {
|
} else {
|
||||||
(self.inv_log_base * d.ln()).ceil() as i64
|
(self.inv_log_base * d.ln()).ceil() as i64
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
/// &[4.6, 3.1, 1.5, 0.2],
|
/// &[4.6, 3.1, 1.5, 0.2],
|
||||||
/// &[5.0, 3.6, 1.4, 0.2],
|
/// &[5.0, 3.6, 1.4, 0.2],
|
||||||
/// &[5.4, 3.9, 1.7, 0.4],
|
/// &[5.4, 3.9, 1.7, 0.4],
|
||||||
/// ]);
|
/// ]).unwrap();
|
||||||
/// let fastpair = FastPair::new(&x);
|
/// let fastpair = FastPair::new(&x);
|
||||||
/// let closest_pair: PairwiseDistance<f64> = fastpair.unwrap().closest_pair();
|
/// let closest_pair: PairwiseDistance<f64> = fastpair.unwrap().closest_pair();
|
||||||
/// ```
|
/// ```
|
||||||
@@ -52,10 +52,8 @@ pub struct FastPair<'a, T: RealNumber + FloatNumber, M: Array2<T>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
|
impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
|
||||||
///
|
|
||||||
/// Constructor
|
/// Constructor
|
||||||
/// Instantiate and inizialise the algorithm
|
/// Instantiate and initialize the algorithm
|
||||||
///
|
|
||||||
pub fn new(m: &'a M) -> Result<Self, Failed> {
|
pub fn new(m: &'a M) -> Result<Self, Failed> {
|
||||||
if m.shape().0 < 3 {
|
if m.shape().0 < 3 {
|
||||||
return Err(Failed::because(
|
return Err(Failed::because(
|
||||||
@@ -74,10 +72,8 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
|
|||||||
Ok(init)
|
Ok(init)
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
/// Initialise `FastPair` by passing a `Array2`.
|
/// Initialise `FastPair` by passing a `Array2`.
|
||||||
/// Build a FastPairs data-structure from a set of (new) points.
|
/// Build a FastPairs data-structure from a set of (new) points.
|
||||||
///
|
|
||||||
fn init(&mut self) {
|
fn init(&mut self) {
|
||||||
// basic measures
|
// basic measures
|
||||||
let len = self.samples.shape().0;
|
let len = self.samples.shape().0;
|
||||||
@@ -158,9 +154,7 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
|
|||||||
self.neighbours = neighbours;
|
self.neighbours = neighbours;
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
/// Find closest pair by scanning list of nearest neighbors.
|
/// Find closest pair by scanning list of nearest neighbors.
|
||||||
///
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn closest_pair(&self) -> PairwiseDistance<T> {
|
pub fn closest_pair(&self) -> PairwiseDistance<T> {
|
||||||
let mut a = self.neighbours[0]; // Start with first point
|
let mut a = self.neighbours[0]; // Start with first point
|
||||||
@@ -179,6 +173,21 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Return order dissimilarities from closest to furthest
|
||||||
|
///
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn ordered_pairs(&self) -> std::vec::IntoIter<&PairwiseDistance<T>> {
|
||||||
|
// improvement: implement this to return `impl Iterator<Item = &PairwiseDistance<T>>`
|
||||||
|
// need to implement trait `Iterator` for `Vec<&PairwiseDistance<T>>`
|
||||||
|
let mut distances = self
|
||||||
|
.distances
|
||||||
|
.values()
|
||||||
|
.collect::<Vec<&PairwiseDistance<T>>>();
|
||||||
|
distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||||
|
distances.into_iter()
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Compute distances from input to all other points in data-structure.
|
// Compute distances from input to all other points in data-structure.
|
||||||
// input is the row index of the sample matrix
|
// input is the row index of the sample matrix
|
||||||
@@ -217,10 +226,10 @@ mod tests_fastpair {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::linalg::basic::{arrays::Array, matrix::DenseMatrix};
|
use crate::linalg::basic::{arrays::Array, matrix::DenseMatrix};
|
||||||
|
|
||||||
///
|
|
||||||
/// Brute force algorithm, used only for comparison and testing
|
/// Brute force algorithm, used only for comparison and testing
|
||||||
///
|
pub fn closest_pair_brute(
|
||||||
pub fn closest_pair_brute(fastpair: &FastPair<f64, DenseMatrix<f64>>) -> PairwiseDistance<f64> {
|
fastpair: &FastPair<'_, f64, DenseMatrix<f64>>,
|
||||||
|
) -> PairwiseDistance<f64> {
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
let m = fastpair.samples.shape().0;
|
let m = fastpair.samples.shape().0;
|
||||||
|
|
||||||
@@ -260,8 +269,8 @@ mod tests_fastpair {
|
|||||||
let distances = fastpair.distances;
|
let distances = fastpair.distances;
|
||||||
let neighbours = fastpair.neighbours;
|
let neighbours = fastpair.neighbours;
|
||||||
|
|
||||||
assert!(distances.len() != 0);
|
assert!(!distances.is_empty());
|
||||||
assert!(neighbours.len() != 0);
|
assert!(!neighbours.is_empty());
|
||||||
|
|
||||||
assert_eq!(10, neighbours.len());
|
assert_eq!(10, neighbours.len());
|
||||||
assert_eq!(10, distances.len());
|
assert_eq!(10, distances.len());
|
||||||
@@ -271,28 +280,24 @@ mod tests_fastpair {
|
|||||||
fn dataset_has_at_least_three_points() {
|
fn dataset_has_at_least_three_points() {
|
||||||
// Create a dataset which consists of only two points:
|
// Create a dataset which consists of only two points:
|
||||||
// A(0.0, 0.0) and B(1.0, 1.0).
|
// A(0.0, 0.0) and B(1.0, 1.0).
|
||||||
let dataset = DenseMatrix::<f64>::from_2d_array(&[&[0.0, 0.0], &[1.0, 1.0]]);
|
let dataset = DenseMatrix::<f64>::from_2d_array(&[&[0.0, 0.0], &[1.0, 1.0]]).unwrap();
|
||||||
|
|
||||||
// We expect an error when we run `FastPair` on this dataset,
|
// We expect an error when we run `FastPair` on this dataset,
|
||||||
// becuase `FastPair` currently only works on a minimum of 3
|
// becuase `FastPair` currently only works on a minimum of 3
|
||||||
// points.
|
// points.
|
||||||
let _fastpair = FastPair::new(&dataset);
|
let fastpair = FastPair::new(&dataset);
|
||||||
|
assert!(fastpair.is_err());
|
||||||
|
|
||||||
match _fastpair {
|
if let Err(e) = fastpair {
|
||||||
Err(e) => {
|
let expected_error =
|
||||||
let expected_error =
|
Failed::because(FailedError::FindFailed, "min number of rows should be 3");
|
||||||
Failed::because(FailedError::FindFailed, "min number of rows should be 3");
|
assert_eq!(e, expected_error)
|
||||||
assert_eq!(e, expected_error)
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
assert!(false);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn one_dimensional_dataset_minimal() {
|
fn one_dimensional_dataset_minimal() {
|
||||||
let dataset = DenseMatrix::<f64>::from_2d_array(&[&[0.0], &[2.0], &[9.0]]);
|
let dataset = DenseMatrix::<f64>::from_2d_array(&[&[0.0], &[2.0], &[9.0]]).unwrap();
|
||||||
|
|
||||||
let result = FastPair::new(&dataset);
|
let result = FastPair::new(&dataset);
|
||||||
assert!(result.is_ok());
|
assert!(result.is_ok());
|
||||||
@@ -312,7 +317,8 @@ mod tests_fastpair {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn one_dimensional_dataset_2() {
|
fn one_dimensional_dataset_2() {
|
||||||
let dataset = DenseMatrix::<f64>::from_2d_array(&[&[27.0], &[0.0], &[9.0], &[2.0]]);
|
let dataset =
|
||||||
|
DenseMatrix::<f64>::from_2d_array(&[&[27.0], &[0.0], &[9.0], &[2.0]]).unwrap();
|
||||||
|
|
||||||
let result = FastPair::new(&dataset);
|
let result = FastPair::new(&dataset);
|
||||||
assert!(result.is_ok());
|
assert!(result.is_ok());
|
||||||
@@ -347,7 +353,8 @@ mod tests_fastpair {
|
|||||||
&[6.9, 3.1, 4.9, 1.5],
|
&[6.9, 3.1, 4.9, 1.5],
|
||||||
&[5.5, 2.3, 4.0, 1.3],
|
&[5.5, 2.3, 4.0, 1.3],
|
||||||
&[6.5, 2.8, 4.6, 1.5],
|
&[6.5, 2.8, 4.6, 1.5],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let fastpair = FastPair::new(&x);
|
let fastpair = FastPair::new(&x);
|
||||||
assert!(fastpair.is_ok());
|
assert!(fastpair.is_ok());
|
||||||
|
|
||||||
@@ -520,7 +527,8 @@ mod tests_fastpair {
|
|||||||
&[6.9, 3.1, 4.9, 1.5],
|
&[6.9, 3.1, 4.9, 1.5],
|
||||||
&[5.5, 2.3, 4.0, 1.3],
|
&[5.5, 2.3, 4.0, 1.3],
|
||||||
&[6.5, 2.8, 4.6, 1.5],
|
&[6.5, 2.8, 4.6, 1.5],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
// compute
|
// compute
|
||||||
let fastpair = FastPair::new(&x);
|
let fastpair = FastPair::new(&x);
|
||||||
assert!(fastpair.is_ok());
|
assert!(fastpair.is_ok());
|
||||||
@@ -568,7 +576,8 @@ mod tests_fastpair {
|
|||||||
&[6.9, 3.1, 4.9, 1.5],
|
&[6.9, 3.1, 4.9, 1.5],
|
||||||
&[5.5, 2.3, 4.0, 1.3],
|
&[5.5, 2.3, 4.0, 1.3],
|
||||||
&[6.5, 2.8, 4.6, 1.5],
|
&[6.5, 2.8, 4.6, 1.5],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
// compute
|
// compute
|
||||||
let fastpair = FastPair::new(&x);
|
let fastpair = FastPair::new(&x);
|
||||||
assert!(fastpair.is_ok());
|
assert!(fastpair.is_ok());
|
||||||
@@ -582,7 +591,7 @@ mod tests_fastpair {
|
|||||||
};
|
};
|
||||||
for p in dissimilarities.iter() {
|
for p in dissimilarities.iter() {
|
||||||
if p.distance.unwrap() < min_dissimilarity.distance.unwrap() {
|
if p.distance.unwrap() < min_dissimilarity.distance.unwrap() {
|
||||||
min_dissimilarity = p.clone()
|
min_dissimilarity = *p
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -594,4 +603,103 @@ mod tests_fastpair {
|
|||||||
|
|
||||||
assert_eq!(closest, min_dissimilarity);
|
assert_eq!(closest, min_dissimilarity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fastpair_ordered_pairs() {
|
||||||
|
let x = DenseMatrix::<f64>::from_2d_array(&[
|
||||||
|
&[5.1, 3.5, 1.4, 0.2],
|
||||||
|
&[4.9, 3.0, 1.4, 0.2],
|
||||||
|
&[4.7, 3.2, 1.3, 0.2],
|
||||||
|
&[4.6, 3.1, 1.5, 0.2],
|
||||||
|
&[5.0, 3.6, 1.4, 0.2],
|
||||||
|
&[5.4, 3.9, 1.7, 0.4],
|
||||||
|
&[4.9, 3.1, 1.5, 0.1],
|
||||||
|
&[7.0, 3.2, 4.7, 1.4],
|
||||||
|
&[6.4, 3.2, 4.5, 1.5],
|
||||||
|
&[6.9, 3.1, 4.9, 1.5],
|
||||||
|
&[5.5, 2.3, 4.0, 1.3],
|
||||||
|
&[6.5, 2.8, 4.6, 1.5],
|
||||||
|
&[4.6, 3.4, 1.4, 0.3],
|
||||||
|
&[5.0, 3.4, 1.5, 0.2],
|
||||||
|
&[4.4, 2.9, 1.4, 0.2],
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
let fastpair = FastPair::new(&x).unwrap();
|
||||||
|
|
||||||
|
let ordered = fastpair.ordered_pairs();
|
||||||
|
|
||||||
|
let mut previous: f64 = -1.0;
|
||||||
|
for p in ordered {
|
||||||
|
if previous == -1.0 {
|
||||||
|
previous = p.distance.unwrap();
|
||||||
|
} else {
|
||||||
|
let current = p.distance.unwrap();
|
||||||
|
assert!(current >= previous);
|
||||||
|
previous = current;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_set() {
|
||||||
|
let empty_matrix = DenseMatrix::<f64>::zeros(0, 0);
|
||||||
|
let result = FastPair::new(&empty_matrix);
|
||||||
|
assert!(result.is_err());
|
||||||
|
if let Err(e) = result {
|
||||||
|
assert_eq!(
|
||||||
|
e,
|
||||||
|
Failed::because(FailedError::FindFailed, "min number of rows should be 3")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_single_point() {
|
||||||
|
let single_point = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]).unwrap();
|
||||||
|
let result = FastPair::new(&single_point);
|
||||||
|
assert!(result.is_err());
|
||||||
|
if let Err(e) = result {
|
||||||
|
assert_eq!(
|
||||||
|
e,
|
||||||
|
Failed::because(FailedError::FindFailed, "min number of rows should be 3")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_two_points() {
|
||||||
|
let two_points = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||||
|
let result = FastPair::new(&two_points);
|
||||||
|
assert!(result.is_err());
|
||||||
|
if let Err(e) = result {
|
||||||
|
assert_eq!(
|
||||||
|
e,
|
||||||
|
Failed::because(FailedError::FindFailed, "min number of rows should be 3")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_three_identical_points() {
|
||||||
|
let identical_points =
|
||||||
|
DenseMatrix::from_2d_array(&[&[1.0, 1.0], &[1.0, 1.0], &[1.0, 1.0]]).unwrap();
|
||||||
|
let result = FastPair::new(&identical_points);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
let fastpair = result.unwrap();
|
||||||
|
let closest_pair = fastpair.closest_pair();
|
||||||
|
assert_eq!(closest_pair.distance, Some(0.0));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_result_unwrapping() {
|
||||||
|
let valid_matrix =
|
||||||
|
DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0], &[5.0, 6.0], &[7.0, 8.0]])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let result = FastPair::new(&valid_matrix);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
// This should not panic
|
||||||
|
let _fastpair = result.unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ impl<T, D: Distance<T>> LinearKNNSearch<T, D> {
|
|||||||
|
|
||||||
for _ in 0..k {
|
for _ in 0..k {
|
||||||
heap.add(KNNPoint {
|
heap.add(KNNPoint {
|
||||||
distance: std::f64::INFINITY,
|
distance: f64::INFINITY,
|
||||||
index: None,
|
index: None,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -215,7 +215,7 @@ mod tests {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let point_inf = KNNPoint {
|
let point_inf = KNNPoint {
|
||||||
distance: std::f64::INFINITY,
|
distance: f64::INFINITY,
|
||||||
index: Some(3),
|
index: Some(3),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -49,20 +49,15 @@ pub mod linear_search;
|
|||||||
/// Both, KNN classifier and regressor benefits from underlying search algorithms that helps to speed up queries.
|
/// Both, KNN classifier and regressor benefits from underlying search algorithms that helps to speed up queries.
|
||||||
/// `KNNAlgorithmName` maintains a list of supported search algorithms, see [KNN algorithms](../algorithm/neighbour/index.html)
|
/// `KNNAlgorithmName` maintains a list of supported search algorithms, see [KNN algorithms](../algorithm/neighbour/index.html)
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub enum KNNAlgorithmName {
|
pub enum KNNAlgorithmName {
|
||||||
/// Heap Search algorithm, see [`LinearSearch`](../algorithm/neighbour/linear_search/index.html)
|
/// Heap Search algorithm, see [`LinearSearch`](../algorithm/neighbour/linear_search/index.html)
|
||||||
LinearSearch,
|
LinearSearch,
|
||||||
/// Cover Tree Search algorithm, see [`CoverTree`](../algorithm/neighbour/cover_tree/index.html)
|
/// Cover Tree Search algorithm, see [`CoverTree`](../algorithm/neighbour/cover_tree/index.html)
|
||||||
|
#[default]
|
||||||
CoverTree,
|
CoverTree,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for KNNAlgorithmName {
|
|
||||||
fn default() -> Self {
|
|
||||||
KNNAlgorithmName::CoverTree
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) enum KNNAlgorithm<T: Number, D: Distance<Vec<T>>> {
|
pub(crate) enum KNNAlgorithm<T: Number, D: Distance<Vec<T>>> {
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_add1() {
|
fn test_add1() {
|
||||||
let mut heap = HeapSelection::with_capacity(3);
|
let mut heap = HeapSelection::with_capacity(3);
|
||||||
heap.add(std::f64::INFINITY);
|
heap.add(f64::INFINITY);
|
||||||
heap.add(-5f64);
|
heap.add(-5f64);
|
||||||
heap.add(4f64);
|
heap.add(4f64);
|
||||||
heap.add(-1f64);
|
heap.add(-1f64);
|
||||||
@@ -151,7 +151,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_add2() {
|
fn test_add2() {
|
||||||
let mut heap = HeapSelection::with_capacity(3);
|
let mut heap = HeapSelection::with_capacity(3);
|
||||||
heap.add(std::f64::INFINITY);
|
heap.add(f64::INFINITY);
|
||||||
heap.add(0.0);
|
heap.add(0.0);
|
||||||
heap.add(8.4852);
|
heap.add(8.4852);
|
||||||
heap.add(5.6568);
|
heap.add(5.6568);
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ use num_traits::Num;
|
|||||||
pub trait QuickArgSort {
|
pub trait QuickArgSort {
|
||||||
fn quick_argsort_mut(&mut self) -> Vec<usize>;
|
fn quick_argsort_mut(&mut self) -> Vec<usize>;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
fn quick_argsort(&self) -> Vec<usize>;
|
fn quick_argsort(&self) -> Vec<usize>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@
|
|||||||
//!
|
//!
|
||||||
//! Example:
|
//! Example:
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```ignore
|
||||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||||
//! use smartcore::linalg::basic::arrays::Array2;
|
//! use smartcore::linalg::basic::arrays::Array2;
|
||||||
//! use smartcore::cluster::dbscan::*;
|
//! use smartcore::cluster::dbscan::*;
|
||||||
@@ -315,8 +315,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while !neighbors.is_empty() {
|
while let Some(neighbor) = neighbors.pop() {
|
||||||
let neighbor = neighbors.pop().unwrap();
|
|
||||||
let index = neighbor.0;
|
let index = neighbor.0;
|
||||||
|
|
||||||
if y[index] == outlier {
|
if y[index] == outlier {
|
||||||
@@ -443,7 +442,8 @@ mod tests {
|
|||||||
&[2.2, 1.2],
|
&[2.2, 1.2],
|
||||||
&[1.8, 0.8],
|
&[1.8, 0.8],
|
||||||
&[3.0, 5.0],
|
&[3.0, 5.0],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let expected_labels = vec![1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0];
|
let expected_labels = vec![1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0];
|
||||||
|
|
||||||
@@ -488,7 +488,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let dbscan = DBSCAN::fit(&x, Default::default()).unwrap();
|
let dbscan = DBSCAN::fit(&x, Default::default()).unwrap();
|
||||||
|
|
||||||
@@ -511,6 +512,6 @@ mod tests {
|
|||||||
.and_then(|dbscan| dbscan.predict(&x))
|
.and_then(|dbscan| dbscan.predict(&x))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
println!("{:?}", labels);
|
println!("{labels:?}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+12
-10
@@ -41,7 +41,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let kmeans = KMeans::fit(&x, KMeansParameters::default().with_k(2)).unwrap(); // Fit to data, 2 clusters
|
//! let kmeans = KMeans::fit(&x, KMeansParameters::default().with_k(2)).unwrap(); // Fit to data, 2 clusters
|
||||||
//! let y_hat: Vec<u8> = kmeans.predict(&x).unwrap(); // use the same points for prediction
|
//! let y_hat: Vec<u8> = kmeans.predict(&x).unwrap(); // use the same points for prediction
|
||||||
@@ -96,7 +96,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> PartialEq for KMeans<
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for j in 0..self.centroids[i].len() {
|
for j in 0..self.centroids[i].len() {
|
||||||
if (self.centroids[i][j] - other.centroids[i][j]).abs() > std::f64::EPSILON {
|
if (self.centroids[i][j] - other.centroids[i][j]).abs() > f64::EPSILON {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -270,7 +270,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> KMeans<TX, TY, X, Y>
|
|||||||
|
|
||||||
let (n, d) = data.shape();
|
let (n, d) = data.shape();
|
||||||
|
|
||||||
let mut distortion = std::f64::MAX;
|
let mut distortion = f64::MAX;
|
||||||
let mut y = KMeans::<TX, TY, X, Y>::kmeans_plus_plus(data, parameters.k, parameters.seed);
|
let mut y = KMeans::<TX, TY, X, Y>::kmeans_plus_plus(data, parameters.k, parameters.seed);
|
||||||
let mut size = vec![0; parameters.k];
|
let mut size = vec![0; parameters.k];
|
||||||
let mut centroids = vec![vec![0f64; d]; parameters.k];
|
let mut centroids = vec![vec![0f64; d]; parameters.k];
|
||||||
@@ -331,7 +331,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> KMeans<TX, TY, X, Y>
|
|||||||
let mut row = vec![0f64; x.shape().1];
|
let mut row = vec![0f64; x.shape().1];
|
||||||
|
|
||||||
for i in 0..n {
|
for i in 0..n {
|
||||||
let mut min_dist = std::f64::MAX;
|
let mut min_dist = f64::MAX;
|
||||||
let mut best_cluster = 0;
|
let mut best_cluster = 0;
|
||||||
|
|
||||||
for j in 0..self.k {
|
for j in 0..self.k {
|
||||||
@@ -361,7 +361,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> KMeans<TX, TY, X, Y>
|
|||||||
.cloned()
|
.cloned()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let mut d = vec![std::f64::MAX; n];
|
let mut d = vec![f64::MAX; n];
|
||||||
let mut row = vec![TX::zero(); data.shape().1];
|
let mut row = vec![TX::zero(); data.shape().1];
|
||||||
|
|
||||||
for j in 1..k {
|
for j in 1..k {
|
||||||
@@ -424,7 +424,7 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn invalid_k() {
|
fn invalid_k() {
|
||||||
let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]);
|
let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap();
|
||||||
|
|
||||||
assert!(KMeans::<i32, i32, DenseMatrix<i32>, Vec<i32>>::fit(
|
assert!(KMeans::<i32, i32, DenseMatrix<i32>, Vec<i32>>::fit(
|
||||||
&x,
|
&x,
|
||||||
@@ -492,14 +492,15 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let kmeans = KMeans::fit(&x, Default::default()).unwrap();
|
let kmeans = KMeans::fit(&x, Default::default()).unwrap();
|
||||||
|
|
||||||
let y: Vec<usize> = kmeans.predict(&x).unwrap();
|
let y: Vec<usize> = kmeans.predict(&x).unwrap();
|
||||||
|
|
||||||
for i in 0..y.len() {
|
for (i, _y_i) in y.iter().enumerate() {
|
||||||
assert_eq!(y[i] as usize, kmeans._y[i]);
|
assert_eq!({ y[i] }, kmeans._y[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -531,7 +532,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let kmeans: KMeans<f32, f32, DenseMatrix<f32>, Vec<f32>> =
|
let kmeans: KMeans<f32, f32, DenseMatrix<f32>, Vec<f32>> =
|
||||||
KMeans::fit(&x, Default::default()).unwrap();
|
KMeans::fit(&x, Default::default()).unwrap();
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ use crate::dataset::Dataset;
|
|||||||
pub fn load_dataset() -> Dataset<f32, f32> {
|
pub fn load_dataset() -> Dataset<f32, f32> {
|
||||||
let (x, y, num_samples, num_features) = match deserialize_data(std::include_bytes!("boston.xy"))
|
let (x, y, num_samples, num_features) = match deserialize_data(std::include_bytes!("boston.xy"))
|
||||||
{
|
{
|
||||||
Err(why) => panic!("Can't deserialize boston.xy. {}", why),
|
Err(why) => panic!("Can't deserialize boston.xy. {why}"),
|
||||||
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
|
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ use crate::dataset::Dataset;
|
|||||||
pub fn load_dataset() -> Dataset<f32, u32> {
|
pub fn load_dataset() -> Dataset<f32, u32> {
|
||||||
let (x, y, num_samples, num_features) =
|
let (x, y, num_samples, num_features) =
|
||||||
match deserialize_data(std::include_bytes!("breast_cancer.xy")) {
|
match deserialize_data(std::include_bytes!("breast_cancer.xy")) {
|
||||||
Err(why) => panic!("Can't deserialize breast_cancer.xy. {}", why),
|
Err(why) => panic!("Can't deserialize breast_cancer.xy. {why}"),
|
||||||
Ok((x, y, num_samples, num_features)) => (
|
Ok((x, y, num_samples, num_features)) => (
|
||||||
x,
|
x,
|
||||||
y.into_iter().map(|x| x as u32).collect(),
|
y.into_iter().map(|x| x as u32).collect(),
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ use crate::dataset::Dataset;
|
|||||||
pub fn load_dataset() -> Dataset<f32, u32> {
|
pub fn load_dataset() -> Dataset<f32, u32> {
|
||||||
let (x, y, num_samples, num_features) =
|
let (x, y, num_samples, num_features) =
|
||||||
match deserialize_data(std::include_bytes!("diabetes.xy")) {
|
match deserialize_data(std::include_bytes!("diabetes.xy")) {
|
||||||
Err(why) => panic!("Can't deserialize diabetes.xy. {}", why),
|
Err(why) => panic!("Can't deserialize diabetes.xy. {why}"),
|
||||||
Ok((x, y, num_samples, num_features)) => (
|
Ok((x, y, num_samples, num_features)) => (
|
||||||
x,
|
x,
|
||||||
y.into_iter().map(|x| x as u32).collect(),
|
y.into_iter().map(|x| x as u32).collect(),
|
||||||
@@ -40,7 +40,7 @@ pub fn load_dataset() -> Dataset<f32, u32> {
|
|||||||
target: y,
|
target: y,
|
||||||
num_samples,
|
num_samples,
|
||||||
num_features,
|
num_features,
|
||||||
feature_names: vec![
|
feature_names: [
|
||||||
"Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6",
|
"Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6",
|
||||||
]
|
]
|
||||||
.iter()
|
.iter()
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ use crate::dataset::Dataset;
|
|||||||
pub fn load_dataset() -> Dataset<f32, f32> {
|
pub fn load_dataset() -> Dataset<f32, f32> {
|
||||||
let (x, y, num_samples, num_features) = match deserialize_data(std::include_bytes!("digits.xy"))
|
let (x, y, num_samples, num_features) = match deserialize_data(std::include_bytes!("digits.xy"))
|
||||||
{
|
{
|
||||||
Err(why) => panic!("Can't deserialize digits.xy. {}", why),
|
Err(why) => panic!("Can't deserialize digits.xy. {why}"),
|
||||||
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
|
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -25,16 +25,14 @@ pub fn load_dataset() -> Dataset<f32, f32> {
|
|||||||
target: y,
|
target: y,
|
||||||
num_samples,
|
num_samples,
|
||||||
num_features,
|
num_features,
|
||||||
feature_names: vec![
|
feature_names: ["sepal length (cm)",
|
||||||
"sepal length (cm)",
|
|
||||||
"sepal width (cm)",
|
"sepal width (cm)",
|
||||||
"petal length (cm)",
|
"petal length (cm)",
|
||||||
"petal width (cm)",
|
"petal width (cm)"]
|
||||||
]
|
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
.collect(),
|
.collect(),
|
||||||
target_names: vec!["setosa", "versicolor", "virginica"]
|
target_names: ["setosa", "versicolor", "virginica"]
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
.collect(),
|
.collect(),
|
||||||
|
|||||||
+3
-3
@@ -22,7 +22,7 @@ use crate::dataset::Dataset;
|
|||||||
pub fn load_dataset() -> Dataset<f32, u32> {
|
pub fn load_dataset() -> Dataset<f32, u32> {
|
||||||
let (x, y, num_samples, num_features): (Vec<f32>, Vec<u32>, usize, usize) =
|
let (x, y, num_samples, num_features): (Vec<f32>, Vec<u32>, usize, usize) =
|
||||||
match deserialize_data(std::include_bytes!("iris.xy")) {
|
match deserialize_data(std::include_bytes!("iris.xy")) {
|
||||||
Err(why) => panic!("Can't deserialize iris.xy. {}", why),
|
Err(why) => panic!("Can't deserialize iris.xy. {why}"),
|
||||||
Ok((x, y, num_samples, num_features)) => (
|
Ok((x, y, num_samples, num_features)) => (
|
||||||
x,
|
x,
|
||||||
y.into_iter().map(|x| x as u32).collect(),
|
y.into_iter().map(|x| x as u32).collect(),
|
||||||
@@ -36,7 +36,7 @@ pub fn load_dataset() -> Dataset<f32, u32> {
|
|||||||
target: y,
|
target: y,
|
||||||
num_samples,
|
num_samples,
|
||||||
num_features,
|
num_features,
|
||||||
feature_names: vec![
|
feature_names: [
|
||||||
"sepal length (cm)",
|
"sepal length (cm)",
|
||||||
"sepal width (cm)",
|
"sepal width (cm)",
|
||||||
"petal length (cm)",
|
"petal length (cm)",
|
||||||
@@ -45,7 +45,7 @@ pub fn load_dataset() -> Dataset<f32, u32> {
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
.collect(),
|
.collect(),
|
||||||
target_names: vec!["setosa", "versicolor", "virginica"]
|
target_names: ["setosa", "versicolor", "virginica"]
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
.collect(),
|
.collect(),
|
||||||
|
|||||||
+1
-1
@@ -78,7 +78,7 @@ pub(crate) fn serialize_data<X: Number + RealNumber, Y: RealNumber>(
|
|||||||
.collect();
|
.collect();
|
||||||
file.write_all(&y)?;
|
file.write_all(&y)?;
|
||||||
}
|
}
|
||||||
Err(why) => panic!("couldn't create {}: {}", filename, why),
|
Err(why) => panic!("couldn't create {filename}: {why}"),
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
+22
-18
@@ -35,7 +35,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let pca = PCA::fit(&iris, PCAParameters::default().with_n_components(2)).unwrap(); // Reduce number of features to 2
|
//! let pca = PCA::fit(&iris, PCAParameters::default().with_n_components(2)).unwrap(); // Reduce number of features to 2
|
||||||
//!
|
//!
|
||||||
@@ -231,8 +231,7 @@ impl<T: Number + RealNumber, X: Array2<T> + SVDDecomposable<T> + EVDDecomposable
|
|||||||
|
|
||||||
if parameters.n_components > n {
|
if parameters.n_components > n {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Number of components, n_components should be <= number of attributes ({})",
|
"Number of components, n_components should be <= number of attributes ({n})"
|
||||||
n
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -374,21 +373,20 @@ mod tests {
|
|||||||
let parameters = PCASearchParameters {
|
let parameters = PCASearchParameters {
|
||||||
n_components: vec![2, 4],
|
n_components: vec![2, 4],
|
||||||
use_correlation_matrix: vec![true, false],
|
use_correlation_matrix: vec![true, false],
|
||||||
..Default::default()
|
|
||||||
};
|
};
|
||||||
let mut iter = parameters.into_iter();
|
let mut iter = parameters.into_iter();
|
||||||
let next = iter.next().unwrap();
|
let next = iter.next().unwrap();
|
||||||
assert_eq!(next.n_components, 2);
|
assert_eq!(next.n_components, 2);
|
||||||
assert_eq!(next.use_correlation_matrix, true);
|
assert!(next.use_correlation_matrix);
|
||||||
let next = iter.next().unwrap();
|
let next = iter.next().unwrap();
|
||||||
assert_eq!(next.n_components, 4);
|
assert_eq!(next.n_components, 4);
|
||||||
assert_eq!(next.use_correlation_matrix, true);
|
assert!(next.use_correlation_matrix);
|
||||||
let next = iter.next().unwrap();
|
let next = iter.next().unwrap();
|
||||||
assert_eq!(next.n_components, 2);
|
assert_eq!(next.n_components, 2);
|
||||||
assert_eq!(next.use_correlation_matrix, false);
|
assert!(!next.use_correlation_matrix);
|
||||||
let next = iter.next().unwrap();
|
let next = iter.next().unwrap();
|
||||||
assert_eq!(next.n_components, 4);
|
assert_eq!(next.n_components, 4);
|
||||||
assert_eq!(next.use_correlation_matrix, false);
|
assert!(!next.use_correlation_matrix);
|
||||||
assert!(iter.next().is_none());
|
assert!(iter.next().is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -445,6 +443,7 @@ mod tests {
|
|||||||
&[2.6, 53.0, 66.0, 10.8],
|
&[2.6, 53.0, 66.0, 10.8],
|
||||||
&[6.8, 161.0, 60.0, 15.6],
|
&[6.8, 161.0, 60.0, 15.6],
|
||||||
])
|
])
|
||||||
|
.unwrap()
|
||||||
}
|
}
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
@@ -459,7 +458,8 @@ mod tests {
|
|||||||
&[0.9952, 0.0588],
|
&[0.9952, 0.0588],
|
||||||
&[0.0463, 0.9769],
|
&[0.0463, 0.9769],
|
||||||
&[0.0752, 0.2007],
|
&[0.0752, 0.2007],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let pca = PCA::fit(&us_arrests, Default::default()).unwrap();
|
let pca = PCA::fit(&us_arrests, Default::default()).unwrap();
|
||||||
|
|
||||||
@@ -502,7 +502,8 @@ mod tests {
|
|||||||
-0.974080592182491,
|
-0.974080592182491,
|
||||||
0.0723250196376097,
|
0.0723250196376097,
|
||||||
],
|
],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let expected_projection = DenseMatrix::from_2d_array(&[
|
let expected_projection = DenseMatrix::from_2d_array(&[
|
||||||
&[-64.8022, -11.448, 2.4949, -2.4079],
|
&[-64.8022, -11.448, 2.4949, -2.4079],
|
||||||
@@ -555,7 +556,8 @@ mod tests {
|
|||||||
&[91.5446, -22.9529, 0.402, -0.7369],
|
&[91.5446, -22.9529, 0.402, -0.7369],
|
||||||
&[118.1763, 5.5076, 2.7113, -0.205],
|
&[118.1763, 5.5076, 2.7113, -0.205],
|
||||||
&[10.4345, -5.9245, 3.7944, 0.5179],
|
&[10.4345, -5.9245, 3.7944, 0.5179],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let expected_eigenvalues: Vec<f64> = vec![
|
let expected_eigenvalues: Vec<f64> = vec![
|
||||||
343544.6277001563,
|
343544.6277001563,
|
||||||
@@ -572,8 +574,8 @@ mod tests {
|
|||||||
epsilon = 1e-4
|
epsilon = 1e-4
|
||||||
));
|
));
|
||||||
|
|
||||||
for i in 0..pca.eigenvalues.len() {
|
for (i, pca_eigenvalues_i) in pca.eigenvalues.iter().enumerate() {
|
||||||
assert!((pca.eigenvalues[i].abs() - expected_eigenvalues[i].abs()).abs() < 1e-8);
|
assert!((pca_eigenvalues_i.abs() - expected_eigenvalues[i].abs()).abs() < 1e-8);
|
||||||
}
|
}
|
||||||
|
|
||||||
let us_arrests_t = pca.transform(&us_arrests).unwrap();
|
let us_arrests_t = pca.transform(&us_arrests).unwrap();
|
||||||
@@ -618,7 +620,8 @@ mod tests {
|
|||||||
-0.0881962972508558,
|
-0.0881962972508558,
|
||||||
-0.0096011588898465,
|
-0.0096011588898465,
|
||||||
],
|
],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let expected_projection = DenseMatrix::from_2d_array(&[
|
let expected_projection = DenseMatrix::from_2d_array(&[
|
||||||
&[0.9856, -1.1334, 0.4443, -0.1563],
|
&[0.9856, -1.1334, 0.4443, -0.1563],
|
||||||
@@ -671,7 +674,8 @@ mod tests {
|
|||||||
&[-2.1086, -1.4248, -0.1048, -0.1319],
|
&[-2.1086, -1.4248, -0.1048, -0.1319],
|
||||||
&[-2.0797, 0.6113, 0.1389, -0.1841],
|
&[-2.0797, 0.6113, 0.1389, -0.1841],
|
||||||
&[-0.6294, -0.321, 0.2407, 0.1667],
|
&[-0.6294, -0.321, 0.2407, 0.1667],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let expected_eigenvalues: Vec<f64> = vec![
|
let expected_eigenvalues: Vec<f64> = vec![
|
||||||
2.480241579149493,
|
2.480241579149493,
|
||||||
@@ -694,8 +698,8 @@ mod tests {
|
|||||||
epsilon = 1e-4
|
epsilon = 1e-4
|
||||||
));
|
));
|
||||||
|
|
||||||
for i in 0..pca.eigenvalues.len() {
|
for (i, pca_eigenvalues_i) in pca.eigenvalues.iter().enumerate() {
|
||||||
assert!((pca.eigenvalues[i].abs() - expected_eigenvalues[i].abs()).abs() < 1e-8);
|
assert!((pca_eigenvalues_i.abs() - expected_eigenvalues[i].abs()).abs() < 1e-8);
|
||||||
}
|
}
|
||||||
|
|
||||||
let us_arrests_t = pca.transform(&us_arrests).unwrap();
|
let us_arrests_t = pca.transform(&us_arrests).unwrap();
|
||||||
@@ -734,7 +738,7 @@ mod tests {
|
|||||||
// &[4.9, 2.4, 3.3, 1.0],
|
// &[4.9, 2.4, 3.3, 1.0],
|
||||||
// &[6.6, 2.9, 4.6, 1.3],
|
// &[6.6, 2.9, 4.6, 1.3],
|
||||||
// &[5.2, 2.7, 3.9, 1.4],
|
// &[5.2, 2.7, 3.9, 1.4],
|
||||||
// ]);
|
// ]).unwrap();
|
||||||
|
|
||||||
// let pca = PCA::fit(&iris, Default::default()).unwrap();
|
// let pca = PCA::fit(&iris, Default::default()).unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -32,7 +32,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let svd = SVD::fit(&iris, SVDParameters::default().
|
//! let svd = SVD::fit(&iris, SVDParameters::default().
|
||||||
//! with_n_components(2)).unwrap(); // Reduce number of features to 2
|
//! with_n_components(2)).unwrap(); // Reduce number of features to 2
|
||||||
@@ -180,8 +180,7 @@ impl<T: Number + RealNumber, X: Array2<T> + SVDDecomposable<T> + EVDDecomposable
|
|||||||
|
|
||||||
if parameters.n_components >= p {
|
if parameters.n_components >= p {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Number of components, n_components should be < number of attributes ({})",
|
"Number of components, n_components should be < number of attributes ({p})"
|
||||||
p
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -202,8 +201,7 @@ impl<T: Number + RealNumber, X: Array2<T> + SVDDecomposable<T> + EVDDecomposable
|
|||||||
let (p_c, k) = self.components.shape();
|
let (p_c, k) = self.components.shape();
|
||||||
if p_c != p {
|
if p_c != p {
|
||||||
return Err(Failed::transform(&format!(
|
return Err(Failed::transform(&format!(
|
||||||
"Can not transform a {}x{} matrix into {}x{} matrix, incorrect input dimentions",
|
"Can not transform a {n}x{p} matrix into {n}x{k} matrix, incorrect input dimentions"
|
||||||
n, p, n, k
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -227,7 +225,6 @@ mod tests {
|
|||||||
fn search_parameters() {
|
fn search_parameters() {
|
||||||
let parameters = SVDSearchParameters {
|
let parameters = SVDSearchParameters {
|
||||||
n_components: vec![10, 100],
|
n_components: vec![10, 100],
|
||||||
..Default::default()
|
|
||||||
};
|
};
|
||||||
let mut iter = parameters.into_iter();
|
let mut iter = parameters.into_iter();
|
||||||
let next = iter.next().unwrap();
|
let next = iter.next().unwrap();
|
||||||
@@ -295,7 +292,8 @@ mod tests {
|
|||||||
&[5.7, 81.0, 39.0, 9.3],
|
&[5.7, 81.0, 39.0, 9.3],
|
||||||
&[2.6, 53.0, 66.0, 10.8],
|
&[2.6, 53.0, 66.0, 10.8],
|
||||||
&[6.8, 161.0, 60.0, 15.6],
|
&[6.8, 161.0, 60.0, 15.6],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let expected = DenseMatrix::from_2d_array(&[
|
let expected = DenseMatrix::from_2d_array(&[
|
||||||
&[243.54655757, -18.76673788],
|
&[243.54655757, -18.76673788],
|
||||||
@@ -303,7 +301,8 @@ mod tests {
|
|||||||
&[305.93972467, -15.39087376],
|
&[305.93972467, -15.39087376],
|
||||||
&[197.28420365, -11.66808306],
|
&[197.28420365, -11.66808306],
|
||||||
&[293.43187394, 1.91163633],
|
&[293.43187394, 1.91163633],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let svd = SVD::fit(&x, Default::default()).unwrap();
|
let svd = SVD::fit(&x, Default::default()).unwrap();
|
||||||
|
|
||||||
let x_transformed = svd.transform(&x).unwrap();
|
let x_transformed = svd.transform(&x).unwrap();
|
||||||
@@ -344,7 +343,7 @@ mod tests {
|
|||||||
// &[4.9, 2.4, 3.3, 1.0],
|
// &[4.9, 2.4, 3.3, 1.0],
|
||||||
// &[6.6, 2.9, 4.6, 1.3],
|
// &[6.6, 2.9, 4.6, 1.3],
|
||||||
// &[5.2, 2.7, 3.9, 1.4],
|
// &[5.2, 2.7, 3.9, 1.4],
|
||||||
// ]);
|
// ]).unwrap();
|
||||||
|
|
||||||
// let svd = SVD::fit(&iris, Default::default()).unwrap();
|
// let svd = SVD::fit(&iris, Default::default()).unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y = vec![
|
//! let y = vec![
|
||||||
//! 0, 0, 0, 0, 0, 0, 0, 0,
|
//! 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
//! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
//! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
@@ -454,8 +454,12 @@ impl<TX: FloatNumber + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY
|
|||||||
y: &Y,
|
y: &Y,
|
||||||
parameters: RandomForestClassifierParameters,
|
parameters: RandomForestClassifierParameters,
|
||||||
) -> Result<RandomForestClassifier<TX, TY, X, Y>, Failed> {
|
) -> Result<RandomForestClassifier<TX, TY, X, Y>, Failed> {
|
||||||
let (_, num_attributes) = x.shape();
|
let (x_nrows, num_attributes) = x.shape();
|
||||||
let y_ncols = y.shape();
|
let y_ncols = y.shape();
|
||||||
|
if x_nrows != y_ncols {
|
||||||
|
return Err(Failed::fit("Number of rows in X should = len(y)"));
|
||||||
|
}
|
||||||
|
|
||||||
let mut yi: Vec<usize> = vec![0; y_ncols];
|
let mut yi: Vec<usize> = vec![0; y_ncols];
|
||||||
let classes = y.unique();
|
let classes = y.unique();
|
||||||
|
|
||||||
@@ -656,7 +660,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
let classifier = RandomForestClassifier::fit(
|
let classifier = RandomForestClassifier::fit(
|
||||||
@@ -678,6 +683,30 @@ mod tests {
|
|||||||
assert!(accuracy(&y, &classifier.predict(&x).unwrap()) >= 0.95);
|
assert!(accuracy(&y, &classifier.predict(&x).unwrap()) >= 0.95);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_random_matrix_with_wrong_rownum() {
|
||||||
|
let x_rand: DenseMatrix<f64> = DenseMatrix::<f64>::rand(21, 200);
|
||||||
|
|
||||||
|
let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
|
let fail = RandomForestClassifier::fit(
|
||||||
|
&x_rand,
|
||||||
|
&y,
|
||||||
|
RandomForestClassifierParameters {
|
||||||
|
criterion: SplitCriterion::Gini,
|
||||||
|
max_depth: Option::None,
|
||||||
|
min_samples_leaf: 1,
|
||||||
|
min_samples_split: 2,
|
||||||
|
n_trees: 100,
|
||||||
|
m: Option::None,
|
||||||
|
keep_samples: false,
|
||||||
|
seed: 87,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(fail.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
wasm_bindgen_test::wasm_bindgen_test
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
@@ -705,7 +734,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
let classifier = RandomForestClassifier::fit(
|
let classifier = RandomForestClassifier::fit(
|
||||||
@@ -758,7 +788,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
let forest = RandomForestClassifier::fit(&x, &y, Default::default()).unwrap();
|
let forest = RandomForestClassifier::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|||||||
@@ -29,7 +29,7 @@
|
|||||||
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y = vec![
|
//! let y = vec![
|
||||||
//! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2,
|
//! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2,
|
||||||
//! 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9
|
//! 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9
|
||||||
@@ -399,6 +399,10 @@ impl<TX: Number + FloatNumber + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1
|
|||||||
) -> Result<RandomForestRegressor<TX, TY, X, Y>, Failed> {
|
) -> Result<RandomForestRegressor<TX, TY, X, Y>, Failed> {
|
||||||
let (n_rows, num_attributes) = x.shape();
|
let (n_rows, num_attributes) = x.shape();
|
||||||
|
|
||||||
|
if n_rows != y.shape() {
|
||||||
|
return Err(Failed::fit("Number of rows in X should = len(y)"));
|
||||||
|
}
|
||||||
|
|
||||||
let mtry = parameters
|
let mtry = parameters
|
||||||
.m
|
.m
|
||||||
.unwrap_or((num_attributes as f64).sqrt().floor() as usize);
|
.unwrap_or((num_attributes as f64).sqrt().floor() as usize);
|
||||||
@@ -570,7 +574,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y = vec![
|
let y = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
114.2, 115.7, 116.9,
|
114.2, 115.7, 116.9,
|
||||||
@@ -595,6 +600,32 @@ mod tests {
|
|||||||
assert!(mean_absolute_error(&y, &y_hat) < 1.0);
|
assert!(mean_absolute_error(&y, &y_hat) < 1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_random_matrix_with_wrong_rownum() {
|
||||||
|
let x_rand: DenseMatrix<f64> = DenseMatrix::<f64>::rand(17, 200);
|
||||||
|
|
||||||
|
let y = vec![
|
||||||
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
|
114.2, 115.7, 116.9,
|
||||||
|
];
|
||||||
|
|
||||||
|
let fail = RandomForestRegressor::fit(
|
||||||
|
&x_rand,
|
||||||
|
&y,
|
||||||
|
RandomForestRegressorParameters {
|
||||||
|
max_depth: Option::None,
|
||||||
|
min_samples_leaf: 1,
|
||||||
|
min_samples_split: 2,
|
||||||
|
n_trees: 1000,
|
||||||
|
m: Option::None,
|
||||||
|
keep_samples: false,
|
||||||
|
seed: 87,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(fail.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
wasm_bindgen_test::wasm_bindgen_test
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
@@ -618,7 +649,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y = vec![
|
let y = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
114.2, 115.7, 116.9,
|
114.2, 115.7, 116.9,
|
||||||
@@ -672,7 +704,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y = vec![
|
let y = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
114.2, 115.7, 116.9,
|
114.2, 115.7, 116.9,
|
||||||
|
|||||||
+21
-2
@@ -30,8 +30,10 @@ pub enum FailedError {
|
|||||||
DecompositionFailed,
|
DecompositionFailed,
|
||||||
/// Can't solve for x
|
/// Can't solve for x
|
||||||
SolutionFailed,
|
SolutionFailed,
|
||||||
/// Erro in input
|
/// Error in input parameters
|
||||||
ParametersError,
|
ParametersError,
|
||||||
|
/// Invalid state error (should never happen)
|
||||||
|
InvalidStateError,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Failed {
|
impl Failed {
|
||||||
@@ -64,6 +66,22 @@ impl Failed {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// new instance of `FailedError::ParametersError`
|
||||||
|
pub fn input(msg: &str) -> Self {
|
||||||
|
Failed {
|
||||||
|
err: FailedError::ParametersError,
|
||||||
|
msg: msg.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// new instance of `FailedError::InvalidStateError`
|
||||||
|
pub fn invalid_state(msg: &str) -> Self {
|
||||||
|
Failed {
|
||||||
|
err: FailedError::InvalidStateError,
|
||||||
|
msg: msg.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// new instance of `err`
|
/// new instance of `err`
|
||||||
pub fn because(err: FailedError, msg: &str) -> Self {
|
pub fn because(err: FailedError, msg: &str) -> Self {
|
||||||
Failed {
|
Failed {
|
||||||
@@ -97,8 +115,9 @@ impl fmt::Display for FailedError {
|
|||||||
FailedError::DecompositionFailed => "Decomposition failed",
|
FailedError::DecompositionFailed => "Decomposition failed",
|
||||||
FailedError::SolutionFailed => "Can't find solution",
|
FailedError::SolutionFailed => "Can't find solution",
|
||||||
FailedError::ParametersError => "Error in input, check parameters",
|
FailedError::ParametersError => "Error in input, check parameters",
|
||||||
|
FailedError::InvalidStateError => "Invalid state, this should never happen", // useful in development phase of lib
|
||||||
};
|
};
|
||||||
write!(f, "{}", failed_err_str)
|
write!(f, "{failed_err_str}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+3
-3
@@ -3,10 +3,10 @@
|
|||||||
clippy::too_many_arguments,
|
clippy::too_many_arguments,
|
||||||
clippy::many_single_char_names,
|
clippy::many_single_char_names,
|
||||||
clippy::unnecessary_wraps,
|
clippy::unnecessary_wraps,
|
||||||
clippy::upper_case_acronyms
|
clippy::upper_case_acronyms,
|
||||||
|
clippy::approx_constant
|
||||||
)]
|
)]
|
||||||
#![warn(missing_docs)]
|
#![warn(missing_docs)]
|
||||||
#![warn(rustdoc::missing_doc_code_examples)]
|
|
||||||
|
|
||||||
//! # smartcore
|
//! # smartcore
|
||||||
//!
|
//!
|
||||||
@@ -63,7 +63,7 @@
|
|||||||
//! &[3., 4.],
|
//! &[3., 4.],
|
||||||
//! &[5., 6.],
|
//! &[5., 6.],
|
||||||
//! &[7., 8.],
|
//! &[7., 8.],
|
||||||
//! &[9., 10.]]);
|
//! &[9., 10.]]).unwrap();
|
||||||
//! // Our classes are defined as a vector
|
//! // Our classes are defined as a vector
|
||||||
//! let y = vec![2, 2, 2, 3, 3];
|
//! let y = vec![2, 2, 2, 3, 3];
|
||||||
//!
|
//!
|
||||||
|
|||||||
+213
-194
File diff suppressed because it is too large
Load Diff
+236
-107
@@ -19,6 +19,8 @@ use crate::linalg::traits::svd::SVDDecomposable;
|
|||||||
use crate::numbers::basenum::Number;
|
use crate::numbers::basenum::Number;
|
||||||
use crate::numbers::realnum::RealNumber;
|
use crate::numbers::realnum::RealNumber;
|
||||||
|
|
||||||
|
use crate::error::Failed;
|
||||||
|
|
||||||
/// Dense matrix
|
/// Dense matrix
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -50,26 +52,26 @@ pub struct DenseMatrixMutView<'a, T: Debug + Display + Copy + Sized> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> {
|
impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> {
|
||||||
fn new(m: &'a DenseMatrix<T>, rows: Range<usize>, cols: Range<usize>) -> Self {
|
fn new(
|
||||||
let (start, end, stride) = if m.column_major {
|
m: &'a DenseMatrix<T>,
|
||||||
(
|
vrows: Range<usize>,
|
||||||
rows.start + cols.start * m.nrows,
|
vcols: Range<usize>,
|
||||||
rows.end + (cols.end - 1) * m.nrows,
|
) -> Result<Self, Failed> {
|
||||||
m.nrows,
|
if m.is_valid_view(m.shape().0, m.shape().1, &vrows, &vcols) {
|
||||||
)
|
Err(Failed::input(
|
||||||
|
"The specified view is outside of the matrix range",
|
||||||
|
))
|
||||||
} else {
|
} else {
|
||||||
(
|
let (start, end, stride) =
|
||||||
rows.start * m.ncols + cols.start,
|
m.stride_range(m.shape().0, m.shape().1, &vrows, &vcols, m.column_major);
|
||||||
(rows.end - 1) * m.ncols + cols.end,
|
|
||||||
m.ncols,
|
Ok(DenseMatrixView {
|
||||||
)
|
values: &m.values[start..end],
|
||||||
};
|
stride,
|
||||||
DenseMatrixView {
|
nrows: vrows.end - vrows.start,
|
||||||
values: &m.values[start..end],
|
ncols: vcols.end - vcols.start,
|
||||||
stride,
|
column_major: m.column_major,
|
||||||
nrows: rows.end - rows.start,
|
})
|
||||||
ncols: cols.end - cols.start,
|
|
||||||
column_major: m.column_major,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,7 +91,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'a, T> {
|
impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'_, T> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
writeln!(
|
writeln!(
|
||||||
f,
|
f,
|
||||||
@@ -102,26 +104,26 @@ impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'a,
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> {
|
impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> {
|
||||||
fn new(m: &'a mut DenseMatrix<T>, rows: Range<usize>, cols: Range<usize>) -> Self {
|
fn new(
|
||||||
let (start, end, stride) = if m.column_major {
|
m: &'a mut DenseMatrix<T>,
|
||||||
(
|
vrows: Range<usize>,
|
||||||
rows.start + cols.start * m.nrows,
|
vcols: Range<usize>,
|
||||||
rows.end + (cols.end - 1) * m.nrows,
|
) -> Result<Self, Failed> {
|
||||||
m.nrows,
|
if m.is_valid_view(m.shape().0, m.shape().1, &vrows, &vcols) {
|
||||||
)
|
Err(Failed::input(
|
||||||
|
"The specified view is outside of the matrix range",
|
||||||
|
))
|
||||||
} else {
|
} else {
|
||||||
(
|
let (start, end, stride) =
|
||||||
rows.start * m.ncols + cols.start,
|
m.stride_range(m.shape().0, m.shape().1, &vrows, &vcols, m.column_major);
|
||||||
(rows.end - 1) * m.ncols + cols.end,
|
|
||||||
m.ncols,
|
Ok(DenseMatrixMutView {
|
||||||
)
|
values: &mut m.values[start..end],
|
||||||
};
|
stride,
|
||||||
DenseMatrixMutView {
|
nrows: vrows.end - vrows.start,
|
||||||
values: &mut m.values[start..end],
|
ncols: vcols.end - vcols.start,
|
||||||
stride,
|
column_major: m.column_major,
|
||||||
nrows: rows.end - rows.start,
|
})
|
||||||
ncols: cols.end - cols.start,
|
|
||||||
column_major: m.column_major,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -140,7 +142,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn iter_mut<'b>(&'b mut self, axis: u8) -> Box<dyn Iterator<Item = &mut T> + 'b> {
|
fn iter_mut<'b>(&'b mut self, axis: u8) -> Box<dyn Iterator<Item = &'b mut T> + 'b> {
|
||||||
let column_major = self.column_major;
|
let column_major = self.column_major;
|
||||||
let stride = self.stride;
|
let stride = self.stride;
|
||||||
let ptr = self.values.as_mut_ptr();
|
let ptr = self.values.as_mut_ptr();
|
||||||
@@ -167,7 +169,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<'a, T> {
|
impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<'_, T> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
writeln!(
|
writeln!(
|
||||||
f,
|
f,
|
||||||
@@ -182,42 +184,102 @@ impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<
|
|||||||
impl<T: Debug + Display + Copy + Sized> DenseMatrix<T> {
|
impl<T: Debug + Display + Copy + Sized> DenseMatrix<T> {
|
||||||
/// Create new instance of `DenseMatrix` without copying data.
|
/// Create new instance of `DenseMatrix` without copying data.
|
||||||
/// `values` should be in column-major order.
|
/// `values` should be in column-major order.
|
||||||
pub fn new(nrows: usize, ncols: usize, values: Vec<T>, column_major: bool) -> Self {
|
pub fn new(
|
||||||
DenseMatrix {
|
nrows: usize,
|
||||||
ncols,
|
ncols: usize,
|
||||||
nrows,
|
values: Vec<T>,
|
||||||
values,
|
column_major: bool,
|
||||||
column_major,
|
) -> Result<Self, Failed> {
|
||||||
|
let data_len = values.len();
|
||||||
|
if nrows * ncols != values.len() {
|
||||||
|
Err(Failed::input(&format!(
|
||||||
|
"The specified shape: (cols: {ncols}, rows: {nrows}) does not align with data len: {data_len}"
|
||||||
|
)))
|
||||||
|
} else {
|
||||||
|
Ok(DenseMatrix {
|
||||||
|
ncols,
|
||||||
|
nrows,
|
||||||
|
values,
|
||||||
|
column_major,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// New instance of `DenseMatrix` from 2d array.
|
/// New instance of `DenseMatrix` from 2d array.
|
||||||
pub fn from_2d_array(values: &[&[T]]) -> Self {
|
pub fn from_2d_array(values: &[&[T]]) -> Result<Self, Failed> {
|
||||||
DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect())
|
DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// New instance of `DenseMatrix` from 2d vector.
|
/// New instance of `DenseMatrix` from 2d vector.
|
||||||
pub fn from_2d_vec(values: &Vec<Vec<T>>) -> Self {
|
#[allow(clippy::ptr_arg)]
|
||||||
let nrows = values.len();
|
pub fn from_2d_vec(values: &Vec<Vec<T>>) -> Result<Self, Failed> {
|
||||||
let ncols = values
|
if values.is_empty() || values[0].is_empty() {
|
||||||
.first()
|
Err(Failed::input(
|
||||||
.unwrap_or_else(|| panic!("Cannot create 2d matrix from an empty vector"))
|
"The 2d vec provided is empty; cannot instantiate the matrix",
|
||||||
.len();
|
))
|
||||||
let mut m_values = Vec::with_capacity(nrows * ncols);
|
} else {
|
||||||
|
let nrows = values.len();
|
||||||
|
let ncols = values
|
||||||
|
.first()
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
panic!("Invalid state: Cannot create 2d matrix from an empty vector")
|
||||||
|
})
|
||||||
|
.len();
|
||||||
|
let mut m_values = Vec::with_capacity(nrows * ncols);
|
||||||
|
|
||||||
for c in 0..ncols {
|
for c in 0..ncols {
|
||||||
for r in values.iter().take(nrows) {
|
for r in values.iter().take(nrows) {
|
||||||
m_values.push(r[c])
|
m_values.push(r[c])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
DenseMatrix::new(nrows, ncols, m_values, true)
|
DenseMatrix::new(nrows, ncols, m_values, true)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterate over values of matrix
|
/// Iterate over values of matrix
|
||||||
pub fn iter(&self) -> Iter<'_, T> {
|
pub fn iter(&self) -> Iter<'_, T> {
|
||||||
self.values.iter()
|
self.values.iter()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if the size of the requested view is bounded to matrix rows/cols count
|
||||||
|
fn is_valid_view(
|
||||||
|
&self,
|
||||||
|
n_rows: usize,
|
||||||
|
n_cols: usize,
|
||||||
|
vrows: &Range<usize>,
|
||||||
|
vcols: &Range<usize>,
|
||||||
|
) -> bool {
|
||||||
|
!(vrows.end <= n_rows
|
||||||
|
&& vcols.end <= n_cols
|
||||||
|
&& vrows.start <= n_rows
|
||||||
|
&& vcols.start <= n_cols)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the range of the requested view: start, end, size of the slice
|
||||||
|
fn stride_range(
|
||||||
|
&self,
|
||||||
|
n_rows: usize,
|
||||||
|
n_cols: usize,
|
||||||
|
vrows: &Range<usize>,
|
||||||
|
vcols: &Range<usize>,
|
||||||
|
column_major: bool,
|
||||||
|
) -> (usize, usize, usize) {
|
||||||
|
let (start, end, stride) = if column_major {
|
||||||
|
(
|
||||||
|
vrows.start + vcols.start * n_rows,
|
||||||
|
vrows.end + (vcols.end - 1) * n_rows,
|
||||||
|
n_rows,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
vrows.start * n_cols + vcols.start,
|
||||||
|
(vrows.end - 1) * n_cols + vcols.end,
|
||||||
|
n_cols,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
(start, end, stride)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrix<T> {
|
impl<T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrix<T> {
|
||||||
@@ -304,6 +366,7 @@ where
|
|||||||
impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrix<T> {
|
impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrix<T> {
|
||||||
fn get(&self, pos: (usize, usize)) -> &T {
|
fn get(&self, pos: (usize, usize)) -> &T {
|
||||||
let (row, col) = pos;
|
let (row, col) = pos;
|
||||||
|
|
||||||
if row >= self.nrows || col >= self.ncols {
|
if row >= self.nrows || col >= self.ncols {
|
||||||
panic!(
|
panic!(
|
||||||
"Invalid index ({},{}) for {}x{} matrix",
|
"Invalid index ({},{}) for {}x{} matrix",
|
||||||
@@ -383,15 +446,15 @@ impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for DenseMatrix<T> {}
|
|||||||
|
|
||||||
impl<T: Debug + Display + Copy + Sized> Array2<T> for DenseMatrix<T> {
|
impl<T: Debug + Display + Copy + Sized> Array2<T> for DenseMatrix<T> {
|
||||||
fn get_row<'a>(&'a self, row: usize) -> Box<dyn ArrayView1<T> + 'a> {
|
fn get_row<'a>(&'a self, row: usize) -> Box<dyn ArrayView1<T> + 'a> {
|
||||||
Box::new(DenseMatrixView::new(self, row..row + 1, 0..self.ncols))
|
Box::new(DenseMatrixView::new(self, row..row + 1, 0..self.ncols).unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_col<'a>(&'a self, col: usize) -> Box<dyn ArrayView1<T> + 'a> {
|
fn get_col<'a>(&'a self, col: usize) -> Box<dyn ArrayView1<T> + 'a> {
|
||||||
Box::new(DenseMatrixView::new(self, 0..self.nrows, col..col + 1))
|
Box::new(DenseMatrixView::new(self, 0..self.nrows, col..col + 1).unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn slice<'a>(&'a self, rows: Range<usize>, cols: Range<usize>) -> Box<dyn ArrayView2<T> + 'a> {
|
fn slice<'a>(&'a self, rows: Range<usize>, cols: Range<usize>) -> Box<dyn ArrayView2<T> + 'a> {
|
||||||
Box::new(DenseMatrixView::new(self, rows, cols))
|
Box::new(DenseMatrixView::new(self, rows, cols).unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn slice_mut<'a>(
|
fn slice_mut<'a>(
|
||||||
@@ -402,15 +465,17 @@ impl<T: Debug + Display + Copy + Sized> Array2<T> for DenseMatrix<T> {
|
|||||||
where
|
where
|
||||||
Self: Sized,
|
Self: Sized,
|
||||||
{
|
{
|
||||||
Box::new(DenseMatrixMutView::new(self, rows, cols))
|
Box::new(DenseMatrixMutView::new(self, rows, cols).unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// private function so for now assume infalible
|
||||||
fn fill(nrows: usize, ncols: usize, value: T) -> Self {
|
fn fill(nrows: usize, ncols: usize, value: T) -> Self {
|
||||||
DenseMatrix::new(nrows, ncols, vec![value; nrows * ncols], true)
|
DenseMatrix::new(nrows, ncols, vec![value; nrows * ncols], true).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// private function so for now assume infalible
|
||||||
fn from_iterator<I: Iterator<Item = T>>(iter: I, nrows: usize, ncols: usize, axis: u8) -> Self {
|
fn from_iterator<I: Iterator<Item = T>>(iter: I, nrows: usize, ncols: usize, axis: u8) -> Self {
|
||||||
DenseMatrix::new(nrows, ncols, iter.collect(), axis != 0)
|
DenseMatrix::new(nrows, ncols, iter.collect(), axis != 0).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn transpose(&self) -> Self {
|
fn transpose(&self) -> Self {
|
||||||
@@ -428,12 +493,12 @@ impl<T: Number + RealNumber> EVDDecomposable<T> for DenseMatrix<T> {}
|
|||||||
impl<T: Number + RealNumber> LUDecomposable<T> for DenseMatrix<T> {}
|
impl<T: Number + RealNumber> LUDecomposable<T> for DenseMatrix<T> {}
|
||||||
impl<T: Number + RealNumber> SVDDecomposable<T> for DenseMatrix<T> {}
|
impl<T: Number + RealNumber> SVDDecomposable<T> for DenseMatrix<T> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixView<'a, T> {
|
impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixView<'_, T> {
|
||||||
fn get(&self, pos: (usize, usize)) -> &T {
|
fn get(&self, pos: (usize, usize)) -> &T {
|
||||||
if self.column_major {
|
if self.column_major {
|
||||||
&self.values[(pos.0 + pos.1 * self.stride)]
|
&self.values[pos.0 + pos.1 * self.stride]
|
||||||
} else {
|
} else {
|
||||||
&self.values[(pos.0 * self.stride + pos.1)]
|
&self.values[pos.0 * self.stride + pos.1]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -450,7 +515,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for DenseMatrixView<'a, T> {
|
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for DenseMatrixView<'_, T> {
|
||||||
fn get(&self, i: usize) -> &T {
|
fn get(&self, i: usize) -> &T {
|
||||||
if self.nrows == 1 {
|
if self.nrows == 1 {
|
||||||
if self.column_major {
|
if self.column_major {
|
||||||
@@ -488,16 +553,16 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for DenseMatrixView<
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixView<'a, T> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixView<'_, T> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for DenseMatrixView<'a, T> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for DenseMatrixView<'_, T> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixMutView<'a, T> {
|
impl<T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixMutView<'_, T> {
|
||||||
fn get(&self, pos: (usize, usize)) -> &T {
|
fn get(&self, pos: (usize, usize)) -> &T {
|
||||||
if self.column_major {
|
if self.column_major {
|
||||||
&self.values[(pos.0 + pos.1 * self.stride)]
|
&self.values[pos.0 + pos.1 * self.stride]
|
||||||
} else {
|
} else {
|
||||||
&self.values[(pos.0 * self.stride + pos.1)]
|
&self.values[pos.0 * self.stride + pos.1]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -514,14 +579,12 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
|
impl<T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)> for DenseMatrixMutView<'_, T> {
|
||||||
for DenseMatrixMutView<'a, T>
|
|
||||||
{
|
|
||||||
fn set(&mut self, pos: (usize, usize), x: T) {
|
fn set(&mut self, pos: (usize, usize), x: T) {
|
||||||
if self.column_major {
|
if self.column_major {
|
||||||
self.values[(pos.0 + pos.1 * self.stride)] = x;
|
self.values[pos.0 + pos.1 * self.stride] = x;
|
||||||
} else {
|
} else {
|
||||||
self.values[(pos.0 * self.stride + pos.1)] = x;
|
self.values[pos.0 * self.stride + pos.1] = x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -530,29 +593,90 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2<T> for DenseMatrixMutView<'a, T> {}
|
impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for DenseMatrixMutView<'_, T> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixMutView<'a, T> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for DenseMatrixMutView<'_, T> {}
|
||||||
|
|
||||||
impl<T: RealNumber> MatrixStats<T> for DenseMatrix<T> {}
|
impl<T: RealNumber> MatrixStats<T> for DenseMatrix<T> {}
|
||||||
|
|
||||||
impl<T: RealNumber> MatrixPreprocessing<T> for DenseMatrix<T> {}
|
impl<T: RealNumber> MatrixPreprocessing<T> for DenseMatrix<T> {}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
#[warn(clippy::reversed_empty_ranges)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use approx::relative_eq;
|
use approx::relative_eq;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_display() {
|
fn test_instantiate_from_2d() {
|
||||||
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]);
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]);
|
||||||
|
assert!(x.is_ok());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_instantiate_from_2d_empty() {
|
||||||
|
let input: &[&[f64]] = &[&[]];
|
||||||
|
let x = DenseMatrix::from_2d_array(input);
|
||||||
|
assert!(x.is_err());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_instantiate_from_2d_empty2() {
|
||||||
|
let input: &[&[f64]] = &[&[], &[]];
|
||||||
|
let x = DenseMatrix::from_2d_array(input);
|
||||||
|
assert!(x.is_err());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_instantiate_ok_view1() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
|
let v = DenseMatrixView::new(&x, 0..2, 0..2);
|
||||||
|
assert!(v.is_ok());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_instantiate_ok_view2() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
|
let v = DenseMatrixView::new(&x, 0..3, 0..3);
|
||||||
|
assert!(v.is_ok());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_instantiate_ok_view3() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
|
let v = DenseMatrixView::new(&x, 2..3, 0..3);
|
||||||
|
assert!(v.is_ok());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_instantiate_ok_view4() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
|
let v = DenseMatrixView::new(&x, 3..3, 0..3);
|
||||||
|
assert!(v.is_ok());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_instantiate_err_view1() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
|
let v = DenseMatrixView::new(&x, 3..4, 0..3);
|
||||||
|
assert!(v.is_err());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_instantiate_err_view2() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
|
let v = DenseMatrixView::new(&x, 0..3, 3..4);
|
||||||
|
assert!(v.is_err());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_instantiate_err_view3() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
|
#[allow(clippy::reversed_empty_ranges)]
|
||||||
|
let v = DenseMatrixView::new(&x, 0..3, 4..3);
|
||||||
|
assert!(v.is_err());
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_display() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
|
|
||||||
println!("{}", &x);
|
println!("{}", &x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_get_row_col() {
|
fn test_get_row_col() {
|
||||||
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]);
|
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
|
|
||||||
assert_eq!(15.0, x.get_col(1).sum());
|
assert_eq!(15.0, x.get_col(1).sum());
|
||||||
assert_eq!(15.0, x.get_row(1).sum());
|
assert_eq!(15.0, x.get_row(1).sum());
|
||||||
@@ -561,7 +685,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_row_major() {
|
fn test_row_major() {
|
||||||
let mut x = DenseMatrix::new(2, 3, vec![1, 2, 3, 4, 5, 6], false);
|
let mut x = DenseMatrix::new(2, 3, vec![1, 2, 3, 4, 5, 6], false).unwrap();
|
||||||
|
|
||||||
assert_eq!(5, *x.get_col(1).get(1));
|
assert_eq!(5, *x.get_col(1).get(1));
|
||||||
assert_eq!(7, x.get_col(1).sum());
|
assert_eq!(7, x.get_col(1).sum());
|
||||||
@@ -575,21 +699,22 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_get_slice() {
|
fn test_get_slice() {
|
||||||
let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]);
|
let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
vec![4, 5, 6],
|
vec![4, 5, 6],
|
||||||
DenseMatrix::from_slice(&(*x.slice(1..2, 0..3))).values
|
DenseMatrix::from_slice(&(*x.slice(1..2, 0..3))).values
|
||||||
);
|
);
|
||||||
let second_row: Vec<i32> = x.slice(1..2, 0..3).iterator(0).map(|x| *x).collect();
|
let second_row: Vec<i32> = x.slice(1..2, 0..3).iterator(0).copied().collect();
|
||||||
assert_eq!(vec![4, 5, 6], second_row);
|
assert_eq!(vec![4, 5, 6], second_row);
|
||||||
let second_col: Vec<i32> = x.slice(0..3, 1..2).iterator(0).map(|x| *x).collect();
|
let second_col: Vec<i32> = x.slice(0..3, 1..2).iterator(0).copied().collect();
|
||||||
assert_eq!(vec![2, 5, 8], second_col);
|
assert_eq!(vec![2, 5, 8], second_col);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_iter_mut() {
|
fn test_iter_mut() {
|
||||||
let mut x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]);
|
let mut x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap();
|
||||||
|
|
||||||
assert_eq!(vec![1, 4, 7, 2, 5, 8, 3, 6, 9], x.values);
|
assert_eq!(vec![1, 4, 7, 2, 5, 8, 3, 6, 9], x.values);
|
||||||
// add +2 to some elements
|
// add +2 to some elements
|
||||||
@@ -625,7 +750,8 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_str_array() {
|
fn test_str_array() {
|
||||||
let mut x =
|
let mut x =
|
||||||
DenseMatrix::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"], &["7", "8", "9"]]);
|
DenseMatrix::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"], &["7", "8", "9"]])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(vec!["1", "4", "7", "2", "5", "8", "3", "6", "9"], x.values);
|
assert_eq!(vec!["1", "4", "7", "2", "5", "8", "3", "6", "9"], x.values);
|
||||||
x.iterator_mut(0).for_each(|v| *v = "str");
|
x.iterator_mut(0).for_each(|v| *v = "str");
|
||||||
@@ -637,20 +763,20 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_transpose() {
|
fn test_transpose() {
|
||||||
let x = DenseMatrix::<&str>::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"]]);
|
let x = DenseMatrix::<&str>::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"]]).unwrap();
|
||||||
|
|
||||||
assert_eq!(vec!["1", "4", "2", "5", "3", "6"], x.values);
|
assert_eq!(vec!["1", "4", "2", "5", "3", "6"], x.values);
|
||||||
assert!(x.column_major == true);
|
assert!(x.column_major);
|
||||||
|
|
||||||
// transpose
|
// transpose
|
||||||
let x = x.transpose();
|
let x = x.transpose();
|
||||||
assert_eq!(vec!["1", "4", "2", "5", "3", "6"], x.values);
|
assert_eq!(vec!["1", "4", "2", "5", "3", "6"], x.values);
|
||||||
assert!(x.column_major == false); // should change column_major
|
assert!(!x.column_major); // should change column_major
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_from_iterator() {
|
fn test_from_iterator() {
|
||||||
let data = vec![1, 2, 3, 4, 5, 6];
|
let data = [1, 2, 3, 4, 5, 6];
|
||||||
|
|
||||||
let m = DenseMatrix::from_iterator(data.iter(), 2, 3, 0);
|
let m = DenseMatrix::from_iterator(data.iter(), 2, 3, 0);
|
||||||
|
|
||||||
@@ -659,25 +785,25 @@ mod tests {
|
|||||||
vec![1, 2, 3, 4, 5, 6],
|
vec![1, 2, 3, 4, 5, 6],
|
||||||
m.values.iter().map(|e| **e).collect::<Vec<i32>>()
|
m.values.iter().map(|e| **e).collect::<Vec<i32>>()
|
||||||
);
|
);
|
||||||
assert!(m.column_major == false);
|
assert!(!m.column_major);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_take() {
|
fn test_take() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]);
|
let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap();
|
||||||
let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]);
|
let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap();
|
||||||
|
|
||||||
println!("{}", a);
|
println!("{a}");
|
||||||
// take column 0 and 2
|
// take column 0 and 2
|
||||||
assert_eq!(vec![1, 3, 4, 6], a.take(&[0, 2], 1).values);
|
assert_eq!(vec![1, 3, 4, 6], a.take(&[0, 2], 1).values);
|
||||||
println!("{}", b);
|
println!("{b}");
|
||||||
// take rows 0 and 2
|
// take rows 0 and 2
|
||||||
assert_eq!(vec![1, 2, 5, 6], b.take(&[0, 2], 0).values);
|
assert_eq!(vec![1, 2, 5, 6], b.take(&[0, 2], 0).values);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_mut() {
|
fn test_mut() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[1.3, -2.1, 3.4], &[-4., -5.3, 6.1]]);
|
let a = DenseMatrix::from_2d_array(&[&[1.3, -2.1, 3.4], &[-4., -5.3, 6.1]]).unwrap();
|
||||||
|
|
||||||
let a = a.abs();
|
let a = a.abs();
|
||||||
assert_eq!(vec![1.3, 4.0, 2.1, 5.3, 3.4, 6.1], a.values);
|
assert_eq!(vec![1.3, 4.0, 2.1, 5.3, 3.4, 6.1], a.values);
|
||||||
@@ -688,26 +814,29 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_reshape() {
|
fn test_reshape() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]);
|
let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let a = a.reshape(2, 6, 0);
|
let a = a.reshape(2, 6, 0);
|
||||||
assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], a.values);
|
assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], a.values);
|
||||||
assert!(a.ncols == 6 && a.nrows == 2 && a.column_major == false);
|
assert!(a.ncols == 6 && a.nrows == 2 && !a.column_major);
|
||||||
|
|
||||||
let a = a.reshape(3, 4, 1);
|
let a = a.reshape(3, 4, 1);
|
||||||
assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], a.values);
|
assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], a.values);
|
||||||
assert!(a.ncols == 4 && a.nrows == 3 && a.column_major == true);
|
assert!(a.ncols == 4 && a.nrows == 3 && a.column_major);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_eq() {
|
fn test_eq() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);
|
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).unwrap();
|
||||||
let b = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]);
|
let b = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
|
||||||
let c = DenseMatrix::from_2d_array(&[
|
let c = DenseMatrix::from_2d_array(&[
|
||||||
&[1. + f32::EPSILON, 2., 3.],
|
&[1. + f32::EPSILON, 2., 3.],
|
||||||
&[4., 5., 6. + f32::EPSILON],
|
&[4., 5., 6. + f32::EPSILON],
|
||||||
]);
|
])
|
||||||
let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]]);
|
.unwrap();
|
||||||
|
let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
assert!(!relative_eq!(a, b));
|
assert!(!relative_eq!(a, b));
|
||||||
assert!(!relative_eq!(a, d));
|
assert!(!relative_eq!(a, d));
|
||||||
|
|||||||
+31
-10
@@ -15,6 +15,25 @@ pub struct VecView<'a, T: Debug + Display + Copy + Sized> {
|
|||||||
ptr: &'a [T],
|
ptr: &'a [T],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for &[T] {
|
||||||
|
fn get(&self, i: usize) -> &T {
|
||||||
|
&self[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn shape(&self) -> usize {
|
||||||
|
self.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_empty(&self) -> bool {
|
||||||
|
self.len() > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn iterator<'b>(&'b self, axis: u8) -> Box<dyn Iterator<Item = &'b T> + 'b> {
|
||||||
|
assert!(axis == 0, "For one dimensional array `axis` should == 0");
|
||||||
|
Box::new(self.iter())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for Vec<T> {
|
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for Vec<T> {
|
||||||
fn get(&self, i: usize) -> &T {
|
fn get(&self, i: usize) -> &T {
|
||||||
&self[i]
|
&self[i]
|
||||||
@@ -36,6 +55,7 @@ impl<T: Debug + Display + Copy + Sized> Array<T, usize> for Vec<T> {
|
|||||||
|
|
||||||
impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for Vec<T> {
|
impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for Vec<T> {
|
||||||
fn set(&mut self, i: usize, x: T) {
|
fn set(&mut self, i: usize, x: T) {
|
||||||
|
// NOTE: this panics in case of out of bounds index
|
||||||
self[i] = x
|
self[i] = x
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -46,6 +66,7 @@ impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for Vec<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for Vec<T> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for Vec<T> {}
|
||||||
|
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for &[T] {}
|
||||||
|
|
||||||
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for Vec<T> {}
|
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for Vec<T> {}
|
||||||
|
|
||||||
@@ -98,7 +119,7 @@ impl<T: Debug + Display + Copy + Sized> Array1<T> for Vec<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecMutView<'a, T> {
|
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for VecMutView<'_, T> {
|
||||||
fn get(&self, i: usize) -> &T {
|
fn get(&self, i: usize) -> &T {
|
||||||
&self.ptr[i]
|
&self.ptr[i]
|
||||||
}
|
}
|
||||||
@@ -117,7 +138,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecMutView<'a, T
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for VecMutView<'a, T> {
|
impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for VecMutView<'_, T> {
|
||||||
fn set(&mut self, i: usize, x: T) {
|
fn set(&mut self, i: usize, x: T) {
|
||||||
self.ptr[i] = x;
|
self.ptr[i] = x;
|
||||||
}
|
}
|
||||||
@@ -128,10 +149,10 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for VecMutView<'a
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for VecMutView<'a, T> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for VecMutView<'_, T> {}
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1<T> for VecMutView<'a, T> {}
|
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for VecMutView<'_, T> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecView<'a, T> {
|
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for VecView<'_, T> {
|
||||||
fn get(&self, i: usize) -> &T {
|
fn get(&self, i: usize) -> &T {
|
||||||
&self.ptr[i]
|
&self.ptr[i]
|
||||||
}
|
}
|
||||||
@@ -150,7 +171,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array<T, usize> for VecView<'a, T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for VecView<'a, T> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for VecView<'_, T> {}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
@@ -160,8 +181,8 @@ mod tests {
|
|||||||
fn dot_product<T: Number, V: Array1<T>>(v: &V) -> T {
|
fn dot_product<T: Number, V: Array1<T>>(v: &V) -> T {
|
||||||
let vv = V::zeros(10);
|
let vv = V::zeros(10);
|
||||||
let v_s = vv.slice(0..3);
|
let v_s = vv.slice(0..3);
|
||||||
let dot = v_s.dot(v);
|
|
||||||
dot
|
v_s.dot(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn vector_ops<T: Number + PartialOrd, V: Array1<T>>(_: &V) -> T {
|
fn vector_ops<T: Number + PartialOrd, V: Array1<T>>(_: &V) -> T {
|
||||||
@@ -191,7 +212,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_len() {
|
fn test_len() {
|
||||||
let x = vec![1, 2, 3];
|
let x = [1, 2, 3];
|
||||||
assert_eq!(3, x.len());
|
assert_eq!(3, x.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -216,7 +237,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_mut_iterator() {
|
fn test_mut_iterator() {
|
||||||
let mut x = vec![1, 2, 3];
|
let mut x = vec![1, 2, 3];
|
||||||
x.iterator_mut(0).for_each(|v| *v = *v * 2);
|
x.iterator_mut(0).for_each(|v| *v *= 2);
|
||||||
assert_eq!(vec![2, 4, 6], x);
|
assert_eq!(vec![2, 4, 6], x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayBase<OwnedRepr<T>
|
|||||||
|
|
||||||
impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
|
impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> for ArrayView<'a, T, Ix2> {
|
impl<T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> for ArrayView<'_, T, Ix2> {
|
||||||
fn get(&self, pos: (usize, usize)) -> &T {
|
fn get(&self, pos: (usize, usize)) -> &T {
|
||||||
&self[[pos.0, pos.1]]
|
&self[[pos.0, pos.1]]
|
||||||
}
|
}
|
||||||
@@ -144,11 +144,9 @@ impl<T: Number + RealNumber> EVDDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2>
|
|||||||
impl<T: Number + RealNumber> LUDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
|
impl<T: Number + RealNumber> LUDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
|
||||||
impl<T: Number + RealNumber> SVDDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
|
impl<T: Number + RealNumber> SVDDecomposable<T> for ArrayBase<OwnedRepr<T>, Ix2> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayView<'a, T, Ix2> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayView<'_, T, Ix2> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)>
|
impl<T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)> for ArrayViewMut<'_, T, Ix2> {
|
||||||
for ArrayViewMut<'a, T, Ix2>
|
|
||||||
{
|
|
||||||
fn get(&self, pos: (usize, usize)) -> &T {
|
fn get(&self, pos: (usize, usize)) -> &T {
|
||||||
&self[[pos.0, pos.1]]
|
&self[[pos.0, pos.1]]
|
||||||
}
|
}
|
||||||
@@ -175,9 +173,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, (usize, usize)>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
|
impl<T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)> for ArrayViewMut<'_, T, Ix2> {
|
||||||
for ArrayViewMut<'a, T, Ix2>
|
|
||||||
{
|
|
||||||
fn set(&mut self, pos: (usize, usize), x: T) {
|
fn set(&mut self, pos: (usize, usize), x: T) {
|
||||||
self[[pos.0, pos.1]] = x
|
self[[pos.0, pos.1]] = x
|
||||||
}
|
}
|
||||||
@@ -195,9 +191,9 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayViewMut<'a, T, Ix2> {}
|
impl<T: Debug + Display + Copy + Sized> MutArrayView2<T> for ArrayViewMut<'_, T, Ix2> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayViewMut<'a, T, Ix2> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView2<T> for ArrayViewMut<'_, T, Ix2> {}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
@@ -217,7 +213,7 @@ mod tests {
|
|||||||
fn test_iterator() {
|
fn test_iterator() {
|
||||||
let a = arr2(&[[1, 2, 3], [4, 5, 6]]);
|
let a = arr2(&[[1, 2, 3], [4, 5, 6]]);
|
||||||
|
|
||||||
let v: Vec<i32> = a.iterator(0).map(|&v| v).collect();
|
let v: Vec<i32> = a.iterator(0).copied().collect();
|
||||||
assert_eq!(v, vec!(1, 2, 3, 4, 5, 6));
|
assert_eq!(v, vec!(1, 2, 3, 4, 5, 6));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -236,7 +232,7 @@ mod tests {
|
|||||||
let x = arr2(&[[1, 2, 3], [4, 5, 6]]);
|
let x = arr2(&[[1, 2, 3], [4, 5, 6]]);
|
||||||
let x_slice = Array2::slice(&x, 0..2, 1..2);
|
let x_slice = Array2::slice(&x, 0..2, 1..2);
|
||||||
assert_eq!((2, 1), x_slice.shape());
|
assert_eq!((2, 1), x_slice.shape());
|
||||||
let v: Vec<i32> = x_slice.iterator(0).map(|&v| v).collect();
|
let v: Vec<i32> = x_slice.iterator(0).copied().collect();
|
||||||
assert_eq!(v, [2, 5]);
|
assert_eq!(v, [2, 5]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -245,11 +241,11 @@ mod tests {
|
|||||||
let x = arr2(&[[1, 2, 3], [4, 5, 6]]);
|
let x = arr2(&[[1, 2, 3], [4, 5, 6]]);
|
||||||
let x_slice = Array2::slice(&x, 0..2, 0..3);
|
let x_slice = Array2::slice(&x, 0..2, 0..3);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
x_slice.iterator(0).map(|&v| v).collect::<Vec<i32>>(),
|
x_slice.iterator(0).copied().collect::<Vec<i32>>(),
|
||||||
vec![1, 2, 3, 4, 5, 6]
|
vec![1, 2, 3, 4, 5, 6]
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
x_slice.iterator(1).map(|&v| v).collect::<Vec<i32>>(),
|
x_slice.iterator(1).copied().collect::<Vec<i32>>(),
|
||||||
vec![1, 4, 2, 5, 3, 6]
|
vec![1, 4, 2, 5, 3, 6]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -279,8 +275,8 @@ mod tests {
|
|||||||
fn test_c_from_iterator() {
|
fn test_c_from_iterator() {
|
||||||
let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||||
let a: NDArray2<i32> = Array2::from_iterator(data.clone().into_iter(), 4, 3, 0);
|
let a: NDArray2<i32> = Array2::from_iterator(data.clone().into_iter(), 4, 3, 0);
|
||||||
println!("{}", a);
|
println!("{a}");
|
||||||
let a: NDArray2<i32> = Array2::from_iterator(data.into_iter(), 4, 3, 1);
|
let a: NDArray2<i32> = Array2::from_iterator(data.into_iter(), 4, 3, 1);
|
||||||
println!("{}", a);
|
println!("{a}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayBase<OwnedRepr<T>
|
|||||||
|
|
||||||
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayBase<OwnedRepr<T>, Ix1> {}
|
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayBase<OwnedRepr<T>, Ix1> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayView<'a, T, Ix1> {
|
impl<T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayView<'_, T, Ix1> {
|
||||||
fn get(&self, i: usize) -> &T {
|
fn get(&self, i: usize) -> &T {
|
||||||
&self[i]
|
&self[i]
|
||||||
}
|
}
|
||||||
@@ -60,9 +60,9 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayView<'a
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayView<'a, T, Ix1> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayView<'_, T, Ix1> {}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayViewMut<'a, T, Ix1> {
|
impl<T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayViewMut<'_, T, Ix1> {
|
||||||
fn get(&self, i: usize) -> &T {
|
fn get(&self, i: usize) -> &T {
|
||||||
&self[i]
|
&self[i]
|
||||||
}
|
}
|
||||||
@@ -81,7 +81,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray<T, usize> for ArrayViewMut
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for ArrayViewMut<'a, T, Ix1> {
|
impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for ArrayViewMut<'_, T, Ix1> {
|
||||||
fn set(&mut self, i: usize, x: T) {
|
fn set(&mut self, i: usize, x: T) {
|
||||||
self[i] = x;
|
self[i] = x;
|
||||||
}
|
}
|
||||||
@@ -92,8 +92,8 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, usize> for ArrayViewMut<
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayViewMut<'a, T, Ix1> {}
|
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for ArrayViewMut<'_, T, Ix1> {}
|
||||||
impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayViewMut<'a, T, Ix1> {}
|
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for ArrayViewMut<'_, T, Ix1> {}
|
||||||
|
|
||||||
impl<T: Debug + Display + Copy + Sized> Array1<T> for ArrayBase<OwnedRepr<T>, Ix1> {
|
impl<T: Debug + Display + Copy + Sized> Array1<T> for ArrayBase<OwnedRepr<T>, Ix1> {
|
||||||
fn slice<'a>(&'a self, range: Range<usize>) -> Box<dyn ArrayView1<T> + 'a> {
|
fn slice<'a>(&'a self, range: Range<usize>) -> Box<dyn ArrayView1<T> + 'a> {
|
||||||
@@ -152,7 +152,7 @@ mod tests {
|
|||||||
fn test_iterator() {
|
fn test_iterator() {
|
||||||
let a = arr1(&[1, 2, 3]);
|
let a = arr1(&[1, 2, 3]);
|
||||||
|
|
||||||
let v: Vec<i32> = a.iterator(0).map(|&v| v).collect();
|
let v: Vec<i32> = a.iterator(0).copied().collect();
|
||||||
assert_eq!(v, vec!(1, 2, 3));
|
assert_eq!(v, vec!(1, 2, 3));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
//! &[25., 15., -5.],
|
//! &[25., 15., -5.],
|
||||||
//! &[15., 18., 0.],
|
//! &[15., 18., 0.],
|
||||||
//! &[-5., 0., 11.]
|
//! &[-5., 0., 11.]
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let cholesky = A.cholesky().unwrap();
|
//! let cholesky = A.cholesky().unwrap();
|
||||||
//! let lower_triangular: DenseMatrix<f64> = cholesky.L();
|
//! let lower_triangular: DenseMatrix<f64> = cholesky.L();
|
||||||
@@ -175,11 +175,14 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn cholesky_decompose() {
|
fn cholesky_decompose() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
|
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]])
|
||||||
|
.unwrap();
|
||||||
let l =
|
let l =
|
||||||
DenseMatrix::from_2d_array(&[&[5.0, 0.0, 0.0], &[3.0, 3.0, 0.0], &[-1.0, 1.0, 3.0]]);
|
DenseMatrix::from_2d_array(&[&[5.0, 0.0, 0.0], &[3.0, 3.0, 0.0], &[-1.0, 1.0, 3.0]])
|
||||||
|
.unwrap();
|
||||||
let u =
|
let u =
|
||||||
DenseMatrix::from_2d_array(&[&[5.0, 3.0, -1.0], &[0.0, 3.0, 1.0], &[0.0, 0.0, 3.0]]);
|
DenseMatrix::from_2d_array(&[&[5.0, 3.0, -1.0], &[0.0, 3.0, 1.0], &[0.0, 0.0, 3.0]])
|
||||||
|
.unwrap();
|
||||||
let cholesky = a.cholesky().unwrap();
|
let cholesky = a.cholesky().unwrap();
|
||||||
|
|
||||||
assert!(relative_eq!(cholesky.L().abs(), l.abs(), epsilon = 1e-4));
|
assert!(relative_eq!(cholesky.L().abs(), l.abs(), epsilon = 1e-4));
|
||||||
@@ -197,9 +200,10 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn cholesky_solve_mut() {
|
fn cholesky_solve_mut() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
|
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]])
|
||||||
let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]);
|
.unwrap();
|
||||||
let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]);
|
let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]).unwrap();
|
||||||
|
let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]).unwrap();
|
||||||
|
|
||||||
let cholesky = a.cholesky().unwrap();
|
let cholesky = a.cholesky().unwrap();
|
||||||
|
|
||||||
|
|||||||
+24
-22
@@ -19,7 +19,7 @@
|
|||||||
//! &[0.9000, 0.4000, 0.7000],
|
//! &[0.9000, 0.4000, 0.7000],
|
||||||
//! &[0.4000, 0.5000, 0.3000],
|
//! &[0.4000, 0.5000, 0.3000],
|
||||||
//! &[0.7000, 0.3000, 0.8000],
|
//! &[0.7000, 0.3000, 0.8000],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let evd = A.evd(true).unwrap();
|
//! let evd = A.evd(true).unwrap();
|
||||||
//! let eigenvectors: DenseMatrix<f64> = evd.V;
|
//! let eigenvectors: DenseMatrix<f64> = evd.V;
|
||||||
@@ -66,7 +66,7 @@ pub trait EVDDecomposable<T: Number + RealNumber>: Array2<T> {
|
|||||||
fn evd_mut(mut self, symmetric: bool) -> Result<EVD<T, Self>, Failed> {
|
fn evd_mut(mut self, symmetric: bool) -> Result<EVD<T, Self>, Failed> {
|
||||||
let (nrows, ncols) = self.shape();
|
let (nrows, ncols) = self.shape();
|
||||||
if ncols != nrows {
|
if ncols != nrows {
|
||||||
panic!("Matrix is not square: {} x {}", nrows, ncols);
|
panic!("Matrix is not square: {nrows} x {ncols}");
|
||||||
}
|
}
|
||||||
|
|
||||||
let n = nrows;
|
let n = nrows;
|
||||||
@@ -820,7 +820,8 @@ mod tests {
|
|||||||
&[0.9000, 0.4000, 0.7000],
|
&[0.9000, 0.4000, 0.7000],
|
||||||
&[0.4000, 0.5000, 0.3000],
|
&[0.4000, 0.5000, 0.3000],
|
||||||
&[0.7000, 0.3000, 0.8000],
|
&[0.7000, 0.3000, 0.8000],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let eigen_values: Vec<f64> = vec![1.7498382, 0.3165784, 0.1335834];
|
let eigen_values: Vec<f64> = vec![1.7498382, 0.3165784, 0.1335834];
|
||||||
|
|
||||||
@@ -828,7 +829,8 @@ mod tests {
|
|||||||
&[0.6881997, -0.07121225, 0.7220180],
|
&[0.6881997, -0.07121225, 0.7220180],
|
||||||
&[0.3700456, 0.89044952, -0.2648886],
|
&[0.3700456, 0.89044952, -0.2648886],
|
||||||
&[0.6240573, -0.44947578, -0.6391588],
|
&[0.6240573, -0.44947578, -0.6391588],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let evd = A.evd(true).unwrap();
|
let evd = A.evd(true).unwrap();
|
||||||
|
|
||||||
@@ -837,11 +839,9 @@ mod tests {
|
|||||||
evd.V.abs(),
|
evd.V.abs(),
|
||||||
epsilon = 1e-4
|
epsilon = 1e-4
|
||||||
));
|
));
|
||||||
for i in 0..eigen_values.len() {
|
for (i, eigen_values_i) in eigen_values.iter().enumerate() {
|
||||||
assert!((eigen_values[i] - evd.d[i]).abs() < 1e-4);
|
assert!((eigen_values_i - evd.d[i]).abs() < 1e-4);
|
||||||
}
|
assert!((0f64 - evd.e[i]).abs() < f64::EPSILON);
|
||||||
for i in 0..eigen_values.len() {
|
|
||||||
assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
@@ -854,7 +854,8 @@ mod tests {
|
|||||||
&[0.9000, 0.4000, 0.7000],
|
&[0.9000, 0.4000, 0.7000],
|
||||||
&[0.4000, 0.5000, 0.3000],
|
&[0.4000, 0.5000, 0.3000],
|
||||||
&[0.8000, 0.3000, 0.8000],
|
&[0.8000, 0.3000, 0.8000],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let eigen_values: Vec<f64> = vec![1.79171122, 0.31908143, 0.08920735];
|
let eigen_values: Vec<f64> = vec![1.79171122, 0.31908143, 0.08920735];
|
||||||
|
|
||||||
@@ -862,7 +863,8 @@ mod tests {
|
|||||||
&[0.7178958, 0.05322098, 0.6812010],
|
&[0.7178958, 0.05322098, 0.6812010],
|
||||||
&[0.3837711, -0.84702111, -0.1494582],
|
&[0.3837711, -0.84702111, -0.1494582],
|
||||||
&[0.6952105, 0.43984484, -0.7036135],
|
&[0.6952105, 0.43984484, -0.7036135],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let evd = A.evd(false).unwrap();
|
let evd = A.evd(false).unwrap();
|
||||||
|
|
||||||
@@ -871,11 +873,9 @@ mod tests {
|
|||||||
evd.V.abs(),
|
evd.V.abs(),
|
||||||
epsilon = 1e-4
|
epsilon = 1e-4
|
||||||
));
|
));
|
||||||
for i in 0..eigen_values.len() {
|
for (i, eigen_values_i) in eigen_values.iter().enumerate() {
|
||||||
assert!((eigen_values[i] - evd.d[i]).abs() < 1e-4);
|
assert!((eigen_values_i - evd.d[i]).abs() < 1e-4);
|
||||||
}
|
assert!((0f64 - evd.e[i]).abs() < f64::EPSILON);
|
||||||
for i in 0..eigen_values.len() {
|
|
||||||
assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
@@ -889,7 +889,8 @@ mod tests {
|
|||||||
&[4.0, -1.0, 1.0, 1.0],
|
&[4.0, -1.0, 1.0, 1.0],
|
||||||
&[1.0, 1.0, 3.0, -2.0],
|
&[1.0, 1.0, 3.0, -2.0],
|
||||||
&[1.0, 1.0, 4.0, -1.0],
|
&[1.0, 1.0, 4.0, -1.0],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let eigen_values_d: Vec<f64> = vec![0.0, 2.0, 2.0, 0.0];
|
let eigen_values_d: Vec<f64> = vec![0.0, 2.0, 2.0, 0.0];
|
||||||
let eigen_values_e: Vec<f64> = vec![2.2361, 0.9999, -0.9999, -2.2361];
|
let eigen_values_e: Vec<f64> = vec![2.2361, 0.9999, -0.9999, -2.2361];
|
||||||
@@ -899,7 +900,8 @@ mod tests {
|
|||||||
&[-0.6707, 0.1059, 0.901, 0.6289],
|
&[-0.6707, 0.1059, 0.901, 0.6289],
|
||||||
&[0.9159, -0.1378, 0.3816, 0.0806],
|
&[0.9159, -0.1378, 0.3816, 0.0806],
|
||||||
&[0.6707, 0.1059, 0.901, -0.6289],
|
&[0.6707, 0.1059, 0.901, -0.6289],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let evd = A.evd(false).unwrap();
|
let evd = A.evd(false).unwrap();
|
||||||
|
|
||||||
@@ -908,11 +910,11 @@ mod tests {
|
|||||||
evd.V.abs(),
|
evd.V.abs(),
|
||||||
epsilon = 1e-4
|
epsilon = 1e-4
|
||||||
));
|
));
|
||||||
for i in 0..eigen_values_d.len() {
|
for (i, eigen_values_d_i) in eigen_values_d.iter().enumerate() {
|
||||||
assert!((eigen_values_d[i] - evd.d[i]).abs() < 1e-4);
|
assert!((eigen_values_d_i - evd.d[i]).abs() < 1e-4);
|
||||||
}
|
}
|
||||||
for i in 0..eigen_values_e.len() {
|
for (i, eigen_values_e_i) in eigen_values_e.iter().enumerate() {
|
||||||
assert!((eigen_values_e[i] - evd.e[i]).abs() < 1e-4);
|
assert!((eigen_values_e_i - evd.e[i]).abs() < 1e-4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,9 +12,9 @@ pub trait HighOrderOperations<T: Number>: Array2<T> {
|
|||||||
/// use smartcore::linalg::traits::high_order::HighOrderOperations;
|
/// use smartcore::linalg::traits::high_order::HighOrderOperations;
|
||||||
/// use smartcore::linalg::basic::arrays::Array2;
|
/// use smartcore::linalg::basic::arrays::Array2;
|
||||||
///
|
///
|
||||||
/// let a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]);
|
/// let a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]).unwrap();
|
||||||
/// let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]);
|
/// let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]).unwrap();
|
||||||
/// let expected = DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]]);
|
/// let expected = DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]]).unwrap();
|
||||||
///
|
///
|
||||||
/// assert_eq!(a.ab(true, &b, false), expected);
|
/// assert_eq!(a.ab(true, &b, false), expected);
|
||||||
/// ```
|
/// ```
|
||||||
|
|||||||
+10
-12
@@ -18,7 +18,7 @@
|
|||||||
//! &[1., 2., 3.],
|
//! &[1., 2., 3.],
|
||||||
//! &[0., 1., 5.],
|
//! &[0., 1., 5.],
|
||||||
//! &[5., 6., 0.]
|
//! &[5., 6., 0.]
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let lu = A.lu().unwrap();
|
//! let lu = A.lu().unwrap();
|
||||||
//! let lower: DenseMatrix<f64> = lu.L();
|
//! let lower: DenseMatrix<f64> = lu.L();
|
||||||
@@ -126,7 +126,7 @@ impl<T: Number + RealNumber, M: Array2<T>> LU<T, M> {
|
|||||||
let (m, n) = self.LU.shape();
|
let (m, n) = self.LU.shape();
|
||||||
|
|
||||||
if m != n {
|
if m != n {
|
||||||
panic!("Matrix is not square: {}x{}", m, n);
|
panic!("Matrix is not square: {m}x{n}");
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut inv = M::zeros(n, n);
|
let mut inv = M::zeros(n, n);
|
||||||
@@ -143,10 +143,7 @@ impl<T: Number + RealNumber, M: Array2<T>> LU<T, M> {
|
|||||||
let (b_m, b_n) = b.shape();
|
let (b_m, b_n) = b.shape();
|
||||||
|
|
||||||
if b_m != m {
|
if b_m != m {
|
||||||
panic!(
|
panic!("Row dimensions do not agree: A is {m} x {n}, but B is {b_m} x {b_n}");
|
||||||
"Row dimensions do not agree: A is {} x {}, but B is {} x {}",
|
|
||||||
m, n, b_m, b_n
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.singular {
|
if self.singular {
|
||||||
@@ -266,13 +263,13 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn decompose() {
|
fn decompose() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]);
|
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]).unwrap();
|
||||||
let expected_L =
|
let expected_L =
|
||||||
DenseMatrix::from_2d_array(&[&[1., 0., 0.], &[0., 1., 0.], &[0.2, 0.8, 1.]]);
|
DenseMatrix::from_2d_array(&[&[1., 0., 0.], &[0., 1., 0.], &[0.2, 0.8, 1.]]).unwrap();
|
||||||
let expected_U =
|
let expected_U =
|
||||||
DenseMatrix::from_2d_array(&[&[5., 6., 0.], &[0., 1., 5.], &[0., 0., -1.]]);
|
DenseMatrix::from_2d_array(&[&[5., 6., 0.], &[0., 1., 5.], &[0., 0., -1.]]).unwrap();
|
||||||
let expected_pivot =
|
let expected_pivot =
|
||||||
DenseMatrix::from_2d_array(&[&[0., 0., 1.], &[0., 1., 0.], &[1., 0., 0.]]);
|
DenseMatrix::from_2d_array(&[&[0., 0., 1.], &[0., 1., 0.], &[1., 0., 0.]]).unwrap();
|
||||||
let lu = a.lu().unwrap();
|
let lu = a.lu().unwrap();
|
||||||
assert!(relative_eq!(lu.L(), expected_L, epsilon = 1e-4));
|
assert!(relative_eq!(lu.L(), expected_L, epsilon = 1e-4));
|
||||||
assert!(relative_eq!(lu.U(), expected_U, epsilon = 1e-4));
|
assert!(relative_eq!(lu.U(), expected_U, epsilon = 1e-4));
|
||||||
@@ -284,9 +281,10 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn inverse() {
|
fn inverse() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]);
|
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]).unwrap();
|
||||||
let expected =
|
let expected =
|
||||||
DenseMatrix::from_2d_array(&[&[-6.0, 3.6, 1.4], &[5.0, -3.0, -1.0], &[-1.0, 0.8, 0.2]]);
|
DenseMatrix::from_2d_array(&[&[-6.0, 3.6, 1.4], &[5.0, -3.0, -1.0], &[-1.0, 0.8, 0.2]])
|
||||||
|
.unwrap();
|
||||||
let a_inv = a.lu().and_then(|lu| lu.inverse()).unwrap();
|
let a_inv = a.lu().and_then(|lu| lu.inverse()).unwrap();
|
||||||
assert!(relative_eq!(a_inv, expected, epsilon = 1e-4));
|
assert!(relative_eq!(a_inv, expected, epsilon = 1e-4));
|
||||||
}
|
}
|
||||||
|
|||||||
+13
-11
@@ -13,7 +13,7 @@
|
|||||||
//! &[0.9, 0.4, 0.7],
|
//! &[0.9, 0.4, 0.7],
|
||||||
//! &[0.4, 0.5, 0.3],
|
//! &[0.4, 0.5, 0.3],
|
||||||
//! &[0.7, 0.3, 0.8]
|
//! &[0.7, 0.3, 0.8]
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let qr = A.qr().unwrap();
|
//! let qr = A.qr().unwrap();
|
||||||
//! let orthogonal: DenseMatrix<f64> = qr.Q();
|
//! let orthogonal: DenseMatrix<f64> = qr.Q();
|
||||||
@@ -102,10 +102,7 @@ impl<T: Number + RealNumber, M: Array2<T>> QR<T, M> {
|
|||||||
let (b_nrows, b_ncols) = b.shape();
|
let (b_nrows, b_ncols) = b.shape();
|
||||||
|
|
||||||
if b_nrows != m {
|
if b_nrows != m {
|
||||||
panic!(
|
panic!("Row dimensions do not agree: A is {m} x {n}, but B is {b_nrows} x {b_ncols}");
|
||||||
"Row dimensions do not agree: A is {} x {}, but B is {} x {}",
|
|
||||||
m, n, b_nrows, b_ncols
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.singular {
|
if self.singular {
|
||||||
@@ -204,17 +201,20 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn decompose() {
|
fn decompose() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
|
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]])
|
||||||
|
.unwrap();
|
||||||
let q = DenseMatrix::from_2d_array(&[
|
let q = DenseMatrix::from_2d_array(&[
|
||||||
&[-0.7448, 0.2436, 0.6212],
|
&[-0.7448, 0.2436, 0.6212],
|
||||||
&[-0.331, -0.9432, -0.027],
|
&[-0.331, -0.9432, -0.027],
|
||||||
&[-0.5793, 0.2257, -0.7832],
|
&[-0.5793, 0.2257, -0.7832],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let r = DenseMatrix::from_2d_array(&[
|
let r = DenseMatrix::from_2d_array(&[
|
||||||
&[-1.2083, -0.6373, -1.0842],
|
&[-1.2083, -0.6373, -1.0842],
|
||||||
&[0.0, -0.3064, 0.0682],
|
&[0.0, -0.3064, 0.0682],
|
||||||
&[0.0, 0.0, -0.1999],
|
&[0.0, 0.0, -0.1999],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let qr = a.qr().unwrap();
|
let qr = a.qr().unwrap();
|
||||||
assert!(relative_eq!(qr.Q().abs(), q.abs(), epsilon = 1e-4));
|
assert!(relative_eq!(qr.Q().abs(), q.abs(), epsilon = 1e-4));
|
||||||
assert!(relative_eq!(qr.R().abs(), r.abs(), epsilon = 1e-4));
|
assert!(relative_eq!(qr.R().abs(), r.abs(), epsilon = 1e-4));
|
||||||
@@ -226,13 +226,15 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn qr_solve_mut() {
|
fn qr_solve_mut() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
|
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]])
|
||||||
let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]);
|
.unwrap();
|
||||||
|
let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]).unwrap();
|
||||||
let expected_w = DenseMatrix::from_2d_array(&[
|
let expected_w = DenseMatrix::from_2d_array(&[
|
||||||
&[-0.2027027, -1.2837838],
|
&[-0.2027027, -1.2837838],
|
||||||
&[0.8783784, 2.2297297],
|
&[0.8783784, 2.2297297],
|
||||||
&[0.4729730, 0.6621622],
|
&[0.4729730, 0.6621622],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let w = a.qr_solve_mut(b).unwrap();
|
let w = a.qr_solve_mut(b).unwrap();
|
||||||
assert!(relative_eq!(w, expected_w, epsilon = 1e-2));
|
assert!(relative_eq!(w, expected_w, epsilon = 1e-2));
|
||||||
}
|
}
|
||||||
|
|||||||
+18
-15
@@ -136,13 +136,12 @@ pub trait MatrixPreprocessing<T: RealNumber>: MutArrayView2<T> + Clone {
|
|||||||
/// ```rust
|
/// ```rust
|
||||||
/// use smartcore::linalg::basic::matrix::DenseMatrix;
|
/// use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||||
/// use smartcore::linalg::traits::stats::MatrixPreprocessing;
|
/// use smartcore::linalg::traits::stats::MatrixPreprocessing;
|
||||||
/// let mut a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]);
|
/// let mut a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]).unwrap();
|
||||||
/// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]);
|
/// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]).unwrap();
|
||||||
/// a.binarize_mut(0.);
|
/// a.binarize_mut(0.);
|
||||||
///
|
///
|
||||||
/// assert_eq!(a, expected);
|
/// assert_eq!(a, expected);
|
||||||
/// ```
|
/// ```
|
||||||
|
|
||||||
fn binarize_mut(&mut self, threshold: T) {
|
fn binarize_mut(&mut self, threshold: T) {
|
||||||
let (nrows, ncols) = self.shape();
|
let (nrows, ncols) = self.shape();
|
||||||
for row in 0..nrows {
|
for row in 0..nrows {
|
||||||
@@ -159,8 +158,8 @@ pub trait MatrixPreprocessing<T: RealNumber>: MutArrayView2<T> + Clone {
|
|||||||
/// ```rust
|
/// ```rust
|
||||||
/// use smartcore::linalg::basic::matrix::DenseMatrix;
|
/// use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||||
/// use smartcore::linalg::traits::stats::MatrixPreprocessing;
|
/// use smartcore::linalg::traits::stats::MatrixPreprocessing;
|
||||||
/// let a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]);
|
/// let a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]).unwrap();
|
||||||
/// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]);
|
/// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]).unwrap();
|
||||||
///
|
///
|
||||||
/// assert_eq!(a.binarize(0.), expected);
|
/// assert_eq!(a.binarize(0.), expected);
|
||||||
/// ```
|
/// ```
|
||||||
@@ -186,7 +185,8 @@ mod tests {
|
|||||||
&[1., 2., 3., 1., 2.],
|
&[1., 2., 3., 1., 2.],
|
||||||
&[4., 5., 6., 3., 4.],
|
&[4., 5., 6., 3., 4.],
|
||||||
&[7., 8., 9., 5., 6.],
|
&[7., 8., 9., 5., 6.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let expected_0 = vec![4., 5., 6., 3., 4.];
|
let expected_0 = vec![4., 5., 6., 3., 4.];
|
||||||
let expected_1 = vec![1.8, 4.4, 7.];
|
let expected_1 = vec![1.8, 4.4, 7.];
|
||||||
|
|
||||||
@@ -196,7 +196,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_var() {
|
fn test_var() {
|
||||||
let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]);
|
let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]).unwrap();
|
||||||
let expected_0 = vec![4., 4., 4., 4.];
|
let expected_0 = vec![4., 4., 4., 4.];
|
||||||
let expected_1 = vec![1.25, 1.25];
|
let expected_1 = vec![1.25, 1.25];
|
||||||
|
|
||||||
@@ -211,12 +211,13 @@ mod tests {
|
|||||||
let m = DenseMatrix::from_2d_array(&[
|
let m = DenseMatrix::from_2d_array(&[
|
||||||
&[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25],
|
&[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25],
|
||||||
&[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25],
|
&[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let expected_0 = vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
|
let expected_0 = vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
|
||||||
let expected_1 = vec![1.25, 1.25];
|
let expected_1 = vec![1.25, 1.25];
|
||||||
|
|
||||||
assert!(m.var(0).approximate_eq(&expected_0, std::f64::EPSILON));
|
assert!(m.var(0).approximate_eq(&expected_0, f64::EPSILON));
|
||||||
assert!(m.var(1).approximate_eq(&expected_1, std::f64::EPSILON));
|
assert!(m.var(1).approximate_eq(&expected_1, f64::EPSILON));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
m.mean(0),
|
m.mean(0),
|
||||||
vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
|
vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
|
||||||
@@ -230,7 +231,8 @@ mod tests {
|
|||||||
&[1., 2., 3., 1., 2.],
|
&[1., 2., 3., 1., 2.],
|
||||||
&[4., 5., 6., 3., 4.],
|
&[4., 5., 6., 3., 4.],
|
||||||
&[7., 8., 9., 5., 6.],
|
&[7., 8., 9., 5., 6.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let expected_0 = vec![
|
let expected_0 = vec![
|
||||||
2.449489742783178,
|
2.449489742783178,
|
||||||
2.449489742783178,
|
2.449489742783178,
|
||||||
@@ -251,10 +253,10 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_scale() {
|
fn test_scale() {
|
||||||
let m: DenseMatrix<f64> =
|
let m: DenseMatrix<f64> =
|
||||||
DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]);
|
DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]).unwrap();
|
||||||
|
|
||||||
let expected_0: DenseMatrix<f64> =
|
let expected_0: DenseMatrix<f64> =
|
||||||
DenseMatrix::from_2d_array(&[&[-1., -1., -1., -1.], &[1., 1., 1., 1.]]);
|
DenseMatrix::from_2d_array(&[&[-1., -1., -1., -1.], &[1., 1., 1., 1.]]).unwrap();
|
||||||
let expected_1: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
|
let expected_1: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
|
||||||
&[
|
&[
|
||||||
-1.3416407864998738,
|
-1.3416407864998738,
|
||||||
@@ -268,7 +270,8 @@ mod tests {
|
|||||||
0.4472135954999579,
|
0.4472135954999579,
|
||||||
1.3416407864998738,
|
1.3416407864998738,
|
||||||
],
|
],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(m.mean(0), vec![3.0, 4.0, 5.0, 6.0]);
|
assert_eq!(m.mean(0), vec![3.0, 4.0, 5.0, 6.0]);
|
||||||
assert_eq!(m.mean(1), vec![2.5, 6.5]);
|
assert_eq!(m.mean(1), vec![2.5, 6.5]);
|
||||||
@@ -286,7 +289,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut m = m.clone();
|
let mut m = m;
|
||||||
m.standard_scale_mut(&m.mean(1), &m.std(1), 1);
|
m.standard_scale_mut(&m.mean(1), &m.std(1), 1);
|
||||||
assert_eq!(&m, &expected_1);
|
assert_eq!(&m, &expected_1);
|
||||||
}
|
}
|
||||||
|
|||||||
+24
-18
@@ -17,7 +17,7 @@
|
|||||||
//! &[0.9, 0.4, 0.7],
|
//! &[0.9, 0.4, 0.7],
|
||||||
//! &[0.4, 0.5, 0.3],
|
//! &[0.4, 0.5, 0.3],
|
||||||
//! &[0.7, 0.3, 0.8]
|
//! &[0.7, 0.3, 0.8]
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let svd = A.svd().unwrap();
|
//! let svd = A.svd().unwrap();
|
||||||
//! let u: DenseMatrix<f64> = svd.U;
|
//! let u: DenseMatrix<f64> = svd.U;
|
||||||
@@ -48,11 +48,9 @@ pub struct SVD<T: Number + RealNumber, M: SVDDecomposable<T>> {
|
|||||||
pub V: M,
|
pub V: M,
|
||||||
/// Singular values of the original matrix
|
/// Singular values of the original matrix
|
||||||
pub s: Vec<T>,
|
pub s: Vec<T>,
|
||||||
///
|
|
||||||
m: usize,
|
m: usize,
|
||||||
///
|
|
||||||
n: usize,
|
n: usize,
|
||||||
///
|
/// Tolerance
|
||||||
tol: T,
|
tol: T,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -489,7 +487,8 @@ mod tests {
|
|||||||
&[0.9000, 0.4000, 0.7000],
|
&[0.9000, 0.4000, 0.7000],
|
||||||
&[0.4000, 0.5000, 0.3000],
|
&[0.4000, 0.5000, 0.3000],
|
||||||
&[0.7000, 0.3000, 0.8000],
|
&[0.7000, 0.3000, 0.8000],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let s: Vec<f64> = vec![1.7498382, 0.3165784, 0.1335834];
|
let s: Vec<f64> = vec![1.7498382, 0.3165784, 0.1335834];
|
||||||
|
|
||||||
@@ -497,20 +496,22 @@ mod tests {
|
|||||||
&[0.6881997, -0.07121225, 0.7220180],
|
&[0.6881997, -0.07121225, 0.7220180],
|
||||||
&[0.3700456, 0.89044952, -0.2648886],
|
&[0.3700456, 0.89044952, -0.2648886],
|
||||||
&[0.6240573, -0.44947578, -0.639158],
|
&[0.6240573, -0.44947578, -0.639158],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let V = DenseMatrix::from_2d_array(&[
|
let V = DenseMatrix::from_2d_array(&[
|
||||||
&[0.6881997, -0.07121225, 0.7220180],
|
&[0.6881997, -0.07121225, 0.7220180],
|
||||||
&[0.3700456, 0.89044952, -0.2648886],
|
&[0.3700456, 0.89044952, -0.2648886],
|
||||||
&[0.6240573, -0.44947578, -0.6391588],
|
&[0.6240573, -0.44947578, -0.6391588],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let svd = A.svd().unwrap();
|
let svd = A.svd().unwrap();
|
||||||
|
|
||||||
assert!(relative_eq!(V.abs(), svd.V.abs(), epsilon = 1e-4));
|
assert!(relative_eq!(V.abs(), svd.V.abs(), epsilon = 1e-4));
|
||||||
assert!(relative_eq!(U.abs(), svd.U.abs(), epsilon = 1e-4));
|
assert!(relative_eq!(U.abs(), svd.U.abs(), epsilon = 1e-4));
|
||||||
for i in 0..s.len() {
|
for (i, s_i) in s.iter().enumerate() {
|
||||||
assert!((s[i] - svd.s[i]).abs() < 1e-4);
|
assert!((s_i - svd.s[i]).abs() < 1e-4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
@@ -577,7 +578,8 @@ mod tests {
|
|||||||
-0.2158704,
|
-0.2158704,
|
||||||
-0.27529472,
|
-0.27529472,
|
||||||
],
|
],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let s: Vec<f64> = vec![
|
let s: Vec<f64> = vec![
|
||||||
3.8589375, 3.4396766, 2.6487176, 2.2317399, 1.5165054, 0.8109055, 0.2706515,
|
3.8589375, 3.4396766, 2.6487176, 2.2317399, 1.5165054, 0.8109055, 0.2706515,
|
||||||
@@ -647,7 +649,8 @@ mod tests {
|
|||||||
0.73034065,
|
0.73034065,
|
||||||
-0.43965505,
|
-0.43965505,
|
||||||
],
|
],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let V = DenseMatrix::from_2d_array(&[
|
let V = DenseMatrix::from_2d_array(&[
|
||||||
&[
|
&[
|
||||||
@@ -707,14 +710,15 @@ mod tests {
|
|||||||
0.1654796,
|
0.1654796,
|
||||||
-0.32346758,
|
-0.32346758,
|
||||||
],
|
],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let svd = A.svd().unwrap();
|
let svd = A.svd().unwrap();
|
||||||
|
|
||||||
assert!(relative_eq!(V.abs(), svd.V.abs(), epsilon = 1e-4));
|
assert!(relative_eq!(V.abs(), svd.V.abs(), epsilon = 1e-4));
|
||||||
assert!(relative_eq!(U.abs(), svd.U.abs(), epsilon = 1e-4));
|
assert!(relative_eq!(U.abs(), svd.U.abs(), epsilon = 1e-4));
|
||||||
for i in 0..s.len() {
|
for (i, s_i) in s.iter().enumerate() {
|
||||||
assert!((s[i] - svd.s[i]).abs() < 1e-4);
|
assert!((s_i - svd.s[i]).abs() < 1e-4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
@@ -723,10 +727,11 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn solve() {
|
fn solve() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
|
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]])
|
||||||
let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]);
|
.unwrap();
|
||||||
|
let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]).unwrap();
|
||||||
let expected_w =
|
let expected_w =
|
||||||
DenseMatrix::from_2d_array(&[&[-0.20, -1.28], &[0.87, 2.22], &[0.47, 0.66]]);
|
DenseMatrix::from_2d_array(&[&[-0.20, -1.28], &[0.87, 2.22], &[0.47, 0.66]]).unwrap();
|
||||||
let w = a.svd_solve_mut(b).unwrap();
|
let w = a.svd_solve_mut(b).unwrap();
|
||||||
assert!(relative_eq!(w, expected_w, epsilon = 1e-2));
|
assert!(relative_eq!(w, expected_w, epsilon = 1e-2));
|
||||||
}
|
}
|
||||||
@@ -737,7 +742,8 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn decompose_restore() {
|
fn decompose_restore() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0, 4.0], &[5.0, 6.0, 7.0, 8.0]]);
|
let a =
|
||||||
|
DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0, 4.0], &[5.0, 6.0, 7.0, 8.0]]).unwrap();
|
||||||
let svd = a.svd().unwrap();
|
let svd = a.svd().unwrap();
|
||||||
let u: &DenseMatrix<f32> = &svd.U; //U
|
let u: &DenseMatrix<f32> = &svd.U; //U
|
||||||
let v: &DenseMatrix<f32> = &svd.V; // V
|
let v: &DenseMatrix<f32> = &svd.V; // V
|
||||||
|
|||||||
@@ -12,7 +12,8 @@
|
|||||||
//! pub struct BGSolver {}
|
//! pub struct BGSolver {}
|
||||||
//! impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X> for BGSolver {}
|
//! impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X> for BGSolver {}
|
||||||
//!
|
//!
|
||||||
//! let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
|
//! let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0.,
|
||||||
|
//! 11.]]).unwrap();
|
||||||
//! let b = vec![40., 51., 28.];
|
//! let b = vec![40., 51., 28.];
|
||||||
//! let expected = vec![1.0, 2.0, 3.0];
|
//! let expected = vec![1.0, 2.0, 3.0];
|
||||||
//! let mut x = Vec::zeros(3);
|
//! let mut x = Vec::zeros(3);
|
||||||
@@ -26,9 +27,9 @@ use crate::error::Failed;
|
|||||||
use crate::linalg::basic::arrays::{Array, Array1, Array2, ArrayView1, MutArrayView1};
|
use crate::linalg::basic::arrays::{Array, Array1, Array2, ArrayView1, MutArrayView1};
|
||||||
use crate::numbers::floatnum::FloatNumber;
|
use crate::numbers::floatnum::FloatNumber;
|
||||||
|
|
||||||
///
|
/// Trait for Biconjugate Gradient Solver
|
||||||
pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2<T>> {
|
pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2<T>> {
|
||||||
///
|
/// Solve Ax = b
|
||||||
fn solve_mut(
|
fn solve_mut(
|
||||||
&self,
|
&self,
|
||||||
a: &'a X,
|
a: &'a X,
|
||||||
@@ -108,7 +109,7 @@ pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2<T>> {
|
|||||||
Ok(err)
|
Ok(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
/// solve preconditioner
|
||||||
fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) {
|
fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) {
|
||||||
let diag = Self::diag(a);
|
let diag = Self::diag(a);
|
||||||
let n = diag.len();
|
let n = diag.len();
|
||||||
@@ -132,7 +133,7 @@ pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2<T>> {
|
|||||||
y.copy_from(&x.xa(true, a));
|
y.copy_from(&x.xa(true, a));
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
/// Extract the diagonal from a matrix
|
||||||
fn diag(a: &X) -> Vec<T> {
|
fn diag(a: &X) -> Vec<T> {
|
||||||
let (nrows, ncols) = a.shape();
|
let (nrows, ncols) = a.shape();
|
||||||
let n = nrows.min(ncols);
|
let n = nrows.min(ncols);
|
||||||
@@ -158,9 +159,10 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bg_solver() {
|
fn bg_solver() {
|
||||||
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
|
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]])
|
||||||
|
.unwrap();
|
||||||
let b = vec![40., 51., 28.];
|
let b = vec![40., 51., 28.];
|
||||||
let expected = vec![1.0, 2.0, 3.0];
|
let expected = [1.0, 2.0, 3.0];
|
||||||
|
|
||||||
let mut x = Vec::zeros(3);
|
let mut x = Vec::zeros(3);
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,7 @@
|
|||||||
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
|
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
|
||||||
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
|
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
|
||||||
@@ -425,10 +425,7 @@ impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
|
|
||||||
for (i, col_std_i) in col_std.iter().enumerate() {
|
for (i, col_std_i) in col_std.iter().enumerate() {
|
||||||
if (*col_std_i - TX::zero()).abs() < TX::epsilon() {
|
if (*col_std_i - TX::zero()).abs() < TX::epsilon() {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!("Cannot rescale constant column {i}")));
|
||||||
"Cannot rescale constant column {}",
|
|
||||||
i
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -514,7 +511,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
@@ -565,7 +563,8 @@ mod tests {
|
|||||||
&[17.0, 1918.0, 1.4054969025700674],
|
&[17.0, 1918.0, 1.4054969025700674],
|
||||||
&[18.0, 1929.0, 1.3271699396384906],
|
&[18.0, 1929.0, 1.3271699396384906],
|
||||||
&[19.0, 1915.0, 1.1373332337674806],
|
&[19.0, 1915.0, 1.1373332337674806],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
1.48, 2.72, 4.52, 5.72, 5.25, 4.07, 3.75, 4.75, 6.77, 4.72, 6.78, 6.79, 8.3, 7.42,
|
1.48, 2.72, 4.52, 5.72, 5.25, 4.07, 3.75, 4.75, 6.77, 4.72, 6.78, 6.79, 8.3, 7.42,
|
||||||
@@ -630,7 +629,7 @@ mod tests {
|
|||||||
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
// ]);
|
// ]).unwrap();
|
||||||
|
|
||||||
// let y = vec![
|
// let y = vec![
|
||||||
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
|
|||||||
+3
-5
@@ -356,10 +356,7 @@ impl<TX: FloatNumber + RealNumber, TY: Number, X: Array2<TX>, Y: Array1<TY>> Las
|
|||||||
|
|
||||||
for (i, col_std_i) in col_std.iter().enumerate() {
|
for (i, col_std_i) in col_std.iter().enumerate() {
|
||||||
if (*col_std_i - TX::zero()).abs() < TX::epsilon() {
|
if (*col_std_i - TX::zero()).abs() < TX::epsilon() {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!("Cannot rescale constant column {i}")));
|
||||||
"Cannot rescale constant column {}",
|
|
||||||
i
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -421,7 +418,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1, MutArray, MutArra
|
|||||||
use crate::linear::bg_solver::BiconjugateGradientSolver;
|
use crate::linear::bg_solver::BiconjugateGradientSolver;
|
||||||
use crate::numbers::floatnum::FloatNumber;
|
use crate::numbers::floatnum::FloatNumber;
|
||||||
|
|
||||||
///
|
/// Interior Point Optimizer
|
||||||
pub struct InteriorPointOptimizer<T: FloatNumber, X: Array2<T>> {
|
pub struct InteriorPointOptimizer<T: FloatNumber, X: Array2<T>> {
|
||||||
ata: X,
|
ata: X,
|
||||||
d1: Vec<T>,
|
d1: Vec<T>,
|
||||||
@@ -25,9 +25,8 @@ pub struct InteriorPointOptimizer<T: FloatNumber, X: Array2<T>> {
|
|||||||
prs: Vec<T>,
|
prs: Vec<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
||||||
///
|
/// Initialize a new Interior Point Optimizer
|
||||||
pub fn new(a: &X, n: usize) -> InteriorPointOptimizer<T, X> {
|
pub fn new(a: &X, n: usize) -> InteriorPointOptimizer<T, X> {
|
||||||
InteriorPointOptimizer {
|
InteriorPointOptimizer {
|
||||||
ata: a.ab(true, a, false),
|
ata: a.ab(true, a, false),
|
||||||
@@ -38,7 +37,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
/// Run the optimization
|
||||||
pub fn optimize(
|
pub fn optimize(
|
||||||
&mut self,
|
&mut self,
|
||||||
x: &X,
|
x: &X,
|
||||||
@@ -101,7 +100,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
|||||||
|
|
||||||
// CALCULATE DUALITY GAP
|
// CALCULATE DUALITY GAP
|
||||||
let xnu = nu.xa(false, x);
|
let xnu = nu.xa(false, x);
|
||||||
let max_xnu = xnu.norm(std::f64::INFINITY);
|
let max_xnu = xnu.norm(f64::INFINITY);
|
||||||
if max_xnu > lambda_f64 {
|
if max_xnu > lambda_f64 {
|
||||||
let lnu = T::from_f64(lambda_f64 / max_xnu).unwrap();
|
let lnu = T::from_f64(lambda_f64 / max_xnu).unwrap();
|
||||||
nu.mul_scalar_mut(lnu);
|
nu.mul_scalar_mut(lnu);
|
||||||
@@ -208,7 +207,6 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
|||||||
Ok(w)
|
Ok(w)
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
fn sumlogneg(f: &X) -> T {
|
fn sumlogneg(f: &X) -> T {
|
||||||
let (n, _) = f.shape();
|
let (n, _) = f.shape();
|
||||||
let mut sum = T::zero();
|
let mut sum = T::zero();
|
||||||
@@ -220,11 +218,9 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X>
|
impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X>
|
||||||
for InteriorPointOptimizer<T, X>
|
for InteriorPointOptimizer<T, X>
|
||||||
{
|
{
|
||||||
///
|
|
||||||
fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) {
|
fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) {
|
||||||
let (_, p) = a.shape();
|
let (_, p) = a.shape();
|
||||||
|
|
||||||
@@ -234,7 +230,6 @@ impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
fn mat_vec_mul(&self, _: &X, x: &Vec<T>, y: &mut Vec<T>) {
|
fn mat_vec_mul(&self, _: &X, x: &Vec<T>, y: &mut Vec<T>) {
|
||||||
let (_, p) = self.ata.shape();
|
let (_, p) = self.ata.shape();
|
||||||
let x_slice = Vec::from_slice(x.slice(0..p).as_ref());
|
let x_slice = Vec::from_slice(x.slice(0..p).as_ref());
|
||||||
@@ -246,7 +241,6 @@ impl<'a, T: FloatNumber, X: Array2<T>> BiconjugateGradientSolver<'a, T, X>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
fn mat_t_vec_mul(&self, a: &X, x: &Vec<T>, y: &mut Vec<T>) {
|
fn mat_t_vec_mul(&self, a: &X, x: &Vec<T>, y: &mut Vec<T>) {
|
||||||
self.mat_vec_mul(a, x, y);
|
self.mat_vec_mul(a, x, y);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,7 +40,7 @@
|
|||||||
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
|
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
|
||||||
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
|
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
|
||||||
@@ -341,7 +341,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8,
|
||||||
@@ -393,7 +394,7 @@ mod tests {
|
|||||||
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
// ]);
|
// ]).unwrap();
|
||||||
|
|
||||||
// let y = vec![
|
// let y = vec![
|
||||||
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
|
|||||||
@@ -35,7 +35,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y: Vec<i32> = vec![
|
//! let y: Vec<i32> = vec![
|
||||||
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
//! ];
|
//! ];
|
||||||
@@ -71,19 +71,14 @@ use crate::optimization::line_search::Backtracking;
|
|||||||
use crate::optimization::FunctionOrder;
|
use crate::optimization::FunctionOrder;
|
||||||
|
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
#[derive(Debug, Clone, Eq, PartialEq, Default)]
|
||||||
/// Solver options for Logistic regression. Right now only LBFGS solver is supported.
|
/// Solver options for Logistic regression. Right now only LBFGS solver is supported.
|
||||||
pub enum LogisticRegressionSolverName {
|
pub enum LogisticRegressionSolverName {
|
||||||
/// Limited-memory Broyden–Fletcher–Goldfarb–Shanno method, see [LBFGS paper](http://users.iems.northwestern.edu/~nocedal/lbfgsb.html)
|
/// Limited-memory Broyden–Fletcher–Goldfarb–Shanno method, see [LBFGS paper](http://users.iems.northwestern.edu/~nocedal/lbfgsb.html)
|
||||||
|
#[default]
|
||||||
LBFGS,
|
LBFGS,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for LogisticRegressionSolverName {
|
|
||||||
fn default() -> Self {
|
|
||||||
LogisticRegressionSolverName::LBFGS
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Logistic Regression parameters
|
/// Logistic Regression parameters
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -188,14 +183,11 @@ pub struct LogisticRegression<
|
|||||||
}
|
}
|
||||||
|
|
||||||
trait ObjectiveFunction<T: Number + FloatNumber, X: Array2<T>> {
|
trait ObjectiveFunction<T: Number + FloatNumber, X: Array2<T>> {
|
||||||
///
|
|
||||||
fn f(&self, w_bias: &[T]) -> T;
|
fn f(&self, w_bias: &[T]) -> T;
|
||||||
|
|
||||||
///
|
|
||||||
#[allow(clippy::ptr_arg)]
|
#[allow(clippy::ptr_arg)]
|
||||||
fn df(&self, g: &mut Vec<T>, w_bias: &Vec<T>);
|
fn df(&self, g: &mut Vec<T>, w_bias: &Vec<T>);
|
||||||
|
|
||||||
///
|
|
||||||
#[allow(clippy::ptr_arg)]
|
#[allow(clippy::ptr_arg)]
|
||||||
fn partial_dot(w: &[T], x: &X, v_col: usize, m_row: usize) -> T {
|
fn partial_dot(w: &[T], x: &X, v_col: usize, m_row: usize) -> T {
|
||||||
let mut sum = T::zero();
|
let mut sum = T::zero();
|
||||||
@@ -266,8 +258,8 @@ impl<TX: Number + FloatNumber + RealNumber, TY: Number + Ord, X: Array2<TX>, Y:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Number + FloatNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
impl<T: Number + FloatNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
||||||
for BinaryObjectiveFunction<'a, T, X>
|
for BinaryObjectiveFunction<'_, T, X>
|
||||||
{
|
{
|
||||||
fn f(&self, w_bias: &[T]) -> T {
|
fn f(&self, w_bias: &[T]) -> T {
|
||||||
let mut f = T::zero();
|
let mut f = T::zero();
|
||||||
@@ -321,8 +313,8 @@ struct MultiClassObjectiveFunction<'a, T: Number + FloatNumber, X: Array2<T>> {
|
|||||||
_phantom_t: PhantomData<T>,
|
_phantom_t: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Number + FloatNumber + RealNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
impl<T: Number + FloatNumber + RealNumber, X: Array2<T>> ObjectiveFunction<T, X>
|
||||||
for MultiClassObjectiveFunction<'a, T, X>
|
for MultiClassObjectiveFunction<'_, T, X>
|
||||||
{
|
{
|
||||||
fn f(&self, w_bias: &[T]) -> T {
|
fn f(&self, w_bias: &[T]) -> T {
|
||||||
let mut f = T::zero();
|
let mut f = T::zero();
|
||||||
@@ -449,8 +441,7 @@ impl<TX: Number + FloatNumber + RealNumber, TY: Number + Ord, X: Array2<TX>, Y:
|
|||||||
|
|
||||||
match k.cmp(&2) {
|
match k.cmp(&2) {
|
||||||
Ordering::Less => Err(Failed::fit(&format!(
|
Ordering::Less => Err(Failed::fit(&format!(
|
||||||
"incorrect number of classes: {}. Should be >= 2.",
|
"incorrect number of classes: {k}. Should be >= 2."
|
||||||
k
|
|
||||||
))),
|
))),
|
||||||
Ordering::Equal => {
|
Ordering::Equal => {
|
||||||
let x0 = Vec::zeros(num_attributes + 1);
|
let x0 = Vec::zeros(num_attributes + 1);
|
||||||
@@ -617,7 +608,8 @@ mod tests {
|
|||||||
&[10., -2.],
|
&[10., -2.],
|
||||||
&[8., 2.],
|
&[8., 2.],
|
||||||
&[9., 0.],
|
&[9., 0.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
|
let y = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
|
||||||
|
|
||||||
@@ -634,21 +626,21 @@ mod tests {
|
|||||||
objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||||
objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||||
|
|
||||||
assert!((g[0] + 33.000068218163484).abs() < std::f64::EPSILON);
|
assert!((g[0] + 33.000068218163484).abs() < f64::EPSILON);
|
||||||
|
|
||||||
let f = objective.f(&vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
let f = objective.f(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||||
|
|
||||||
assert!((f - 408.0052230582765).abs() < std::f64::EPSILON);
|
assert!((f - 408.0052230582765).abs() < f64::EPSILON);
|
||||||
|
|
||||||
let objective_reg = MultiClassObjectiveFunction {
|
let objective_reg = MultiClassObjectiveFunction {
|
||||||
x: &x,
|
x: &x,
|
||||||
y: y.clone(),
|
y,
|
||||||
k: 3,
|
k: 3,
|
||||||
alpha: 1.0,
|
alpha: 1.0,
|
||||||
_phantom_t: PhantomData,
|
_phantom_t: PhantomData,
|
||||||
};
|
};
|
||||||
|
|
||||||
let f = objective_reg.f(&vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
let f = objective_reg.f(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||||
assert!((f - 487.5052).abs() < 1e-4);
|
assert!((f - 487.5052).abs() < 1e-4);
|
||||||
|
|
||||||
objective_reg.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
objective_reg.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]);
|
||||||
@@ -677,7 +669,8 @@ mod tests {
|
|||||||
&[10., -2.],
|
&[10., -2.],
|
||||||
&[8., 2.],
|
&[8., 2.],
|
||||||
&[9., 0.],
|
&[9., 0.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y = vec![0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1];
|
let y = vec![0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1];
|
||||||
|
|
||||||
@@ -693,22 +686,22 @@ mod tests {
|
|||||||
objective.df(&mut g, &vec![1., 2., 3.]);
|
objective.df(&mut g, &vec![1., 2., 3.]);
|
||||||
objective.df(&mut g, &vec![1., 2., 3.]);
|
objective.df(&mut g, &vec![1., 2., 3.]);
|
||||||
|
|
||||||
assert!((g[0] - 26.051064349381285).abs() < std::f64::EPSILON);
|
assert!((g[0] - 26.051064349381285).abs() < f64::EPSILON);
|
||||||
assert!((g[1] - 10.239000702928523).abs() < std::f64::EPSILON);
|
assert!((g[1] - 10.239000702928523).abs() < f64::EPSILON);
|
||||||
assert!((g[2] - 3.869294270156324).abs() < std::f64::EPSILON);
|
assert!((g[2] - 3.869294270156324).abs() < f64::EPSILON);
|
||||||
|
|
||||||
let f = objective.f(&vec![1., 2., 3.]);
|
let f = objective.f(&[1., 2., 3.]);
|
||||||
|
|
||||||
assert!((f - 59.76994756647412).abs() < std::f64::EPSILON);
|
assert!((f - 59.76994756647412).abs() < f64::EPSILON);
|
||||||
|
|
||||||
let objective_reg = BinaryObjectiveFunction {
|
let objective_reg = BinaryObjectiveFunction {
|
||||||
x: &x,
|
x: &x,
|
||||||
y: y.clone(),
|
y,
|
||||||
alpha: 1.0,
|
alpha: 1.0,
|
||||||
_phantom_t: PhantomData,
|
_phantom_t: PhantomData,
|
||||||
};
|
};
|
||||||
|
|
||||||
let f = objective_reg.f(&vec![1., 2., 3.]);
|
let f = objective_reg.f(&[1., 2., 3.]);
|
||||||
assert!((f - 62.2699).abs() < 1e-4);
|
assert!((f - 62.2699).abs() < 1e-4);
|
||||||
|
|
||||||
objective_reg.df(&mut g, &vec![1., 2., 3.]);
|
objective_reg.df(&mut g, &vec![1., 2., 3.]);
|
||||||
@@ -739,7 +732,8 @@ mod tests {
|
|||||||
&[10., -2.],
|
&[10., -2.],
|
||||||
&[8., 2.],
|
&[8., 2.],
|
||||||
&[9., 0.],
|
&[9., 0.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
|
let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
|
||||||
|
|
||||||
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||||
@@ -824,37 +818,41 @@ mod tests {
|
|||||||
assert!(reg_coeff_sum < coeff);
|
assert!(reg_coeff_sum < coeff);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: serialization for the new DenseMatrix needs to be implemented
|
//TODO: serialization for the new DenseMatrix needs to be implemented
|
||||||
// #[cfg_attr(all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test)]
|
#[cfg_attr(
|
||||||
// #[test]
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
// #[cfg(feature = "serde")]
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
// fn serde() {
|
)]
|
||||||
// let x = DenseMatrix::from_2d_array(&[
|
#[test]
|
||||||
// &[1., -5.],
|
#[cfg(feature = "serde")]
|
||||||
// &[2., 5.],
|
fn serde() {
|
||||||
// &[3., -2.],
|
let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
|
||||||
// &[1., 2.],
|
&[1., -5.],
|
||||||
// &[2., 0.],
|
&[2., 5.],
|
||||||
// &[6., -5.],
|
&[3., -2.],
|
||||||
// &[7., 5.],
|
&[1., 2.],
|
||||||
// &[6., -2.],
|
&[2., 0.],
|
||||||
// &[7., 2.],
|
&[6., -5.],
|
||||||
// &[6., 0.],
|
&[7., 5.],
|
||||||
// &[8., -5.],
|
&[6., -2.],
|
||||||
// &[9., 5.],
|
&[7., 2.],
|
||||||
// &[10., -2.],
|
&[6., 0.],
|
||||||
// &[8., 2.],
|
&[8., -5.],
|
||||||
// &[9., 0.],
|
&[9., 5.],
|
||||||
// ]);
|
&[10., -2.],
|
||||||
// let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
|
&[8., 2.],
|
||||||
|
&[9., 0.],
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
let y: Vec<i32> = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1];
|
||||||
|
|
||||||
// let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|
||||||
// let deserialized_lr: LogisticRegression<f64, i32, DenseMatrix<f64>, Vec<i32>> =
|
let deserialized_lr: LogisticRegression<f64, i32, DenseMatrix<f64>, Vec<i32>> =
|
||||||
// serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||||
|
|
||||||
// assert_eq!(lr, deserialized_lr);
|
assert_eq!(lr, deserialized_lr);
|
||||||
// }
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
@@ -883,7 +881,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||||
@@ -896,11 +895,7 @@ mod tests {
|
|||||||
|
|
||||||
let y_hat = lr.predict(&x).unwrap();
|
let y_hat = lr.predict(&x).unwrap();
|
||||||
|
|
||||||
let error: i32 = y
|
let error: i32 = y.into_iter().zip(y_hat).map(|(a, b)| (a - b).abs()).sum();
|
||||||
.into_iter()
|
|
||||||
.zip(y_hat.into_iter())
|
|
||||||
.map(|(a, b)| (a - b).abs())
|
|
||||||
.sum();
|
|
||||||
|
|
||||||
assert!(error <= 1);
|
assert!(error <= 1);
|
||||||
|
|
||||||
@@ -909,4 +904,46 @@ mod tests {
|
|||||||
|
|
||||||
assert!(reg_coeff_sum < coeff);
|
assert!(reg_coeff_sum < coeff);
|
||||||
}
|
}
|
||||||
|
#[cfg_attr(
|
||||||
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
|
)]
|
||||||
|
#[test]
|
||||||
|
fn lr_fit_predict_random() {
|
||||||
|
let x: DenseMatrix<f32> = DenseMatrix::rand(52181, 94);
|
||||||
|
let y1: Vec<i32> = vec![1; 2181];
|
||||||
|
let y2: Vec<i32> = vec![0; 50000];
|
||||||
|
let y: Vec<i32> = y1.into_iter().chain(y2).collect();
|
||||||
|
|
||||||
|
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
let lr_reg = LogisticRegression::fit(
|
||||||
|
&x,
|
||||||
|
&y,
|
||||||
|
LogisticRegressionParameters::default().with_alpha(1.0),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let y_hat = lr.predict(&x).unwrap();
|
||||||
|
let y_hat_reg = lr_reg.predict(&x).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(y.len(), y_hat.len());
|
||||||
|
assert_eq!(y.len(), y_hat_reg.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_logit() {
|
||||||
|
let x: &DenseMatrix<f64> = &DenseMatrix::rand(52181, 94);
|
||||||
|
let y1: Vec<u32> = vec![1; 2181];
|
||||||
|
let y2: Vec<u32> = vec![0; 50000];
|
||||||
|
let y: &Vec<u32> = &(y1.into_iter().chain(y2).collect());
|
||||||
|
println!("y vec height: {:?}", y.len());
|
||||||
|
println!("x matrix shape: {:?}", x.shape());
|
||||||
|
|
||||||
|
let lr = LogisticRegression::fit(x, y, Default::default()).unwrap();
|
||||||
|
let y_hat = lr.predict(x).unwrap();
|
||||||
|
|
||||||
|
println!("y_hat shape: {:?}", y_hat.shape());
|
||||||
|
|
||||||
|
assert_eq!(y_hat.shape(), 52181);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,7 +40,7 @@
|
|||||||
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
|
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
|
||||||
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
|
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
|
||||||
@@ -71,21 +71,16 @@ use crate::numbers::basenum::Number;
|
|||||||
use crate::numbers::realnum::RealNumber;
|
use crate::numbers::realnum::RealNumber;
|
||||||
|
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
#[derive(Debug, Clone, Eq, PartialEq, Default)]
|
||||||
/// Approach to use for estimation of regression coefficients. Cholesky is more efficient but SVD is more stable.
|
/// Approach to use for estimation of regression coefficients. Cholesky is more efficient but SVD is more stable.
|
||||||
pub enum RidgeRegressionSolverName {
|
pub enum RidgeRegressionSolverName {
|
||||||
/// Cholesky decomposition, see [Cholesky](../../linalg/cholesky/index.html)
|
/// Cholesky decomposition, see [Cholesky](../../linalg/cholesky/index.html)
|
||||||
|
#[default]
|
||||||
Cholesky,
|
Cholesky,
|
||||||
/// SVD decomposition, see [SVD](../../linalg/svd/index.html)
|
/// SVD decomposition, see [SVD](../../linalg/svd/index.html)
|
||||||
SVD,
|
SVD,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for RidgeRegressionSolverName {
|
|
||||||
fn default() -> Self {
|
|
||||||
RidgeRegressionSolverName::Cholesky
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Ridge Regression parameters
|
/// Ridge Regression parameters
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -384,10 +379,7 @@ impl<
|
|||||||
|
|
||||||
for (i, col_std_i) in col_std.iter().enumerate() {
|
for (i, col_std_i) in col_std.iter().enumerate() {
|
||||||
if (*col_std_i - TX::zero()).abs() < TX::epsilon() {
|
if (*col_std_i - TX::zero()).abs() < TX::epsilon() {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!("Cannot rescale constant column {i}")));
|
||||||
"Cannot rescale constant column {}",
|
|
||||||
i
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -463,7 +455,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
@@ -521,7 +514,7 @@ mod tests {
|
|||||||
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
// &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
// &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
// &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
// ]);
|
// ]).unwrap();
|
||||||
|
|
||||||
// let y = vec![
|
// let y = vec![
|
||||||
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
// 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
|
|||||||
@@ -98,8 +98,8 @@ mod tests {
|
|||||||
let mut scores = HCVScore::new();
|
let mut scores = HCVScore::new();
|
||||||
scores.compute(&v1, &v2);
|
scores.compute(&v1, &v2);
|
||||||
|
|
||||||
assert!((0.2548 - scores.homogeneity.unwrap() as f64).abs() < 1e-4);
|
assert!((0.2548 - scores.homogeneity.unwrap()).abs() < 1e-4);
|
||||||
assert!((0.5440 - scores.completeness.unwrap() as f64).abs() < 1e-4);
|
assert!((0.5440 - scores.completeness.unwrap()).abs() < 1e-4);
|
||||||
assert!((0.3471 - scores.v_measure.unwrap() as f64).abs() < 1e-4);
|
assert!((0.3471 - scores.v_measure.unwrap()).abs() < 1e-4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ mod tests {
|
|||||||
fn entropy_test() {
|
fn entropy_test() {
|
||||||
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
|
let v1 = vec![0, 0, 1, 1, 2, 0, 4];
|
||||||
|
|
||||||
assert!((1.2770 - entropy(&v1).unwrap() as f64).abs() < 1e-4);
|
assert!((1.2770 - entropy(&v1).unwrap()).abs() < 1e-4);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
|
|||||||
@@ -25,7 +25,7 @@
|
|||||||
//! &[68., 590., 37.],
|
//! &[68., 590., 37.],
|
||||||
//! &[69., 660., 46.],
|
//! &[69., 660., 46.],
|
||||||
//! &[73., 600., 55.],
|
//! &[73., 600., 55.],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let a = data.mean_by(0);
|
//! let a = data.mean_by(0);
|
||||||
//! let b = vec![66., 640., 44.];
|
//! let b = vec![66., 640., 44.];
|
||||||
@@ -151,7 +151,8 @@ mod tests {
|
|||||||
&[68., 590., 37.],
|
&[68., 590., 37.],
|
||||||
&[69., 660., 46.],
|
&[69., 660., 46.],
|
||||||
&[73., 600., 55.],
|
&[73., 600., 55.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let a = data.mean_by(0);
|
let a = data.mean_by(0);
|
||||||
let b = vec![66., 640., 44.];
|
let b = vec![66., 640., 44.];
|
||||||
|
|||||||
+2
-2
@@ -95,8 +95,8 @@ mod tests {
|
|||||||
let score1: f64 = F1::new_with(beta).get_score(&y_true, &y_pred);
|
let score1: f64 = F1::new_with(beta).get_score(&y_true, &y_pred);
|
||||||
let score2: f64 = F1::new_with(beta).get_score(&y_true, &y_true);
|
let score2: f64 = F1::new_with(beta).get_score(&y_true, &y_true);
|
||||||
|
|
||||||
println!("{:?}", score1);
|
println!("{score1:?}");
|
||||||
println!("{:?}", score2);
|
println!("{score2:?}");
|
||||||
|
|
||||||
assert!((score1 - 0.57142857).abs() < 1e-8);
|
assert!((score1 - 0.57142857).abs() < 1e-8);
|
||||||
assert!((score2 - 1.0).abs() < 1e-8);
|
assert!((score2 - 1.0).abs() < 1e-8);
|
||||||
|
|||||||
+1
-1
@@ -37,7 +37,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y: Vec<i8> = vec![
|
//! let y: Vec<i8> = vec![
|
||||||
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
//! ];
|
//! ];
|
||||||
|
|||||||
@@ -3,9 +3,9 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
api::{Predictor, SupervisedEstimator},
|
api::{Predictor, SupervisedEstimator},
|
||||||
error::{Failed, FailedError},
|
error::{Failed, FailedError},
|
||||||
linalg::basic::arrays::{Array2, Array1},
|
linalg::basic::arrays::{Array1, Array2},
|
||||||
numbers::realnum::RealNumber,
|
|
||||||
numbers::basenum::Number,
|
numbers::basenum::Number,
|
||||||
|
numbers::realnum::RealNumber,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult};
|
use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult};
|
||||||
|
|||||||
@@ -213,17 +213,17 @@ mod tests {
|
|||||||
|
|
||||||
for t in &test_masks[0][0..11] {
|
for t in &test_masks[0][0..11] {
|
||||||
// TODO: this can be prob done better
|
// TODO: this can be prob done better
|
||||||
assert_eq!(*t, true)
|
assert!(*t)
|
||||||
}
|
}
|
||||||
for t in &test_masks[0][11..22] {
|
for t in &test_masks[0][11..22] {
|
||||||
assert_eq!(*t, false)
|
assert!(!*t)
|
||||||
}
|
}
|
||||||
|
|
||||||
for t in &test_masks[1][0..11] {
|
for t in &test_masks[1][0..11] {
|
||||||
assert_eq!(*t, false)
|
assert!(!*t)
|
||||||
}
|
}
|
||||||
for t in &test_masks[1][11..22] {
|
for t in &test_masks[1][11..22] {
|
||||||
assert_eq!(*t, true)
|
assert!(*t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -283,9 +283,7 @@ mod tests {
|
|||||||
(vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
|
(vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
|
||||||
(vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
|
(vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
|
||||||
];
|
];
|
||||||
for ((train, test), (expected_train, expected_test)) in
|
for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) {
|
||||||
k.split(&x).into_iter().zip(expected)
|
|
||||||
{
|
|
||||||
assert_eq!(test, expected_test);
|
assert_eq!(test, expected_test);
|
||||||
assert_eq!(train, expected_train);
|
assert_eq!(train, expected_train);
|
||||||
}
|
}
|
||||||
@@ -307,9 +305,7 @@ mod tests {
|
|||||||
(vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
|
(vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
|
||||||
(vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
|
(vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
|
||||||
];
|
];
|
||||||
for ((train, test), (expected_train, expected_test)) in
|
for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) {
|
||||||
k.split(&x).into_iter().zip(expected)
|
|
||||||
{
|
|
||||||
assert_eq!(test.len(), expected_test.len());
|
assert_eq!(test.len(), expected_test.len());
|
||||||
assert_eq!(train.len(), expected_train.len());
|
assert_eq!(train.len(), expected_train.len());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,7 +36,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y: Vec<f64> = vec![
|
//! let y: Vec<f64> = vec![
|
||||||
//! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
|
//! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
|
||||||
//! ];
|
//! ];
|
||||||
@@ -84,7 +84,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y: Vec<i32> = vec![
|
//! let y: Vec<i32> = vec![
|
||||||
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
//! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
//! ];
|
//! ];
|
||||||
@@ -169,7 +169,7 @@ pub fn train_test_split<
|
|||||||
let n_test = ((n as f32) * test_size) as usize;
|
let n_test = ((n as f32) * test_size) as usize;
|
||||||
|
|
||||||
if n_test < 1 {
|
if n_test < 1 {
|
||||||
panic!("number of sample is too small {}", n);
|
panic!("number of sample is too small {n}");
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut indices: Vec<usize> = (0..n).collect();
|
let mut indices: Vec<usize> = (0..n).collect();
|
||||||
@@ -396,7 +396,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
let cv = KFold {
|
let cv = KFold {
|
||||||
@@ -441,7 +442,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y = vec![
|
let y = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
114.2, 115.7, 116.9,
|
114.2, 115.7, 116.9,
|
||||||
@@ -489,7 +491,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
114.2, 115.7, 116.9,
|
114.2, 115.7, 116.9,
|
||||||
@@ -539,7 +542,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
let y: Vec<i32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
let cv = KFold::default().with_n_splits(3);
|
let cv = KFold::default().with_n_splits(3);
|
||||||
@@ -553,6 +557,6 @@ mod tests {
|
|||||||
&accuracy,
|
&accuracy,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
println!("{:?}", results);
|
println!("{results:?}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,14 +19,14 @@
|
|||||||
//! &[0, 1, 0, 0, 1, 0],
|
//! &[0, 1, 0, 0, 1, 0],
|
||||||
//! &[0, 1, 0, 1, 0, 0],
|
//! &[0, 1, 0, 1, 0, 0],
|
||||||
//! &[0, 1, 1, 0, 0, 1],
|
//! &[0, 1, 1, 0, 0, 1],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y: Vec<u32> = vec![0, 0, 0, 1];
|
//! let y: Vec<u32> = vec![0, 0, 0, 1];
|
||||||
//!
|
//!
|
||||||
//! let nb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
|
//! let nb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
//!
|
//!
|
||||||
//! // Testing data point is:
|
//! // Testing data point is:
|
||||||
//! // Chinese Chinese Chinese Tokyo Japan
|
//! // Chinese Chinese Chinese Tokyo Japan
|
||||||
//! let x_test = DenseMatrix::from_2d_array(&[&[0, 1, 1, 0, 0, 1]]);
|
//! let x_test = DenseMatrix::from_2d_array(&[&[0, 1, 1, 0, 0, 1]]).unwrap();
|
||||||
//! let y_hat = nb.predict(&x_test).unwrap();
|
//! let y_hat = nb.predict(&x_test).unwrap();
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
@@ -257,8 +257,7 @@ impl<TY: Number + Ord + Unsigned> BernoulliNBDistribution<TY> {
|
|||||||
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
|
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
|
||||||
/// * `x` - training data.
|
/// * `x` - training data.
|
||||||
/// * `y` - vector with target values (classes) of length N.
|
/// * `y` - vector with target values (classes) of length N.
|
||||||
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
|
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
|
||||||
/// priors are adjusted according to the data.
|
|
||||||
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
|
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
|
||||||
/// * `binarize` - Threshold for binarizing.
|
/// * `binarize` - Threshold for binarizing.
|
||||||
fn fit<TX: Number + PartialOrd, X: Array2<TX>, Y: Array1<TY>>(
|
fn fit<TX: Number + PartialOrd, X: Array2<TX>, Y: Array1<TY>>(
|
||||||
@@ -271,21 +270,18 @@ impl<TY: Number + Ord + Unsigned> BernoulliNBDistribution<TY> {
|
|||||||
let y_samples = y.shape();
|
let y_samples = y.shape();
|
||||||
if y_samples != n_samples {
|
if y_samples != n_samples {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x should equal size of y; |x|=[{}], |y|=[{}]",
|
"Size of x should equal size of y; |x|=[{n_samples}], |y|=[{y_samples}]"
|
||||||
n_samples, y_samples
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if n_samples == 0 {
|
if n_samples == 0 {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x and y should greater than 0; |x|=[{}]",
|
"Size of x and y should greater than 0; |x|=[{n_samples}]"
|
||||||
n_samples
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
if alpha < 0f64 {
|
if alpha < 0f64 {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Alpha should be greater than 0; |alpha|=[{}]",
|
"Alpha should be greater than 0; |alpha|=[{alpha}]"
|
||||||
alpha
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -318,8 +314,7 @@ impl<TY: Number + Ord + Unsigned> BernoulliNBDistribution<TY> {
|
|||||||
feature_in_class_counter[class_index][idx] +=
|
feature_in_class_counter[class_index][idx] +=
|
||||||
row_i.to_usize().ok_or_else(|| {
|
row_i.to_usize().ok_or_else(|| {
|
||||||
Failed::fit(&format!(
|
Failed::fit(&format!(
|
||||||
"Elements of the matrix should be 1.0 or 0.0 |found|=[{}]",
|
"Elements of the matrix should be 1.0 or 0.0 |found|=[{row_i}]"
|
||||||
row_i
|
|
||||||
))
|
))
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
@@ -406,10 +401,10 @@ impl<TX: Number + PartialOrd, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Arr
|
|||||||
{
|
{
|
||||||
/// Fits BernoulliNB with given data
|
/// Fits BernoulliNB with given data
|
||||||
/// * `x` - training data of size NxM where N is the number of samples and M is the number of
|
/// * `x` - training data of size NxM where N is the number of samples and M is the number of
|
||||||
/// features.
|
/// features.
|
||||||
/// * `y` - vector with target values (classes) of length N.
|
/// * `y` - vector with target values (classes) of length N.
|
||||||
/// * `parameters` - additional parameters like class priors, alpha for smoothing and
|
/// * `parameters` - additional parameters like class priors, alpha for smoothing and
|
||||||
/// binarizing threshold.
|
/// binarizing threshold.
|
||||||
pub fn fit(x: &X, y: &Y, parameters: BernoulliNBParameters<TX>) -> Result<Self, Failed> {
|
pub fn fit(x: &X, y: &Y, parameters: BernoulliNBParameters<TX>) -> Result<Self, Failed> {
|
||||||
let distribution = if let Some(threshold) = parameters.binarize {
|
let distribution = if let Some(threshold) = parameters.binarize {
|
||||||
BernoulliNBDistribution::fit(
|
BernoulliNBDistribution::fit(
|
||||||
@@ -431,6 +426,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Arr
|
|||||||
|
|
||||||
/// Estimates the class labels for the provided data.
|
/// Estimates the class labels for the provided data.
|
||||||
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
||||||
|
///
|
||||||
/// Returns a vector of size N with class estimates.
|
/// Returns a vector of size N with class estimates.
|
||||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||||
if let Some(threshold) = self.binarize {
|
if let Some(threshold) = self.binarize {
|
||||||
@@ -531,7 +527,8 @@ mod tests {
|
|||||||
&[0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
&[0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||||
&[0.0, 1.0, 0.0, 1.0, 0.0, 0.0],
|
&[0.0, 1.0, 0.0, 1.0, 0.0, 0.0],
|
||||||
&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0],
|
&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![0, 0, 0, 1];
|
let y: Vec<u32> = vec![0, 0, 0, 1];
|
||||||
let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
|
let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|
||||||
@@ -562,7 +559,7 @@ mod tests {
|
|||||||
|
|
||||||
// Testing data point is:
|
// Testing data point is:
|
||||||
// Chinese Chinese Chinese Tokyo Japan
|
// Chinese Chinese Chinese Tokyo Japan
|
||||||
let x_test = DenseMatrix::from_2d_array(&[&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0]]);
|
let x_test = DenseMatrix::from_2d_array(&[&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0]]).unwrap();
|
||||||
let y_hat = bnb.predict(&x_test).unwrap();
|
let y_hat = bnb.predict(&x_test).unwrap();
|
||||||
|
|
||||||
assert_eq!(y_hat, &[1]);
|
assert_eq!(y_hat, &[1]);
|
||||||
@@ -590,7 +587,8 @@ mod tests {
|
|||||||
&[2, 0, 3, 3, 1, 2, 0, 2, 4, 1],
|
&[2, 0, 3, 3, 1, 2, 0, 2, 4, 1],
|
||||||
&[2, 4, 0, 4, 2, 4, 1, 3, 1, 4],
|
&[2, 4, 0, 4, 2, 4, 1, 3, 1, 4],
|
||||||
&[0, 2, 2, 3, 4, 0, 4, 4, 4, 4],
|
&[0, 2, 2, 3, 4, 0, 4, 4, 4, 4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2];
|
let y: Vec<u32> = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2];
|
||||||
let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
|
let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|
||||||
@@ -647,7 +645,8 @@ mod tests {
|
|||||||
&[0, 1, 0, 0, 1, 0],
|
&[0, 1, 0, 0, 1, 0],
|
||||||
&[0, 1, 0, 1, 0, 0],
|
&[0, 1, 0, 1, 0, 0],
|
||||||
&[0, 1, 1, 0, 0, 1],
|
&[0, 1, 1, 0, 0, 1],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![0, 0, 0, 1];
|
let y: Vec<u32> = vec![0, 0, 0, 1];
|
||||||
|
|
||||||
let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
|
let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|||||||
@@ -24,7 +24,7 @@
|
|||||||
//! &[3, 4, 2, 4],
|
//! &[3, 4, 2, 4],
|
||||||
//! &[0, 3, 1, 2],
|
//! &[0, 3, 1, 2],
|
||||||
//! &[0, 4, 1, 2],
|
//! &[0, 4, 1, 2],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
|
//! let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
|
||||||
//!
|
//!
|
||||||
//! let nb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
|
//! let nb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
@@ -95,7 +95,7 @@ impl<T: Number + Unsigned> PartialEq for CategoricalNBDistribution<T> {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (a_i_j, b_i_j) in a_i.iter().zip(b_i.iter()) {
|
for (a_i_j, b_i_j) in a_i.iter().zip(b_i.iter()) {
|
||||||
if (*a_i_j - *b_i_j).abs() > std::f64::EPSILON {
|
if (*a_i_j - *b_i_j).abs() > f64::EPSILON {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -158,8 +158,7 @@ impl<T: Number + Unsigned> CategoricalNBDistribution<T> {
|
|||||||
pub fn fit<X: Array2<T>, Y: Array1<T>>(x: &X, y: &Y, alpha: f64) -> Result<Self, Failed> {
|
pub fn fit<X: Array2<T>, Y: Array1<T>>(x: &X, y: &Y, alpha: f64) -> Result<Self, Failed> {
|
||||||
if alpha < 0f64 {
|
if alpha < 0f64 {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"alpha should be >= 0, alpha=[{}]",
|
"alpha should be >= 0, alpha=[{alpha}]"
|
||||||
alpha
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -167,15 +166,13 @@ impl<T: Number + Unsigned> CategoricalNBDistribution<T> {
|
|||||||
let y_samples = y.shape();
|
let y_samples = y.shape();
|
||||||
if y_samples != n_samples {
|
if y_samples != n_samples {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x should equal size of y; |x|=[{}], |y|=[{}]",
|
"Size of x should equal size of y; |x|=[{n_samples}], |y|=[{y_samples}]"
|
||||||
n_samples, y_samples
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if n_samples == 0 {
|
if n_samples == 0 {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x and y should greater than 0; |x|=[{}]",
|
"Size of x and y should greater than 0; |x|=[{n_samples}]"
|
||||||
n_samples
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
let y: Vec<usize> = y.iterator(0).map(|y_i| y_i.to_usize().unwrap()).collect();
|
let y: Vec<usize> = y.iterator(0).map(|y_i| y_i.to_usize().unwrap()).collect();
|
||||||
@@ -202,8 +199,7 @@ impl<T: Number + Unsigned> CategoricalNBDistribution<T> {
|
|||||||
.max()
|
.max()
|
||||||
.ok_or_else(|| {
|
.ok_or_else(|| {
|
||||||
Failed::fit(&format!(
|
Failed::fit(&format!(
|
||||||
"Failed to get the categories for feature = {}",
|
"Failed to get the categories for feature = {feature}"
|
||||||
feature
|
|
||||||
))
|
))
|
||||||
})?;
|
})?;
|
||||||
n_categories.push(feature_max + 1);
|
n_categories.push(feature_max + 1);
|
||||||
@@ -367,7 +363,7 @@ impl<T: Number + Unsigned, X: Array2<T>, Y: Array1<T>> Predictor<X, Y> for Categ
|
|||||||
impl<T: Number + Unsigned, X: Array2<T>, Y: Array1<T>> CategoricalNB<T, X, Y> {
|
impl<T: Number + Unsigned, X: Array2<T>, Y: Array1<T>> CategoricalNB<T, X, Y> {
|
||||||
/// Fits CategoricalNB with given data
|
/// Fits CategoricalNB with given data
|
||||||
/// * `x` - training data of size NxM where N is the number of samples and M is the number of
|
/// * `x` - training data of size NxM where N is the number of samples and M is the number of
|
||||||
/// features.
|
/// features.
|
||||||
/// * `y` - vector with target values (classes) of length N.
|
/// * `y` - vector with target values (classes) of length N.
|
||||||
/// * `parameters` - additional parameters like alpha for smoothing
|
/// * `parameters` - additional parameters like alpha for smoothing
|
||||||
pub fn fit(x: &X, y: &Y, parameters: CategoricalNBParameters) -> Result<Self, Failed> {
|
pub fn fit(x: &X, y: &Y, parameters: CategoricalNBParameters) -> Result<Self, Failed> {
|
||||||
@@ -379,6 +375,7 @@ impl<T: Number + Unsigned, X: Array2<T>, Y: Array1<T>> CategoricalNB<T, X, Y> {
|
|||||||
|
|
||||||
/// Estimates the class labels for the provided data.
|
/// Estimates the class labels for the provided data.
|
||||||
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
||||||
|
///
|
||||||
/// Returns a vector of size N with class estimates.
|
/// Returns a vector of size N with class estimates.
|
||||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||||
self.inner.as_ref().unwrap().predict(x)
|
self.inner.as_ref().unwrap().predict(x)
|
||||||
@@ -429,7 +426,6 @@ mod tests {
|
|||||||
fn search_parameters() {
|
fn search_parameters() {
|
||||||
let parameters = CategoricalNBSearchParameters {
|
let parameters = CategoricalNBSearchParameters {
|
||||||
alpha: vec![1., 2.],
|
alpha: vec![1., 2.],
|
||||||
..Default::default()
|
|
||||||
};
|
};
|
||||||
let mut iter = parameters.into_iter();
|
let mut iter = parameters.into_iter();
|
||||||
let next = iter.next().unwrap();
|
let next = iter.next().unwrap();
|
||||||
@@ -460,7 +456,8 @@ mod tests {
|
|||||||
&[1, 1, 1, 1],
|
&[1, 1, 1, 1],
|
||||||
&[1, 2, 0, 0],
|
&[1, 2, 0, 0],
|
||||||
&[2, 1, 1, 1],
|
&[2, 1, 1, 1],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
|
let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
|
||||||
|
|
||||||
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
|
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
@@ -518,7 +515,7 @@ mod tests {
|
|||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
let x_test = DenseMatrix::from_2d_array(&[&[0, 2, 1, 0], &[2, 2, 0, 0]]);
|
let x_test = DenseMatrix::from_2d_array(&[&[0, 2, 1, 0], &[2, 2, 0, 0]]).unwrap();
|
||||||
let y_hat = cnb.predict(&x_test).unwrap();
|
let y_hat = cnb.predict(&x_test).unwrap();
|
||||||
assert_eq!(y_hat, vec![0, 1]);
|
assert_eq!(y_hat, vec![0, 1]);
|
||||||
}
|
}
|
||||||
@@ -544,7 +541,8 @@ mod tests {
|
|||||||
&[3, 4, 2, 4],
|
&[3, 4, 2, 4],
|
||||||
&[0, 3, 1, 2],
|
&[0, 3, 1, 2],
|
||||||
&[0, 4, 1, 2],
|
&[0, 4, 1, 2],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
|
let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
|
||||||
|
|
||||||
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
|
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
@@ -576,7 +574,8 @@ mod tests {
|
|||||||
&[3, 4, 2, 4],
|
&[3, 4, 2, 4],
|
||||||
&[0, 3, 1, 2],
|
&[0, 3, 1, 2],
|
||||||
&[0, 4, 1, 2],
|
&[0, 4, 1, 2],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
|
let y: Vec<u32> = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0];
|
||||||
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
|
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|||||||
+12
-12
@@ -16,7 +16,7 @@
|
|||||||
//! &[ 1., 1.],
|
//! &[ 1., 1.],
|
||||||
//! &[ 2., 1.],
|
//! &[ 2., 1.],
|
||||||
//! &[ 3., 2.],
|
//! &[ 3., 2.],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
|
//! let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
|
||||||
//!
|
//!
|
||||||
//! let nb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
|
//! let nb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
@@ -174,8 +174,7 @@ impl<TY: Number + Ord + Unsigned> GaussianNBDistribution<TY> {
|
|||||||
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
|
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
|
||||||
/// * `x` - training data.
|
/// * `x` - training data.
|
||||||
/// * `y` - vector with target values (classes) of length N.
|
/// * `y` - vector with target values (classes) of length N.
|
||||||
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
|
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
|
||||||
/// priors are adjusted according to the data.
|
|
||||||
pub fn fit<TX: Number + RealNumber, X: Array2<TX>, Y: Array1<TY>>(
|
pub fn fit<TX: Number + RealNumber, X: Array2<TX>, Y: Array1<TY>>(
|
||||||
x: &X,
|
x: &X,
|
||||||
y: &Y,
|
y: &Y,
|
||||||
@@ -185,15 +184,13 @@ impl<TY: Number + Ord + Unsigned> GaussianNBDistribution<TY> {
|
|||||||
let y_samples = y.shape();
|
let y_samples = y.shape();
|
||||||
if y_samples != n_samples {
|
if y_samples != n_samples {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x should equal size of y; |x|=[{}], |y|=[{}]",
|
"Size of x should equal size of y; |x|=[{n_samples}], |y|=[{y_samples}]"
|
||||||
n_samples, y_samples
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if n_samples == 0 {
|
if n_samples == 0 {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x and y should greater than 0; |x|=[{}]",
|
"Size of x and y should greater than 0; |x|=[{n_samples}]"
|
||||||
n_samples
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
let (class_labels, indices) = y.unique_with_indices();
|
let (class_labels, indices) = y.unique_with_indices();
|
||||||
@@ -319,7 +316,7 @@ impl<TX: Number + RealNumber, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Arr
|
|||||||
{
|
{
|
||||||
/// Fits GaussianNB with given data
|
/// Fits GaussianNB with given data
|
||||||
/// * `x` - training data of size NxM where N is the number of samples and M is the number of
|
/// * `x` - training data of size NxM where N is the number of samples and M is the number of
|
||||||
/// features.
|
/// features.
|
||||||
/// * `y` - vector with target values (classes) of length N.
|
/// * `y` - vector with target values (classes) of length N.
|
||||||
/// * `parameters` - additional parameters like class priors.
|
/// * `parameters` - additional parameters like class priors.
|
||||||
pub fn fit(x: &X, y: &Y, parameters: GaussianNBParameters) -> Result<Self, Failed> {
|
pub fn fit(x: &X, y: &Y, parameters: GaussianNBParameters) -> Result<Self, Failed> {
|
||||||
@@ -330,6 +327,7 @@ impl<TX: Number + RealNumber, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Arr
|
|||||||
|
|
||||||
/// Estimates the class labels for the provided data.
|
/// Estimates the class labels for the provided data.
|
||||||
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
||||||
|
///
|
||||||
/// Returns a vector of size N with class estimates.
|
/// Returns a vector of size N with class estimates.
|
||||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||||
self.inner.as_ref().unwrap().predict(x)
|
self.inner.as_ref().unwrap().predict(x)
|
||||||
@@ -375,7 +373,6 @@ mod tests {
|
|||||||
fn search_parameters() {
|
fn search_parameters() {
|
||||||
let parameters = GaussianNBSearchParameters {
|
let parameters = GaussianNBSearchParameters {
|
||||||
priors: vec![Some(vec![1.]), Some(vec![2.])],
|
priors: vec![Some(vec![1.]), Some(vec![2.])],
|
||||||
..Default::default()
|
|
||||||
};
|
};
|
||||||
let mut iter = parameters.into_iter();
|
let mut iter = parameters.into_iter();
|
||||||
let next = iter.next().unwrap();
|
let next = iter.next().unwrap();
|
||||||
@@ -398,7 +395,8 @@ mod tests {
|
|||||||
&[1., 1.],
|
&[1., 1.],
|
||||||
&[2., 1.],
|
&[2., 1.],
|
||||||
&[3., 2.],
|
&[3., 2.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
|
let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
|
||||||
|
|
||||||
let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
|
let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
@@ -438,7 +436,8 @@ mod tests {
|
|||||||
&[1., 1.],
|
&[1., 1.],
|
||||||
&[2., 1.],
|
&[2., 1.],
|
||||||
&[3., 2.],
|
&[3., 2.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
|
let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
|
||||||
|
|
||||||
let priors = vec![0.3, 0.7];
|
let priors = vec![0.3, 0.7];
|
||||||
@@ -465,7 +464,8 @@ mod tests {
|
|||||||
&[1., 1.],
|
&[1., 1.],
|
||||||
&[2., 1.],
|
&[2., 1.],
|
||||||
&[3., 2.],
|
&[3., 2.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
|
let y: Vec<u32> = vec![1, 1, 1, 2, 2, 2];
|
||||||
|
|
||||||
let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
|
let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|||||||
+532
-20
@@ -89,33 +89,545 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: NBDistribution<TX,
|
|||||||
|
|
||||||
/// Estimates the class labels for the provided data.
|
/// Estimates the class labels for the provided data.
|
||||||
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
||||||
|
///
|
||||||
/// Returns a vector of size N with class estimates.
|
/// Returns a vector of size N with class estimates.
|
||||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||||
let y_classes = self.distribution.classes();
|
let y_classes = self.distribution.classes();
|
||||||
|
|
||||||
|
if y_classes.is_empty() {
|
||||||
|
return Err(Failed::predict("Failed to predict, no classes available"));
|
||||||
|
}
|
||||||
|
|
||||||
let (rows, _) = x.shape();
|
let (rows, _) = x.shape();
|
||||||
let predictions = (0..rows)
|
let mut predictions = Vec::with_capacity(rows);
|
||||||
.map(|row_index| {
|
let mut all_probs_nan = true;
|
||||||
let row = x.get_row(row_index);
|
|
||||||
let (prediction, _probability) = y_classes
|
for row_index in 0..rows {
|
||||||
.iter()
|
let row = x.get_row(row_index);
|
||||||
.enumerate()
|
let mut max_log_prob = f64::NEG_INFINITY;
|
||||||
.map(|(class_index, class)| {
|
let mut max_class = None;
|
||||||
(
|
|
||||||
class,
|
for (class_index, class) in y_classes.iter().enumerate() {
|
||||||
self.distribution.log_likelihood(class_index, &row)
|
let log_likelihood = self.distribution.log_likelihood(class_index, &row);
|
||||||
+ self.distribution.prior(class_index).ln(),
|
let log_prob = log_likelihood + self.distribution.prior(class_index).ln();
|
||||||
)
|
|
||||||
})
|
if !log_prob.is_nan() && log_prob > max_log_prob {
|
||||||
.max_by(|(_, p1), (_, p2)| p1.partial_cmp(p2).unwrap())
|
max_log_prob = log_prob;
|
||||||
.unwrap();
|
max_class = Some(*class);
|
||||||
*prediction
|
all_probs_nan = false;
|
||||||
})
|
}
|
||||||
.collect::<Vec<TY>>();
|
}
|
||||||
let y_hat = Y::from_vec_slice(&predictions);
|
|
||||||
Ok(y_hat)
|
predictions.push(max_class.unwrap_or(y_classes[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if all_probs_nan {
|
||||||
|
Err(Failed::predict(
|
||||||
|
"Failed to predict, all probabilities were NaN",
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok(Y::from_vec_slice(&predictions))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub mod bernoulli;
|
pub mod bernoulli;
|
||||||
pub mod categorical;
|
pub mod categorical;
|
||||||
pub mod gaussian;
|
pub mod gaussian;
|
||||||
pub mod multinomial;
|
pub mod multinomial;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::linalg::basic::arrays::Array;
|
||||||
|
use crate::linalg::basic::matrix::DenseMatrix;
|
||||||
|
use num_traits::float::Float;
|
||||||
|
|
||||||
|
type Model<'d> = BaseNaiveBayes<i32, i32, DenseMatrix<i32>, Vec<i32>, TestDistribution<'d>>;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
|
struct TestDistribution<'d>(&'d Vec<i32>);
|
||||||
|
|
||||||
|
impl NBDistribution<i32, i32> for TestDistribution<'_> {
|
||||||
|
fn prior(&self, _class_index: usize) -> f64 {
|
||||||
|
1.
|
||||||
|
}
|
||||||
|
|
||||||
|
fn log_likelihood<'a>(
|
||||||
|
&'a self,
|
||||||
|
class_index: usize,
|
||||||
|
_j: &'a Box<dyn ArrayView1<i32> + 'a>,
|
||||||
|
) -> f64 {
|
||||||
|
match self.0.get(class_index) {
|
||||||
|
&v @ 2 | &v @ 10 | &v @ 20 => v as f64,
|
||||||
|
_ => f64::nan(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn classes(&self) -> &Vec<i32> {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict() {
|
||||||
|
let matrix = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap();
|
||||||
|
|
||||||
|
let val = vec![];
|
||||||
|
match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) {
|
||||||
|
Ok(_) => panic!("Should return error in case of empty classes"),
|
||||||
|
Err(err) => assert_eq!(
|
||||||
|
err.to_string(),
|
||||||
|
"Predict failed: Failed to predict, no classes available"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
let val = vec![1, 2, 3];
|
||||||
|
match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) {
|
||||||
|
Ok(r) => assert_eq!(r, vec![2, 2, 2]),
|
||||||
|
Err(_) => panic!("Should success in normal case with NaNs"),
|
||||||
|
}
|
||||||
|
|
||||||
|
let val = vec![20, 2, 10];
|
||||||
|
match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) {
|
||||||
|
Ok(r) => assert_eq!(r, vec![20, 20, 20]),
|
||||||
|
Err(_) => panic!("Should success in normal case without NaNs"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A simple test distribution using float
|
||||||
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
|
struct TestDistributionAgain {
|
||||||
|
classes: Vec<u32>,
|
||||||
|
probs: Vec<f64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NBDistribution<f64, u32> for TestDistributionAgain {
|
||||||
|
fn classes(&self) -> &Vec<u32> {
|
||||||
|
&self.classes
|
||||||
|
}
|
||||||
|
fn prior(&self, class_index: usize) -> f64 {
|
||||||
|
self.probs[class_index]
|
||||||
|
}
|
||||||
|
fn log_likelihood<'a>(
|
||||||
|
&'a self,
|
||||||
|
class_index: usize,
|
||||||
|
_j: &'a Box<dyn ArrayView1<f64> + 'a>,
|
||||||
|
) -> f64 {
|
||||||
|
self.probs[class_index].ln()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type TestNB = BaseNaiveBayes<f64, u32, DenseMatrix<f64>, Vec<u32>, TestDistributionAgain>;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_empty_classes() {
|
||||||
|
let dist = TestDistributionAgain {
|
||||||
|
classes: vec![],
|
||||||
|
probs: vec![],
|
||||||
|
};
|
||||||
|
let nb = TestNB::fit(dist).unwrap();
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||||
|
assert!(nb.predict(&x).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_single_class() {
|
||||||
|
let dist = TestDistributionAgain {
|
||||||
|
classes: vec![1],
|
||||||
|
probs: vec![1.0],
|
||||||
|
};
|
||||||
|
let nb = TestNB::fit(dist).unwrap();
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||||
|
let result = nb.predict(&x).unwrap();
|
||||||
|
assert_eq!(result, vec![1, 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_multiple_classes() {
|
||||||
|
let dist = TestDistributionAgain {
|
||||||
|
classes: vec![1, 2, 3],
|
||||||
|
probs: vec![0.2, 0.5, 0.3],
|
||||||
|
};
|
||||||
|
let nb = TestNB::fit(dist).unwrap();
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0], &[5.0, 6.0]]).unwrap();
|
||||||
|
let result = nb.predict(&x).unwrap();
|
||||||
|
assert_eq!(result, vec![2, 2, 2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_with_nans() {
|
||||||
|
let dist = TestDistributionAgain {
|
||||||
|
classes: vec![1, 2],
|
||||||
|
probs: vec![f64::NAN, 0.5],
|
||||||
|
};
|
||||||
|
let nb = TestNB::fit(dist).unwrap();
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||||
|
let result = nb.predict(&x).unwrap();
|
||||||
|
assert_eq!(result, vec![2, 2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_all_nans() {
|
||||||
|
let dist = TestDistributionAgain {
|
||||||
|
classes: vec![1, 2],
|
||||||
|
probs: vec![f64::NAN, f64::NAN],
|
||||||
|
};
|
||||||
|
let nb = TestNB::fit(dist).unwrap();
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||||
|
assert!(nb.predict(&x).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_extreme_probabilities() {
|
||||||
|
let dist = TestDistributionAgain {
|
||||||
|
classes: vec![1, 2],
|
||||||
|
probs: vec![1e-300, 1e-301],
|
||||||
|
};
|
||||||
|
let nb = TestNB::fit(dist).unwrap();
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||||
|
let result = nb.predict(&x).unwrap();
|
||||||
|
assert_eq!(result, vec![1, 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_with_infinity() {
|
||||||
|
let dist = TestDistributionAgain {
|
||||||
|
classes: vec![1, 2, 3],
|
||||||
|
probs: vec![f64::INFINITY, 1.0, 2.0],
|
||||||
|
};
|
||||||
|
let nb = TestNB::fit(dist).unwrap();
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||||
|
let result = nb.predict(&x).unwrap();
|
||||||
|
assert_eq!(result, vec![1, 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_with_negative_infinity() {
|
||||||
|
let dist = TestDistributionAgain {
|
||||||
|
classes: vec![1, 2, 3],
|
||||||
|
probs: vec![f64::NEG_INFINITY, 1.0, 2.0],
|
||||||
|
};
|
||||||
|
let nb = TestNB::fit(dist).unwrap();
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
|
||||||
|
let result = nb.predict(&x).unwrap();
|
||||||
|
assert_eq!(result, vec![3, 3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_gaussian_naive_bayes_numerical_stability() {
|
||||||
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
|
struct GaussianTestDistribution {
|
||||||
|
classes: Vec<u32>,
|
||||||
|
means: Vec<Vec<f64>>,
|
||||||
|
variances: Vec<Vec<f64>>,
|
||||||
|
priors: Vec<f64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NBDistribution<f64, u32> for GaussianTestDistribution {
|
||||||
|
fn classes(&self) -> &Vec<u32> {
|
||||||
|
&self.classes
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prior(&self, class_index: usize) -> f64 {
|
||||||
|
self.priors[class_index]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn log_likelihood<'a>(
|
||||||
|
&'a self,
|
||||||
|
class_index: usize,
|
||||||
|
j: &'a Box<dyn ArrayView1<f64> + 'a>,
|
||||||
|
) -> f64 {
|
||||||
|
let means = &self.means[class_index];
|
||||||
|
let variances = &self.variances[class_index];
|
||||||
|
j.iterator(0)
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, &xi)| {
|
||||||
|
let mean = means[i];
|
||||||
|
let var = variances[i] + 1e-9; // Small smoothing for numerical stability
|
||||||
|
let coeff = -0.5 * (2.0 * std::f64::consts::PI * var).ln();
|
||||||
|
let exponent = -(xi - mean).powi(2) / (2.0 * var);
|
||||||
|
coeff + exponent
|
||||||
|
})
|
||||||
|
.sum()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn train_distribution(x: &DenseMatrix<f64>, y: &[u32]) -> GaussianTestDistribution {
|
||||||
|
let mut classes: Vec<u32> = y
|
||||||
|
.iter()
|
||||||
|
.cloned()
|
||||||
|
.collect::<std::collections::HashSet<u32>>()
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
classes.sort();
|
||||||
|
let n_classes = classes.len();
|
||||||
|
let n_features = x.shape().1;
|
||||||
|
|
||||||
|
let mut means = vec![vec![0.0; n_features]; n_classes];
|
||||||
|
let mut variances = vec![vec![0.0; n_features]; n_classes];
|
||||||
|
let mut class_counts = vec![0; n_classes];
|
||||||
|
|
||||||
|
// Calculate means and count samples per class
|
||||||
|
for (sample, &class) in x.row_iter().zip(y.iter()) {
|
||||||
|
let class_idx = classes.iter().position(|&c| c == class).unwrap();
|
||||||
|
class_counts[class_idx] += 1;
|
||||||
|
for (i, &value) in sample.iterator(0).enumerate() {
|
||||||
|
means[class_idx][i] += value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize means
|
||||||
|
for (class_idx, mean) in means.iter_mut().enumerate() {
|
||||||
|
for value in mean.iter_mut() {
|
||||||
|
*value /= class_counts[class_idx] as f64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate variances
|
||||||
|
for (sample, &class) in x.row_iter().zip(y.iter()) {
|
||||||
|
let class_idx = classes.iter().position(|&c| c == class).unwrap();
|
||||||
|
for (i, &value) in sample.iterator(0).enumerate() {
|
||||||
|
let diff = value - means[class_idx][i];
|
||||||
|
variances[class_idx][i] += diff * diff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize variances and add small epsilon to avoid zero variance
|
||||||
|
let epsilon = 1e-9;
|
||||||
|
for (class_idx, variance) in variances.iter_mut().enumerate() {
|
||||||
|
for value in variance.iter_mut() {
|
||||||
|
*value = *value / class_counts[class_idx] as f64 + epsilon;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate priors
|
||||||
|
let total_samples = y.len() as f64;
|
||||||
|
let priors: Vec<f64> = class_counts
|
||||||
|
.iter()
|
||||||
|
.map(|&count| count as f64 / total_samples)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
GaussianTestDistribution {
|
||||||
|
classes,
|
||||||
|
means,
|
||||||
|
variances,
|
||||||
|
priors,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type TestNBGaussian =
|
||||||
|
BaseNaiveBayes<f64, u32, DenseMatrix<f64>, Vec<u32>, GaussianTestDistribution>;
|
||||||
|
|
||||||
|
// Create a constant training dataset
|
||||||
|
let n_samples = 1000;
|
||||||
|
let n_features = 5;
|
||||||
|
let n_classes = 4;
|
||||||
|
|
||||||
|
let mut x_data = Vec::with_capacity(n_samples * n_features);
|
||||||
|
let mut y_data = Vec::with_capacity(n_samples);
|
||||||
|
|
||||||
|
for i in 0..n_samples {
|
||||||
|
for j in 0..n_features {
|
||||||
|
x_data.push((i * j) as f64 % 10.0);
|
||||||
|
}
|
||||||
|
y_data.push((i % n_classes) as u32);
|
||||||
|
}
|
||||||
|
|
||||||
|
let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap();
|
||||||
|
let y = y_data;
|
||||||
|
|
||||||
|
// Train the model
|
||||||
|
let dist = train_distribution(&x, &y);
|
||||||
|
let nb = TestNBGaussian::fit(dist).unwrap();
|
||||||
|
|
||||||
|
// Create constant test data
|
||||||
|
let n_test_samples = 100;
|
||||||
|
let mut test_x_data = Vec::with_capacity(n_test_samples * n_features);
|
||||||
|
for i in 0..n_test_samples {
|
||||||
|
for j in 0..n_features {
|
||||||
|
test_x_data.push((i * j * 2) as f64 % 15.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let test_x = DenseMatrix::new(n_test_samples, n_features, test_x_data, true).unwrap();
|
||||||
|
|
||||||
|
// Make predictions
|
||||||
|
let predictions = nb
|
||||||
|
.predict(&test_x)
|
||||||
|
.map_err(|e| format!("Prediction failed: {}", e))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Check numerical stability
|
||||||
|
assert_eq!(
|
||||||
|
predictions.len(),
|
||||||
|
n_test_samples,
|
||||||
|
"Number of predictions should match number of test samples"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check that all predictions are valid class labels
|
||||||
|
for &pred in predictions.iter() {
|
||||||
|
assert!(pred < n_classes as u32, "Predicted class should be valid");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check consistency of predictions
|
||||||
|
let repeated_predictions = nb
|
||||||
|
.predict(&test_x)
|
||||||
|
.map_err(|e| format!("Repeated prediction failed: {}", e))
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
predictions, repeated_predictions,
|
||||||
|
"Predictions should be consistent when repeated"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check extreme values
|
||||||
|
let extreme_x =
|
||||||
|
DenseMatrix::new(2, n_features, vec![f64::MAX; n_features * 2], true).unwrap();
|
||||||
|
let extreme_predictions = nb.predict(&extreme_x);
|
||||||
|
assert!(
|
||||||
|
extreme_predictions.is_err(),
|
||||||
|
"Extreme value input should result in an error"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
extreme_predictions.unwrap_err().to_string(),
|
||||||
|
"Predict failed: Failed to predict, all probabilities were NaN",
|
||||||
|
"Incorrect error message for extreme values"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check for NaN handling
|
||||||
|
let nan_x = DenseMatrix::new(2, n_features, vec![f64::NAN; n_features * 2], true).unwrap();
|
||||||
|
let nan_predictions = nb.predict(&nan_x);
|
||||||
|
assert!(
|
||||||
|
nan_predictions.is_err(),
|
||||||
|
"NaN input should result in an error"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check for very small values
|
||||||
|
let small_x =
|
||||||
|
DenseMatrix::new(2, n_features, vec![f64::MIN_POSITIVE; n_features * 2], true).unwrap();
|
||||||
|
let small_predictions = nb
|
||||||
|
.predict(&small_x)
|
||||||
|
.map_err(|e| format!("Small value prediction failed: {}", e))
|
||||||
|
.unwrap();
|
||||||
|
for &pred in small_predictions.iter() {
|
||||||
|
assert!(
|
||||||
|
pred < n_classes as u32,
|
||||||
|
"Predictions for very small values should be valid"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for values close to zero
|
||||||
|
let near_zero_x =
|
||||||
|
DenseMatrix::new(2, n_features, vec![1e-300; n_features * 2], true).unwrap();
|
||||||
|
let near_zero_predictions = nb
|
||||||
|
.predict(&near_zero_x)
|
||||||
|
.map_err(|e| format!("Near-zero value prediction failed: {}", e))
|
||||||
|
.unwrap();
|
||||||
|
for &pred in near_zero_predictions.iter() {
|
||||||
|
assert!(
|
||||||
|
pred < n_classes as u32,
|
||||||
|
"Predictions for near-zero values should be valid"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("All numerical stability checks passed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_gaussian_naive_bayes_numerical_stability_random_data() {
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct MySimpleRng {
|
||||||
|
state: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MySimpleRng {
|
||||||
|
fn new(seed: u64) -> Self {
|
||||||
|
MySimpleRng { state: seed }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the next u64 in the sequence.
|
||||||
|
fn next_u64(&mut self) -> u64 {
|
||||||
|
// LCG parameters; these are somewhat arbitrary but commonly used.
|
||||||
|
// Feel free to tweak the multiplier/adder etc.
|
||||||
|
self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
|
||||||
|
self.state
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get an f64 in the range [min, max).
|
||||||
|
fn next_f64(&mut self, min: f64, max: f64) -> f64 {
|
||||||
|
let fraction = (self.next_u64() as f64) / (u64::MAX as f64);
|
||||||
|
min + fraction * (max - min)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a usize in the range [min, max). This floors the floating result.
|
||||||
|
fn gen_range_usize(&mut self, min: usize, max: usize) -> usize {
|
||||||
|
let v = self.next_f64(min as f64, max as f64);
|
||||||
|
// Truncate into the integer range. Because of floating inexactness,
|
||||||
|
// ensure we also clamp.
|
||||||
|
let int_v = v.floor() as isize;
|
||||||
|
// simple clamp to avoid any float rounding out of range
|
||||||
|
let clamped = int_v.max(min as isize).min((max - 1) as isize);
|
||||||
|
clamped as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
use crate::naive_bayes::gaussian::GaussianNB;
|
||||||
|
// We will generate random data in a reproducible way (using a fixed seed).
|
||||||
|
// We will generate random data in a reproducible way:
|
||||||
|
let mut rng = MySimpleRng::new(42);
|
||||||
|
|
||||||
|
let n_samples = 1000;
|
||||||
|
let n_features = 5;
|
||||||
|
let n_classes = 4;
|
||||||
|
|
||||||
|
// Our feature matrix and label vector
|
||||||
|
let mut x_data = Vec::with_capacity(n_samples * n_features);
|
||||||
|
let mut y_data = Vec::with_capacity(n_samples);
|
||||||
|
|
||||||
|
// Fill x_data with random values and y_data with random class labels.
|
||||||
|
for _i in 0..n_samples {
|
||||||
|
for _j in 0..n_features {
|
||||||
|
// We’ll pick random values in [-10, 10).
|
||||||
|
x_data.push(rng.next_f64(-10.0, 10.0));
|
||||||
|
}
|
||||||
|
let class = rng.gen_range_usize(0, n_classes) as u32;
|
||||||
|
y_data.push(class);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create DenseMatrix from x_data
|
||||||
|
let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap();
|
||||||
|
|
||||||
|
// Train GaussianNB
|
||||||
|
let gnb = GaussianNB::fit(&x, &y_data, Default::default())
|
||||||
|
.expect("Fitting GaussianNB with random data failed.");
|
||||||
|
|
||||||
|
// Predict on the same training data to verify no numerical instability
|
||||||
|
let predictions = gnb.predict(&x).expect("Prediction on random data failed.");
|
||||||
|
|
||||||
|
// Basic sanity checks
|
||||||
|
assert_eq!(
|
||||||
|
predictions.len(),
|
||||||
|
n_samples,
|
||||||
|
"Prediction size must match n_samples"
|
||||||
|
);
|
||||||
|
for &pred_class in &predictions {
|
||||||
|
assert!(
|
||||||
|
(pred_class as usize) < n_classes,
|
||||||
|
"Predicted class {} is out of range [0..n_classes).",
|
||||||
|
pred_class
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If you want to compare with scikit-learn, you can do something like:
|
||||||
|
// println!("X = {:?}", &x);
|
||||||
|
// println!("Y = {:?}", &y_data);
|
||||||
|
// println!("predictions = {:?}", &predictions);
|
||||||
|
// and then in Python:
|
||||||
|
// import numpy as np
|
||||||
|
// from sklearn.naive_bayes import GaussianNB
|
||||||
|
// X = np.reshape(np.array(x), (1000, 5), order='F')
|
||||||
|
// Y = np.array(y)
|
||||||
|
// gnb = GaussianNB().fit(X, Y)
|
||||||
|
// preds = gnb.predict(X)
|
||||||
|
// expected = np.array(predictions)
|
||||||
|
// assert expected == preds
|
||||||
|
// They should match closely (or exactly) depending on floating rounding.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -20,13 +20,13 @@
|
|||||||
//! &[0, 2, 0, 0, 1, 0],
|
//! &[0, 2, 0, 0, 1, 0],
|
||||||
//! &[0, 1, 0, 1, 0, 0],
|
//! &[0, 1, 0, 1, 0, 0],
|
||||||
//! &[0, 1, 1, 0, 0, 1],
|
//! &[0, 1, 1, 0, 0, 1],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y: Vec<u32> = vec![0, 0, 0, 1];
|
//! let y: Vec<u32> = vec![0, 0, 0, 1];
|
||||||
//! let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
|
//! let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
//!
|
//!
|
||||||
//! // Testing data point is:
|
//! // Testing data point is:
|
||||||
//! // Chinese Chinese Chinese Tokyo Japan
|
//! // Chinese Chinese Chinese Tokyo Japan
|
||||||
//! let x_test = DenseMatrix::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]);
|
//! let x_test = DenseMatrix::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]).unwrap();
|
||||||
//! let y_hat = nb.predict(&x_test).unwrap();
|
//! let y_hat = nb.predict(&x_test).unwrap();
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
@@ -207,8 +207,7 @@ impl<TY: Number + Ord + Unsigned> MultinomialNBDistribution<TY> {
|
|||||||
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
|
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
|
||||||
/// * `x` - training data.
|
/// * `x` - training data.
|
||||||
/// * `y` - vector with target values (classes) of length N.
|
/// * `y` - vector with target values (classes) of length N.
|
||||||
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
|
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
|
||||||
/// priors are adjusted according to the data.
|
|
||||||
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
|
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
|
||||||
pub fn fit<TX: Number + Unsigned, X: Array2<TX>, Y: Array1<TY>>(
|
pub fn fit<TX: Number + Unsigned, X: Array2<TX>, Y: Array1<TY>>(
|
||||||
x: &X,
|
x: &X,
|
||||||
@@ -220,21 +219,18 @@ impl<TY: Number + Ord + Unsigned> MultinomialNBDistribution<TY> {
|
|||||||
let y_samples = y.shape();
|
let y_samples = y.shape();
|
||||||
if y_samples != n_samples {
|
if y_samples != n_samples {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x should equal size of y; |x|=[{}], |y|=[{}]",
|
"Size of x should equal size of y; |x|=[{n_samples}], |y|=[{y_samples}]"
|
||||||
n_samples, y_samples
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if n_samples == 0 {
|
if n_samples == 0 {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x and y should greater than 0; |x|=[{}]",
|
"Size of x and y should greater than 0; |x|=[{n_samples}]"
|
||||||
n_samples
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
if alpha < 0f64 {
|
if alpha < 0f64 {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Alpha should be greater than 0; |alpha|=[{}]",
|
"Alpha should be greater than 0; |alpha|=[{alpha}]"
|
||||||
alpha
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -266,8 +262,7 @@ impl<TY: Number + Ord + Unsigned> MultinomialNBDistribution<TY> {
|
|||||||
feature_in_class_counter[class_index][idx] +=
|
feature_in_class_counter[class_index][idx] +=
|
||||||
row_i.to_usize().ok_or_else(|| {
|
row_i.to_usize().ok_or_else(|| {
|
||||||
Failed::fit(&format!(
|
Failed::fit(&format!(
|
||||||
"Elements of the matrix should be convertible to usize |found|=[{}]",
|
"Elements of the matrix should be convertible to usize |found|=[{row_i}]"
|
||||||
row_i
|
|
||||||
))
|
))
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
@@ -349,10 +344,10 @@ impl<TX: Number + Unsigned, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Array
|
|||||||
{
|
{
|
||||||
/// Fits MultinomialNB with given data
|
/// Fits MultinomialNB with given data
|
||||||
/// * `x` - training data of size NxM where N is the number of samples and M is the number of
|
/// * `x` - training data of size NxM where N is the number of samples and M is the number of
|
||||||
/// features.
|
/// features.
|
||||||
/// * `y` - vector with target values (classes) of length N.
|
/// * `y` - vector with target values (classes) of length N.
|
||||||
/// * `parameters` - additional parameters like class priors, alpha for smoothing and
|
/// * `parameters` - additional parameters like class priors, alpha for smoothing and
|
||||||
/// binarizing threshold.
|
/// binarizing threshold.
|
||||||
pub fn fit(x: &X, y: &Y, parameters: MultinomialNBParameters) -> Result<Self, Failed> {
|
pub fn fit(x: &X, y: &Y, parameters: MultinomialNBParameters) -> Result<Self, Failed> {
|
||||||
let distribution =
|
let distribution =
|
||||||
MultinomialNBDistribution::fit(x, y, parameters.alpha, parameters.priors)?;
|
MultinomialNBDistribution::fit(x, y, parameters.alpha, parameters.priors)?;
|
||||||
@@ -362,6 +357,7 @@ impl<TX: Number + Unsigned, TY: Number + Ord + Unsigned, X: Array2<TX>, Y: Array
|
|||||||
|
|
||||||
/// Estimates the class labels for the provided data.
|
/// Estimates the class labels for the provided data.
|
||||||
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
||||||
|
///
|
||||||
/// Returns a vector of size N with class estimates.
|
/// Returns a vector of size N with class estimates.
|
||||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||||
self.inner.as_ref().unwrap().predict(x)
|
self.inner.as_ref().unwrap().predict(x)
|
||||||
@@ -437,7 +433,8 @@ mod tests {
|
|||||||
&[0, 2, 0, 0, 1, 0],
|
&[0, 2, 0, 0, 1, 0],
|
||||||
&[0, 1, 0, 1, 0, 0],
|
&[0, 1, 0, 1, 0, 0],
|
||||||
&[0, 1, 1, 0, 0, 1],
|
&[0, 1, 1, 0, 0, 1],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![0, 0, 0, 1];
|
let y: Vec<u32> = vec![0, 0, 0, 1];
|
||||||
let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
|
let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|
||||||
@@ -471,7 +468,7 @@ mod tests {
|
|||||||
|
|
||||||
// Testing data point is:
|
// Testing data point is:
|
||||||
// Chinese Chinese Chinese Tokyo Japan
|
// Chinese Chinese Chinese Tokyo Japan
|
||||||
let x_test = DenseMatrix::<u32>::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]);
|
let x_test = DenseMatrix::<u32>::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]).unwrap();
|
||||||
let y_hat = mnb.predict(&x_test).unwrap();
|
let y_hat = mnb.predict(&x_test).unwrap();
|
||||||
|
|
||||||
assert_eq!(y_hat, &[0]);
|
assert_eq!(y_hat, &[0]);
|
||||||
@@ -499,7 +496,8 @@ mod tests {
|
|||||||
&[2, 0, 3, 3, 1, 2, 0, 2, 4, 1],
|
&[2, 0, 3, 3, 1, 2, 0, 2, 4, 1],
|
||||||
&[2, 4, 0, 4, 2, 4, 1, 3, 1, 4],
|
&[2, 4, 0, 4, 2, 4, 1, 3, 1, 4],
|
||||||
&[0, 2, 2, 3, 4, 0, 4, 4, 4, 4],
|
&[0, 2, 2, 3, 4, 0, 4, 4, 4, 4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2];
|
let y: Vec<u32> = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2];
|
||||||
let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
|
let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|
||||||
@@ -558,7 +556,8 @@ mod tests {
|
|||||||
&[0, 1, 0, 0, 1, 0],
|
&[0, 1, 0, 0, 1, 0],
|
||||||
&[0, 1, 0, 1, 0, 0],
|
&[0, 1, 0, 1, 0, 0],
|
||||||
&[0, 1, 1, 0, 0, 1],
|
&[0, 1, 1, 0, 0, 1],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y = vec![0, 0, 0, 1];
|
let y = vec![0, 0, 0, 1];
|
||||||
|
|
||||||
let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
|
let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|||||||
@@ -22,7 +22,7 @@
|
|||||||
//! &[3., 4.],
|
//! &[3., 4.],
|
||||||
//! &[5., 6.],
|
//! &[5., 6.],
|
||||||
//! &[7., 8.],
|
//! &[7., 8.],
|
||||||
//! &[9., 10.]]);
|
//! &[9., 10.]]).unwrap();
|
||||||
//! let y = vec![2, 2, 2, 3, 3]; //your class labels
|
//! let y = vec![2, 2, 2, 3, 3]; //your class labels
|
||||||
//!
|
//!
|
||||||
//! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
|
//! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
|
||||||
@@ -236,8 +236,7 @@ impl<TX: Number, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec
|
|||||||
|
|
||||||
if x_n != y_n {
|
if x_n != y_n {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x should equal size of y; |x|=[{}], |y|=[{}]",
|
"Size of x should equal size of y; |x|=[{x_n}], |y|=[{y_n}]"
|
||||||
x_n, y_n
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -262,6 +261,7 @@ impl<TX: Number, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec
|
|||||||
|
|
||||||
/// Estimates the class labels for the provided data.
|
/// Estimates the class labels for the provided data.
|
||||||
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
||||||
|
///
|
||||||
/// Returns a vector of size N with class estimates.
|
/// Returns a vector of size N with class estimates.
|
||||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||||
let mut result = Y::zeros(x.shape().0);
|
let mut result = Y::zeros(x.shape().0);
|
||||||
@@ -312,7 +312,8 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn knn_fit_predict() {
|
fn knn_fit_predict() {
|
||||||
let x =
|
let x =
|
||||||
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
|
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
|
||||||
|
.unwrap();
|
||||||
let y = vec![2, 2, 2, 3, 3];
|
let y = vec![2, 2, 2, 3, 3];
|
||||||
let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
|
let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
|
||||||
let y_hat = knn.predict(&x).unwrap();
|
let y_hat = knn.predict(&x).unwrap();
|
||||||
@@ -326,7 +327,7 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn knn_fit_predict_weighted() {
|
fn knn_fit_predict_weighted() {
|
||||||
let x = DenseMatrix::from_2d_array(&[&[1.], &[2.], &[3.], &[4.], &[5.]]);
|
let x = DenseMatrix::from_2d_array(&[&[1.], &[2.], &[3.], &[4.], &[5.]]).unwrap();
|
||||||
let y = vec![2, 2, 2, 3, 3];
|
let y = vec![2, 2, 2, 3, 3];
|
||||||
let knn = KNNClassifier::fit(
|
let knn = KNNClassifier::fit(
|
||||||
&x,
|
&x,
|
||||||
@@ -337,7 +338,9 @@ mod tests {
|
|||||||
.with_weight(KNNWeightFunction::Distance),
|
.with_weight(KNNWeightFunction::Distance),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let y_hat = knn.predict(&DenseMatrix::from_2d_array(&[&[4.1]])).unwrap();
|
let y_hat = knn
|
||||||
|
.predict(&DenseMatrix::from_2d_array(&[&[4.1]]).unwrap())
|
||||||
|
.unwrap();
|
||||||
assert_eq!(vec![3], y_hat);
|
assert_eq!(vec![3], y_hat);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -349,7 +352,8 @@ mod tests {
|
|||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
fn serde() {
|
fn serde() {
|
||||||
let x =
|
let x =
|
||||||
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
|
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
|
||||||
|
.unwrap();
|
||||||
let y = vec![2, 2, 2, 3, 3];
|
let y = vec![2, 2, 2, 3, 3];
|
||||||
|
|
||||||
let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
|
let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|||||||
@@ -24,7 +24,7 @@
|
|||||||
//! &[2., 2.],
|
//! &[2., 2.],
|
||||||
//! &[3., 3.],
|
//! &[3., 3.],
|
||||||
//! &[4., 4.],
|
//! &[4., 4.],
|
||||||
//! &[5., 5.]]);
|
//! &[5., 5.]]).unwrap();
|
||||||
//! let y = vec![1., 2., 3., 4., 5.]; //your target values
|
//! let y = vec![1., 2., 3., 4., 5.]; //your target values
|
||||||
//!
|
//!
|
||||||
//! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
//! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
||||||
@@ -88,25 +88,21 @@ pub struct KNNRegressor<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D:
|
|||||||
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
||||||
KNNRegressor<TX, TY, X, Y, D>
|
KNNRegressor<TX, TY, X, Y, D>
|
||||||
{
|
{
|
||||||
///
|
|
||||||
fn y(&self) -> &Y {
|
fn y(&self) -> &Y {
|
||||||
self.y.as_ref().unwrap()
|
self.y.as_ref().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> {
|
fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> {
|
||||||
self.knn_algorithm
|
self.knn_algorithm
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.expect("Missing parameter: KNNAlgorithm")
|
.expect("Missing parameter: KNNAlgorithm")
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
fn weight(&self) -> &KNNWeightFunction {
|
fn weight(&self) -> &KNNWeightFunction {
|
||||||
self.weight.as_ref().expect("Missing parameter: weight")
|
self.weight.as_ref().expect("Missing parameter: weight")
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
///
|
|
||||||
fn k(&self) -> usize {
|
fn k(&self) -> usize {
|
||||||
self.k.unwrap()
|
self.k.unwrap()
|
||||||
}
|
}
|
||||||
@@ -224,8 +220,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
|||||||
|
|
||||||
if x_n != y_n {
|
if x_n != y_n {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Size of x should equal size of y; |x|=[{}], |y|=[{}]",
|
"Size of x should equal size of y; |x|=[{x_n}], |y|=[{y_n}]"
|
||||||
x_n, y_n
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -251,6 +246,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
|||||||
|
|
||||||
/// Predict the target for the provided data.
|
/// Predict the target for the provided data.
|
||||||
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
||||||
|
///
|
||||||
/// Returns a vector of size N with estimates.
|
/// Returns a vector of size N with estimates.
|
||||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||||
let mut result = Y::zeros(x.shape().0);
|
let mut result = Y::zeros(x.shape().0);
|
||||||
@@ -296,9 +292,10 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn knn_fit_predict_weighted() {
|
fn knn_fit_predict_weighted() {
|
||||||
let x =
|
let x =
|
||||||
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
|
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
|
let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
|
||||||
let y_exp = vec![1., 2., 3., 4., 5.];
|
let y_exp = [1., 2., 3., 4., 5.];
|
||||||
let knn = KNNRegressor::fit(
|
let knn = KNNRegressor::fit(
|
||||||
&x,
|
&x,
|
||||||
&y,
|
&y,
|
||||||
@@ -312,7 +309,7 @@ mod tests {
|
|||||||
let y_hat = knn.predict(&x).unwrap();
|
let y_hat = knn.predict(&x).unwrap();
|
||||||
assert_eq!(5, Vec::len(&y_hat));
|
assert_eq!(5, Vec::len(&y_hat));
|
||||||
for i in 0..y_hat.len() {
|
for i in 0..y_hat.len() {
|
||||||
assert!((y_hat[i] - y_exp[i]).abs() < std::f64::EPSILON);
|
assert!((y_hat[i] - y_exp[i]).abs() < f64::EPSILON);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -323,9 +320,10 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn knn_fit_predict_uniform() {
|
fn knn_fit_predict_uniform() {
|
||||||
let x =
|
let x =
|
||||||
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
|
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
|
let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
|
||||||
let y_exp = vec![2., 2., 3., 4., 4.];
|
let y_exp = [2., 2., 3., 4., 4.];
|
||||||
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
||||||
let y_hat = knn.predict(&x).unwrap();
|
let y_hat = knn.predict(&x).unwrap();
|
||||||
assert_eq!(5, Vec::len(&y_hat));
|
assert_eq!(5, Vec::len(&y_hat));
|
||||||
@@ -342,7 +340,8 @@ mod tests {
|
|||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
fn serde() {
|
fn serde() {
|
||||||
let x =
|
let x =
|
||||||
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
|
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]])
|
||||||
|
.unwrap();
|
||||||
let y = vec![1., 2., 3., 4., 5.];
|
let y = vec![1., 2., 3., 4., 5.];
|
||||||
|
|
||||||
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|||||||
@@ -49,20 +49,15 @@ pub type KNNAlgorithmName = crate::algorithm::neighbour::KNNAlgorithmName;
|
|||||||
|
|
||||||
/// Weight function that is used to determine estimated value.
|
/// Weight function that is used to determine estimated value.
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub enum KNNWeightFunction {
|
pub enum KNNWeightFunction {
|
||||||
/// All k nearest points are weighted equally
|
/// All k nearest points are weighted equally
|
||||||
|
#[default]
|
||||||
Uniform,
|
Uniform,
|
||||||
/// k nearest points are weighted by the inverse of their distance. Closer neighbors will have a greater influence than neighbors which are further away.
|
/// k nearest points are weighted by the inverse of their distance. Closer neighbors will have a greater influence than neighbors which are further away.
|
||||||
Distance,
|
Distance,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for KNNWeightFunction {
|
|
||||||
fn default() -> Self {
|
|
||||||
KNNWeightFunction::Uniform
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl KNNWeightFunction {
|
impl KNNWeightFunction {
|
||||||
fn calc_weights(&self, distances: Vec<f64>) -> std::vec::Vec<f64> {
|
fn calc_weights(&self, distances: Vec<f64>) -> std::vec::Vec<f64> {
|
||||||
match *self {
|
match *self {
|
||||||
|
|||||||
+26
-3
@@ -2,9 +2,13 @@
|
|||||||
//! Most algorithms in `smartcore` rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ.
|
//! Most algorithms in `smartcore` rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ.
|
||||||
//! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module.
|
//! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module.
|
||||||
|
|
||||||
|
use rand::rngs::SmallRng;
|
||||||
|
use rand::{Rng, SeedableRng};
|
||||||
|
|
||||||
use num_traits::Float;
|
use num_traits::Float;
|
||||||
|
|
||||||
use crate::numbers::basenum::Number;
|
use crate::numbers::basenum::Number;
|
||||||
|
use crate::rand_custom::get_rng_impl;
|
||||||
|
|
||||||
/// Defines real number
|
/// Defines real number
|
||||||
/// <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
|
/// <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
|
||||||
@@ -63,8 +67,12 @@ impl RealNumber for f64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn rand() -> f64 {
|
fn rand() -> f64 {
|
||||||
// TODO: to be implemented, see issue smartcore#214
|
let mut small_rng = get_rng_impl(None);
|
||||||
1.0
|
|
||||||
|
let mut rngs: Vec<SmallRng> = (0..3)
|
||||||
|
.map(|_| SmallRng::from_rng(&mut small_rng).unwrap())
|
||||||
|
.collect();
|
||||||
|
rngs[0].gen::<f64>()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn two() -> Self {
|
fn two() -> Self {
|
||||||
@@ -108,7 +116,12 @@ impl RealNumber for f32 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn rand() -> f32 {
|
fn rand() -> f32 {
|
||||||
1.0
|
let mut small_rng = get_rng_impl(None);
|
||||||
|
|
||||||
|
let mut rngs: Vec<SmallRng> = (0..3)
|
||||||
|
.map(|_| SmallRng::from_rng(&mut small_rng).unwrap())
|
||||||
|
.collect();
|
||||||
|
rngs[0].gen::<f32>()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn two() -> Self {
|
fn two() -> Self {
|
||||||
@@ -149,4 +162,14 @@ mod tests {
|
|||||||
fn f64_from_string() {
|
fn f64_from_string() {
|
||||||
assert_eq!(f64::from_str("1.111111111").unwrap(), 1.111111111)
|
assert_eq!(f64::from_str("1.111111111").unwrap(), 1.111111111)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn f64_rand() {
|
||||||
|
f64::rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn f32_rand() {
|
||||||
|
f32::rand();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
// TODO: missing documentation
|
|
||||||
|
|
||||||
use std::default::Default;
|
use std::default::Default;
|
||||||
|
|
||||||
use crate::linalg::basic::arrays::Array1;
|
use crate::linalg::basic::arrays::Array1;
|
||||||
@@ -8,30 +6,27 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
|
|||||||
use crate::optimization::line_search::LineSearchMethod;
|
use crate::optimization::line_search::LineSearchMethod;
|
||||||
use crate::optimization::{DF, F};
|
use crate::optimization::{DF, F};
|
||||||
|
|
||||||
///
|
/// Gradient Descent optimization algorithm
|
||||||
pub struct GradientDescent {
|
pub struct GradientDescent {
|
||||||
///
|
/// Maximum number of iterations
|
||||||
pub max_iter: usize,
|
pub max_iter: usize,
|
||||||
///
|
/// Relative tolerance for the gradient norm
|
||||||
pub g_rtol: f64,
|
pub g_rtol: f64,
|
||||||
///
|
/// Absolute tolerance for the gradient norm
|
||||||
pub g_atol: f64,
|
pub g_atol: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
impl Default for GradientDescent {
|
impl Default for GradientDescent {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
GradientDescent {
|
GradientDescent {
|
||||||
max_iter: 10000,
|
max_iter: 10000,
|
||||||
g_rtol: std::f64::EPSILON.sqrt(),
|
g_rtol: f64::EPSILON.sqrt(),
|
||||||
g_atol: std::f64::EPSILON,
|
g_atol: f64::EPSILON,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
impl<T: FloatNumber> FirstOrderOptimizer<T> for GradientDescent {
|
impl<T: FloatNumber> FirstOrderOptimizer<T> for GradientDescent {
|
||||||
///
|
|
||||||
fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>(
|
fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>(
|
||||||
&self,
|
&self,
|
||||||
f: &'a F<'_, T, X>,
|
f: &'a F<'_, T, X>,
|
||||||
@@ -113,12 +108,13 @@ mod tests {
|
|||||||
g[1] = 200. * (x[1] - x[0].powf(2.));
|
g[1] = 200. * (x[1] - x[0].powf(2.));
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut ls: Backtracking<f64> = Default::default();
|
let ls: Backtracking<f64> = Backtracking::<f64> {
|
||||||
ls.order = FunctionOrder::THIRD;
|
order: FunctionOrder::THIRD,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
let optimizer: GradientDescent = Default::default();
|
let optimizer: GradientDescent = Default::default();
|
||||||
|
|
||||||
let result = optimizer.optimize(&f, &df, &x0, &ls);
|
let result = optimizer.optimize(&f, &df, &x0, &ls);
|
||||||
println!("{:?}", result);
|
|
||||||
|
|
||||||
assert!((result.f_x - 0.0).abs() < 1e-5);
|
assert!((result.f_x - 0.0).abs() < 1e-5);
|
||||||
assert!((result.x[0] - 1.0).abs() < 1e-2);
|
assert!((result.x[0] - 1.0).abs() < 1e-2);
|
||||||
|
|||||||
@@ -11,31 +11,29 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
|
|||||||
use crate::optimization::line_search::LineSearchMethod;
|
use crate::optimization::line_search::LineSearchMethod;
|
||||||
use crate::optimization::{DF, F};
|
use crate::optimization::{DF, F};
|
||||||
|
|
||||||
///
|
/// Limited-memory BFGS optimization algorithm
|
||||||
pub struct LBFGS {
|
pub struct LBFGS {
|
||||||
///
|
/// Maximum number of iterations
|
||||||
pub max_iter: usize,
|
pub max_iter: usize,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub g_rtol: f64,
|
pub g_rtol: f64,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub g_atol: f64,
|
pub g_atol: f64,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub x_atol: f64,
|
pub x_atol: f64,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub x_rtol: f64,
|
pub x_rtol: f64,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub f_abstol: f64,
|
pub f_abstol: f64,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub f_reltol: f64,
|
pub f_reltol: f64,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub successive_f_tol: usize,
|
pub successive_f_tol: usize,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub m: usize,
|
pub m: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
impl Default for LBFGS {
|
impl Default for LBFGS {
|
||||||
///
|
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
LBFGS {
|
LBFGS {
|
||||||
max_iter: 1000,
|
max_iter: 1000,
|
||||||
@@ -51,9 +49,7 @@ impl Default for LBFGS {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
impl LBFGS {
|
impl LBFGS {
|
||||||
///
|
|
||||||
fn two_loops<T: FloatNumber + RealNumber, X: Array1<T>>(&self, state: &mut LBFGSState<T, X>) {
|
fn two_loops<T: FloatNumber + RealNumber, X: Array1<T>>(&self, state: &mut LBFGSState<T, X>) {
|
||||||
let lower = state.iteration.max(self.m) - self.m;
|
let lower = state.iteration.max(self.m) - self.m;
|
||||||
let upper = state.iteration;
|
let upper = state.iteration;
|
||||||
@@ -95,7 +91,6 @@ impl LBFGS {
|
|||||||
state.s.mul_scalar_mut(-T::one());
|
state.s.mul_scalar_mut(-T::one());
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
fn init_state<T: FloatNumber + RealNumber, X: Array1<T>>(&self, x: &X) -> LBFGSState<T, X> {
|
fn init_state<T: FloatNumber + RealNumber, X: Array1<T>>(&self, x: &X) -> LBFGSState<T, X> {
|
||||||
LBFGSState {
|
LBFGSState {
|
||||||
x: x.clone(),
|
x: x.clone(),
|
||||||
@@ -119,7 +114,6 @@ impl LBFGS {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
fn update_state<'a, T: FloatNumber + RealNumber, X: Array1<T>, LS: LineSearchMethod<T>>(
|
fn update_state<'a, T: FloatNumber + RealNumber, X: Array1<T>, LS: LineSearchMethod<T>>(
|
||||||
&self,
|
&self,
|
||||||
f: &'a F<'_, T, X>,
|
f: &'a F<'_, T, X>,
|
||||||
@@ -161,7 +155,6 @@ impl LBFGS {
|
|||||||
df(&mut state.x_df, &state.x);
|
df(&mut state.x_df, &state.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
fn assess_convergence<T: FloatNumber, X: Array1<T>>(
|
fn assess_convergence<T: FloatNumber, X: Array1<T>>(
|
||||||
&self,
|
&self,
|
||||||
state: &mut LBFGSState<T, X>,
|
state: &mut LBFGSState<T, X>,
|
||||||
@@ -173,7 +166,7 @@ impl LBFGS {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if state.x.max_diff(&state.x_prev)
|
if state.x.max_diff(&state.x_prev)
|
||||||
<= T::from_f64(self.x_rtol * state.x.norm(std::f64::INFINITY)).unwrap()
|
<= T::from_f64(self.x_rtol * state.x.norm(f64::INFINITY)).unwrap()
|
||||||
{
|
{
|
||||||
x_converged = true;
|
x_converged = true;
|
||||||
}
|
}
|
||||||
@@ -188,17 +181,16 @@ impl LBFGS {
|
|||||||
state.counter_f_tol += 1;
|
state.counter_f_tol += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if state.x_df.norm(std::f64::INFINITY) <= self.g_atol {
|
if state.x_df.norm(f64::INFINITY) <= self.g_atol {
|
||||||
g_converged = true;
|
g_converged = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
g_converged || x_converged || state.counter_f_tol > self.successive_f_tol
|
g_converged || x_converged || state.counter_f_tol > self.successive_f_tol
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
fn update_hessian<T: FloatNumber, X: Array1<T>>(
|
||||||
fn update_hessian<'a, T: FloatNumber, X: Array1<T>>(
|
|
||||||
&self,
|
&self,
|
||||||
_: &'a DF<'_, X>,
|
_: &DF<'_, X>,
|
||||||
state: &mut LBFGSState<T, X>,
|
state: &mut LBFGSState<T, X>,
|
||||||
) {
|
) {
|
||||||
state.dg = state.x_df.sub(&state.x_df_prev);
|
state.dg = state.x_df.sub(&state.x_df_prev);
|
||||||
@@ -212,7 +204,6 @@ impl LBFGS {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct LBFGSState<T: FloatNumber, X: Array1<T>> {
|
struct LBFGSState<T: FloatNumber, X: Array1<T>> {
|
||||||
x: X,
|
x: X,
|
||||||
@@ -234,9 +225,7 @@ struct LBFGSState<T: FloatNumber, X: Array1<T>> {
|
|||||||
alpha: T,
|
alpha: T,
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
impl<T: FloatNumber + RealNumber> FirstOrderOptimizer<T> for LBFGS {
|
impl<T: FloatNumber + RealNumber> FirstOrderOptimizer<T> for LBFGS {
|
||||||
///
|
|
||||||
fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>(
|
fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>(
|
||||||
&self,
|
&self,
|
||||||
f: &F<'_, T, X>,
|
f: &F<'_, T, X>,
|
||||||
@@ -248,7 +237,7 @@ impl<T: FloatNumber + RealNumber> FirstOrderOptimizer<T> for LBFGS {
|
|||||||
|
|
||||||
df(&mut state.x_df, x0);
|
df(&mut state.x_df, x0);
|
||||||
|
|
||||||
let g_converged = state.x_df.norm(std::f64::INFINITY) < self.g_atol;
|
let g_converged = state.x_df.norm(f64::INFINITY) < self.g_atol;
|
||||||
let mut converged = g_converged;
|
let mut converged = g_converged;
|
||||||
let stopped = false;
|
let stopped = false;
|
||||||
|
|
||||||
@@ -291,13 +280,15 @@ mod tests {
|
|||||||
g[0] = -2. * (1. - x[0]) - 400. * (x[1] - x[0].powf(2.)) * x[0];
|
g[0] = -2. * (1. - x[0]) - 400. * (x[1] - x[0].powf(2.)) * x[0];
|
||||||
g[1] = 200. * (x[1] - x[0].powf(2.));
|
g[1] = 200. * (x[1] - x[0].powf(2.));
|
||||||
};
|
};
|
||||||
let mut ls: Backtracking<f64> = Default::default();
|
let ls: Backtracking<f64> = Backtracking::<f64> {
|
||||||
ls.order = FunctionOrder::THIRD;
|
order: FunctionOrder::THIRD,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
let optimizer: LBFGS = Default::default();
|
let optimizer: LBFGS = Default::default();
|
||||||
|
|
||||||
let result = optimizer.optimize(&f, &df, &x0, &ls);
|
let result = optimizer.optimize(&f, &df, &x0, &ls);
|
||||||
|
|
||||||
assert!((result.f_x - 0.0).abs() < std::f64::EPSILON);
|
assert!((result.f_x - 0.0).abs() < f64::EPSILON);
|
||||||
assert!((result.x[0] - 1.0).abs() < 1e-8);
|
assert!((result.x[0] - 1.0).abs() < 1e-8);
|
||||||
assert!((result.x[1] - 1.0).abs() < 1e-8);
|
assert!((result.x[1] - 1.0).abs() < 1e-8);
|
||||||
assert!(result.iterations <= 24);
|
assert!(result.iterations <= 24);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
///
|
/// Gradient descent optimization algorithm
|
||||||
pub mod gradient_descent;
|
pub mod gradient_descent;
|
||||||
///
|
/// Limited-memory BFGS optimization algorithm
|
||||||
pub mod lbfgs;
|
pub mod lbfgs;
|
||||||
|
|
||||||
use std::clone::Clone;
|
use std::clone::Clone;
|
||||||
@@ -11,9 +11,9 @@ use crate::numbers::floatnum::FloatNumber;
|
|||||||
use crate::optimization::line_search::LineSearchMethod;
|
use crate::optimization::line_search::LineSearchMethod;
|
||||||
use crate::optimization::{DF, F};
|
use crate::optimization::{DF, F};
|
||||||
|
|
||||||
///
|
/// First-order optimization is a class of algorithms that use the first derivative of a function to find optimal solutions.
|
||||||
pub trait FirstOrderOptimizer<T: FloatNumber> {
|
pub trait FirstOrderOptimizer<T: FloatNumber> {
|
||||||
///
|
/// run first order optimization
|
||||||
fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>(
|
fn optimize<'a, X: Array1<T>, LS: LineSearchMethod<T>>(
|
||||||
&self,
|
&self,
|
||||||
f: &F<'_, T, X>,
|
f: &F<'_, T, X>,
|
||||||
@@ -23,13 +23,13 @@ pub trait FirstOrderOptimizer<T: FloatNumber> {
|
|||||||
) -> OptimizerResult<T, X>;
|
) -> OptimizerResult<T, X>;
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
/// Result of optimization
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct OptimizerResult<T: FloatNumber, X: Array1<T>> {
|
pub struct OptimizerResult<T: FloatNumber, X: Array1<T>> {
|
||||||
///
|
/// Solution
|
||||||
pub x: X,
|
pub x: X,
|
||||||
///
|
/// f(x) value
|
||||||
pub f_x: T,
|
pub f_x: T,
|
||||||
///
|
/// number of iterations
|
||||||
pub iterations: usize,
|
pub iterations: usize,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
// TODO: missing documentation
|
|
||||||
|
|
||||||
use crate::optimization::FunctionOrder;
|
use crate::optimization::FunctionOrder;
|
||||||
use num_traits::Float;
|
use num_traits::Float;
|
||||||
|
|
||||||
///
|
/// Line search optimization.
|
||||||
pub trait LineSearchMethod<T: Float> {
|
pub trait LineSearchMethod<T: Float> {
|
||||||
///
|
/// Find alpha that satisfies strong Wolfe conditions.
|
||||||
fn search(
|
fn search(
|
||||||
&self,
|
&self,
|
||||||
f: &(dyn Fn(T) -> T),
|
f: &(dyn Fn(T) -> T),
|
||||||
@@ -16,32 +14,31 @@ pub trait LineSearchMethod<T: Float> {
|
|||||||
) -> LineSearchResult<T>;
|
) -> LineSearchResult<T>;
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
/// Line search result
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct LineSearchResult<T: Float> {
|
pub struct LineSearchResult<T: Float> {
|
||||||
///
|
/// Alpha value
|
||||||
pub alpha: T,
|
pub alpha: T,
|
||||||
///
|
/// f(alpha) value
|
||||||
pub f_x: T,
|
pub f_x: T,
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
/// Backtracking line search method.
|
||||||
pub struct Backtracking<T: Float> {
|
pub struct Backtracking<T: Float> {
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub c1: T,
|
pub c1: T,
|
||||||
///
|
/// Maximum number of iterations for Backtracking single run
|
||||||
pub max_iterations: usize,
|
pub max_iterations: usize,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub max_infinity_iterations: usize,
|
pub max_infinity_iterations: usize,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub phi: T,
|
pub phi: T,
|
||||||
///
|
/// TODO: Add documentation
|
||||||
pub plo: T,
|
pub plo: T,
|
||||||
///
|
/// function order
|
||||||
pub order: FunctionOrder,
|
pub order: FunctionOrder,
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
impl<T: Float> Default for Backtracking<T> {
|
impl<T: Float> Default for Backtracking<T> {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Backtracking {
|
Backtracking {
|
||||||
@@ -55,9 +52,7 @@ impl<T: Float> Default for Backtracking<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
impl<T: Float> LineSearchMethod<T> for Backtracking<T> {
|
impl<T: Float> LineSearchMethod<T> for Backtracking<T> {
|
||||||
///
|
|
||||||
fn search(
|
fn search(
|
||||||
&self,
|
&self,
|
||||||
f: &(dyn Fn(T) -> T),
|
f: &(dyn Fn(T) -> T),
|
||||||
|
|||||||
@@ -1,21 +1,19 @@
|
|||||||
// TODO: missing documentation
|
/// first order optimization algorithms
|
||||||
|
|
||||||
///
|
|
||||||
pub mod first_order;
|
pub mod first_order;
|
||||||
///
|
/// line search algorithms
|
||||||
pub mod line_search;
|
pub mod line_search;
|
||||||
|
|
||||||
///
|
/// Function f(x) = y
|
||||||
pub type F<'a, T, X> = dyn for<'b> Fn(&'b X) -> T + 'a;
|
pub type F<'a, T, X> = dyn for<'b> Fn(&'b X) -> T + 'a;
|
||||||
///
|
/// Function df(x)
|
||||||
pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a;
|
pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a;
|
||||||
|
|
||||||
///
|
/// Function order
|
||||||
#[allow(clippy::upper_case_acronyms)]
|
#[allow(clippy::upper_case_acronyms)]
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub enum FunctionOrder {
|
pub enum FunctionOrder {
|
||||||
///
|
/// Second order
|
||||||
SECOND,
|
SECOND,
|
||||||
///
|
/// Third order
|
||||||
THIRD,
|
THIRD,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
//! &[1.5, 2.0, 1.5, 4.0],
|
//! &[1.5, 2.0, 1.5, 4.0],
|
||||||
//! &[1.5, 1.0, 1.5, 5.0],
|
//! &[1.5, 1.0, 1.5, 5.0],
|
||||||
//! &[1.5, 2.0, 1.5, 6.0],
|
//! &[1.5, 2.0, 1.5, 6.0],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let encoder_params = OneHotEncoderParams::from_cat_idx(&[1, 3]);
|
//! let encoder_params = OneHotEncoderParams::from_cat_idx(&[1, 3]);
|
||||||
//! // Infer number of categories from data and return a reusable encoder
|
//! // Infer number of categories from data and return a reusable encoder
|
||||||
//! let encoder = OneHotEncoder::fit(&data, encoder_params).unwrap();
|
//! let encoder = OneHotEncoder::fit(&data, encoder_params).unwrap();
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
//! // &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0]
|
//! // &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0]
|
||||||
//! // &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0]
|
//! // &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0]
|
||||||
//! ```
|
//! ```
|
||||||
use std::iter;
|
use std::iter::repeat_n;
|
||||||
|
|
||||||
use crate::error::Failed;
|
use crate::error::Failed;
|
||||||
use crate::linalg::basic::arrays::Array2;
|
use crate::linalg::basic::arrays::Array2;
|
||||||
@@ -75,11 +75,7 @@ fn find_new_idxs(num_params: usize, cat_sizes: &[usize], cat_idxs: &[usize]) ->
|
|||||||
let offset = (0..1).chain(offset_);
|
let offset = (0..1).chain(offset_);
|
||||||
|
|
||||||
let new_param_idxs: Vec<usize> = (0..num_params)
|
let new_param_idxs: Vec<usize> = (0..num_params)
|
||||||
.zip(
|
.zip(repeats.zip(offset).flat_map(|(r, o)| repeat_n(o, r)))
|
||||||
repeats
|
|
||||||
.zip(offset)
|
|
||||||
.flat_map(|(r, o)| iter::repeat(o).take(r)),
|
|
||||||
)
|
|
||||||
.map(|(idx, ofst)| idx + ofst)
|
.map(|(idx, ofst)| idx + ofst)
|
||||||
.collect();
|
.collect();
|
||||||
new_param_idxs
|
new_param_idxs
|
||||||
@@ -124,7 +120,7 @@ impl OneHotEncoder {
|
|||||||
let (nrows, _) = data.shape();
|
let (nrows, _) = data.shape();
|
||||||
|
|
||||||
// col buffer to avoid allocations
|
// col buffer to avoid allocations
|
||||||
let mut col_buf: Vec<T> = iter::repeat(T::zero()).take(nrows).collect();
|
let mut col_buf: Vec<T> = repeat_n(T::zero(), nrows).collect();
|
||||||
|
|
||||||
let mut res: Vec<CategoryMapper<CategoricalFloat>> = Vec::with_capacity(idxs.len());
|
let mut res: Vec<CategoryMapper<CategoricalFloat>> = Vec::with_capacity(idxs.len());
|
||||||
|
|
||||||
@@ -132,8 +128,7 @@ impl OneHotEncoder {
|
|||||||
data.copy_col_as_vec(idx, &mut col_buf);
|
data.copy_col_as_vec(idx, &mut col_buf);
|
||||||
if !validate_col_is_categorical(&col_buf) {
|
if !validate_col_is_categorical(&col_buf) {
|
||||||
let msg = format!(
|
let msg = format!(
|
||||||
"Column {} of data matrix containts non categorizable (integer) values",
|
"Column {idx} of data matrix containts non categorizable (integer) values"
|
||||||
idx
|
|
||||||
);
|
);
|
||||||
return Err(Failed::fit(&msg[..]));
|
return Err(Failed::fit(&msg[..]));
|
||||||
}
|
}
|
||||||
@@ -182,7 +177,7 @@ impl OneHotEncoder {
|
|||||||
match oh_vec {
|
match oh_vec {
|
||||||
None => {
|
None => {
|
||||||
// Since we support T types, bad value in a series causes in to be invalid
|
// Since we support T types, bad value in a series causes in to be invalid
|
||||||
let msg = format!("At least one value in column {} doesn't conform to category definition", old_cidx);
|
let msg = format!("At least one value in column {old_cidx} doesn't conform to category definition");
|
||||||
return Err(Failed::transform(&msg[..]));
|
return Err(Failed::transform(&msg[..]));
|
||||||
}
|
}
|
||||||
Some(v) => {
|
Some(v) => {
|
||||||
@@ -241,14 +236,16 @@ mod tests {
|
|||||||
&[2.0, 1.5, 4.0],
|
&[2.0, 1.5, 4.0],
|
||||||
&[1.0, 1.5, 5.0],
|
&[1.0, 1.5, 5.0],
|
||||||
&[2.0, 1.5, 6.0],
|
&[2.0, 1.5, 6.0],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let oh_enc = DenseMatrix::from_2d_array(&[
|
let oh_enc = DenseMatrix::from_2d_array(&[
|
||||||
&[1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
|
&[1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
|
||||||
&[0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
|
&[0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
|
||||||
&[1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
|
&[1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
|
||||||
&[0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
|
&[0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
(orig, oh_enc)
|
(orig, oh_enc)
|
||||||
}
|
}
|
||||||
@@ -260,14 +257,16 @@ mod tests {
|
|||||||
&[1.5, 2.0, 1.5, 4.0],
|
&[1.5, 2.0, 1.5, 4.0],
|
||||||
&[1.5, 1.0, 1.5, 5.0],
|
&[1.5, 1.0, 1.5, 5.0],
|
||||||
&[1.5, 2.0, 1.5, 6.0],
|
&[1.5, 2.0, 1.5, 6.0],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let oh_enc = DenseMatrix::from_2d_array(&[
|
let oh_enc = DenseMatrix::from_2d_array(&[
|
||||||
&[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
|
&[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
|
||||||
&[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
|
&[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
|
||||||
&[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
|
&[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
|
||||||
&[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
|
&[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
(orig, oh_enc)
|
(orig, oh_enc)
|
||||||
}
|
}
|
||||||
@@ -278,7 +277,7 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_encode_f64_series() {
|
fn hash_encode_f64_series() {
|
||||||
let series = vec![3.0, 1.0, 2.0, 1.0];
|
let series = [3.0, 1.0, 2.0, 1.0];
|
||||||
let hashable_series: Vec<CategoricalFloat> =
|
let hashable_series: Vec<CategoricalFloat> =
|
||||||
series.iter().map(|v| v.to_category()).collect();
|
series.iter().map(|v| v.to_category()).collect();
|
||||||
let enc = CategoryMapper::from_positional_category_vec(hashable_series);
|
let enc = CategoryMapper::from_positional_category_vec(hashable_series);
|
||||||
@@ -335,14 +334,11 @@ mod tests {
|
|||||||
&[2.0, 1.5, 4.0],
|
&[2.0, 1.5, 4.0],
|
||||||
&[1.0, 1.5, 5.0],
|
&[1.0, 1.5, 5.0],
|
||||||
&[2.0, 1.5, 6.0],
|
&[2.0, 1.5, 6.0],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let params = OneHotEncoderParams::from_cat_idx(&[1]);
|
let params = OneHotEncoderParams::from_cat_idx(&[1]);
|
||||||
match OneHotEncoder::fit(&m, params) {
|
let result = OneHotEncoder::fit(&m, params);
|
||||||
Err(_) => {
|
assert!(result.is_err());
|
||||||
assert!(true);
|
|
||||||
}
|
|
||||||
_ => assert!(false),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
//! vec![0.0, 0.0],
|
//! vec![0.0, 0.0],
|
||||||
//! vec![1.0, 1.0],
|
//! vec![1.0, 1.0],
|
||||||
//! vec![1.0, 1.0],
|
//! vec![1.0, 1.0],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let standard_scaler =
|
//! let standard_scaler =
|
||||||
//! numerical::StandardScaler::fit(&data, numerical::StandardScalerParameters::default())
|
//! numerical::StandardScaler::fit(&data, numerical::StandardScalerParameters::default())
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
//! vec![-1.0, -1.0],
|
//! vec![-1.0, -1.0],
|
||||||
//! vec![1.0, 1.0],
|
//! vec![1.0, 1.0],
|
||||||
//! vec![1.0, 1.0],
|
//! vec![1.0, 1.0],
|
||||||
//! ])
|
//! ]).unwrap()
|
||||||
//! );
|
//! );
|
||||||
//! ```
|
//! ```
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
@@ -172,18 +172,14 @@ where
|
|||||||
T: Number + RealNumber,
|
T: Number + RealNumber,
|
||||||
M: Array2<T>,
|
M: Array2<T>,
|
||||||
{
|
{
|
||||||
if let Some(output_matrix) = columns.first().cloned() {
|
columns.first().cloned().map(|output_matrix| {
|
||||||
return Some(
|
columns
|
||||||
columns
|
.iter()
|
||||||
.iter()
|
.skip(1)
|
||||||
.skip(1)
|
.fold(output_matrix, |current_matrix, new_colum| {
|
||||||
.fold(output_matrix, |current_matrix, new_colum| {
|
current_matrix.h_stack(new_colum)
|
||||||
current_matrix.h_stack(new_colum)
|
})
|
||||||
}),
|
})
|
||||||
);
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -197,15 +193,18 @@ mod tests {
|
|||||||
fn combine_three_columns() {
|
fn combine_three_columns() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
build_matrix_from_columns(vec![
|
build_matrix_from_columns(vec![
|
||||||
DenseMatrix::from_2d_vec(&vec![vec![1.0], vec![1.0], vec![1.0],]),
|
DenseMatrix::from_2d_vec(&vec![vec![1.0], vec![1.0], vec![1.0],]).unwrap(),
|
||||||
DenseMatrix::from_2d_vec(&vec![vec![2.0], vec![2.0], vec![2.0],]),
|
DenseMatrix::from_2d_vec(&vec![vec![2.0], vec![2.0], vec![2.0],]).unwrap(),
|
||||||
DenseMatrix::from_2d_vec(&vec![vec![3.0], vec![3.0], vec![3.0],])
|
DenseMatrix::from_2d_vec(&vec![vec![3.0], vec![3.0], vec![3.0],]).unwrap()
|
||||||
]),
|
]),
|
||||||
Some(DenseMatrix::from_2d_vec(&vec![
|
Some(
|
||||||
vec![1.0, 2.0, 3.0],
|
DenseMatrix::from_2d_vec(&vec![
|
||||||
vec![1.0, 2.0, 3.0],
|
vec![1.0, 2.0, 3.0],
|
||||||
vec![1.0, 2.0, 3.0]
|
vec![1.0, 2.0, 3.0],
|
||||||
]))
|
vec![1.0, 2.0, 3.0]
|
||||||
|
])
|
||||||
|
.unwrap()
|
||||||
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -287,21 +286,24 @@ mod tests {
|
|||||||
/// sklearn.
|
/// sklearn.
|
||||||
#[test]
|
#[test]
|
||||||
fn fit_transform_random_values() {
|
fn fit_transform_random_values() {
|
||||||
let transformed_values =
|
let transformed_values = fit_transform_with_default_standard_scaler(
|
||||||
fit_transform_with_default_standard_scaler(&DenseMatrix::from_2d_array(&[
|
&DenseMatrix::from_2d_array(&[
|
||||||
&[0.1004222429, 0.2194113576, 0.9310663354, 0.3313593793],
|
&[0.1004222429, 0.2194113576, 0.9310663354, 0.3313593793],
|
||||||
&[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264],
|
&[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264],
|
||||||
&[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046],
|
&[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046],
|
||||||
&[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442],
|
&[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442],
|
||||||
]));
|
])
|
||||||
println!("{}", transformed_values);
|
.unwrap(),
|
||||||
|
);
|
||||||
|
println!("{transformed_values}");
|
||||||
assert!(transformed_values.approximate_eq(
|
assert!(transformed_values.approximate_eq(
|
||||||
&DenseMatrix::from_2d_array(&[
|
&DenseMatrix::from_2d_array(&[
|
||||||
&[-1.1154020653, -0.4031985330, 0.9284605204, -0.4271473866],
|
&[-1.1154020653, -0.4031985330, 0.9284605204, -0.4271473866],
|
||||||
&[-0.7615464283, -0.7076698384, -1.1075452562, 1.2632979631],
|
&[-0.7615464283, -0.7076698384, -1.1075452562, 1.2632979631],
|
||||||
&[0.4832504303, -0.6106747444, 1.0630075435, 0.5494084257],
|
&[0.4832504303, -0.6106747444, 1.0630075435, 0.5494084257],
|
||||||
&[1.3936980634, 1.7215431158, -0.8839228078, -1.3855590021],
|
&[1.3936980634, 1.7215431158, -0.8839228078, -1.3855590021],
|
||||||
]),
|
])
|
||||||
|
.unwrap(),
|
||||||
1.0
|
1.0
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
@@ -310,13 +312,10 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn fit_transform_with_zero_variance() {
|
fn fit_transform_with_zero_variance() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
fit_transform_with_default_standard_scaler(&DenseMatrix::from_2d_array(&[
|
fit_transform_with_default_standard_scaler(
|
||||||
&[1.0],
|
&DenseMatrix::from_2d_array(&[&[1.0], &[1.0], &[1.0], &[1.0]]).unwrap()
|
||||||
&[1.0],
|
),
|
||||||
&[1.0],
|
DenseMatrix::from_2d_array(&[&[0.0], &[0.0], &[0.0], &[0.0]]).unwrap(),
|
||||||
&[1.0]
|
|
||||||
])),
|
|
||||||
DenseMatrix::from_2d_array(&[&[0.0], &[0.0], &[0.0], &[0.0]]),
|
|
||||||
"When scaling values with zero variance, zero is expected as return value"
|
"When scaling values with zero variance, zero is expected as return value"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -331,7 +330,8 @@ mod tests {
|
|||||||
&[1.0, 2.0, 5.0],
|
&[1.0, 2.0, 5.0],
|
||||||
&[1.0, 1.0, 1.0],
|
&[1.0, 1.0, 1.0],
|
||||||
&[1.0, 2.0, 5.0]
|
&[1.0, 2.0, 5.0]
|
||||||
]),
|
])
|
||||||
|
.unwrap(),
|
||||||
StandardScalerParameters::default(),
|
StandardScalerParameters::default(),
|
||||||
),
|
),
|
||||||
Ok(StandardScaler {
|
Ok(StandardScaler {
|
||||||
@@ -354,7 +354,8 @@ mod tests {
|
|||||||
&[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264],
|
&[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264],
|
||||||
&[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046],
|
&[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046],
|
||||||
&[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442],
|
&[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442],
|
||||||
]),
|
])
|
||||||
|
.unwrap(),
|
||||||
StandardScalerParameters::default(),
|
StandardScalerParameters::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -364,17 +365,18 @@ mod tests {
|
|||||||
vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625],
|
vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625],
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(
|
assert!(&DenseMatrix::<f64>::from_2d_vec(&vec![fitted_scaler.stds])
|
||||||
&DenseMatrix::<f64>::from_2d_vec(&vec![fitted_scaler.stds]).approximate_eq(
|
.unwrap()
|
||||||
|
.approximate_eq(
|
||||||
&DenseMatrix::from_2d_array(&[&[
|
&DenseMatrix::from_2d_array(&[&[
|
||||||
0.29426447500954,
|
0.29426447500954,
|
||||||
0.16758497615485,
|
0.16758497615485,
|
||||||
0.20820945786863,
|
0.20820945786863,
|
||||||
0.23329718831165
|
0.23329718831165
|
||||||
],]),
|
],])
|
||||||
|
.unwrap(),
|
||||||
0.00000000000001
|
0.00000000000001
|
||||||
)
|
))
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If `with_std` is set to `false` the values should not be
|
/// If `with_std` is set to `false` the values should not be
|
||||||
@@ -392,8 +394,9 @@ mod tests {
|
|||||||
};
|
};
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
standard_scaler.transform(&DenseMatrix::from_2d_array(&[&[0.0, 2.0], &[2.0, 4.0]])),
|
standard_scaler
|
||||||
Ok(DenseMatrix::from_2d_array(&[&[-1.0, -1.0], &[1.0, 1.0]]))
|
.transform(&DenseMatrix::from_2d_array(&[&[0.0, 2.0], &[2.0, 4.0]]).unwrap()),
|
||||||
|
Ok(DenseMatrix::from_2d_array(&[&[-1.0, -1.0], &[1.0, 1.0]]).unwrap())
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -413,8 +416,8 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
standard_scaler
|
standard_scaler
|
||||||
.transform(&DenseMatrix::from_2d_array(&[&[0.0, 9.0], &[4.0, 12.0]])),
|
.transform(&DenseMatrix::from_2d_array(&[&[0.0, 9.0], &[4.0, 12.0]]).unwrap()),
|
||||||
Ok(DenseMatrix::from_2d_array(&[&[0.0, 3.0], &[2.0, 4.0]]))
|
Ok(DenseMatrix::from_2d_array(&[&[0.0, 3.0], &[2.0, 4.0]]).unwrap())
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -433,7 +436,8 @@ mod tests {
|
|||||||
&[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264],
|
&[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264],
|
||||||
&[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046],
|
&[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046],
|
||||||
&[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442],
|
&[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442],
|
||||||
]),
|
])
|
||||||
|
.unwrap(),
|
||||||
StandardScalerParameters::default(),
|
StandardScalerParameters::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -446,17 +450,18 @@ mod tests {
|
|||||||
vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625],
|
vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625],
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(
|
assert!(&DenseMatrix::from_2d_vec(&vec![deserialized_scaler.stds])
|
||||||
&DenseMatrix::from_2d_vec(&vec![deserialized_scaler.stds]).approximate_eq(
|
.unwrap()
|
||||||
|
.approximate_eq(
|
||||||
&DenseMatrix::from_2d_array(&[&[
|
&DenseMatrix::from_2d_array(&[&[
|
||||||
0.29426447500954,
|
0.29426447500954,
|
||||||
0.16758497615485,
|
0.16758497615485,
|
||||||
0.20820945786863,
|
0.20820945786863,
|
||||||
0.23329718831165
|
0.23329718831165
|
||||||
],]),
|
],])
|
||||||
|
.unwrap(),
|
||||||
0.00000000000001
|
0.00000000000001
|
||||||
)
|
))
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -206,7 +206,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn from_categories() {
|
fn from_categories() {
|
||||||
let fake_categories: Vec<usize> = vec![1, 2, 3, 4, 5, 3, 5, 3, 1, 2, 4];
|
let fake_categories: Vec<usize> = vec![1, 2, 3, 4, 5, 3, 5, 3, 1, 2, 4];
|
||||||
let it = fake_categories.iter().map(|&a| a);
|
let it = fake_categories.iter().copied();
|
||||||
let enc = CategoryMapper::<usize>::fit_to_iter(it);
|
let enc = CategoryMapper::<usize>::fit_to_iter(it);
|
||||||
let oh_vec: Vec<f64> = match enc.get_one_hot(&1) {
|
let oh_vec: Vec<f64> = match enc.get_one_hot(&1) {
|
||||||
None => panic!("Wrong categories"),
|
None => panic!("Wrong categories"),
|
||||||
@@ -218,8 +218,8 @@ mod tests {
|
|||||||
|
|
||||||
fn build_fake_str_enc<'a>() -> CategoryMapper<&'a str> {
|
fn build_fake_str_enc<'a>() -> CategoryMapper<&'a str> {
|
||||||
let fake_category_pos = vec!["background", "dog", "cat"];
|
let fake_category_pos = vec!["background", "dog", "cat"];
|
||||||
let enc = CategoryMapper::<&str>::from_positional_category_vec(fake_category_pos);
|
|
||||||
enc
|
CategoryMapper::<&str>::from_positional_category_vec(fake_category_pos)
|
||||||
}
|
}
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
@@ -275,7 +275,7 @@ mod tests {
|
|||||||
let lab = enc.invert_one_hot(res).unwrap();
|
let lab = enc.invert_one_hot(res).unwrap();
|
||||||
assert_eq!(lab, "dog");
|
assert_eq!(lab, "dog");
|
||||||
if let Err(e) = enc.invert_one_hot(vec![0.0, 0.0, 0.0]) {
|
if let Err(e) = enc.invert_one_hot(vec![0.0, 0.0, 0.0]) {
|
||||||
let pos_entries = format!("Expected a single positive entry, 0 entires found");
|
let pos_entries = "Expected a single positive entry, 0 entires found".to_string();
|
||||||
assert_eq!(e, Failed::transform(&pos_entries[..]));
|
assert_eq!(e, Failed::transform(&pos_entries[..]));
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
+9
-14
@@ -30,7 +30,7 @@ pub struct CSVDefinition<'a> {
|
|||||||
/// What seperates the fields in your csv-file?
|
/// What seperates the fields in your csv-file?
|
||||||
field_seperator: &'a str,
|
field_seperator: &'a str,
|
||||||
}
|
}
|
||||||
impl<'a> Default for CSVDefinition<'a> {
|
impl Default for CSVDefinition<'_> {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
n_rows_header: 1,
|
n_rows_header: 1,
|
||||||
@@ -83,7 +83,7 @@ where
|
|||||||
Matrix: Array2<T>,
|
Matrix: Array2<T>,
|
||||||
{
|
{
|
||||||
let csv_text = read_string_from_source(source)?;
|
let csv_text = read_string_from_source(source)?;
|
||||||
let rows: Vec<Vec<T>> = extract_row_vectors_from_csv_text::<T, RowVector, Matrix>(
|
let rows: Vec<Vec<T>> = extract_row_vectors_from_csv_text(
|
||||||
&csv_text,
|
&csv_text,
|
||||||
&definition,
|
&definition,
|
||||||
detect_row_format(&csv_text, &definition)?,
|
detect_row_format(&csv_text, &definition)?,
|
||||||
@@ -103,12 +103,7 @@ where
|
|||||||
|
|
||||||
/// Given a string containing the contents of a csv file, extract its value
|
/// Given a string containing the contents of a csv file, extract its value
|
||||||
/// into row-vectors.
|
/// into row-vectors.
|
||||||
fn extract_row_vectors_from_csv_text<
|
fn extract_row_vectors_from_csv_text<'a, T: Number + RealNumber + std::str::FromStr>(
|
||||||
'a,
|
|
||||||
T: Number + RealNumber + std::str::FromStr,
|
|
||||||
RowVector: Array1<T>,
|
|
||||||
Matrix: Array2<T>,
|
|
||||||
>(
|
|
||||||
csv_text: &'a str,
|
csv_text: &'a str,
|
||||||
definition: &'a CSVDefinition<'_>,
|
definition: &'a CSVDefinition<'_>,
|
||||||
row_format: CSVRowFormat<'_>,
|
row_format: CSVRowFormat<'_>,
|
||||||
@@ -167,7 +162,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Ensure that a string containing a csv row conforms to a specified row format.
|
/// Ensure that a string containing a csv row conforms to a specified row format.
|
||||||
fn validate_csv_row<'a>(row: &'a str, row_format: &CSVRowFormat<'_>) -> Result<(), ReadingError> {
|
fn validate_csv_row(row: &str, row_format: &CSVRowFormat<'_>) -> Result<(), ReadingError> {
|
||||||
let actual_number_of_fields = row.split(row_format.field_seperator).count();
|
let actual_number_of_fields = row.split(row_format.field_seperator).count();
|
||||||
if row_format.n_fields == actual_number_of_fields {
|
if row_format.n_fields == actual_number_of_fields {
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -208,7 +203,7 @@ where
|
|||||||
match value_string.parse::<T>().ok() {
|
match value_string.parse::<T>().ok() {
|
||||||
Some(value) => Ok(value),
|
Some(value) => Ok(value),
|
||||||
None => Err(ReadingError::InvalidField {
|
None => Err(ReadingError::InvalidField {
|
||||||
msg: format!("Value '{}' could not be read.", value_string,),
|
msg: format!("Value '{value_string}' could not be read.",),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -243,7 +238,8 @@ mod tests {
|
|||||||
&[5.1, 3.5, 1.4, 0.2],
|
&[5.1, 3.5, 1.4, 0.2],
|
||||||
&[4.9, 3.0, 1.4, 0.2],
|
&[4.9, 3.0, 1.4, 0.2],
|
||||||
&[4.7, 3.2, 1.3, 0.2],
|
&[4.7, 3.2, 1.3, 0.2],
|
||||||
]))
|
])
|
||||||
|
.unwrap())
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
@@ -266,7 +262,7 @@ mod tests {
|
|||||||
&[5.1, 3.5, 1.4, 0.2],
|
&[5.1, 3.5, 1.4, 0.2],
|
||||||
&[4.9, 3.0, 1.4, 0.2],
|
&[4.9, 3.0, 1.4, 0.2],
|
||||||
&[4.7, 3.2, 1.3, 0.2],
|
&[4.7, 3.2, 1.3, 0.2],
|
||||||
]))
|
]).unwrap())
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
@@ -305,12 +301,11 @@ mod tests {
|
|||||||
}
|
}
|
||||||
mod extract_row_vectors_from_csv_text {
|
mod extract_row_vectors_from_csv_text {
|
||||||
use super::super::{extract_row_vectors_from_csv_text, CSVDefinition, CSVRowFormat};
|
use super::super::{extract_row_vectors_from_csv_text, CSVDefinition, CSVRowFormat};
|
||||||
use crate::linalg::basic::matrix::DenseMatrix;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn read_default_csv() {
|
fn read_default_csv() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
extract_row_vectors_from_csv_text::<f64, Vec<_>, DenseMatrix<_>>(
|
extract_row_vectors_from_csv_text::<f64>(
|
||||||
"column 1, column 2, column3\n1.0,2.0,3.0\n4.0,5.0,6.0",
|
"column 1, column 2, column3\n1.0,2.0,3.0\n4.0,5.0,6.0",
|
||||||
&CSVDefinition::default(),
|
&CSVDefinition::default(),
|
||||||
CSVRowFormat {
|
CSVRowFormat {
|
||||||
|
|||||||
+2
-2
@@ -56,7 +56,7 @@ pub struct Kernels;
|
|||||||
impl Kernels {
|
impl Kernels {
|
||||||
/// Return a default linear
|
/// Return a default linear
|
||||||
pub fn linear() -> LinearKernel {
|
pub fn linear() -> LinearKernel {
|
||||||
LinearKernel::default()
|
LinearKernel
|
||||||
}
|
}
|
||||||
/// Return a default RBF
|
/// Return a default RBF
|
||||||
pub fn rbf() -> RBFKernel {
|
pub fn rbf() -> RBFKernel {
|
||||||
@@ -292,7 +292,7 @@ mod tests {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
.abs();
|
.abs();
|
||||||
|
|
||||||
assert!((4913f64 - result) < std::f64::EPSILON);
|
assert!((4913f64 - result).abs() < f64::EPSILON);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
|
|||||||
+71
-83
@@ -53,7 +53,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y = vec![ -1, -1, -1, -1, -1, -1, -1, -1,
|
//! let y = vec![ -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
//! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
//! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
//!
|
//!
|
||||||
@@ -322,19 +322,26 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX> + 'a, Y: Array
|
|||||||
let (n, _) = x.shape();
|
let (n, _) = x.shape();
|
||||||
let mut y_hat: Vec<TX> = Array1::zeros(n);
|
let mut y_hat: Vec<TX> = Array1::zeros(n);
|
||||||
|
|
||||||
|
let mut row = Vec::with_capacity(n);
|
||||||
for i in 0..n {
|
for i in 0..n {
|
||||||
let row_pred: TX =
|
row.clear();
|
||||||
self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n));
|
row.extend(x.get_row(i).iterator(0).copied());
|
||||||
|
let row_pred: TX = self.predict_for_row(&row);
|
||||||
y_hat.set(i, row_pred);
|
y_hat.set(i, row_pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(y_hat)
|
Ok(y_hat)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn predict_for_row(&self, x: Vec<TX>) -> TX {
|
fn predict_for_row(&self, x: &[TX]) -> TX {
|
||||||
let mut f = self.b.unwrap();
|
let mut f = self.b.unwrap();
|
||||||
|
|
||||||
|
let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
for i in 0..self.instances.as_ref().unwrap().len() {
|
for i in 0..self.instances.as_ref().unwrap().len() {
|
||||||
|
let xj: Vec<_> = self.instances.as_ref().unwrap()[i]
|
||||||
|
.iter()
|
||||||
|
.map(|e| e.to_f64().unwrap())
|
||||||
|
.collect();
|
||||||
f += self.w.as_ref().unwrap()[i]
|
f += self.w.as_ref().unwrap()[i]
|
||||||
* TX::from(
|
* TX::from(
|
||||||
self.parameters
|
self.parameters
|
||||||
@@ -343,13 +350,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX> + 'a, Y: Array
|
|||||||
.kernel
|
.kernel
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.apply(
|
.apply(&xi, &xj)
|
||||||
&x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
&self.instances.as_ref().unwrap()[i]
|
|
||||||
.iter()
|
|
||||||
.map(|e| e.to_f64().unwrap())
|
|
||||||
.collect(),
|
|
||||||
)
|
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -359,8 +360,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX> + 'a, Y: Array
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq
|
impl<TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq
|
||||||
for SVC<'a, TX, TY, X, Y>
|
for SVC<'_, TX, TY, X, Y>
|
||||||
{
|
{
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
if (self.b.unwrap().sub(other.b.unwrap())).abs() > TX::epsilon() * TX::two()
|
if (self.b.unwrap().sub(other.b.unwrap())).abs() > TX::epsilon() * TX::two()
|
||||||
@@ -472,14 +473,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
let tol = self.parameters.tol;
|
let tol = self.parameters.tol;
|
||||||
let good_enough = TX::from_i32(1000).unwrap();
|
let good_enough = TX::from_i32(1000).unwrap();
|
||||||
|
|
||||||
|
let mut x = Vec::with_capacity(n);
|
||||||
for _ in 0..self.parameters.epoch {
|
for _ in 0..self.parameters.epoch {
|
||||||
for i in self.permutate(n) {
|
for i in self.permutate(n) {
|
||||||
self.process(
|
x.clear();
|
||||||
i,
|
x.extend(self.x.get_row(i).iterator(0).take(n).copied());
|
||||||
Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n),
|
self.process(i, &x, *self.y.get(i), &mut cache);
|
||||||
*self.y.get(i),
|
|
||||||
&mut cache,
|
|
||||||
);
|
|
||||||
loop {
|
loop {
|
||||||
self.reprocess(tol, &mut cache);
|
self.reprocess(tol, &mut cache);
|
||||||
self.find_min_max_gradient();
|
self.find_min_max_gradient();
|
||||||
@@ -511,24 +510,17 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
let mut cp = 0;
|
let mut cp = 0;
|
||||||
let mut cn = 0;
|
let mut cn = 0;
|
||||||
|
|
||||||
|
let mut x = Vec::with_capacity(n);
|
||||||
for i in self.permutate(n) {
|
for i in self.permutate(n) {
|
||||||
|
x.clear();
|
||||||
|
x.extend(self.x.get_row(i).iterator(0).take(n).copied());
|
||||||
if *self.y.get(i) == TY::one() && cp < few {
|
if *self.y.get(i) == TY::one() && cp < few {
|
||||||
if self.process(
|
if self.process(i, &x, *self.y.get(i), cache) {
|
||||||
i,
|
|
||||||
Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n),
|
|
||||||
*self.y.get(i),
|
|
||||||
cache,
|
|
||||||
) {
|
|
||||||
cp += 1;
|
cp += 1;
|
||||||
}
|
}
|
||||||
} else if *self.y.get(i) == TY::from(-1).unwrap()
|
} else if *self.y.get(i) == TY::from(-1).unwrap()
|
||||||
&& cn < few
|
&& cn < few
|
||||||
&& self.process(
|
&& self.process(i, &x, *self.y.get(i), cache)
|
||||||
i,
|
|
||||||
Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n),
|
|
||||||
*self.y.get(i),
|
|
||||||
cache,
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
cn += 1;
|
cn += 1;
|
||||||
}
|
}
|
||||||
@@ -539,7 +531,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn process(&mut self, i: usize, x: Vec<TX>, y: TY, cache: &mut Cache<TX, TY, X, Y>) -> bool {
|
fn process(&mut self, i: usize, x: &[TX], y: TY, cache: &mut Cache<TX, TY, X, Y>) -> bool {
|
||||||
for j in 0..self.sv.len() {
|
for j in 0..self.sv.len() {
|
||||||
if self.sv[j].index == i {
|
if self.sv[j].index == i {
|
||||||
return true;
|
return true;
|
||||||
@@ -551,15 +543,14 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
let mut cache_values: Vec<((usize, usize), TX)> = Vec::new();
|
let mut cache_values: Vec<((usize, usize), TX)> = Vec::new();
|
||||||
|
|
||||||
for v in self.sv.iter() {
|
for v in self.sv.iter() {
|
||||||
|
let xi: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
|
let xj: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
let k = self
|
let k = self
|
||||||
.parameters
|
.parameters
|
||||||
.kernel
|
.kernel
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.apply(
|
.apply(&xi, &xj)
|
||||||
&v.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
&x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
cache_values.push(((i, v.index), TX::from(k).unwrap()));
|
cache_values.push(((i, v.index), TX::from(k).unwrap()));
|
||||||
g -= v.alpha * k;
|
g -= v.alpha * k;
|
||||||
@@ -578,7 +569,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
cache.insert(v.0, v.1.to_f64().unwrap());
|
cache.insert(v.0, v.1.to_f64().unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
let x_f64 = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
let x_f64: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
let k_v = self
|
let k_v = self
|
||||||
.parameters
|
.parameters
|
||||||
.kernel
|
.kernel
|
||||||
@@ -701,8 +692,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
let km = sv1.k;
|
let km = sv1.k;
|
||||||
let gm = sv1.grad;
|
let gm = sv1.grad;
|
||||||
let mut best = 0f64;
|
let mut best = 0f64;
|
||||||
|
let xi: Vec<_> = sv1.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
for i in 0..self.sv.len() {
|
for i in 0..self.sv.len() {
|
||||||
let v = &self.sv[i];
|
let v = &self.sv[i];
|
||||||
|
let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
let z = v.grad - gm;
|
let z = v.grad - gm;
|
||||||
let k = cache.get(
|
let k = cache.get(
|
||||||
sv1,
|
sv1,
|
||||||
@@ -711,10 +704,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
.kernel
|
.kernel
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.apply(
|
.apply(&xi, &xj)
|
||||||
&sv1.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
&v.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
)
|
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
);
|
);
|
||||||
let mut curv = km + v.k - 2f64 * k;
|
let mut curv = km + v.k - 2f64 * k;
|
||||||
@@ -732,6 +722,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let xi: Vec<_> = self.sv[idx_1]
|
||||||
|
.x
|
||||||
|
.iter()
|
||||||
|
.map(|e| e.to_f64().unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
idx_2.map(|idx_2| {
|
idx_2.map(|idx_2| {
|
||||||
(
|
(
|
||||||
idx_1,
|
idx_1,
|
||||||
@@ -742,16 +738,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.apply(
|
.apply(
|
||||||
&self.sv[idx_1]
|
&xi,
|
||||||
.x
|
|
||||||
.iter()
|
|
||||||
.map(|e| e.to_f64().unwrap())
|
|
||||||
.collect(),
|
|
||||||
&self.sv[idx_2]
|
&self.sv[idx_2]
|
||||||
.x
|
.x
|
||||||
.iter()
|
.iter()
|
||||||
.map(|e| e.to_f64().unwrap())
|
.map(|e| e.to_f64().unwrap())
|
||||||
.collect(),
|
.collect::<Vec<_>>(),
|
||||||
)
|
)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}),
|
}),
|
||||||
@@ -765,8 +757,11 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
let km = sv2.k;
|
let km = sv2.k;
|
||||||
let gm = sv2.grad;
|
let gm = sv2.grad;
|
||||||
let mut best = 0f64;
|
let mut best = 0f64;
|
||||||
|
|
||||||
|
let xi: Vec<_> = sv2.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
for i in 0..self.sv.len() {
|
for i in 0..self.sv.len() {
|
||||||
let v = &self.sv[i];
|
let v = &self.sv[i];
|
||||||
|
let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
let z = gm - v.grad;
|
let z = gm - v.grad;
|
||||||
let k = cache.get(
|
let k = cache.get(
|
||||||
sv2,
|
sv2,
|
||||||
@@ -775,10 +770,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
.kernel
|
.kernel
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.apply(
|
.apply(&xi, &xj)
|
||||||
&sv2.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
&v.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
)
|
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
);
|
);
|
||||||
let mut curv = km + v.k - 2f64 * k;
|
let mut curv = km + v.k - 2f64 * k;
|
||||||
@@ -797,6 +789,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let xj: Vec<_> = self.sv[idx_2]
|
||||||
|
.x
|
||||||
|
.iter()
|
||||||
|
.map(|e| e.to_f64().unwrap())
|
||||||
|
.collect();
|
||||||
|
|
||||||
idx_1.map(|idx_1| {
|
idx_1.map(|idx_1| {
|
||||||
(
|
(
|
||||||
idx_1,
|
idx_1,
|
||||||
@@ -811,12 +809,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
.x
|
.x
|
||||||
.iter()
|
.iter()
|
||||||
.map(|e| e.to_f64().unwrap())
|
.map(|e| e.to_f64().unwrap())
|
||||||
.collect(),
|
.collect::<Vec<_>>(),
|
||||||
&self.sv[idx_2]
|
&xj,
|
||||||
.x
|
|
||||||
.iter()
|
|
||||||
.map(|e| e.to_f64().unwrap())
|
|
||||||
.collect(),
|
|
||||||
)
|
)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}),
|
}),
|
||||||
@@ -835,12 +829,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
.x
|
.x
|
||||||
.iter()
|
.iter()
|
||||||
.map(|e| e.to_f64().unwrap())
|
.map(|e| e.to_f64().unwrap())
|
||||||
.collect(),
|
.collect::<Vec<_>>(),
|
||||||
&self.sv[idx_2]
|
&self.sv[idx_2]
|
||||||
.x
|
.x
|
||||||
.iter()
|
.iter()
|
||||||
.map(|e| e.to_f64().unwrap())
|
.map(|e| e.to_f64().unwrap())
|
||||||
.collect(),
|
.collect::<Vec<_>>(),
|
||||||
)
|
)
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
)),
|
)),
|
||||||
@@ -895,7 +889,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
self.sv[v1].alpha -= step.to_f64().unwrap();
|
self.sv[v1].alpha -= step.to_f64().unwrap();
|
||||||
self.sv[v2].alpha += step.to_f64().unwrap();
|
self.sv[v2].alpha += step.to_f64().unwrap();
|
||||||
|
|
||||||
|
let xi_v1: Vec<_> = self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
|
let xi_v2: Vec<_> = self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
for i in 0..self.sv.len() {
|
for i in 0..self.sv.len() {
|
||||||
|
let xj: Vec<_> = self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
let k2 = cache.get(
|
let k2 = cache.get(
|
||||||
&self.sv[v2],
|
&self.sv[v2],
|
||||||
&self.sv[i],
|
&self.sv[i],
|
||||||
@@ -903,10 +900,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
.kernel
|
.kernel
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.apply(
|
.apply(&xi_v2, &xj)
|
||||||
&self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
&self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
)
|
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
);
|
);
|
||||||
let k1 = cache.get(
|
let k1 = cache.get(
|
||||||
@@ -916,10 +910,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
|||||||
.kernel
|
.kernel
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.apply(
|
.apply(&xi_v1, &xj)
|
||||||
&self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
&self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
)
|
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
);
|
);
|
||||||
self.sv[i].grad -= step.to_f64().unwrap() * (k2 - k1);
|
self.sv[i].grad -= step.to_f64().unwrap() * (k2 - k1);
|
||||||
@@ -966,7 +957,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<i32> = vec![
|
let y: Vec<i32> = vec![
|
||||||
-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
@@ -983,11 +975,7 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
let acc = accuracy(&y, &(y_hat.iter().map(|e| e.to_i32().unwrap()).collect()));
|
let acc = accuracy(&y, &(y_hat.iter().map(|e| e.to_i32().unwrap()).collect()));
|
||||||
|
|
||||||
assert!(
|
assert!(acc >= 0.9, "accuracy ({acc}) is not larger or equal to 0.9");
|
||||||
acc >= 0.9,
|
|
||||||
"accuracy ({}) is not larger or equal to 0.9",
|
|
||||||
acc
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
@@ -996,7 +984,8 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn svc_fit_decision_function() {
|
fn svc_fit_decision_function() {
|
||||||
let x = DenseMatrix::from_2d_array(&[&[4.0, 0.0], &[0.0, 4.0], &[8.0, 0.0], &[0.0, 8.0]]);
|
let x = DenseMatrix::from_2d_array(&[&[4.0, 0.0], &[0.0, 4.0], &[8.0, 0.0], &[0.0, 8.0]])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let x2 = DenseMatrix::from_2d_array(&[
|
let x2 = DenseMatrix::from_2d_array(&[
|
||||||
&[3.0, 3.0],
|
&[3.0, 3.0],
|
||||||
@@ -1005,7 +994,8 @@ mod tests {
|
|||||||
&[10.0, 10.0],
|
&[10.0, 10.0],
|
||||||
&[1.0, 1.0],
|
&[1.0, 1.0],
|
||||||
&[0.0, 0.0],
|
&[0.0, 0.0],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<i32> = vec![-1, -1, 1, 1];
|
let y: Vec<i32> = vec![-1, -1, 1, 1];
|
||||||
|
|
||||||
@@ -1058,7 +1048,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<i32> = vec![
|
let y: Vec<i32> = vec![
|
||||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
@@ -1076,11 +1067,7 @@ mod tests {
|
|||||||
|
|
||||||
let acc = accuracy(&y, &(y_hat.iter().map(|e| e.to_i32().unwrap()).collect()));
|
let acc = accuracy(&y, &(y_hat.iter().map(|e| e.to_i32().unwrap()).collect()));
|
||||||
|
|
||||||
assert!(
|
assert!(acc >= 0.9, "accuracy ({acc}) is not larger or equal to 0.9");
|
||||||
acc >= 0.9,
|
|
||||||
"accuracy ({}) is not larger or equal to 0.9",
|
|
||||||
acc
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
@@ -1111,7 +1098,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<i32> = vec![
|
let y: Vec<i32> = vec![
|
||||||
-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
@@ -1122,7 +1110,7 @@ mod tests {
|
|||||||
let svc = SVC::fit(&x, &y, ¶ms).unwrap();
|
let svc = SVC::fit(&x, &y, ¶ms).unwrap();
|
||||||
|
|
||||||
// serialization
|
// serialization
|
||||||
let deserialized_svc: SVC<f64, i32, _, _> =
|
let deserialized_svc: SVC<'_, f64, i32, _, _> =
|
||||||
serde_json::from_str(&serde_json::to_string(&svc).unwrap()).unwrap();
|
serde_json::from_str(&serde_json::to_string(&svc).unwrap()).unwrap();
|
||||||
|
|
||||||
assert_eq!(svc, deserialized_svc);
|
assert_eq!(svc, deserialized_svc);
|
||||||
|
|||||||
+16
-16
@@ -44,7 +44,7 @@
|
|||||||
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//!
|
//!
|
||||||
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
|
//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
|
||||||
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
|
//! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
|
||||||
@@ -248,19 +248,20 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> SVR<'
|
|||||||
|
|
||||||
let mut y_hat: Vec<T> = Vec::<T>::zeros(n);
|
let mut y_hat: Vec<T> = Vec::<T>::zeros(n);
|
||||||
|
|
||||||
|
let mut x_i = Vec::with_capacity(n);
|
||||||
for i in 0..n {
|
for i in 0..n {
|
||||||
y_hat.set(
|
x_i.clear();
|
||||||
i,
|
x_i.extend(x.get_row(i).iterator(0).copied());
|
||||||
self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)),
|
y_hat.set(i, self.predict_for_row(&x_i));
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(y_hat)
|
Ok(y_hat)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn predict_for_row(&self, x: Vec<T>) -> T {
|
pub(crate) fn predict_for_row(&self, x: &[T]) -> T {
|
||||||
let mut f = self.b;
|
let mut f = self.b;
|
||||||
|
|
||||||
|
let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||||
for i in 0..self.instances.as_ref().unwrap().len() {
|
for i in 0..self.instances.as_ref().unwrap().len() {
|
||||||
f += self.w.as_ref().unwrap()[i]
|
f += self.w.as_ref().unwrap()[i]
|
||||||
* T::from(
|
* T::from(
|
||||||
@@ -270,10 +271,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> SVR<'
|
|||||||
.kernel
|
.kernel
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.apply(
|
.apply(&xi, &self.instances.as_ref().unwrap()[i])
|
||||||
&x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
|
||||||
&self.instances.as_ref().unwrap()[i],
|
|
||||||
)
|
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
)
|
)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
@@ -283,8 +281,8 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> SVR<'
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> PartialEq
|
impl<T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> PartialEq
|
||||||
for SVR<'a, T, X, Y>
|
for SVR<'_, T, X, Y>
|
||||||
{
|
{
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
if (self.b - other.b).abs() > T::epsilon() * T::two()
|
if (self.b - other.b).abs() > T::epsilon() * T::two()
|
||||||
@@ -642,7 +640,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
@@ -662,7 +661,7 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let t = mean_squared_error(&y_hat, &y);
|
let t = mean_squared_error(&y_hat, &y);
|
||||||
println!("{:?}", t);
|
println!("{t:?}");
|
||||||
assert!(t < 2.5);
|
assert!(t < 2.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -690,7 +689,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
@@ -702,7 +702,7 @@ mod tests {
|
|||||||
|
|
||||||
let svr = SVR::fit(&x, &y, ¶ms).unwrap();
|
let svr = SVR::fit(&x, &y, ¶ms).unwrap();
|
||||||
|
|
||||||
let deserialized_svr: SVR<f64, DenseMatrix<f64>, _> =
|
let deserialized_svr: SVR<'_, f64, DenseMatrix<f64>, _> =
|
||||||
serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
|
serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
|
||||||
|
|
||||||
assert_eq!(svr, deserialized_svr);
|
assert_eq!(svr, deserialized_svr);
|
||||||
|
|||||||
@@ -48,7 +48,7 @@
|
|||||||
//! &[4.9, 2.4, 3.3, 1.0],
|
//! &[4.9, 2.4, 3.3, 1.0],
|
||||||
//! &[6.6, 2.9, 4.6, 1.3],
|
//! &[6.6, 2.9, 4.6, 1.3],
|
||||||
//! &[5.2, 2.7, 3.9, 1.4],
|
//! &[5.2, 2.7, 3.9, 1.4],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y = vec![ 0, 0, 0, 0, 0, 0, 0, 0,
|
//! let y = vec![ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
//! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
//! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
//!
|
//!
|
||||||
@@ -77,7 +77,9 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use crate::api::{Predictor, SupervisedEstimator};
|
use crate::api::{Predictor, SupervisedEstimator};
|
||||||
use crate::error::Failed;
|
use crate::error::Failed;
|
||||||
|
use crate::linalg::basic::arrays::MutArray;
|
||||||
use crate::linalg::basic::arrays::{Array1, Array2, MutArrayView1};
|
use crate::linalg::basic::arrays::{Array1, Array2, MutArrayView1};
|
||||||
|
use crate::linalg::basic::matrix::DenseMatrix;
|
||||||
use crate::numbers::basenum::Number;
|
use crate::numbers::basenum::Number;
|
||||||
use crate::rand_custom::get_rng_impl;
|
use crate::rand_custom::get_rng_impl;
|
||||||
|
|
||||||
@@ -116,6 +118,7 @@ pub struct DecisionTreeClassifier<
|
|||||||
num_classes: usize,
|
num_classes: usize,
|
||||||
classes: Vec<TY>,
|
classes: Vec<TY>,
|
||||||
depth: u16,
|
depth: u16,
|
||||||
|
num_features: usize,
|
||||||
_phantom_tx: PhantomData<TX>,
|
_phantom_tx: PhantomData<TX>,
|
||||||
_phantom_x: PhantomData<X>,
|
_phantom_x: PhantomData<X>,
|
||||||
_phantom_y: PhantomData<Y>,
|
_phantom_y: PhantomData<Y>,
|
||||||
@@ -137,16 +140,17 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
self.classes.as_ref()
|
self.classes.as_ref()
|
||||||
}
|
}
|
||||||
/// Get depth of tree
|
/// Get depth of tree
|
||||||
fn depth(&self) -> u16 {
|
pub fn depth(&self) -> u16 {
|
||||||
self.depth
|
self.depth
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The function to measure the quality of a split.
|
/// The function to measure the quality of a split.
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub enum SplitCriterion {
|
pub enum SplitCriterion {
|
||||||
/// [Gini index](../decision_tree_classifier/index.html)
|
/// [Gini index](../decision_tree_classifier/index.html)
|
||||||
|
#[default]
|
||||||
Gini,
|
Gini,
|
||||||
/// [Entropy](../decision_tree_classifier/index.html)
|
/// [Entropy](../decision_tree_classifier/index.html)
|
||||||
Entropy,
|
Entropy,
|
||||||
@@ -154,21 +158,17 @@ pub enum SplitCriterion {
|
|||||||
ClassificationError,
|
ClassificationError,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for SplitCriterion {
|
|
||||||
fn default() -> Self {
|
|
||||||
SplitCriterion::Gini
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct Node {
|
struct Node {
|
||||||
output: usize,
|
output: usize,
|
||||||
|
n_node_samples: usize,
|
||||||
split_feature: usize,
|
split_feature: usize,
|
||||||
split_value: Option<f64>,
|
split_value: Option<f64>,
|
||||||
split_score: Option<f64>,
|
split_score: Option<f64>,
|
||||||
true_child: Option<usize>,
|
true_child: Option<usize>,
|
||||||
false_child: Option<usize>,
|
false_child: Option<usize>,
|
||||||
|
impurity: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq
|
impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>> PartialEq
|
||||||
@@ -199,12 +199,12 @@ impl PartialEq for Node {
|
|||||||
self.output == other.output
|
self.output == other.output
|
||||||
&& self.split_feature == other.split_feature
|
&& self.split_feature == other.split_feature
|
||||||
&& match (self.split_value, other.split_value) {
|
&& match (self.split_value, other.split_value) {
|
||||||
(Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON,
|
(Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
|
||||||
(None, None) => true,
|
(None, None) => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
&& match (self.split_score, other.split_score) {
|
&& match (self.split_score, other.split_score) {
|
||||||
(Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON,
|
(Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
|
||||||
(None, None) => true,
|
(None, None) => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
@@ -405,14 +405,16 @@ impl Default for DecisionTreeClassifierSearchParameters {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Node {
|
impl Node {
|
||||||
fn new(output: usize) -> Self {
|
fn new(output: usize, n_node_samples: usize) -> Self {
|
||||||
Node {
|
Node {
|
||||||
output,
|
output,
|
||||||
|
n_node_samples,
|
||||||
split_feature: 0,
|
split_feature: 0,
|
||||||
split_value: Option::None,
|
split_value: Option::None,
|
||||||
split_score: Option::None,
|
split_score: Option::None,
|
||||||
true_child: Option::None,
|
true_child: Option::None,
|
||||||
false_child: Option::None,
|
false_child: Option::None,
|
||||||
|
impurity: Option::None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -512,6 +514,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
num_classes: 0usize,
|
num_classes: 0usize,
|
||||||
classes: vec![],
|
classes: vec![],
|
||||||
depth: 0u16,
|
depth: 0u16,
|
||||||
|
num_features: 0usize,
|
||||||
_phantom_tx: PhantomData,
|
_phantom_tx: PhantomData,
|
||||||
_phantom_x: PhantomData,
|
_phantom_x: PhantomData,
|
||||||
_phantom_y: PhantomData,
|
_phantom_y: PhantomData,
|
||||||
@@ -543,6 +546,10 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
parameters: DecisionTreeClassifierParameters,
|
parameters: DecisionTreeClassifierParameters,
|
||||||
) -> Result<DecisionTreeClassifier<TX, TY, X, Y>, Failed> {
|
) -> Result<DecisionTreeClassifier<TX, TY, X, Y>, Failed> {
|
||||||
let (x_nrows, num_attributes) = x.shape();
|
let (x_nrows, num_attributes) = x.shape();
|
||||||
|
if x_nrows != y.shape() {
|
||||||
|
return Err(Failed::fit("Size of x should equal size of y"));
|
||||||
|
}
|
||||||
|
|
||||||
let samples = vec![1; x_nrows];
|
let samples = vec![1; x_nrows];
|
||||||
DecisionTreeClassifier::fit_weak_learner(x, y, samples, num_attributes, parameters)
|
DecisionTreeClassifier::fit_weak_learner(x, y, samples, num_attributes, parameters)
|
||||||
}
|
}
|
||||||
@@ -560,8 +567,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
let k = classes.len();
|
let k = classes.len();
|
||||||
if k < 2 {
|
if k < 2 {
|
||||||
return Err(Failed::fit(&format!(
|
return Err(Failed::fit(&format!(
|
||||||
"Incorrect number of classes: {}. Should be >= 2.",
|
"Incorrect number of classes: {k}. Should be >= 2."
|
||||||
k
|
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -580,7 +586,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
count[yi[i]] += samples[i];
|
count[yi[i]] += samples[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
let root = Node::new(which_max(&count));
|
let root = Node::new(which_max(&count), y_ncols);
|
||||||
change_nodes.push(root);
|
change_nodes.push(root);
|
||||||
let mut order: Vec<Vec<usize>> = Vec::new();
|
let mut order: Vec<Vec<usize>> = Vec::new();
|
||||||
|
|
||||||
@@ -595,6 +601,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
num_classes: k,
|
num_classes: k,
|
||||||
classes,
|
classes,
|
||||||
depth: 0u16,
|
depth: 0u16,
|
||||||
|
num_features: num_attributes,
|
||||||
_phantom_tx: PhantomData,
|
_phantom_tx: PhantomData,
|
||||||
_phantom_x: PhantomData,
|
_phantom_x: PhantomData,
|
||||||
_phantom_y: PhantomData,
|
_phantom_y: PhantomData,
|
||||||
@@ -608,7 +615,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
visitor_queue.push_back(visitor);
|
visitor_queue.push_back(visitor);
|
||||||
}
|
}
|
||||||
|
|
||||||
while tree.depth() < tree.parameters().max_depth.unwrap_or(std::u16::MAX) {
|
while tree.depth() < tree.parameters().max_depth.unwrap_or(u16::MAX) {
|
||||||
match visitor_queue.pop_front() {
|
match visitor_queue.pop_front() {
|
||||||
Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng),
|
Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng),
|
||||||
None => break,
|
None => break,
|
||||||
@@ -645,7 +652,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
if node.true_child.is_none() && node.false_child.is_none() {
|
if node.true_child.is_none() && node.false_child.is_none() {
|
||||||
result = node.output;
|
result = node.output;
|
||||||
} else if x.get((row, node.split_feature)).to_f64().unwrap()
|
} else if x.get((row, node.split_feature)).to_f64().unwrap()
|
||||||
<= node.split_value.unwrap_or(std::f64::NAN)
|
<= node.split_value.unwrap_or(f64::NAN)
|
||||||
{
|
{
|
||||||
queue.push_back(node.true_child.unwrap());
|
queue.push_back(node.true_child.unwrap());
|
||||||
} else {
|
} else {
|
||||||
@@ -680,16 +687,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_pure {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
let n = visitor.samples.iter().sum();
|
let n = visitor.samples.iter().sum();
|
||||||
|
|
||||||
if n <= self.parameters().min_samples_split {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut count = vec![0; self.num_classes];
|
let mut count = vec![0; self.num_classes];
|
||||||
let mut false_count = vec![0; self.num_classes];
|
let mut false_count = vec![0; self.num_classes];
|
||||||
for i in 0..n_rows {
|
for i in 0..n_rows {
|
||||||
@@ -698,7 +696,15 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let parent_impurity = impurity(&self.parameters().criterion, &count, n);
|
self.nodes[visitor.node].impurity = Some(impurity(&self.parameters().criterion, &count, n));
|
||||||
|
|
||||||
|
if is_pure {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if n <= self.parameters().min_samples_split {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
let mut variables = (0..n_attr).collect::<Vec<_>>();
|
let mut variables = (0..n_attr).collect::<Vec<_>>();
|
||||||
|
|
||||||
@@ -707,14 +713,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
}
|
}
|
||||||
|
|
||||||
for variable in variables.iter().take(mtry) {
|
for variable in variables.iter().take(mtry) {
|
||||||
self.find_best_split(
|
self.find_best_split(visitor, n, &count, &mut false_count, *variable);
|
||||||
visitor,
|
|
||||||
n,
|
|
||||||
&count,
|
|
||||||
&mut false_count,
|
|
||||||
parent_impurity,
|
|
||||||
*variable,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.nodes()[visitor.node].split_score.is_some()
|
self.nodes()[visitor.node].split_score.is_some()
|
||||||
@@ -726,7 +725,6 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
n: usize,
|
n: usize,
|
||||||
count: &[usize],
|
count: &[usize],
|
||||||
false_count: &mut [usize],
|
false_count: &mut [usize],
|
||||||
parent_impurity: f64,
|
|
||||||
j: usize,
|
j: usize,
|
||||||
) {
|
) {
|
||||||
let mut true_count = vec![0; self.num_classes];
|
let mut true_count = vec![0; self.num_classes];
|
||||||
@@ -762,6 +760,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
|
|
||||||
let true_label = which_max(&true_count);
|
let true_label = which_max(&true_count);
|
||||||
let false_label = which_max(false_count);
|
let false_label = which_max(false_count);
|
||||||
|
let parent_impurity = self.nodes()[visitor.node].impurity.unwrap();
|
||||||
let gain = parent_impurity
|
let gain = parent_impurity
|
||||||
- tc as f64 / n as f64
|
- tc as f64 / n as f64
|
||||||
* impurity(&self.parameters().criterion, &true_count, tc)
|
* impurity(&self.parameters().criterion, &true_count, tc)
|
||||||
@@ -806,9 +805,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
.get((i, self.nodes()[visitor.node].split_feature))
|
.get((i, self.nodes()[visitor.node].split_feature))
|
||||||
.to_f64()
|
.to_f64()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
<= self.nodes()[visitor.node]
|
<= self.nodes()[visitor.node].split_value.unwrap_or(f64::NAN)
|
||||||
.split_value
|
|
||||||
.unwrap_or(std::f64::NAN)
|
|
||||||
{
|
{
|
||||||
*true_sample = visitor.samples[i];
|
*true_sample = visitor.samples[i];
|
||||||
tc += *true_sample;
|
tc += *true_sample;
|
||||||
@@ -829,9 +826,9 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
|
|
||||||
let true_child_idx = self.nodes().len();
|
let true_child_idx = self.nodes().len();
|
||||||
|
|
||||||
self.nodes.push(Node::new(visitor.true_child_output));
|
self.nodes.push(Node::new(visitor.true_child_output, tc));
|
||||||
let false_child_idx = self.nodes().len();
|
let false_child_idx = self.nodes().len();
|
||||||
self.nodes.push(Node::new(visitor.false_child_output));
|
self.nodes.push(Node::new(visitor.false_child_output, fc));
|
||||||
self.nodes[visitor.node].true_child = Some(true_child_idx);
|
self.nodes[visitor.node].true_child = Some(true_child_idx);
|
||||||
self.nodes[visitor.node].false_child = Some(false_child_idx);
|
self.nodes[visitor.node].false_child = Some(false_child_idx);
|
||||||
|
|
||||||
@@ -865,11 +862,104 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
|
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compute feature importances for the fitted tree.
|
||||||
|
pub fn compute_feature_importances(&self, normalize: bool) -> Vec<f64> {
|
||||||
|
let mut importances = vec![0f64; self.num_features];
|
||||||
|
|
||||||
|
for node in self.nodes().iter() {
|
||||||
|
if node.true_child.is_none() && node.false_child.is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let left = &self.nodes()[node.true_child.unwrap()];
|
||||||
|
let right = &self.nodes()[node.false_child.unwrap()];
|
||||||
|
|
||||||
|
importances[node.split_feature] += node.n_node_samples as f64 * node.impurity.unwrap()
|
||||||
|
- left.n_node_samples as f64 * left.impurity.unwrap()
|
||||||
|
- right.n_node_samples as f64 * right.impurity.unwrap();
|
||||||
|
}
|
||||||
|
for item in importances.iter_mut() {
|
||||||
|
*item /= self.nodes()[0].n_node_samples as f64;
|
||||||
|
}
|
||||||
|
if normalize {
|
||||||
|
let sum = importances.iter().sum::<f64>();
|
||||||
|
for importance in importances.iter_mut() {
|
||||||
|
*importance /= sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
importances
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Predict class probabilities for the input samples.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `x` - The input samples as a matrix where each row is a sample and each column is a feature.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A `Result` containing a `DenseMatrix<f64>` where each row corresponds to a sample and each column
|
||||||
|
/// corresponds to a class. The values represent the probability of the sample belonging to each class.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if at least one row prediction process fails.
|
||||||
|
pub fn predict_proba(&self, x: &X) -> Result<DenseMatrix<f64>, Failed> {
|
||||||
|
let (n_samples, _) = x.shape();
|
||||||
|
let n_classes = self.classes().len();
|
||||||
|
let mut result = DenseMatrix::<f64>::zeros(n_samples, n_classes);
|
||||||
|
|
||||||
|
for i in 0..n_samples {
|
||||||
|
let probs = self.predict_proba_for_row(x, i)?;
|
||||||
|
for (j, &prob) in probs.iter().enumerate() {
|
||||||
|
result.set((i, j), prob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Predict class probabilities for a single input sample.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `x` - The input matrix containing all samples.
|
||||||
|
/// * `row` - The index of the row in `x` for which to predict probabilities.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A vector of probabilities, one for each class, representing the probability
|
||||||
|
/// of the input sample belonging to each class.
|
||||||
|
fn predict_proba_for_row(&self, x: &X, row: usize) -> Result<Vec<f64>, Failed> {
|
||||||
|
let mut node = 0;
|
||||||
|
|
||||||
|
while let Some(current_node) = self.nodes().get(node) {
|
||||||
|
if current_node.true_child.is_none() && current_node.false_child.is_none() {
|
||||||
|
// Leaf node reached
|
||||||
|
let mut probs = vec![0.0; self.classes().len()];
|
||||||
|
probs[current_node.output] = 1.0;
|
||||||
|
return Ok(probs);
|
||||||
|
}
|
||||||
|
|
||||||
|
let split_feature = current_node.split_feature;
|
||||||
|
let split_value = current_node.split_value.unwrap_or(f64::NAN);
|
||||||
|
|
||||||
|
if x.get((row, split_feature)).to_f64().unwrap() <= split_value {
|
||||||
|
node = current_node.true_child.unwrap();
|
||||||
|
} else {
|
||||||
|
node = current_node.false_child.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This should never happen if the tree is properly constructed
|
||||||
|
Err(Failed::predict("Nodes iteration did not reach leaf"))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::linalg::basic::arrays::Array;
|
||||||
use crate::linalg::basic::matrix::DenseMatrix;
|
use crate::linalg::basic::matrix::DenseMatrix;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -901,19 +991,62 @@ mod tests {
|
|||||||
)]
|
)]
|
||||||
#[test]
|
#[test]
|
||||||
fn gini_impurity() {
|
fn gini_impurity() {
|
||||||
|
assert!((impurity(&SplitCriterion::Gini, &[7, 3], 10) - 0.42).abs() < f64::EPSILON);
|
||||||
assert!(
|
assert!(
|
||||||
(impurity(&SplitCriterion::Gini, &vec![7, 3], 10) - 0.42).abs() < std::f64::EPSILON
|
(impurity(&SplitCriterion::Entropy, &[7, 3], 10) - 0.8812908992306927).abs()
|
||||||
|
< f64::EPSILON
|
||||||
);
|
);
|
||||||
assert!(
|
assert!(
|
||||||
(impurity(&SplitCriterion::Entropy, &vec![7, 3], 10) - 0.8812908992306927).abs()
|
(impurity(&SplitCriterion::ClassificationError, &[7, 3], 10) - 0.3).abs()
|
||||||
< std::f64::EPSILON
|
< f64::EPSILON
|
||||||
);
|
|
||||||
assert!(
|
|
||||||
(impurity(&SplitCriterion::ClassificationError, &vec![7, 3], 10) - 0.3).abs()
|
|
||||||
< std::f64::EPSILON
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(
|
||||||
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
|
)]
|
||||||
|
#[test]
|
||||||
|
fn test_predict_proba() {
|
||||||
|
let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
|
||||||
|
&[5.1, 3.5, 1.4, 0.2],
|
||||||
|
&[4.9, 3.0, 1.4, 0.2],
|
||||||
|
&[4.7, 3.2, 1.3, 0.2],
|
||||||
|
&[4.6, 3.1, 1.5, 0.2],
|
||||||
|
&[5.0, 3.6, 1.4, 0.2],
|
||||||
|
&[7.0, 3.2, 4.7, 1.4],
|
||||||
|
&[6.4, 3.2, 4.5, 1.5],
|
||||||
|
&[6.9, 3.1, 4.9, 1.5],
|
||||||
|
&[5.5, 2.3, 4.0, 1.3],
|
||||||
|
&[6.5, 2.8, 4.6, 1.5],
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
let y: Vec<usize> = vec![0, 0, 0, 0, 0, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
|
let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
let probabilities = tree.predict_proba(&x).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(probabilities.shape(), (10, 2));
|
||||||
|
|
||||||
|
for row in 0..10 {
|
||||||
|
let row_sum: f64 = probabilities.get_row(row).sum();
|
||||||
|
assert!(
|
||||||
|
(row_sum - 1.0).abs() < 1e-6,
|
||||||
|
"Row probabilities should sum to 1"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the first 5 samples have higher probability for class 0
|
||||||
|
for i in 0..5 {
|
||||||
|
assert!(probabilities.get((i, 0)) > probabilities.get((i, 1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the last 5 samples have higher probability for class 1
|
||||||
|
for i in 5..10 {
|
||||||
|
assert!(probabilities.get((i, 1)) > probabilities.get((i, 0)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
wasm_bindgen_test::wasm_bindgen_test
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
@@ -942,7 +1075,8 @@ mod tests {
|
|||||||
&[4.9, 2.4, 3.3, 1.0],
|
&[4.9, 2.4, 3.3, 1.0],
|
||||||
&[6.6, 2.9, 4.6, 1.3],
|
&[6.6, 2.9, 4.6, 1.3],
|
||||||
&[5.2, 2.7, 3.9, 1.4],
|
&[5.2, 2.7, 3.9, 1.4],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -971,6 +1105,17 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_random_matrix_with_wrong_rownum() {
|
||||||
|
let x_rand: DenseMatrix<f64> = DenseMatrix::<f64>::rand(21, 200);
|
||||||
|
|
||||||
|
let y: Vec<u32> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
||||||
|
|
||||||
|
let fail = DecisionTreeClassifier::fit(&x_rand, &y, Default::default());
|
||||||
|
|
||||||
|
assert!(fail.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
wasm_bindgen_test::wasm_bindgen_test
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
@@ -998,7 +1143,8 @@ mod tests {
|
|||||||
&[0., 0., 1., 1.],
|
&[0., 0., 1., 1.],
|
||||||
&[0., 0., 0., 0.],
|
&[0., 0., 0., 0.],
|
||||||
&[0., 0., 0., 1.],
|
&[0., 0., 0., 1.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<u32> = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0];
|
let y: Vec<u32> = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0];
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1009,6 +1155,43 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_compute_feature_importances() {
|
||||||
|
let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[
|
||||||
|
&[1., 1., 1., 0.],
|
||||||
|
&[1., 1., 1., 0.],
|
||||||
|
&[1., 1., 1., 1.],
|
||||||
|
&[1., 1., 0., 0.],
|
||||||
|
&[1., 1., 0., 1.],
|
||||||
|
&[1., 0., 1., 0.],
|
||||||
|
&[1., 0., 1., 0.],
|
||||||
|
&[1., 0., 1., 1.],
|
||||||
|
&[1., 0., 0., 0.],
|
||||||
|
&[1., 0., 0., 1.],
|
||||||
|
&[0., 1., 1., 0.],
|
||||||
|
&[0., 1., 1., 0.],
|
||||||
|
&[0., 1., 1., 1.],
|
||||||
|
&[0., 1., 0., 0.],
|
||||||
|
&[0., 1., 0., 1.],
|
||||||
|
&[0., 0., 1., 0.],
|
||||||
|
&[0., 0., 1., 0.],
|
||||||
|
&[0., 0., 1., 1.],
|
||||||
|
&[0., 0., 0., 0.],
|
||||||
|
&[0., 0., 0., 1.],
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
let y: Vec<u32> = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0];
|
||||||
|
let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
tree.compute_feature_importances(false),
|
||||||
|
vec![0., 0., 0.21333333333333332, 0.26666666666666666]
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
tree.compute_feature_importances(true),
|
||||||
|
vec![0., 0., 0.4444444444444444, 0.5555555555555556]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
wasm_bindgen_test::wasm_bindgen_test
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
@@ -1037,7 +1220,8 @@ mod tests {
|
|||||||
&[0., 0., 1., 1.],
|
&[0., 0., 1., 1.],
|
||||||
&[0., 0., 0., 0.],
|
&[0., 0., 0., 0.],
|
||||||
&[0., 0., 0., 1.],
|
&[0., 0., 0., 1.],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0];
|
let y = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0];
|
||||||
|
|
||||||
let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap();
|
let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|||||||
@@ -18,7 +18,6 @@
|
|||||||
//! Example:
|
//! Example:
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```
|
||||||
//! use rand::thread_rng;
|
|
||||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||||
//! use smartcore::tree::decision_tree_regressor::*;
|
//! use smartcore::tree::decision_tree_regressor::*;
|
||||||
//!
|
//!
|
||||||
@@ -40,7 +39,7 @@
|
|||||||
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
//! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
//! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
//! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
//! ]);
|
//! ]).unwrap();
|
||||||
//! let y: Vec<f64> = vec![
|
//! let y: Vec<f64> = vec![
|
||||||
//! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0,
|
//! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0,
|
||||||
//! 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9,
|
//! 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9,
|
||||||
@@ -312,15 +311,15 @@ impl Node {
|
|||||||
|
|
||||||
impl PartialEq for Node {
|
impl PartialEq for Node {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
(self.output - other.output).abs() < std::f64::EPSILON
|
(self.output - other.output).abs() < f64::EPSILON
|
||||||
&& self.split_feature == other.split_feature
|
&& self.split_feature == other.split_feature
|
||||||
&& match (self.split_value, other.split_value) {
|
&& match (self.split_value, other.split_value) {
|
||||||
(Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON,
|
(Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
|
||||||
(None, None) => true,
|
(None, None) => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
&& match (self.split_score, other.split_score) {
|
&& match (self.split_score, other.split_score) {
|
||||||
(Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON,
|
(Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
|
||||||
(None, None) => true,
|
(None, None) => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
@@ -422,6 +421,10 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
parameters: DecisionTreeRegressorParameters,
|
parameters: DecisionTreeRegressorParameters,
|
||||||
) -> Result<DecisionTreeRegressor<TX, TY, X, Y>, Failed> {
|
) -> Result<DecisionTreeRegressor<TX, TY, X, Y>, Failed> {
|
||||||
let (x_nrows, num_attributes) = x.shape();
|
let (x_nrows, num_attributes) = x.shape();
|
||||||
|
if x_nrows != y.shape() {
|
||||||
|
return Err(Failed::fit("Size of x should equal size of y"));
|
||||||
|
}
|
||||||
|
|
||||||
let samples = vec![1; x_nrows];
|
let samples = vec![1; x_nrows];
|
||||||
DecisionTreeRegressor::fit_weak_learner(x, y, samples, num_attributes, parameters)
|
DecisionTreeRegressor::fit_weak_learner(x, y, samples, num_attributes, parameters)
|
||||||
}
|
}
|
||||||
@@ -475,7 +478,7 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
visitor_queue.push_back(visitor);
|
visitor_queue.push_back(visitor);
|
||||||
}
|
}
|
||||||
|
|
||||||
while tree.depth() < tree.parameters().max_depth.unwrap_or(std::u16::MAX) {
|
while tree.depth() < tree.parameters().max_depth.unwrap_or(u16::MAX) {
|
||||||
match visitor_queue.pop_front() {
|
match visitor_queue.pop_front() {
|
||||||
Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng),
|
Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng),
|
||||||
None => break,
|
None => break,
|
||||||
@@ -512,7 +515,7 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
if node.true_child.is_none() && node.false_child.is_none() {
|
if node.true_child.is_none() && node.false_child.is_none() {
|
||||||
result = node.output;
|
result = node.output;
|
||||||
} else if x.get((row, node.split_feature)).to_f64().unwrap()
|
} else if x.get((row, node.split_feature)).to_f64().unwrap()
|
||||||
<= node.split_value.unwrap_or(std::f64::NAN)
|
<= node.split_value.unwrap_or(f64::NAN)
|
||||||
{
|
{
|
||||||
queue.push_back(node.true_child.unwrap());
|
queue.push_back(node.true_child.unwrap());
|
||||||
} else {
|
} else {
|
||||||
@@ -637,9 +640,7 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
|||||||
.get((i, self.nodes()[visitor.node].split_feature))
|
.get((i, self.nodes()[visitor.node].split_feature))
|
||||||
.to_f64()
|
.to_f64()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
<= self.nodes()[visitor.node]
|
<= self.nodes()[visitor.node].split_value.unwrap_or(f64::NAN)
|
||||||
.split_value
|
|
||||||
.unwrap_or(std::f64::NAN)
|
|
||||||
{
|
{
|
||||||
*true_sample = visitor.samples[i];
|
*true_sample = visitor.samples[i];
|
||||||
tc += *true_sample;
|
tc += *true_sample;
|
||||||
@@ -750,7 +751,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
114.2, 115.7, 116.9,
|
114.2, 115.7, 116.9,
|
||||||
@@ -764,7 +766,7 @@ mod tests {
|
|||||||
assert!((y_hat[i] - y[i]).abs() < 0.1);
|
assert!((y_hat[i] - y[i]).abs() < 0.1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let expected_y = vec![
|
let expected_y = [
|
||||||
87.3, 87.3, 87.3, 87.3, 98.9, 98.9, 98.9, 98.9, 98.9, 107.9, 107.9, 107.9, 114.85,
|
87.3, 87.3, 87.3, 87.3, 98.9, 98.9, 98.9, 98.9, 98.9, 107.9, 107.9, 107.9, 114.85,
|
||||||
114.85, 114.85, 114.85,
|
114.85, 114.85, 114.85,
|
||||||
];
|
];
|
||||||
@@ -785,7 +787,7 @@ mod tests {
|
|||||||
assert!((y_hat[i] - expected_y[i]).abs() < 0.1);
|
assert!((y_hat[i] - expected_y[i]).abs() < 0.1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let expected_y = vec![
|
let expected_y = [
|
||||||
83.0, 88.35, 88.35, 89.5, 97.15, 97.15, 99.5, 99.5, 101.2, 104.6, 109.6, 109.6, 113.4,
|
83.0, 88.35, 88.35, 89.5, 97.15, 97.15, 99.5, 99.5, 101.2, 104.6, 109.6, 109.6, 113.4,
|
||||||
113.4, 116.30, 116.30,
|
113.4, 116.30, 116.30,
|
||||||
];
|
];
|
||||||
@@ -831,7 +833,8 @@ mod tests {
|
|||||||
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
|
||||||
]);
|
])
|
||||||
|
.unwrap();
|
||||||
let y: Vec<f64> = vec![
|
let y: Vec<f64> = vec![
|
||||||
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
|
||||||
114.2, 115.7, 116.9,
|
114.2, 115.7, 116.9,
|
||||||
|
|||||||
Reference in New Issue
Block a user