3 Commits

Author SHA1 Message Date
morenol
76d1ef610d Update Cargo.toml (#299)
* Update Cargo.toml

* chore: fix clippy

* chore: bump actions

* chore: fix clippy

* chore: update target name

---------

Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>
2025-04-24 23:24:29 -04:00
Lorenzo
4092e24c2a Update README.md 2025-02-04 14:26:53 +00:00
Lorenzo
17dc9f3bbf Add ordered pairs for FastPair (#252)
* Add ordered_pairs method to FastPair
* add tests to fastpair
2025-01-28 00:48:08 +00:00
11 changed files with 132 additions and 34 deletions
+6 -16
View File
@@ -19,14 +19,13 @@ jobs:
{ os: "ubuntu", target: "i686-unknown-linux-gnu" }, { os: "ubuntu", target: "i686-unknown-linux-gnu" },
{ os: "ubuntu", target: "wasm32-unknown-unknown" }, { os: "ubuntu", target: "wasm32-unknown-unknown" },
{ os: "macos", target: "aarch64-apple-darwin" }, { os: "macos", target: "aarch64-apple-darwin" },
{ os: "ubuntu", target: "wasm32-wasi" },
] ]
env: env:
TZ: "/usr/share/zoneinfo/your/location" TZ: "/usr/share/zoneinfo/your/location"
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- name: Cache .cargo and target - name: Cache .cargo and target
uses: actions/cache@v2 uses: actions/cache@v4
with: with:
path: | path: |
~/.cargo ~/.cargo
@@ -36,16 +35,13 @@ jobs:
- name: Install Rust toolchain - name: Install Rust toolchain
uses: actions-rs/toolchain@v1 uses: actions-rs/toolchain@v1
with: with:
toolchain: 1.81 # 1.82 seems to break wasm32 tests https://github.com/rustwasm/wasm-bindgen/issues/4274 toolchain: stable
target: ${{ matrix.platform.target }} target: ${{ matrix.platform.target }}
profile: minimal profile: minimal
default: true default: true
- name: Install test runner for wasm - name: Install test runner for wasm
if: matrix.platform.target == 'wasm32-unknown-unknown' if: matrix.platform.target == 'wasm32-unknown-unknown'
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Install test runner for wasi
if: matrix.platform.target == 'wasm32-wasi'
run: curl https://wasmtime.dev/install.sh -sSf | bash
- name: Stable Build with all features - name: Stable Build with all features
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
@@ -65,13 +61,7 @@ jobs:
- name: Tests in WASM - name: Tests in WASM
if: matrix.platform.target == 'wasm32-unknown-unknown' if: matrix.platform.target == 'wasm32-unknown-unknown'
run: wasm-pack test --node -- --all-features run: wasm-pack test --node -- --all-features
- name: Tests in WASI
if: matrix.platform.target == 'wasm32-wasi'
run: |
export WASMTIME_HOME="$HOME/.wasmtime"
export PATH="$WASMTIME_HOME/bin:$PATH"
cargo install cargo-wasi && cargo wasi test
check_features: check_features:
runs-on: "${{ matrix.platform.os }}-latest" runs-on: "${{ matrix.platform.os }}-latest"
strategy: strategy:
@@ -81,9 +71,9 @@ jobs:
env: env:
TZ: "/usr/share/zoneinfo/your/location" TZ: "/usr/share/zoneinfo/your/location"
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- name: Cache .cargo and target - name: Cache .cargo and target
uses: actions/cache@v2 uses: actions/cache@v4
with: with:
path: | path: |
~/.cargo ~/.cargo
+2 -2
View File
@@ -12,9 +12,9 @@ jobs:
env: env:
TZ: "/usr/share/zoneinfo/your/location" TZ: "/usr/share/zoneinfo/your/location"
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v4
- name: Cache .cargo - name: Cache .cargo
uses: actions/cache@v2 uses: actions/cache@v4
with: with:
path: | path: |
~/.cargo ~/.cargo
+1 -1
View File
@@ -14,7 +14,7 @@ jobs:
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Cache .cargo and target - name: Cache .cargo and target
uses: actions/cache@v2 uses: actions/cache@v4
with: with:
path: | path: |
~/.cargo ~/.cargo
+1 -1
View File
@@ -2,7 +2,7 @@
name = "smartcore" name = "smartcore"
description = "Machine Learning in Rust." description = "Machine Learning in Rust."
homepage = "https://smartcorelib.org" homepage = "https://smartcorelib.org"
version = "0.4.0" version = "0.4.1"
authors = ["smartcore Developers"] authors = ["smartcore Developers"]
edition = "2021" edition = "2021"
license = "Apache-2.0" license = "Apache-2.0"
+1 -1
View File
@@ -18,4 +18,4 @@
----- -----
[![CI](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml/badge.svg)](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml) [![CI](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml/badge.svg)](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml)
To start getting familiar with the new smartcore v0.3 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md). To start getting familiar with the new smartcore v0.4 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
+114
View File
@@ -173,6 +173,21 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
} }
} }
///
/// Return order dissimilarities from closest to furthest
///
#[allow(dead_code)]
pub fn ordered_pairs(&self) -> std::vec::IntoIter<&PairwiseDistance<T>> {
// improvement: implement this to return `impl Iterator<Item = &PairwiseDistance<T>>`
// need to implement trait `Iterator` for `Vec<&PairwiseDistance<T>>`
let mut distances = self
.distances
.values()
.collect::<Vec<&PairwiseDistance<T>>>();
distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
distances.into_iter()
}
// //
// Compute distances from input to all other points in data-structure. // Compute distances from input to all other points in data-structure.
// input is the row index of the sample matrix // input is the row index of the sample matrix
@@ -588,4 +603,103 @@ mod tests_fastpair {
assert_eq!(closest, min_dissimilarity); assert_eq!(closest, min_dissimilarity);
} }
#[test]
fn fastpair_ordered_pairs() {
let x = DenseMatrix::<f64>::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
&[4.9, 3.0, 1.4, 0.2],
&[4.7, 3.2, 1.3, 0.2],
&[4.6, 3.1, 1.5, 0.2],
&[5.0, 3.6, 1.4, 0.2],
&[5.4, 3.9, 1.7, 0.4],
&[4.9, 3.1, 1.5, 0.1],
&[7.0, 3.2, 4.7, 1.4],
&[6.4, 3.2, 4.5, 1.5],
&[6.9, 3.1, 4.9, 1.5],
&[5.5, 2.3, 4.0, 1.3],
&[6.5, 2.8, 4.6, 1.5],
&[4.6, 3.4, 1.4, 0.3],
&[5.0, 3.4, 1.5, 0.2],
&[4.4, 2.9, 1.4, 0.2],
])
.unwrap();
let fastpair = FastPair::new(&x).unwrap();
let ordered = fastpair.ordered_pairs();
let mut previous: f64 = -1.0;
for p in ordered {
if previous == -1.0 {
previous = p.distance.unwrap();
} else {
let current = p.distance.unwrap();
assert!(current >= previous);
previous = current;
}
}
}
#[test]
fn test_empty_set() {
let empty_matrix = DenseMatrix::<f64>::zeros(0, 0);
let result = FastPair::new(&empty_matrix);
assert!(result.is_err());
if let Err(e) = result {
assert_eq!(
e,
Failed::because(FailedError::FindFailed, "min number of rows should be 3")
);
}
}
#[test]
fn test_single_point() {
let single_point = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]).unwrap();
let result = FastPair::new(&single_point);
assert!(result.is_err());
if let Err(e) = result {
assert_eq!(
e,
Failed::because(FailedError::FindFailed, "min number of rows should be 3")
);
}
}
#[test]
fn test_two_points() {
let two_points = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap();
let result = FastPair::new(&two_points);
assert!(result.is_err());
if let Err(e) = result {
assert_eq!(
e,
Failed::because(FailedError::FindFailed, "min number of rows should be 3")
);
}
}
#[test]
fn test_three_identical_points() {
let identical_points =
DenseMatrix::from_2d_array(&[&[1.0, 1.0], &[1.0, 1.0], &[1.0, 1.0]]).unwrap();
let result = FastPair::new(&identical_points);
assert!(result.is_ok());
let fastpair = result.unwrap();
let closest_pair = fastpair.closest_pair();
assert_eq!(closest_pair.distance, Some(0.0));
}
#[test]
fn test_result_unwrapping() {
let valid_matrix =
DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0], &[5.0, 6.0], &[7.0, 8.0]])
.unwrap();
let result = FastPair::new(&valid_matrix);
assert!(result.is_ok());
// This should not panic
let _fastpair = result.unwrap();
}
} }
+1
View File
@@ -663,6 +663,7 @@ mod tests {
#[test] #[test]
fn test_instantiate_err_view3() { fn test_instantiate_err_view3() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
#[allow(clippy::reversed_empty_ranges)]
let v = DenseMatrixView::new(&x, 0..3, 4..3); let v = DenseMatrixView::new(&x, 0..3, 4..3);
assert!(v.is_err()); assert!(v.is_err());
} }
+1 -2
View File
@@ -257,8 +257,7 @@ impl<TY: Number + Ord + Unsigned> BernoulliNBDistribution<TY> {
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features. /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
/// * `x` - training data. /// * `x` - training data.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
/// priors are adjusted according to the data.
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter. /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
/// * `binarize` - Threshold for binarizing. /// * `binarize` - Threshold for binarizing.
fn fit<TX: Number + PartialOrd, X: Array2<TX>, Y: Array1<TY>>( fn fit<TX: Number + PartialOrd, X: Array2<TX>, Y: Array1<TY>>(
+1 -2
View File
@@ -174,8 +174,7 @@ impl<TY: Number + Ord + Unsigned> GaussianNBDistribution<TY> {
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features. /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
/// * `x` - training data. /// * `x` - training data.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
/// priors are adjusted according to the data.
pub fn fit<TX: Number + RealNumber, X: Array2<TX>, Y: Array1<TY>>( pub fn fit<TX: Number + RealNumber, X: Array2<TX>, Y: Array1<TY>>(
x: &X, x: &X,
y: &Y, y: &Y,
+1 -2
View File
@@ -207,8 +207,7 @@ impl<TY: Number + Ord + Unsigned> MultinomialNBDistribution<TY> {
/// Fits the distribution to a NxM matrix where N is number of samples and M is number of features. /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
/// * `x` - training data. /// * `x` - training data.
/// * `y` - vector with target values (classes) of length N. /// * `y` - vector with target values (classes) of length N.
/// * `priors` - Optional vector with prior probabilities of the classes. If not defined, /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
/// priors are adjusted according to the data.
/// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter. /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
pub fn fit<TX: Number + Unsigned, X: Array2<TX>, Y: Array1<TY>>( pub fn fit<TX: Number + Unsigned, X: Array2<TX>, Y: Array1<TY>>(
x: &X, x: &X,
+3 -7
View File
@@ -24,7 +24,7 @@
//! // &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0] //! // &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0]
//! // &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0] //! // &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0]
//! ``` //! ```
use std::iter; use std::iter::repeat_n;
use crate::error::Failed; use crate::error::Failed;
use crate::linalg::basic::arrays::Array2; use crate::linalg::basic::arrays::Array2;
@@ -75,11 +75,7 @@ fn find_new_idxs(num_params: usize, cat_sizes: &[usize], cat_idxs: &[usize]) ->
let offset = (0..1).chain(offset_); let offset = (0..1).chain(offset_);
let new_param_idxs: Vec<usize> = (0..num_params) let new_param_idxs: Vec<usize> = (0..num_params)
.zip( .zip(repeats.zip(offset).flat_map(|(r, o)| repeat_n(o, r)))
repeats
.zip(offset)
.flat_map(|(r, o)| iter::repeat(o).take(r)),
)
.map(|(idx, ofst)| idx + ofst) .map(|(idx, ofst)| idx + ofst)
.collect(); .collect();
new_param_idxs new_param_idxs
@@ -124,7 +120,7 @@ impl OneHotEncoder {
let (nrows, _) = data.shape(); let (nrows, _) = data.shape();
// col buffer to avoid allocations // col buffer to avoid allocations
let mut col_buf: Vec<T> = iter::repeat(T::zero()).take(nrows).collect(); let mut col_buf: Vec<T> = repeat_n(T::zero(), nrows).collect();
let mut res: Vec<CategoryMapper<CategoricalFloat>> = Vec::with_capacity(idxs.len()); let mut res: Vec<CategoryMapper<CategoricalFloat>> = Vec::with_capacity(idxs.len());