Add another pairwise distance algorithm

add tests to fastpair
fix test
2025-01-28 00:30:57 +00:00 · 2025-01-28 00:20:29 +00:00 · 2025-01-27 23:43:42 +00:00 · 2025-01-27 23:34:45 +00:00 · 2025-01-27 23:28:58 +00:00 · 2023-03-24 12:06:54 +09:00
12 changed files with 256 additions and 19 deletions
@@ -19,13 +19,14 @@ jobs:
            { os: "ubuntu", target: "i686-unknown-linux-gnu" },
            { os: "ubuntu", target: "wasm32-unknown-unknown" },
            { os: "macos", target: "aarch64-apple-darwin" },
            { os: "ubuntu", target: "wasm32-wasi" },
          ]
    env:
      TZ: "/usr/share/zoneinfo/your/location"
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3
      - name: Cache .cargo and target
-        uses: actions/cache@v4
+        uses: actions/cache@v2
        with:
          path: |
            ~/.cargo
@@ -35,13 +36,16 @@ jobs:
      - name: Install Rust toolchain
        uses: actions-rs/toolchain@v1
        with:
-          toolchain: stable
+          toolchain: 1.81 # 1.82 seems to break wasm32 tests https://github.com/rustwasm/wasm-bindgen/issues/4274
          target: ${{ matrix.platform.target }}
          profile: minimal
          default: true
      - name: Install test runner for wasm
        if: matrix.platform.target == 'wasm32-unknown-unknown'
        run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
      - name: Install test runner for wasi
        if: matrix.platform.target == 'wasm32-wasi'
        run: curl https://wasmtime.dev/install.sh -sSf | bash
      - name: Stable Build with all features
        uses: actions-rs/cargo@v1
        with:
@@ -61,7 +65,13 @@ jobs:
      - name: Tests in WASM
        if: matrix.platform.target == 'wasm32-unknown-unknown'
        run: wasm-pack test --node -- --all-features
-  
+      - name: Tests in WASI
        if: matrix.platform.target == 'wasm32-wasi'
        run: |
          export WASMTIME_HOME="$HOME/.wasmtime"
          export PATH="$WASMTIME_HOME/bin:$PATH"
          cargo install cargo-wasi && cargo wasi test
  check_features:
    runs-on: "${{ matrix.platform.os }}-latest"
    strategy:
@@ -71,9 +81,9 @@ jobs:
    env:
      TZ: "/usr/share/zoneinfo/your/location"
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3
      - name: Cache .cargo and target
-        uses: actions/cache@v4
+        uses: actions/cache@v2
        with:
          path: |
            ~/.cargo
@@ -12,9 +12,9 @@ jobs:
    env:
      TZ: "/usr/share/zoneinfo/your/location"
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v2
      - name: Cache .cargo
-        uses: actions/cache@v4
+        uses: actions/cache@v2
        with:
          path: |
            ~/.cargo
@@ -14,7 +14,7 @@ jobs:
    steps:
      - uses: actions/checkout@v2
      - name: Cache .cargo and target
-        uses: actions/cache@v4
+        uses: actions/cache@v2
        with:
          path: |
            ~/.cargo
@@ -2,7 +2,7 @@
 name = "smartcore"
 description = "Machine Learning in Rust."
 homepage = "https://smartcorelib.org"
-version = "0.4.1"
+version = "0.4.0"
 authors = ["smartcore Developers"]
 edition = "2021"
 license = "Apache-2.0"
@@ -18,4 +18,4 @@
 -----
 [![CI](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml/badge.svg)](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml)
-To start getting familiar with the new smartcore v0.4 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
+To start getting familiar with the new smartcore v0.3 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
@@ -0,0 +1,219 @@
 //! This module provides FastPair, a data-structure for efficiently tracking the dynamic
 //! closest pairs in a set of points, with an example usage in hierarchical clustering.[2][3][5]
 //!
 //! ## Purpose
 //!
 //! FastPair allows quick retrieval of the nearest neighbor for each data point by maintaining
 //! a "conga line" of closest pairs. Each point retains a link to its known nearest neighbor,
 //! and updates in the data structure propagate accordingly. This can be leveraged in
 //! agglomerative clustering steps, where merging or insertion of new points must be reflected
 //! in nearest-neighbor relationships.
 //!
 //! ## Example
 //!
 //! ```
 //! use smartcore::metrics::distance::PairwiseDistance;
 //! use smartcore::linalg::basic::matrix::DenseMatrix;
 //! use smartcore::algorithm::neighbour::fastpair::FastPair;
 //!
 //! let x = DenseMatrix::from_2d_array(&[
 //!     &[5.1, 3.5, 1.4, 0.2],
 //!     &[4.9, 3.0, 1.4, 0.2],
 //!     &[4.7, 3.2, 1.3, 0.2],
 //!     &[4.6, 3.1, 1.5, 0.2],
 //!     &[5.0, 3.6, 1.4, 0.2],
 //!     &[5.4, 3.9, 1.7, 0.4],
 //! ]).unwrap();
 //!
 //! let fastpair = FastPair::new(&x).unwrap();
 //! let closest = fastpair.closest_pair();
 //! println!("Closest pair: {:?}", closest);
 //! ```
 use std::collections::HashMap;
 use num::Bounded;
 use crate::error::{Failed, FailedError};
 use crate::linalg::basic::arrays::{Array, Array1, Array2};
 use crate::metrics::distance::euclidian::Euclidian;
 use crate::metrics::distance::PairwiseDistance;
 use crate::numbers::floatnum::FloatNumber;
 use crate::numbers::realnum::RealNumber;
 /// Eppstein dynamic closet-pair structure
 /// 'M' can be a matrix-like trait that provides row access
 #[derive(Debug)]
 pub struct EppsteinDCP<'a, T: RealNumber + FloatNumber, M: Array2<T>> {
    samples: &'a M,
    // "buckets" store, for each row, a small structure recording potential neighbors
    neighbors: HashMap<usize, PairwiseDistance<T>>,
 }
 impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> EppsteinDCP<'a, T, M> {
    /// Creates a new EppsteinDCP instance with the given data
    pub fn new(m: &'a M) -> Result<Self, Failed> {
        if m.shape().0 < 3 {
            return Err(Failed::because(
                FailedError::FindFailed,
                "min number of rows should be 3",
            ));
        }
        let mut this = Self {
            samples: m,
            neighbors: HashMap::with_capacity(m.shape().0),
        };
        this.initialize();
        Ok(this)
    }
    /// Build an initial "conga line" or chain of potential neighbors
    /// akin to Eppstein’s technique[2].
    fn initialize(&mut self) {
        let n = self.samples.shape().0;
        if n < 2 {
            return;
        }
        // Assign each row i some large distance by default
        for i in 0..n {
            self.neighbors.insert(
                i,
                PairwiseDistance {
                    node: i,
                    neighbour: None,
                    distance: Some(<T as Bounded>::max_value()),
                },
            );
        }
        // Example: link each i to the next, forming a chain
        // (depending on the actual Eppstein approach, can refine)
        for i in 0..(n - 1) {
            let dist = self.compute_dist(i, i + 1);
            self.neighbors.entry(i).and_modify(|pd| {
                pd.neighbour = Some(i + 1);
                pd.distance = Some(dist);
            });
        }
        // Potential refinement steps omitted for brevity
    }
    /// Insert a point into the structure.
    pub fn insert(&mut self, row_idx: usize) {
        // Expand data, find neighbor to link with
        // For example, link row_idx to nearest among existing
        let mut best_neighbor = None;
        let mut best_d = <T as Bounded>::max_value();
        for (i, _) in &self.neighbors {
            let d = self.compute_dist(*i, row_idx);
            if d < best_d {
                best_d = d;
                best_neighbor = Some(*i);
            }
        }
        self.neighbors.insert(
            row_idx,
            PairwiseDistance {
                node: row_idx,
                neighbour: best_neighbor,
                distance: Some(best_d),
            },
        );
        // For the best_neighbor, you might want to see if row_idx becomes closer
        if let Some(kn) = best_neighbor {
            let dist = self.compute_dist(row_idx, kn);
            let entry = self.neighbors.get_mut(&kn).unwrap();
            if dist < entry.distance.unwrap() {
                entry.neighbour = Some(row_idx);
                entry.distance = Some(dist);
            }
        }
    }
    /// For hierarchical clustering, discover minimal pairs, then merge
    pub fn closest_pair(&self) -> Option<PairwiseDistance<T>> {
        let mut min_pair: Option<PairwiseDistance<T>> = None;
        for (_, pd) in &self.neighbors {
            if let Some(d) = pd.distance {
                if min_pair.is_none() || d < min_pair.as_ref().unwrap().distance.unwrap() {
                    min_pair = Some(pd.clone());
                }
            }
        }
        min_pair
    }
    fn compute_dist(&self, i: usize, j: usize) -> T {
        // Example: Euclidean
        let row_i = self.samples.get_row(i);
        let row_j = self.samples.get_row(j);
        row_i
            .iterator(0)
            .zip(row_j.iterator(0))
            .map(|(a, b)| (*a - *b) * (*a - *b))
            .sum()
    }
 }
 /// Simple usage
 #[cfg(test)]
 mod tests_eppstein {
    use super::*;
    use crate::linalg::basic::matrix::DenseMatrix;
    #[test]
    fn test_eppstein() {
        let matrix =
            DenseMatrix::from_2d_array(&[&vec![1.0, 2.0], &vec![2.0, 2.0], &vec![5.0, 3.0]])
                .unwrap();
        let mut dcp = EppsteinDCP::new(&matrix).unwrap();
        dcp.insert(2);
        let cp = dcp.closest_pair();
        assert!(cp.is_some());
    }
    #[test]
    fn compare_fastpair_eppstein() {
        use crate::algorithm::neighbour::fastpair::FastPair;
        // Assuming EppsteinDCP is implemented in a similar module
        use crate::algorithm::neighbour::eppstein::EppsteinDCP;
        // Create a static example matrix
        let x = DenseMatrix::from_2d_array(&[
            &[5.1, 3.5, 1.4, 0.2],
            &[4.9, 3.0, 1.4, 0.2],
            &[4.7, 3.2, 1.3, 0.2],
            &[4.6, 3.1, 1.5, 0.2],
            &[5.0, 3.6, 1.4, 0.2],
            &[5.4, 3.9, 1.7, 0.4],
            &[4.6, 3.4, 1.4, 0.3],
            &[5.0, 3.4, 1.5, 0.2],
            &[4.4, 2.9, 1.4, 0.2],
            &[4.9, 3.1, 1.5, 0.1],
        ])
        .unwrap();
        // Build FastPair
        let fastpair = FastPair::new(&x).unwrap();
        let pair_fastpair = fastpair.closest_pair();
        // Build EppsteinDCP
        let eppstein = EppsteinDCP::new(&x).unwrap();
        let pair_eppstein = eppstein.closest_pair();
        // Compare the results
        assert_eq!(pair_fastpair.node, pair_eppstein.as_ref().unwrap().node);
        assert_eq!(
            pair_fastpair.neighbour.unwrap(),
            pair_eppstein.as_ref().unwrap().neighbour.unwrap()
        );
        // Use a small epsilon for floating-point comparison
        let epsilon = 1e-9;
        let diff: f64 =
            pair_fastpair.distance.unwrap() - pair_eppstein.as_ref().unwrap().distance.unwrap();
        assert!(diff.abs() < epsilon);
        println!("FastPair result: {:?}", pair_fastpair);
        println!("EppsteinDCP result: {:?}", pair_eppstein);
    }
 }
@@ -41,7 +41,9 @@ use serde::{Deserialize, Serialize};
 pub(crate) mod bbd_tree;
 /// tree data structure for fast nearest neighbor search
 pub mod cover_tree;
-/// fastpair closest neighbour algorithm
+/// eppstein pairwise closest neighbour algorithm
 pub mod eppstein;
 /// fastpair pairwise closest neighbour algorithm
 pub mod fastpair;
 /// very simple algorithm that sequentially checks each element of the list until a match is found or the whole list has been searched.
 pub mod linear_search;
@@ -663,7 +663,6 @@ mod tests {
    #[test]
    fn test_instantiate_err_view3() {
        let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap();
        #[allow(clippy::reversed_empty_ranges)]
        let v = DenseMatrixView::new(&x, 0..3, 4..3);
        assert!(v.is_err());
    }
@@ -257,7 +257,8 @@ impl<TY: Number + Ord + Unsigned> BernoulliNBDistribution<TY> {
    /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
    /// * `x` - training data.
    /// * `y` - vector with target values (classes) of length N.
-    /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
+    /// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
    ///     priors are adjusted according to the data.
    /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
    /// * `binarize` - Threshold for binarizing.
    fn fit<TX: Number + PartialOrd, X: Array2<TX>, Y: Array1<TY>>(
@@ -174,7 +174,8 @@ impl<TY: Number + Ord + Unsigned> GaussianNBDistribution<TY> {
    /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
    /// * `x` - training data.
    /// * `y` - vector with target values (classes) of length N.
-    /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
+    /// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
    ///     priors are adjusted according to the data.
    pub fn fit<TX: Number + RealNumber, X: Array2<TX>, Y: Array1<TY>>(
        x: &X,
        y: &Y,
@@ -207,7 +207,8 @@ impl<TY: Number + Ord + Unsigned> MultinomialNBDistribution<TY> {
    /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
    /// * `x` - training data.
    /// * `y` - vector with target values (classes) of length N.
-    /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, priors are adjusted according to the data.
+    /// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
    ///     priors are adjusted according to the data.
    /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
    pub fn fit<TX: Number + Unsigned, X: Array2<TX>, Y: Array1<TY>>(
        x: &X,
@@ -24,7 +24,7 @@
 //! //    &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0]
 //! //    &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0]
 //! ```
-use std::iter::repeat_n;
+use std::iter;
 use crate::error::Failed;
 use crate::linalg::basic::arrays::Array2;
@@ -75,7 +75,11 @@ fn find_new_idxs(num_params: usize, cat_sizes: &[usize], cat_idxs: &[usize]) ->
    let offset = (0..1).chain(offset_);
    let new_param_idxs: Vec<usize> = (0..num_params)
-        .zip(repeats.zip(offset).flat_map(|(r, o)| repeat_n(o, r)))
+        .zip(
            repeats
                .zip(offset)
                .flat_map(|(r, o)| iter::repeat(o).take(r)),
        )
        .map(|(idx, ofst)| idx + ofst)
        .collect();
    new_param_idxs
@@ -120,7 +124,7 @@ impl OneHotEncoder {
                let (nrows, _) = data.shape();
                // col buffer to avoid allocations
-                let mut col_buf: Vec<T> = repeat_n(T::zero(), nrows).collect();
+                let mut col_buf: Vec<T> = iter::repeat(T::zero()).take(nrows).collect();
                let mut res: Vec<CategoryMapper<CategoricalFloat>> = Vec::with_capacity(idxs.len());
Author	SHA1	Message	Date
Lorenzo Mec-iS	a62c293244	Add another pairwise distance algorithm	2025-01-28 00:30:57 +00:00
Lorenzo Mec-iS	39f87aa5c2	add tests to fastpair	2025-01-28 00:20:29 +00:00
Lorenzo Mec-iS	8cc02cdd48	fix test	2025-01-27 23:43:42 +00:00
Lorenzo Mec-iS	d60ba63862	Merge branch 'main' of github.com:smartcorelib/smartcore into march-2023-improvements	2025-01-27 23:34:45 +00:00
Lorenzo	5dd5c2f0d0	Merge branch 'development' into march-2023-improvements	2025-01-27 23:28:58 +00:00
Lorenzo (Mec-iS)	074cfaf14f	rustfmt	2023-03-24 12:06:54 +09:00
Lorenzo	393cf15534	Merge branch 'development' into march-2023-improvements	2023-03-24 12:05:06 +09:00
Lorenzo (Mec-iS)	80c406b37d	Merge branch 'development' of github.com:smartcorelib/smartcore into march-2023-improvements	2023-03-21 17:38:35 +09:00
Lorenzo (Mec-iS)	0e1bf6ce7f	Add ordered_pairs method to FastPair	2023-03-21 14:46:33 +09:00
Lorenzo (Mec-iS)	0c9c70f8d2	Merge	2022-11-09 12:05:17 +00:00
morenol	62de25b2ae	Handle kernel serialization (#232 ) * Handle kernel serialization * Do not use typetag in WASM * enable tests for serialization * Update serde feature deps Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> Co-authored-by: Lorenzo <tunedconsulting@gmail.com>	2022-11-08 11:29:56 -05:00
morenol	7d87451333	Fixes for release (#237 ) * Fixes for release * add new test * Remove change applied in development branch * Only add dependency for wasm32 * Update ci.yml Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> Co-authored-by: Lorenzo <tunedconsulting@gmail.com>	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	265fd558e7	make work cargo build --target wasm32-unknown-unknown	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	e25e2aea2b	update CHANGELOG	2022-11-08 11:29:56 -05:00
Lorenzo	2f6dd1325e	update comment	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	b0dece9476	use getrandom/js	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	c507d976be	Update CHANGELOG	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	fa54d5ee86	Remove unused tests flags	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	459d558d48	minor fixes to doc	2022-11-08 11:29:56 -05:00
Lorenzo	1b7dda30a2	minor fix	2022-11-08 11:29:56 -05:00
Lorenzo	c1bd1df5f6	minor fix	2022-11-08 11:29:56 -05:00
Lorenzo	cf751f05aa	minor fix	2022-11-08 11:29:56 -05:00
Lorenzo	63ed89aadd	minor fix	2022-11-08 11:29:56 -05:00
Lorenzo	890e9d644c	minor fix	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	af0a740394	Fix std_rand feature	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	616e38c282	cleanup	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	a449fdd4ea	fmt	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	669f87f812	Use getrandom as default (for no-std feature)	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	6d529b34d2	Add static analyzer to doc	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	3ec9e4f0db	Exclude datasets test for wasm/wasi	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	527477dea7	minor fixes	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	5b517c5048	minor fix	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	2df0795be9	Release 0.3	2022-11-08 11:29:56 -05:00
Lorenzo	0dc97a4e9b	Create DEVELOPERS.md	2022-11-08 11:29:56 -05:00
Lorenzo	6c0fd37222	Update README.md	2022-11-08 11:29:56 -05:00
Lorenzo	d8d0fb6903	Update README.md	2022-11-08 11:29:56 -05:00
morenol	8d07efd921	Use Box in SVM and remove lifetimes (#228 ) * Do not change external API Authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
morenol	ba27dd2a55	Fix CI (#227 ) * Update ci.yml Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
Lorenzo	ed9769f651	Implement CSV reader with new traits (#209 )	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	b427e5d8b1	Improve options conditionals	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	fabe362755	Implement Display for NaiveBayes	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	ee6b6a53d6	cargo clippy	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	19f3a2fcc0	Fix signature of metrics tests	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	e09c4ba724	Add kernels' parameters to public interface	2022-11-08 11:29:56 -05:00
Lorenzo	6624732a65	Fix svr tests (#222 )	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	1cbde3ba22	Refactor modules structure in src/svm	2022-11-08 11:29:56 -05:00
Lorenzo (Mec-iS)	551a6e34a5	clean up svm	2022-11-08 11:29:56 -05:00
Lorenzo	c45bab491a	Support Wasi as target (#216 ) * Improve features * Add wasm32-wasi as a target * Update .github/workflows/ci.yml Co-authored-by: morenol <22335041+morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
Lorenzo	7f35dc54e4	Disambiguate distances. Implement Fastpair. (#220 )	2022-11-08 11:29:56 -05:00
morenol	8f1a7dfd79	build: fix compilation without default features (#218 ) * build: fix compilation with optional features * Remove unused config from Cargo.toml * Fix cache keys Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
Lorenzo	712c478af6	Improve features (#215 )	2022-11-08 11:29:56 -05:00
Lorenzo	4d36b7f34f	Fix metrics::auc (#212 ) * Fix metrics::auc	2022-11-08 11:29:56 -05:00
Lorenzo	a16927aa16	Port ensemble. Add Display to naive_bayes (#208 )	2022-11-08 11:29:56 -05:00
Lorenzo	d91f4f7ce4	Update README.md	2022-11-08 11:29:56 -05:00
Lorenzo	a7fa0585eb	Merge potential next release v0.4 (#187 ) Breaking Changes * First draft of the new n-dimensional arrays + NB use case * Improves default implementation of multiple Array methods * Refactors tree methods * Adds matrix decomposition routines * Adds matrix decomposition methods to ndarray and nalgebra bindings * Refactoring + linear regression now uses array2 * Ridge & Linear regression * LBFGS optimizer & logistic regression * LBFGS optimizer & logistic regression * Changes linear methods, metrics and model selection methods to new n-dimensional arrays * Switches KNN and clustering algorithms to new n-d array layer * Refactors distance metrics * Optimizes knn and clustering methods * Refactors metrics module * Switches decomposition methods to n-dimensional arrays * Linalg refactoring - cleanup rng merge (#172) * Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure. * Exclude AUC metrics. Needs reimplementation * Improve developers walkthrough New traits system in place at `src/numbers` and `src/linalg` Co-authored-by: Lorenzo <tunedconsulting@gmail.com> * Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021 * Implement getters to use as_ref() in src/neighbors * Implement getters to use as_ref() in src/naive_bayes * Implement getters to use as_ref() in src/linear * Add Clone to src/naive_bayes * Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021 * Implement ndarray-bindings. Remove FloatNumber from implementations * Drop nalgebra-bindings support (as decided in conf-call to go for ndarray) * Remove benches. Benches will have their own repo at smartcore-benches * Implement SVC * Implement SVC serialization. Move search parameters in dedicated module * Implement SVR. Definitely too slow * Fix compilation issues for wasm (#202) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> * Fix tests (#203) * Port linalg/traits/stats.rs * Improve methods naming * Improve Display for DenseMatrix Co-authored-by: Montana Low <montanalow@users.noreply.github.com> Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>	2022-11-08 11:29:56 -05:00
RJ Nowling	a32eb66a6a	Dataset doc cleanup (#205 ) * Update iris.rs * Update mod.rs * Update digits.rs	2022-11-08 11:29:56 -05:00
Lorenzo	f605f6e075	Update README.md	2022-11-08 11:29:56 -05:00
Lorenzo	3b1aaaadf7	Update README.md	2022-11-08 11:29:56 -05:00
Lorenzo	d015b12402	Update CONTRIBUTING.md	2022-11-08 11:29:56 -05:00
morenol	d5200074c2	fix: fix issue with iterator for svc search (#182 )	2022-11-08 11:29:56 -05:00
morenol	473cdfc44d	refactor: Try to follow similar pattern to other APIs (#180 ) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
morenol	ad2e6c2900	feat: expose hyper tuning module in model_selection (#179 ) * feat: expose hyper tuning module in model_selection * Move to a folder Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
Lorenzo	9ea3133c27	Update CONTRIBUTING.md	2022-11-08 11:29:56 -05:00
Lorenzo	e4c47c7540	Add contribution guidelines (#178 )	2022-11-08 11:29:56 -05:00
Montana Low	f4fd4d2239	make default params available to serde (#167 ) * add seed param to search params * make default params available to serde * lints * create defaults for enums * lint	2022-11-08 11:29:56 -05:00
Montana Low	05dfffad5c	add seed param to search params (#168 )	2022-11-08 11:29:56 -05:00
morenol	a37b552a7d	Lmm/add seeds in more algorithms (#164 ) * Provide better output in flaky tests * feat: add seed parameter to multiple algorithms * Update changelog Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
Montana Low	55e1158581	Complete grid search params (#166 ) * grid search draft * hyperparam search for linear estimators * grid search for ensembles * support grid search for more algos * grid search for unsupervised algos * minor cleanup	2022-11-08 11:29:56 -05:00
morenol	cfa824d7db	Provide better output in flaky tests (#163 )	2022-11-08 11:29:56 -05:00
morenol	bb5b437a32	feat: allocate first and then proceed to create matrix from Vec of Ro… (#159 ) * feat: allocate first and then proceed to create matrix from Vec of RowVectors	2022-11-08 11:29:56 -05:00
morenol	851533dfa7	Make rand_distr optional (#161 )	2022-11-08 11:29:56 -05:00
Lorenzo	0d996edafe	Update LICENSE	2022-11-08 11:29:56 -05:00
morenol	f291b71f4a	fix: fix compilation warnings when running only with default features (#160 ) * fix: fix compilation warnings when running only with default features Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
Tim Toebrock	2d75c2c405	Implement a generic read_csv method (#147 ) * feat: Add interface to build `Matrix` from rows. * feat: Add option to derive `RealNumber` from string. To construct a `Matrix` from csv, and therefore from string, I need to be able to deserialize a generic `RealNumber` from string. * feat: Implement `Matrix::read_csv`.	2022-11-08 11:29:56 -05:00
Montana Low	1f2597be74	grid search (#154 ) * grid search draft * hyperparam search for linear estimators	2022-11-08 11:29:56 -05:00
Montana Low	0f442e96c0	Handle multiclass precision/recall (#152 ) * handle multiclass precision/recall	2022-11-08 11:29:56 -05:00
dependabot[bot]	44e4be23a6	Update criterion requirement from 0.3 to 0.4 (#150 ) * Update criterion requirement from 0.3 to 0.4 Updates the requirements on [criterion](https://github.com/bheisler/criterion.rs) to permit the latest version. - [Release notes](https://github.com/bheisler/criterion.rs/releases) - [Changelog](https://github.com/bheisler/criterion.rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/bheisler/criterion.rs/compare/0.3.0...0.4.0) --- updated-dependencies: - dependency-name: criterion dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> * fix criterion Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
Christos Katsakioris	01f753f86d	Add serde for StandardScaler (#148 ) * Derive `serde::Serialize` and `serde::Deserialize` for `StandardScaler`. * Add relevant unit test. Signed-off-by: Christos Katsakioris <ckatsak@gmail.com> Signed-off-by: Christos Katsakioris <ckatsak@gmail.com>	2022-11-08 11:29:56 -05:00
Tim Toebrock	df766eaf79	Implementation of Standard scaler (#143 ) * docs: Fix typo in doc for categorical transformer. * feat: Add option to take a column from Matrix. I created the method `Matrix::take_column` that uses the `Matrix::take`-interface to extract a single column from a matrix. I need that feature in the implementation of `StandardScaler`. * feat: Add `StandardScaler`. Authored-by: titoeb <timtoebrock@googlemail.com>	2022-11-08 11:29:56 -05:00
Lorenzo	09d9205696	Add example for FastPair (#144 ) * Add example * Move to top * Add imports to example * Fix imports	2022-11-08 11:29:56 -05:00
Lorenzo	dc7f01db4a	Implement fastpair (#142 ) * initial fastpair implementation * FastPair initial implementation * implement fastpair * Add random test * Add bench for fastpair * Refactor with constructor for FastPair * Add serialization for PairwiseDistance * Add fp_bench feature for fastpair bench	2022-11-08 11:29:56 -05:00
Chris McComb	eb4b49d552	Added additional doctest and fixed indices (#141 )	2022-11-08 11:29:56 -05:00
morenol	98e3465e7b	Fix clippy warnings (#139 ) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
ferrouille	ea39024fd2	Add SVC::decision_function (#135 )	2022-11-08 11:29:56 -05:00
dependabot[bot]	4e94feb872	Update nalgebra requirement from 0.23.0 to 0.31.0 (#128 ) Updates the requirements on [nalgebra](https://github.com/dimforge/nalgebra) to permit the latest version. - [Release notes](https://github.com/dimforge/nalgebra/releases) - [Changelog](https://github.com/dimforge/nalgebra/blob/dev/CHANGELOG.md) - [Commits](https://github.com/dimforge/nalgebra/compare/v0.23.0...v0.31.0) --- updated-dependencies: - dependency-name: nalgebra dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2022-11-08 11:29:56 -05:00
dependabot-preview[bot]	fa802d2d3f	build(deps): update nalgebra requirement from 0.23.0 to 0.26.2 (#98 ) * build(deps): update nalgebra requirement from 0.23.0 to 0.26.2 Updates the requirements on [nalgebra](https://github.com/dimforge/nalgebra) to permit the latest version. - [Release notes](https://github.com/dimforge/nalgebra/releases) - [Changelog](https://github.com/dimforge/nalgebra/blob/dev/CHANGELOG.md) - [Commits](https://github.com/dimforge/nalgebra/compare/v0.23.0...v0.26.2) Signed-off-by: dependabot-preview[bot] <support@dependabot.com> * fix: updates for nalgebre * test: explicitly call pow_mut from BaseVector since now it conflicts with nalgebra implementation * Don't be strict with dependencies Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>	2022-11-08 11:29:56 -05:00