diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index c09dfa7..15b3906 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -26,6 +26,17 @@ Take a look to the conventions established by existing code:
* Every module should provide comprehensive tests at the end, in its `mod tests {}` sub-module. These tests can be flagged or not with configuration flags to allow WebAssembly target.
* Run `cargo doc --no-deps --open` and read the generated documentation in the browser to be sure that your changes reflects in the documentation and new code is documented.
+#### digging deeper
+* a nice overview of the codebase is given by [static analyzer](https://mozilla.github.io/rust-code-analysis/metrics.html):
+```
+$ cargo install rust-code-analysis-cli
+// print metrics for every module
+$ rust-code-analysis-cli -m -O json -o . -p src/ --pr
+// print full AST for a module
+$ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213 -d > ast.txt
+```
+* find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file.
+
## Issue Report Process
1. Go to the project's issues.
diff --git a/.github/DEVELOPERS.md b/.github/DEVELOPERS.md
index 87c2506..b3a647b 100644
--- a/.github/DEVELOPERS.md
+++ b/.github/DEVELOPERS.md
@@ -1,4 +1,7 @@
-# Smartcore: Introduction to modules
+# smartcore: Introduction to modules
+
+Important source of information:
+* [Rust API guidelines](https://rust-lang.github.io/api-guidelines/about.html)
## Walkthrough: traits system and basic structures
diff --git a/.gitignore b/.gitignore
index 9c0651c..0983a15 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,4 +26,6 @@ src.dot
out.svg
FlameGraph/
-out.stacks
\ No newline at end of file
+out.stacks
+*.json
+*.txt
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a9dda10..d105432 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,22 +4,29 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## [Unreleased]
+## [0.3.0] - 2022-11-09
## Added
-- Seeds to multiple algorithims that depend on random number generation.
-- Added feature `js` to use WASM in browser
-- Drop `nalgebra-bindings` feature
-- Complete refactoring with *extensive API changes* that includes:
+- WARNING: Breaking changes!
+- Complete refactoring with **extensive API changes** that includes:
* moving to a new traits system, less structs more traits
* adapting all the modules to the new traits system
- * moving towards Rust 2021, in particular the use of `dyn` and `as_ref`
- * reorganization of the code base, trying to eliminate duplicates
+ * moving to Rust 2021, use of object-safe traits and `as_ref`
+ * reorganization of the code base, eliminate duplicates
+- implements `readers` (needs "serde" feature) for read/write CSV file, extendible to other formats
+- default feature is now Wasm-/Wasi-first
-## BREAKING CHANGE
-- Added a new parameter to `train_test_split` to define the seed.
+## Changed
+- WARNING: Breaking changes!
+- Seeds to multiple algorithims that depend on random number generation
+- Added a new parameter to `train_test_split` to define the seed
+- changed use of "serde" feature
-## [0.2.1] - 2022-05-10
+## Dropped
+- WARNING: Breaking changes!
+- Drop `nalgebra-bindings` feature, only `ndarray` as supported library
+
+## [0.2.1] - 2021-05-10
## Added
- L2 regularization penalty to the Logistic Regression
diff --git a/Cargo.toml b/Cargo.toml
index 0a23083..4fb260b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,9 +1,9 @@
[package]
name = "smartcore"
-description = "The most advanced machine learning library in rust."
+description = "Machine Learning in Rust."
homepage = "https://smartcorelib.org"
-version = "0.4.0"
-authors = ["SmartCore Developers"]
+version = "0.3.0"
+authors = ["smartcore Developers"]
edition = "2021"
license = "Apache-2.0"
documentation = "https://docs.rs/smartcore"
@@ -11,6 +11,13 @@ repository = "https://github.com/smartcorelib/smartcore"
readme = "README.md"
keywords = ["machine-learning", "statistical", "ai", "optimization", "linear-algebra"]
categories = ["science"]
+exclude = [
+ ".github",
+ ".gitignore",
+ "smartcore.iml",
+ "smartcore.svg",
+ "tests/"
+]
[dependencies]
approx = "0.5.1"
@@ -19,32 +26,31 @@ ndarray = { version = "0.15", optional = true }
num-traits = "0.2.12"
num = "0.4"
rand = { version = "0.8.5", default-features = false, features = ["small_rng"] }
+getrandom = "*"
rand_distr = { version = "0.4", optional = true }
serde = { version = "1", features = ["derive"], optional = true }
[features]
-default = ["serde", "datasets"]
+default = []
serde = ["dep:serde"]
ndarray-bindings = ["dep:ndarray"]
-datasets = ["dep:rand_distr", "std"]
-std = ["rand/std_rng", "rand/std"]
-# wasm32 only
+datasets = ["dep:rand_distr", "std_rand", "serde"]
+std_rand = ["rand/std_rng", "rand/std"]
+# used by wasm32-unknown-unknown for in-browser usage
js = ["getrandom/js"]
[target.'cfg(target_arch = "wasm32")'.dependencies]
getrandom = { version = "0.2", optional = true }
-[dev-dependencies]
-itertools = "*"
-criterion = { version = "0.4", default-features = false }
-serde_json = "1.0"
-bincode = "1.3.1"
-
[target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies]
wasm-bindgen-test = "0.3"
+[dev-dependencies]
+itertools = "*"
+serde_json = "1.0"
+bincode = "1.3.1"
+
[workspace]
-resolver = "2"
[profile.test]
debug = 1
diff --git a/LICENSE b/LICENSE
index 3cd5786..9448cee 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.
- Copyright 2019-present at SmartCore developers (smartcorelib.org)
+ Copyright 2019-present at smartcore developers (smartcorelib.org)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
diff --git a/README.md b/README.md
index fd6f481..758a461 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-
+
@@ -18,4 +18,4 @@
-----
[](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml)
-To start getting familiar with the new Smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
+To start getting familiar with the new smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
diff --git a/smartcore.svg b/smartcore.svg
index 3e4c68d..eaffd58 100644
--- a/smartcore.svg
+++ b/smartcore.svg
@@ -76,5 +76,5 @@
y="81.876823"
x="91.861809"
id="tspan842"
- sodipodi:role="line">SmartCore
+ sodipodi:role="line">smartcore
diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs
index db062f9..011a9cc 100644
--- a/src/algorithm/neighbour/cover_tree.rs
+++ b/src/algorithm/neighbour/cover_tree.rs
@@ -64,7 +64,7 @@ struct Node {
max_dist: f64,
parent_dist: f64,
children: Vec,
- scale: i64,
+ _scale: i64,
}
#[derive(Debug)]
@@ -84,7 +84,7 @@ impl> CoverTree {
max_dist: 0f64,
parent_dist: 0f64,
children: Vec::new(),
- scale: 0,
+ _scale: 0,
};
let mut tree = CoverTree {
base,
@@ -245,7 +245,7 @@ impl> CoverTree {
max_dist: 0f64,
parent_dist: 0f64,
children: Vec::new(),
- scale: 100,
+ _scale: 100,
}
}
@@ -306,7 +306,7 @@ impl> CoverTree {
max_dist: 0f64,
parent_dist: 0f64,
children,
- scale: 100,
+ _scale: 100,
}
} else {
let mut far: Vec = Vec::new();
@@ -375,7 +375,7 @@ impl> CoverTree {
max_dist: self.max(consumed_set),
parent_dist: 0f64,
children,
- scale: (top_scale - max_scale),
+ _scale: (top_scale - max_scale),
}
}
}
diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs
index 9322d65..18f8308 100644
--- a/src/cluster/kmeans.rs
+++ b/src/cluster/kmeans.rs
@@ -11,7 +11,7 @@
//! these re-calculated centroids becoming the new centers of their respective clusters. Next all instances of the training set are re-assigned to their closest cluster again.
//! This iterative process continues until convergence is achieved and the clusters are considered settled.
//!
-//! Initial choice of K data points is very important and has big effect on performance of the algorithm. SmartCore uses k-means++ algorithm to initialize cluster centers.
+//! Initial choice of K data points is very important and has big effect on performance of the algorithm. `smartcore` uses k-means++ algorithm to initialize cluster centers.
//!
//! Example:
//!
@@ -74,7 +74,7 @@ pub struct KMeans, Y: Array1> {
k: usize,
_y: Vec,
size: Vec,
- distortion: f64,
+ _distortion: f64,
centroids: Vec>,
_phantom_tx: PhantomData,
_phantom_ty: PhantomData,
@@ -313,7 +313,7 @@ impl, Y: Array1> KMeans
k: parameters.k,
_y: y,
size,
- distortion,
+ _distortion: distortion,
centroids,
_phantom_tx: PhantomData,
_phantom_ty: PhantomData,
@@ -470,7 +470,7 @@ mod tests {
wasm_bindgen_test::wasm_bindgen_test
)]
#[test]
- fn fit_predict_iris() {
+ fn fit_predict() {
let x = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
&[4.9, 3.0, 1.4, 0.2],
diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs
index 5b32d02..855b288 100644
--- a/src/dataset/mod.rs
+++ b/src/dataset/mod.rs
@@ -1,6 +1,6 @@
//! Datasets
//!
-//! In this module you will find small datasets that are used in SmartCore mostly for demonstration purposes.
+//! In this module you will find small datasets that are used in `smartcore` mostly for demonstration purposes.
pub mod boston;
pub mod breast_cancer;
pub mod diabetes;
diff --git a/src/ensemble/mod.rs b/src/ensemble/mod.rs
index 1ddf4b4..8cebd5c 100644
--- a/src/ensemble/mod.rs
+++ b/src/ensemble/mod.rs
@@ -7,7 +7,7 @@
//! set and then aggregate their individual predictions to form a final prediction. In classification setting the overall prediction is the most commonly
//! occurring majority class among the individual predictions.
//!
-//! In SmartCore you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html).
+//! In `smartcore` you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html).
//! Random forests provide an improvement over bagged trees by way of a small tweak that decorrelates the trees. As in bagging, we build a number of
//! decision trees on bootstrapped training samples. But when building these decision trees, each time a split in a tree is considered,
//! a random sample of _m_ predictors is chosen as split candidates from the full set of _p_ predictors.
diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs
index d01acef..8ea174b 100644
--- a/src/ensemble/random_forest_classifier.rs
+++ b/src/ensemble/random_forest_classifier.rs
@@ -104,7 +104,6 @@ pub struct RandomForestClassifier<
X: Array2,
Y: Array1,
> {
- parameters: Option,
trees: Option>>,
classes: Option>,
samples: Option>>,
@@ -198,7 +197,6 @@ impl, Y:
{
fn new() -> Self {
Self {
- parameters: Option::None,
trees: Option::None,
classes: Option::None,
samples: Option::None,
@@ -501,7 +499,6 @@ impl, Y: Array1,
Y: Array1,
> {
- parameters: Option,
trees: Option>>,
samples: Option>>,
}
@@ -177,7 +176,6 @@ impl, Y: Array1
{
fn new() -> Self {
Self {
- parameters: Option::None,
trees: Option::None,
samples: Option::None,
}
@@ -434,7 +432,6 @@ impl, Y: Array1
}
Ok(RandomForestRegressor {
- parameters: Some(parameters),
trees: Some(trees),
samples: maybe_all_samples,
})
diff --git a/src/lib.rs b/src/lib.rs
index a955de2..03bfc03 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -8,25 +8,38 @@
#![warn(missing_docs)]
#![warn(rustdoc::missing_doc_code_examples)]
-//! # SmartCore
+//! # smartcore
//!
-//! Welcome to SmartCore, machine learning in Rust!
+//! Welcome to `smartcore`, machine learning in Rust!
//!
-//! SmartCore features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN,
+//! `smartcore` features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN,
//! as well as tools for model selection and model evaluation.
//!
-//! SmartCore provides its own traits system that extends Rust standard library, to deal with linear algebra and common
+//! `smartcore` provides its own traits system that extends Rust standard library, to deal with linear algebra and common
//! computational models. Its API is designed using well recognizable patterns. Extra features (like support for [ndarray](https://docs.rs/ndarray)
//! structures) is available via optional features.
//!
//! ## Getting Started
//!
-//! To start using SmartCore simply add the following to your Cargo.toml file:
+//! To start using `smartcore` latest stable version simply add the following to your `Cargo.toml` file:
+//! ```ignore
+//! [dependencies]
+//! smartcore = "*"
+//! ```
+//!
+//! To start using smartcore development version with latest unstable additions:
//! ```ignore
//! [dependencies]
//! smartcore = { git = "https://github.com/smartcorelib/smartcore", branch = "development" }
//! ```
//!
+//! There are different features that can be added to the base library, for example to add sample datasets:
+//! ```ignore
+//! [dependencies]
+//! smartcore = { git = "https://github.com/smartcorelib/smartcore", features = ["datasets"] }
+//! ```
+//! Check `smartcore`'s `Cargo.toml` for available features.
+//!
//! ## Using Jupyter
//! For quick introduction, Jupyter Notebooks are available [here](https://github.com/smartcorelib/smartcore-jupyter/tree/main/notebooks).
//! You can set up a local environment to run Rust notebooks using [EVCXR](https://github.com/google/evcxr)
@@ -37,7 +50,7 @@
//! For example, you can use this code to fit a [K Nearest Neighbors classifier](neighbors/knn_classifier/index.html) to a dataset that is defined as standard Rust vector:
//!
//! ```
-//! // DenseMatrix defenition
+//! // DenseMatrix definition
//! use smartcore::linalg::basic::matrix::DenseMatrix;
//! // KNNClassifier
//! use smartcore::neighbors::knn_classifier::*;
@@ -62,7 +75,9 @@
//! ```
//!
//! ## Overview
-//! All machine learning algorithms in SmartCore are grouped into these broad categories:
+//!
+//! ### Supported algorithms
+//! All machine learning algorithms are grouped into these broad categories:
//! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data.
//! * [Matrix Decomposition](decomposition/index.html), various methods for matrix decomposition.
//! * [Linear Models](linear/index.html), regression and classification methods where output is assumed to have linear relation to explanatory variables
@@ -71,11 +86,14 @@
//! * [Nearest Neighbors](neighbors/index.html), K Nearest Neighbors for classification and regression
//! * [Naive Bayes](naive_bayes/index.html), statistical classification technique based on Bayes Theorem
//! * [SVM](svm/index.html), support vector machines
+//!
+//! ### Linear Algebra traits system
+//! For an introduction to `smartcore`'s traits system see [this notebook](https://github.com/smartcorelib/smartcore-jupyter/blob/5523993c53c6ec1fd72eea130ef4e7883121c1ea/notebooks/01-A-little-bit-about-numbers.ipynb)
/// Foundamental numbers traits
pub mod numbers;
-/// Various algorithms and helper methods that are used elsewhere in SmartCore
+/// Various algorithms and helper methods that are used elsewhere in smartcore
pub mod algorithm;
pub mod api;
@@ -89,7 +107,7 @@ pub mod decomposition;
/// Ensemble methods, including Random Forest classifier and regressor
pub mod ensemble;
pub mod error;
-/// Diverse collection of linear algebra abstractions and methods that power SmartCore algorithms
+/// Diverse collection of linear algebra abstractions and methods that power smartcore algorithms
pub mod linalg;
/// Supervised classification and regression models that assume linear relationship between dependent and explanatory variables.
pub mod linear;
@@ -105,7 +123,8 @@ pub mod neighbors;
pub mod optimization;
/// Preprocessing utilities
pub mod preprocessing;
-/// Reading in data from serialized foramts
+/// Reading in data from serialized formats
+#[cfg(feature = "serde")]
pub mod readers;
/// Support Vector Machines
pub mod svm;
diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs
index 1f7d540..a5c7699 100644
--- a/src/linear/linear_regression.rs
+++ b/src/linear/linear_regression.rs
@@ -12,7 +12,7 @@
//! \\[\hat{\beta} = (X^TX)^{-1}X^Ty \\]
//!
//! the \\((X^TX)^{-1}\\) term is both computationally expensive and numerically unstable. An alternative approach is to use a matrix decomposition to avoid this operation.
-//! SmartCore uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\).
+//! `smartcore` uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\).
//! The QR decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly,
//! but does not work for all data matrices. Unlike the QR decomposition, all matrices have an SVD decomposition.
//!
@@ -113,7 +113,6 @@ pub struct LinearRegression<
> {
coefficients: Option,
intercept: Option,
- solver: LinearRegressionSolverName,
_phantom_ty: PhantomData,
_phantom_y: PhantomData,
}
@@ -210,7 +209,6 @@ impl<
Self {
coefficients: Option::None,
intercept: Option::None,
- solver: LinearRegressionParameters::default().solver,
_phantom_ty: PhantomData,
_phantom_y: PhantomData,
}
@@ -276,7 +274,6 @@ impl<
Ok(LinearRegression {
intercept: Some(*w.get((num_attributes, 0))),
coefficients: Some(weights),
- solver: parameters.solver,
_phantom_ty: PhantomData,
_phantom_y: PhantomData,
})
diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs
index 7dd269c..8bf65bf 100644
--- a/src/linear/logistic_regression.rs
+++ b/src/linear/logistic_regression.rs
@@ -5,7 +5,7 @@
//!
//! \\[ Pr(y=1) \approx \frac{e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}}{1 + e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}} \\]
//!
-//! SmartCore uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\)
+//! `smartcore` uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\)
//!
//! Example:
//!
diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs
index 914afc2..6bd5595 100644
--- a/src/linear/ridge_regression.rs
+++ b/src/linear/ridge_regression.rs
@@ -12,7 +12,7 @@
//! where \\(\alpha \geq 0\\) is a tuning parameter that controls strength of regularization. When \\(\alpha = 0\\) the penalty term has no effect, and ridge regression will produce the least squares estimates.
//! However, as \\(\alpha \rightarrow \infty\\), the impact of the shrinkage penalty grows, and the ridge regression coefficient estimates will approach zero.
//!
-//! SmartCore uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\).
+//! `smartcore` uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\).
//! The Cholesky decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly,
//! but does not work for all data matrices. Unlike the Cholesky decomposition, all matrices have an SVD decomposition.
//!
@@ -197,7 +197,6 @@ pub struct RidgeRegression<
> {
coefficients: Option,
intercept: Option,
- solver: Option,
_phantom_ty: PhantomData,
_phantom_y: PhantomData,
}
@@ -259,7 +258,6 @@ impl<
Self {
coefficients: Option::None,
intercept: Option::None,
- solver: Option::None,
_phantom_ty: PhantomData,
_phantom_y: PhantomData,
}
@@ -367,7 +365,6 @@ impl<
Ok(RidgeRegression {
intercept: Some(b),
coefficients: Some(w),
- solver: Some(parameters.solver),
_phantom_ty: PhantomData,
_phantom_y: PhantomData,
})
diff --git a/src/metrics/auc.rs b/src/metrics/auc.rs
index ecaf646..0a7ddf4 100644
--- a/src/metrics/auc.rs
+++ b/src/metrics/auc.rs
@@ -2,7 +2,7 @@
//! Computes the area under the receiver operating characteristic (ROC) curve that is equal to the probability that a classifier will rank a
//! randomly chosen positive instance higher than a randomly chosen negative one.
//!
-//! SmartCore calculates ROC AUC from Wilcoxon or Mann-Whitney U test.
+//! `smartcore` calculates ROC AUC from Wilcoxon or Mann-Whitney U test.
//!
//! Example:
//! ```
diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs
index 06d44a1..c7e1be3 100644
--- a/src/metrics/mod.rs
+++ b/src/metrics/mod.rs
@@ -4,7 +4,7 @@
//! In a feedback loop you build your model first, then you get feedback from metrics, improve it and repeat until your model achieve desirable performance.
//! Evaluation metrics helps to explain the performance of a model and compare models based on an objective criterion.
//!
-//! Choosing the right metric is crucial while evaluating machine learning models. In SmartCore you will find metrics for these classes of ML models:
+//! Choosing the right metric is crucial while evaluating machine learning models. In `smartcore` you will find metrics for these classes of ML models:
//!
//! * [Classification metrics](struct.ClassificationMetrics.html)
//! * [Regression metrics](struct.RegressionMetrics.html)
diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs
index b8e4e7f..222b9d7 100644
--- a/src/model_selection/mod.rs
+++ b/src/model_selection/mod.rs
@@ -7,7 +7,7 @@
//! Splitting data into multiple subsets helps us to find the right combination of hyperparameters, estimate model performance and choose the right model for
//! the data.
//!
-//! In SmartCore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function.
+//! In `smartcore` a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function.
//!
//! ```
//! use smartcore::linalg::basic::matrix::DenseMatrix;
diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs
index 67d094a..882ac55 100644
--- a/src/neighbors/knn_classifier.rs
+++ b/src/neighbors/knn_classifier.rs
@@ -1,6 +1,6 @@
//! # K Nearest Neighbors Classifier
//!
-//! SmartCore relies on 2 backend algorithms to speedup KNN queries:
+//! `smartcore` relies on 2 backend algorithms to speedup KNN queries:
//! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html)
//! * [`CoverTree`](../../algorithm/neighbour/cover_tree/index.html)
//!
diff --git a/src/numbers/realnum.rs b/src/numbers/realnum.rs
index 8c60e47..f4d9aec 100644
--- a/src/numbers/realnum.rs
+++ b/src/numbers/realnum.rs
@@ -1,5 +1,5 @@
//! # Real Number
-//! Most algorithms in SmartCore rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ.
+//! Most algorithms in `smartcore` rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ.
//! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module.
use num_traits::Float;
diff --git a/src/rand_custom.rs b/src/rand_custom.rs
index 15f9e73..b22390e 100644
--- a/src/rand_custom.rs
+++ b/src/rand_custom.rs
@@ -1,19 +1,23 @@
-#[cfg(not(feature = "std"))]
-pub(crate) use rand::rngs::SmallRng as RngImpl;
-#[cfg(feature = "std")]
-pub(crate) use rand::rngs::StdRng as RngImpl;
+#[cfg(not(feature = "std_rand"))]
+pub use rand::rngs::SmallRng as RngImpl;
+#[cfg(feature = "std_rand")]
+pub use rand::rngs::StdRng as RngImpl;
use rand::SeedableRng;
-pub(crate) fn get_rng_impl(seed: Option) -> RngImpl {
+/// Custom switch for random fuctions
+pub fn get_rng_impl(seed: Option) -> RngImpl {
match seed {
Some(seed) => RngImpl::seed_from_u64(seed),
None => {
cfg_if::cfg_if! {
- if #[cfg(feature = "std")] {
+ if #[cfg(feature = "std_rand")] {
use rand::RngCore;
RngImpl::seed_from_u64(rand::thread_rng().next_u64())
} else {
- panic!("seed number needed for non-std build");
+ // no std_random feature build, use getrandom
+ let mut buf = [0u8; 64];
+ getrandom::getrandom(&mut buf).unwrap();
+ RngImpl::seed_from_u64(buf[0] as u64)
}
}
}
diff --git a/src/svm/mod.rs b/src/svm/mod.rs
index a30fe87..ef0f003 100644
--- a/src/svm/mod.rs
+++ b/src/svm/mod.rs
@@ -9,7 +9,7 @@
//! SVM is memory efficient since it uses only a subset of training data to find a decision boundary. This subset is called support vectors.
//!
//! In SVM distance between a data point and the support vectors is defined by the kernel function.
-//! SmartCore supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel.
+//! `smartcore` supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel.
//! Building a new kernel requires a good mathematical understanding of the [Mercer theorem](https://en.wikipedia.org/wiki/Mercer%27s_theorem)
//! that gives necessary and sufficient condition for a function to be a kernel function.
//!
diff --git a/src/svm/svc.rs b/src/svm/svc.rs
index 9cb140d..74998f5 100644
--- a/src/svm/svc.rs
+++ b/src/svm/svc.rs
@@ -20,7 +20,7 @@
//!
//! Where \\( m \\) is a number of training samples, \\( y_i \\) is a label value (either 1 or -1) and \\(\langle\vec{w}, \vec{x}_i \rangle + b\\) is a decision boundary.
//!
-//! To solve this optimization problem, SmartCore uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm).
+//! To solve this optimization problem, `smartcore` uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm).
//! The optimizer reaches accuracies similar to that of a real SVM after performing two passes through the training examples. You can choose the number of passes
//! through the data that the algorithm takes by changing the `epoch` parameter of the classifier.
//!
@@ -934,8 +934,7 @@ mod tests {
use super::*;
use crate::linalg::basic::matrix::DenseMatrix;
use crate::metrics::accuracy;
- #[cfg(feature = "serde")]
- use crate::svm::*;
+ use crate::svm::Kernels;
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
diff --git a/src/svm/svr.rs b/src/svm/svr.rs
index 7a39a56..8d49525 100644
--- a/src/svm/svr.rs
+++ b/src/svm/svr.rs
@@ -596,7 +596,6 @@ mod tests {
use super::*;
use crate::linalg::basic::matrix::DenseMatrix;
use crate::metrics::mean_squared_error;
- #[cfg(feature = "serde")]
use crate::svm::Kernels;
// #[test]
@@ -617,7 +616,6 @@ mod tests {
// assert!(iter.next().is_none());
// }
- //TODO: had to disable this test as it runs for too long
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test
diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs
index 6341ab4..cbce14e 100644
--- a/src/tree/decision_tree_classifier.rs
+++ b/src/tree/decision_tree_classifier.rs
@@ -163,7 +163,6 @@ impl Default for SplitCriterion {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
struct Node {
- index: usize,
output: usize,
split_feature: usize,
split_value: Option,
@@ -406,9 +405,8 @@ impl Default for DecisionTreeClassifierSearchParameters {
}
impl Node {
- fn new(index: usize, output: usize) -> Self {
+ fn new(output: usize) -> Self {
Node {
- index,
output,
split_feature: 0,
split_value: Option::None,
@@ -582,7 +580,7 @@ impl, Y: Array1>
count[yi[i]] += samples[i];
}
- let root = Node::new(0, which_max(&count));
+ let root = Node::new(which_max(&count));
change_nodes.push(root);
let mut order: Vec> = Vec::new();
@@ -831,11 +829,9 @@ impl, Y: Array1>
let true_child_idx = self.nodes().len();
- self.nodes
- .push(Node::new(true_child_idx, visitor.true_child_output));
+ self.nodes.push(Node::new(visitor.true_child_output));
let false_child_idx = self.nodes().len();
- self.nodes
- .push(Node::new(false_child_idx, visitor.false_child_output));
+ self.nodes.push(Node::new(visitor.false_child_output));
self.nodes[visitor.node].true_child = Some(true_child_idx);
self.nodes[visitor.node].false_child = Some(false_child_idx);
@@ -923,6 +919,7 @@ mod tests {
wasm_bindgen_test::wasm_bindgen_test
)]
#[test]
+ #[cfg(feature = "datasets")]
fn fit_predict_iris() {
let x: DenseMatrix = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs
index 12ea978..0146cbc 100644
--- a/src/tree/decision_tree_regressor.rs
+++ b/src/tree/decision_tree_regressor.rs
@@ -11,7 +11,7 @@
//!
//! where \\(\hat{y}_{Rk}\\) is the mean response for the training observations withing region _k_.
//!
-//! SmartCore uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space
+//! `smartcore` uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space
//! one predictor at a time. At each step of the tree-building process, the best split is made at that particular step, rather than looking ahead and picking a split that will lead to a better
//! tree in some future step.
//!
@@ -128,7 +128,6 @@ impl, Y: Array1>
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
struct Node {
- index: usize,
output: f64,
split_feature: usize,
split_value: Option,
@@ -299,9 +298,8 @@ impl Default for DecisionTreeRegressorSearchParameters {
}
impl Node {
- fn new(index: usize, output: f64) -> Self {
+ fn new(output: f64) -> Self {
Node {
- index,
output,
split_feature: 0,
split_value: Option::None,
@@ -450,7 +448,7 @@ impl, Y: Array1>
sum += *sample_i as f64 * y_m.get(i).to_f64().unwrap();
}
- let root = Node::new(0, sum / (n as f64));
+ let root = Node::new(sum / (n as f64));
nodes.push(root);
let mut order: Vec> = Vec::new();
@@ -662,11 +660,9 @@ impl, Y: Array1>
let true_child_idx = self.nodes().len();
- self.nodes
- .push(Node::new(true_child_idx, visitor.true_child_output));
+ self.nodes.push(Node::new(visitor.true_child_output));
let false_child_idx = self.nodes().len();
- self.nodes
- .push(Node::new(false_child_idx, visitor.false_child_output));
+ self.nodes.push(Node::new(visitor.false_child_output));
self.nodes[visitor.node].true_child = Some(true_child_idx);
self.nodes[visitor.node].false_child = Some(false_child_idx);
diff --git a/src/tree/mod.rs b/src/tree/mod.rs
index 700dc76..340b0a8 100644
--- a/src/tree/mod.rs
+++ b/src/tree/mod.rs
@@ -9,7 +9,7 @@
//! Decision trees suffer from high variance and often does not deliver best prediction accuracy when compared to other supervised learning approaches, such as linear and logistic regression.
//! Hence some techniques such as [Random Forests](../ensemble/index.html) use more than one decision tree to improve performance of the algorithm.
//!
-//! SmartCore uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees.
+//! `smartcore` uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees.
//!
//! ## References:
//!