Release 0.3
This commit is contained in:
committed by
morenol
parent
0dc97a4e9b
commit
2df0795be9
@@ -1,4 +1,7 @@
|
||||
# Smartcore: Introduction to modules
|
||||
# smartcore: Introduction to modules
|
||||
|
||||
Important source of information:
|
||||
* [Rust API guidelines](https://rust-lang.github.io/api-guidelines/about.html)
|
||||
|
||||
## Walkthrough: traits system and basic structures
|
||||
|
||||
|
||||
+4
-3
@@ -4,13 +4,14 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
## [0.3] - 2022-11
|
||||
|
||||
## Added
|
||||
- WARNING: Breaking changes!
|
||||
- Seeds to multiple algorithims that depend on random number generation.
|
||||
- Added feature `js` to use WASM in browser
|
||||
- Drop `nalgebra-bindings` feature
|
||||
- Complete refactoring with *extensive API changes* that includes:
|
||||
- Complete refactoring with **extensive API changes** that includes:
|
||||
* moving to a new traits system, less structs more traits
|
||||
* adapting all the modules to the new traits system
|
||||
* moving towards Rust 2021, in particular the use of `dyn` and `as_ref`
|
||||
@@ -19,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
## BREAKING CHANGE
|
||||
- Added a new parameter to `train_test_split` to define the seed.
|
||||
|
||||
## [0.2.1] - 2022-05-10
|
||||
## [0.2.1] - 2021-05-10
|
||||
|
||||
## Added
|
||||
- L2 regularization penalty to the Logistic Regression
|
||||
|
||||
+11
-5
@@ -1,9 +1,9 @@
|
||||
[package]
|
||||
name = "smartcore"
|
||||
description = "The most advanced machine learning library in rust."
|
||||
description = "Machine Learning in Rust."
|
||||
homepage = "https://smartcorelib.org"
|
||||
version = "0.4.0"
|
||||
authors = ["SmartCore Developers"]
|
||||
version = "0.3.0"
|
||||
authors = ["smartcore Developers"]
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
documentation = "https://docs.rs/smartcore"
|
||||
@@ -11,6 +11,12 @@ repository = "https://github.com/smartcorelib/smartcore"
|
||||
readme = "README.md"
|
||||
keywords = ["machine-learning", "statistical", "ai", "optimization", "linear-algebra"]
|
||||
categories = ["science"]
|
||||
exclude = [
|
||||
".github",
|
||||
".gitignore",
|
||||
"smartcore.iml",
|
||||
"smartcore.svg",
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
approx = "0.5.1"
|
||||
@@ -23,10 +29,10 @@ rand_distr = { version = "0.4", optional = true }
|
||||
serde = { version = "1", features = ["derive"], optional = true }
|
||||
|
||||
[features]
|
||||
default = ["serde", "datasets"]
|
||||
default = []
|
||||
serde = ["dep:serde"]
|
||||
ndarray-bindings = ["dep:ndarray"]
|
||||
datasets = ["dep:rand_distr", "std"]
|
||||
datasets = ["dep:rand_distr", "std", "serde"]
|
||||
std = ["rand/std_rng", "rand/std"]
|
||||
# wasm32 only
|
||||
js = ["getrandom/js"]
|
||||
|
||||
@@ -186,7 +186,7 @@
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2019-present at SmartCore developers (smartcorelib.org)
|
||||
Copyright 2019-present at smartcore developers (smartcorelib.org)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
<p align="center">
|
||||
<a href="https://smartcorelib.org">
|
||||
<img src="smartcore.svg" width="450" alt="SmartCore">
|
||||
<img src="smartcore.svg" width="450" alt="smartcore">
|
||||
</a>
|
||||
</p>
|
||||
<p align = "center">
|
||||
@@ -18,4 +18,4 @@
|
||||
-----
|
||||
[](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml)
|
||||
|
||||
To start getting familiar with the new Smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
|
||||
To start getting familiar with the new smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md).
|
||||
|
||||
+1
-1
@@ -76,5 +76,5 @@
|
||||
y="81.876823"
|
||||
x="91.861809"
|
||||
id="tspan842"
|
||||
sodipodi:role="line">SmartCore</tspan></text>
|
||||
sodipodi:role="line">smartcore</tspan></text>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 2.5 KiB After Width: | Height: | Size: 2.5 KiB |
@@ -64,7 +64,7 @@ struct Node {
|
||||
max_dist: f64,
|
||||
parent_dist: f64,
|
||||
children: Vec<Node>,
|
||||
scale: i64,
|
||||
_scale: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -84,7 +84,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
|
||||
max_dist: 0f64,
|
||||
parent_dist: 0f64,
|
||||
children: Vec::new(),
|
||||
scale: 0,
|
||||
_scale: 0,
|
||||
};
|
||||
let mut tree = CoverTree {
|
||||
base,
|
||||
@@ -245,7 +245,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
|
||||
max_dist: 0f64,
|
||||
parent_dist: 0f64,
|
||||
children: Vec::new(),
|
||||
scale: 100,
|
||||
_scale: 100,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -306,7 +306,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
|
||||
max_dist: 0f64,
|
||||
parent_dist: 0f64,
|
||||
children,
|
||||
scale: 100,
|
||||
_scale: 100,
|
||||
}
|
||||
} else {
|
||||
let mut far: Vec<DistanceSet> = Vec::new();
|
||||
@@ -375,7 +375,7 @@ impl<T: Debug + PartialEq, D: Distance<T>> CoverTree<T, D> {
|
||||
max_dist: self.max(consumed_set),
|
||||
parent_dist: 0f64,
|
||||
children,
|
||||
scale: (top_scale - max_scale),
|
||||
_scale: (top_scale - max_scale),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
//! these re-calculated centroids becoming the new centers of their respective clusters. Next all instances of the training set are re-assigned to their closest cluster again.
|
||||
//! This iterative process continues until convergence is achieved and the clusters are considered settled.
|
||||
//!
|
||||
//! Initial choice of K data points is very important and has big effect on performance of the algorithm. SmartCore uses k-means++ algorithm to initialize cluster centers.
|
||||
//! Initial choice of K data points is very important and has big effect on performance of the algorithm. smartcore uses k-means++ algorithm to initialize cluster centers.
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
@@ -74,7 +74,7 @@ pub struct KMeans<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> {
|
||||
k: usize,
|
||||
_y: Vec<usize>,
|
||||
size: Vec<usize>,
|
||||
distortion: f64,
|
||||
_distortion: f64,
|
||||
centroids: Vec<Vec<f64>>,
|
||||
_phantom_tx: PhantomData<TX>,
|
||||
_phantom_ty: PhantomData<TY>,
|
||||
@@ -313,7 +313,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> KMeans<TX, TY, X, Y>
|
||||
k: parameters.k,
|
||||
_y: y,
|
||||
size,
|
||||
distortion,
|
||||
_distortion: distortion,
|
||||
centroids,
|
||||
_phantom_tx: PhantomData,
|
||||
_phantom_ty: PhantomData,
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
//! Datasets
|
||||
//!
|
||||
//! In this module you will find small datasets that are used in SmartCore mostly for demonstration purposes.
|
||||
//! In this module you will find small datasets that are used in smartcore mostly for demonstration purposes.
|
||||
pub mod boston;
|
||||
pub mod breast_cancer;
|
||||
pub mod diabetes;
|
||||
|
||||
+1
-1
@@ -7,7 +7,7 @@
|
||||
//! set and then aggregate their individual predictions to form a final prediction. In classification setting the overall prediction is the most commonly
|
||||
//! occurring majority class among the individual predictions.
|
||||
//!
|
||||
//! In SmartCore you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html).
|
||||
//! In smartcore you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html).
|
||||
//! Random forests provide an improvement over bagged trees by way of a small tweak that decorrelates the trees. As in bagging, we build a number of
|
||||
//! decision trees on bootstrapped training samples. But when building these decision trees, each time a split in a tree is considered,
|
||||
//! a random sample of _m_ predictors is chosen as split candidates from the full set of _p_ predictors.
|
||||
|
||||
@@ -104,7 +104,6 @@ pub struct RandomForestClassifier<
|
||||
X: Array2<TX>,
|
||||
Y: Array1<TY>,
|
||||
> {
|
||||
parameters: Option<RandomForestClassifierParameters>,
|
||||
trees: Option<Vec<DecisionTreeClassifier<TX, TY, X, Y>>>,
|
||||
classes: Option<Vec<TY>>,
|
||||
samples: Option<Vec<Vec<bool>>>,
|
||||
@@ -198,7 +197,6 @@ impl<TX: Number + FloatNumber + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y:
|
||||
{
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
parameters: Option::None,
|
||||
trees: Option::None,
|
||||
classes: Option::None,
|
||||
samples: Option::None,
|
||||
@@ -501,7 +499,6 @@ impl<TX: FloatNumber + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY
|
||||
}
|
||||
|
||||
Ok(RandomForestClassifier {
|
||||
parameters: Some(parameters),
|
||||
trees: Some(trees),
|
||||
classes: Some(classes),
|
||||
samples: maybe_all_samples,
|
||||
|
||||
@@ -98,7 +98,6 @@ pub struct RandomForestRegressor<
|
||||
X: Array2<TX>,
|
||||
Y: Array1<TY>,
|
||||
> {
|
||||
parameters: Option<RandomForestRegressorParameters>,
|
||||
trees: Option<Vec<DecisionTreeRegressor<TX, TY, X, Y>>>,
|
||||
samples: Option<Vec<Vec<bool>>>,
|
||||
}
|
||||
@@ -177,7 +176,6 @@ impl<TX: Number + FloatNumber + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1
|
||||
{
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
parameters: Option::None,
|
||||
trees: Option::None,
|
||||
samples: Option::None,
|
||||
}
|
||||
@@ -434,7 +432,6 @@ impl<TX: Number + FloatNumber + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1
|
||||
}
|
||||
|
||||
Ok(RandomForestRegressor {
|
||||
parameters: Some(parameters),
|
||||
trees: Some(trees),
|
||||
samples: maybe_all_samples,
|
||||
})
|
||||
|
||||
+29
-10
@@ -8,25 +8,38 @@
|
||||
#![warn(missing_docs)]
|
||||
#![warn(rustdoc::missing_doc_code_examples)]
|
||||
|
||||
//! # SmartCore
|
||||
//! # smartcore
|
||||
//!
|
||||
//! Welcome to SmartCore, machine learning in Rust!
|
||||
//! Welcome to smartcore, machine learning in Rust!
|
||||
//!
|
||||
//! SmartCore features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN,
|
||||
//! `smartcore` features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN,
|
||||
//! as well as tools for model selection and model evaluation.
|
||||
//!
|
||||
//! SmartCore provides its own traits system that extends Rust standard library, to deal with linear algebra and common
|
||||
//! `smartcore` provides its own traits system that extends Rust standard library, to deal with linear algebra and common
|
||||
//! computational models. Its API is designed using well recognizable patterns. Extra features (like support for [ndarray](https://docs.rs/ndarray)
|
||||
//! structures) is available via optional features.
|
||||
//!
|
||||
//! ## Getting Started
|
||||
//!
|
||||
//! To start using SmartCore simply add the following to your Cargo.toml file:
|
||||
//! To start using `smartcore` latest stable version simply add the following to your `Cargo.toml` file:
|
||||
//! ```ignore
|
||||
//! [dependencies]
|
||||
//! smartcore = "*"
|
||||
//! ```
|
||||
//!
|
||||
//! To start using smartcore development version with latest unstable additions:
|
||||
//! ```ignore
|
||||
//! [dependencies]
|
||||
//! smartcore = { git = "https://github.com/smartcorelib/smartcore", branch = "development" }
|
||||
//! ```
|
||||
//!
|
||||
//! There are different features that can be added to the base library, for example to add sample datasets:
|
||||
//! ```ignore
|
||||
//! [dependencies]
|
||||
//! smartcore = { git = "https://github.com/smartcorelib/smartcore", features = ["datasets"] }
|
||||
//! ```
|
||||
//! Check `smartcore`'s `Cargo.toml` for available features.
|
||||
//!
|
||||
//! ## Using Jupyter
|
||||
//! For quick introduction, Jupyter Notebooks are available [here](https://github.com/smartcorelib/smartcore-jupyter/tree/main/notebooks).
|
||||
//! You can set up a local environment to run Rust notebooks using [EVCXR](https://github.com/google/evcxr)
|
||||
@@ -37,7 +50,7 @@
|
||||
//! For example, you can use this code to fit a [K Nearest Neighbors classifier](neighbors/knn_classifier/index.html) to a dataset that is defined as standard Rust vector:
|
||||
//!
|
||||
//! ```
|
||||
//! // DenseMatrix defenition
|
||||
//! // DenseMatrix definition
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! // KNNClassifier
|
||||
//! use smartcore::neighbors::knn_classifier::*;
|
||||
@@ -62,7 +75,9 @@
|
||||
//! ```
|
||||
//!
|
||||
//! ## Overview
|
||||
//! All machine learning algorithms in SmartCore are grouped into these broad categories:
|
||||
//!
|
||||
//! ### Supported algorithms
|
||||
//! All machine learning algorithms are grouped into these broad categories:
|
||||
//! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data.
|
||||
//! * [Matrix Decomposition](decomposition/index.html), various methods for matrix decomposition.
|
||||
//! * [Linear Models](linear/index.html), regression and classification methods where output is assumed to have linear relation to explanatory variables
|
||||
@@ -71,11 +86,14 @@
|
||||
//! * [Nearest Neighbors](neighbors/index.html), K Nearest Neighbors for classification and regression
|
||||
//! * [Naive Bayes](naive_bayes/index.html), statistical classification technique based on Bayes Theorem
|
||||
//! * [SVM](svm/index.html), support vector machines
|
||||
//!
|
||||
//! ### Linear Algebra traits system
|
||||
//! For an introduction to `smartcore`'s traits system see [this notebook](https://github.com/smartcorelib/smartcore-jupyter/blob/5523993c53c6ec1fd72eea130ef4e7883121c1ea/notebooks/01-A-little-bit-about-numbers.ipynb)
|
||||
|
||||
/// Foundamental numbers traits
|
||||
pub mod numbers;
|
||||
|
||||
/// Various algorithms and helper methods that are used elsewhere in SmartCore
|
||||
/// Various algorithms and helper methods that are used elsewhere in smartcore
|
||||
pub mod algorithm;
|
||||
pub mod api;
|
||||
|
||||
@@ -89,7 +107,7 @@ pub mod decomposition;
|
||||
/// Ensemble methods, including Random Forest classifier and regressor
|
||||
pub mod ensemble;
|
||||
pub mod error;
|
||||
/// Diverse collection of linear algebra abstractions and methods that power SmartCore algorithms
|
||||
/// Diverse collection of linear algebra abstractions and methods that power smartcore algorithms
|
||||
pub mod linalg;
|
||||
/// Supervised classification and regression models that assume linear relationship between dependent and explanatory variables.
|
||||
pub mod linear;
|
||||
@@ -105,7 +123,8 @@ pub mod neighbors;
|
||||
pub mod optimization;
|
||||
/// Preprocessing utilities
|
||||
pub mod preprocessing;
|
||||
/// Reading in data from serialized foramts
|
||||
/// Reading in data from serialized formats
|
||||
#[cfg(feature = "serde")]
|
||||
pub mod readers;
|
||||
/// Support Vector Machines
|
||||
pub mod svm;
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
//! \\[\hat{\beta} = (X^TX)^{-1}X^Ty \\]
|
||||
//!
|
||||
//! the \\((X^TX)^{-1}\\) term is both computationally expensive and numerically unstable. An alternative approach is to use a matrix decomposition to avoid this operation.
|
||||
//! SmartCore uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\).
|
||||
//! smartcore uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\).
|
||||
//! The QR decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly,
|
||||
//! but does not work for all data matrices. Unlike the QR decomposition, all matrices have an SVD decomposition.
|
||||
//!
|
||||
@@ -113,7 +113,6 @@ pub struct LinearRegression<
|
||||
> {
|
||||
coefficients: Option<X>,
|
||||
intercept: Option<TX>,
|
||||
solver: LinearRegressionSolverName,
|
||||
_phantom_ty: PhantomData<TY>,
|
||||
_phantom_y: PhantomData<Y>,
|
||||
}
|
||||
@@ -210,7 +209,6 @@ impl<
|
||||
Self {
|
||||
coefficients: Option::None,
|
||||
intercept: Option::None,
|
||||
solver: LinearRegressionParameters::default().solver,
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
}
|
||||
@@ -276,7 +274,6 @@ impl<
|
||||
Ok(LinearRegression {
|
||||
intercept: Some(*w.get((num_attributes, 0))),
|
||||
coefficients: Some(weights),
|
||||
solver: parameters.solver,
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
})
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
//!
|
||||
//! \\[ Pr(y=1) \approx \frac{e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}}{1 + e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}} \\]
|
||||
//!
|
||||
//! SmartCore uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\)
|
||||
//! smartcore uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\)
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
//! where \\(\alpha \geq 0\\) is a tuning parameter that controls strength of regularization. When \\(\alpha = 0\\) the penalty term has no effect, and ridge regression will produce the least squares estimates.
|
||||
//! However, as \\(\alpha \rightarrow \infty\\), the impact of the shrinkage penalty grows, and the ridge regression coefficient estimates will approach zero.
|
||||
//!
|
||||
//! SmartCore uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\).
|
||||
//! smartcore uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\).
|
||||
//! The Cholesky decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly,
|
||||
//! but does not work for all data matrices. Unlike the Cholesky decomposition, all matrices have an SVD decomposition.
|
||||
//!
|
||||
@@ -197,7 +197,6 @@ pub struct RidgeRegression<
|
||||
> {
|
||||
coefficients: Option<X>,
|
||||
intercept: Option<TX>,
|
||||
solver: Option<RidgeRegressionSolverName>,
|
||||
_phantom_ty: PhantomData<TY>,
|
||||
_phantom_y: PhantomData<Y>,
|
||||
}
|
||||
@@ -259,7 +258,6 @@ impl<
|
||||
Self {
|
||||
coefficients: Option::None,
|
||||
intercept: Option::None,
|
||||
solver: Option::None,
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
}
|
||||
@@ -367,7 +365,6 @@ impl<
|
||||
Ok(RidgeRegression {
|
||||
intercept: Some(b),
|
||||
coefficients: Some(w),
|
||||
solver: Some(parameters.solver),
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
})
|
||||
|
||||
+1
-1
@@ -2,7 +2,7 @@
|
||||
//! Computes the area under the receiver operating characteristic (ROC) curve that is equal to the probability that a classifier will rank a
|
||||
//! randomly chosen positive instance higher than a randomly chosen negative one.
|
||||
//!
|
||||
//! SmartCore calculates ROC AUC from Wilcoxon or Mann-Whitney U test.
|
||||
//! smartcore calculates ROC AUC from Wilcoxon or Mann-Whitney U test.
|
||||
//!
|
||||
//! Example:
|
||||
//! ```
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@
|
||||
//! In a feedback loop you build your model first, then you get feedback from metrics, improve it and repeat until your model achieve desirable performance.
|
||||
//! Evaluation metrics helps to explain the performance of a model and compare models based on an objective criterion.
|
||||
//!
|
||||
//! Choosing the right metric is crucial while evaluating machine learning models. In SmartCore you will find metrics for these classes of ML models:
|
||||
//! Choosing the right metric is crucial while evaluating machine learning models. In smartcore you will find metrics for these classes of ML models:
|
||||
//!
|
||||
//! * [Classification metrics](struct.ClassificationMetrics.html)
|
||||
//! * [Regression metrics](struct.RegressionMetrics.html)
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//! Splitting data into multiple subsets helps us to find the right combination of hyperparameters, estimate model performance and choose the right model for
|
||||
//! the data.
|
||||
//!
|
||||
//! In SmartCore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function.
|
||||
//! In smartcore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function.
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! # K Nearest Neighbors Classifier
|
||||
//!
|
||||
//! SmartCore relies on 2 backend algorithms to speedup KNN queries:
|
||||
//! smartcore relies on 2 backend algorithms to speedup KNN queries:
|
||||
//! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html)
|
||||
//! * [`CoverTree`](../../algorithm/neighbour/cover_tree/index.html)
|
||||
//!
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//! # Real Number
|
||||
//! Most algorithms in SmartCore rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ.
|
||||
//! Most algorithms in smartcore rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ.
|
||||
//! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module.
|
||||
|
||||
use num_traits::Float;
|
||||
|
||||
+1
-1
@@ -9,7 +9,7 @@
|
||||
//! SVM is memory efficient since it uses only a subset of training data to find a decision boundary. This subset is called support vectors.
|
||||
//!
|
||||
//! In SVM distance between a data point and the support vectors is defined by the kernel function.
|
||||
//! SmartCore supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel.
|
||||
//! smartcore supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel.
|
||||
//! Building a new kernel requires a good mathematical understanding of the [Mercer theorem](https://en.wikipedia.org/wiki/Mercer%27s_theorem)
|
||||
//! that gives necessary and sufficient condition for a function to be a kernel function.
|
||||
//!
|
||||
|
||||
+2
-3
@@ -20,7 +20,7 @@
|
||||
//!
|
||||
//! Where \\( m \\) is a number of training samples, \\( y_i \\) is a label value (either 1 or -1) and \\(\langle\vec{w}, \vec{x}_i \rangle + b\\) is a decision boundary.
|
||||
//!
|
||||
//! To solve this optimization problem, SmartCore uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm).
|
||||
//! To solve this optimization problem, smartcore uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm).
|
||||
//! The optimizer reaches accuracies similar to that of a real SVM after performing two passes through the training examples. You can choose the number of passes
|
||||
//! through the data that the algorithm takes by changing the `epoch` parameter of the classifier.
|
||||
//!
|
||||
@@ -934,8 +934,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::metrics::accuracy;
|
||||
#[cfg(feature = "serde")]
|
||||
use crate::svm::*;
|
||||
use crate::svm::Kernels;
|
||||
|
||||
#[cfg_attr(
|
||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||
|
||||
@@ -596,7 +596,6 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::metrics::mean_squared_error;
|
||||
#[cfg(feature = "serde")]
|
||||
use crate::svm::Kernels;
|
||||
|
||||
// #[test]
|
||||
@@ -617,7 +616,6 @@ mod tests {
|
||||
// assert!(iter.next().is_none());
|
||||
// }
|
||||
|
||||
//TODO: had to disable this test as it runs for too long
|
||||
#[cfg_attr(
|
||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||
wasm_bindgen_test::wasm_bindgen_test
|
||||
|
||||
@@ -163,7 +163,6 @@ impl Default for SplitCriterion {
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
struct Node {
|
||||
index: usize,
|
||||
output: usize,
|
||||
split_feature: usize,
|
||||
split_value: Option<f64>,
|
||||
@@ -406,9 +405,8 @@ impl Default for DecisionTreeClassifierSearchParameters {
|
||||
}
|
||||
|
||||
impl Node {
|
||||
fn new(index: usize, output: usize) -> Self {
|
||||
fn new(output: usize) -> Self {
|
||||
Node {
|
||||
index,
|
||||
output,
|
||||
split_feature: 0,
|
||||
split_value: Option::None,
|
||||
@@ -582,7 +580,7 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
||||
count[yi[i]] += samples[i];
|
||||
}
|
||||
|
||||
let root = Node::new(0, which_max(&count));
|
||||
let root = Node::new(which_max(&count));
|
||||
change_nodes.push(root);
|
||||
let mut order: Vec<Vec<usize>> = Vec::new();
|
||||
|
||||
@@ -831,11 +829,9 @@ impl<TX: Number + PartialOrd, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>>
|
||||
|
||||
let true_child_idx = self.nodes().len();
|
||||
|
||||
self.nodes
|
||||
.push(Node::new(true_child_idx, visitor.true_child_output));
|
||||
self.nodes.push(Node::new(visitor.true_child_output));
|
||||
let false_child_idx = self.nodes().len();
|
||||
self.nodes
|
||||
.push(Node::new(false_child_idx, visitor.false_child_output));
|
||||
self.nodes.push(Node::new(visitor.false_child_output));
|
||||
self.nodes[visitor.node].true_child = Some(true_child_idx);
|
||||
self.nodes[visitor.node].false_child = Some(false_child_idx);
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
//!
|
||||
//! where \\(\hat{y}_{Rk}\\) is the mean response for the training observations withing region _k_.
|
||||
//!
|
||||
//! SmartCore uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space
|
||||
//! smartcore uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space
|
||||
//! one predictor at a time. At each step of the tree-building process, the best split is made at that particular step, rather than looking ahead and picking a split that will lead to a better
|
||||
//! tree in some future step.
|
||||
//!
|
||||
@@ -128,7 +128,6 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
struct Node {
|
||||
index: usize,
|
||||
output: f64,
|
||||
split_feature: usize,
|
||||
split_value: Option<f64>,
|
||||
@@ -299,9 +298,8 @@ impl Default for DecisionTreeRegressorSearchParameters {
|
||||
}
|
||||
|
||||
impl Node {
|
||||
fn new(index: usize, output: f64) -> Self {
|
||||
fn new(output: f64) -> Self {
|
||||
Node {
|
||||
index,
|
||||
output,
|
||||
split_feature: 0,
|
||||
split_value: Option::None,
|
||||
@@ -450,7 +448,7 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
||||
sum += *sample_i as f64 * y_m.get(i).to_f64().unwrap();
|
||||
}
|
||||
|
||||
let root = Node::new(0, sum / (n as f64));
|
||||
let root = Node::new(sum / (n as f64));
|
||||
nodes.push(root);
|
||||
let mut order: Vec<Vec<usize>> = Vec::new();
|
||||
|
||||
@@ -662,11 +660,9 @@ impl<TX: Number + PartialOrd, TY: Number, X: Array2<TX>, Y: Array1<TY>>
|
||||
|
||||
let true_child_idx = self.nodes().len();
|
||||
|
||||
self.nodes
|
||||
.push(Node::new(true_child_idx, visitor.true_child_output));
|
||||
self.nodes.push(Node::new(visitor.true_child_output));
|
||||
let false_child_idx = self.nodes().len();
|
||||
self.nodes
|
||||
.push(Node::new(false_child_idx, visitor.false_child_output));
|
||||
self.nodes.push(Node::new(visitor.false_child_output));
|
||||
|
||||
self.nodes[visitor.node].true_child = Some(true_child_idx);
|
||||
self.nodes[visitor.node].false_child = Some(false_child_idx);
|
||||
|
||||
+1
-1
@@ -9,7 +9,7 @@
|
||||
//! Decision trees suffer from high variance and often does not deliver best prediction accuracy when compared to other supervised learning approaches, such as linear and logistic regression.
|
||||
//! Hence some techniques such as [Random Forests](../ensemble/index.html) use more than one decision tree to improve performance of the algorithm.
|
||||
//!
|
||||
//! SmartCore uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees.
|
||||
//! smartcore uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees.
|
||||
//!
|
||||
//! ## References:
|
||||
//!
|
||||
|
||||
Reference in New Issue
Block a user