fix: code cleanup, documentation
This commit is contained in:
@@ -13,6 +13,7 @@ Cargo.lock
|
||||
.idea
|
||||
.project
|
||||
.vscode
|
||||
smartcore.code-workspace
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
|
||||
+2
-2
@@ -1,12 +1,12 @@
|
||||
[package]
|
||||
name = "smartcore"
|
||||
version = "0.1.0"
|
||||
authors = ["Vlad Orlov"]
|
||||
authors = ["SmartCore Developers"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
ndarray = "0.13"
|
||||
nalgebra = "0.21.1"
|
||||
nalgebra = "0.22.0"
|
||||
num-traits = "0.2.12"
|
||||
num = "0.3.0"
|
||||
rand = "0.7.3"
|
||||
|
||||
@@ -66,7 +66,7 @@ impl<T: Debug, F: FloatExt, D: Distance<T, F>> CoverTree<T, F, D> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_node(&mut self, parent: Option<NodeId>, data: T) -> NodeId {
|
||||
fn new_node(&mut self, parent: Option<NodeId>, data: T) -> NodeId {
|
||||
let next_index = self.nodes.len();
|
||||
let node_id = NodeId { index: next_index };
|
||||
self.nodes.push(Node {
|
||||
@@ -300,7 +300,7 @@ impl<T: Debug, F: FloatExt, D: Distance<T, F>> CoverTree<T, F, D> {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
|
||||
pub struct NodeId {
|
||||
struct NodeId {
|
||||
index: usize,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
pub mod bbd_tree;
|
||||
pub(crate) mod bbd_tree;
|
||||
pub mod cover_tree;
|
||||
pub mod linear_search;
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
use ndarray::ScalarOperand;
|
||||
use num_traits::{FromPrimitive, Num, One, ToPrimitive, Zero};
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub trait AnyNumber: Num + ScalarOperand + ToPrimitive + FromPrimitive {}
|
||||
|
||||
pub trait Nominal:
|
||||
PartialEq + Zero + One + Eq + Hash + ToPrimitive + FromPrimitive + Debug + 'static + Clone
|
||||
{
|
||||
}
|
||||
|
||||
impl<T> AnyNumber for T where T: Num + ScalarOperand + ToPrimitive + FromPrimitive {}
|
||||
|
||||
impl<T> Nominal for T where
|
||||
T: PartialEq + Zero + One + Eq + Hash + ToPrimitive + Debug + FromPrimitive + 'static + Clone
|
||||
{
|
||||
}
|
||||
+77
-2
@@ -1,12 +1,87 @@
|
||||
#![warn(missing_docs)]
|
||||
#![warn(missing_doc_code_examples)]
|
||||
|
||||
//! # SmartCore
|
||||
//!
|
||||
//! Welcome to SmartCore library, the most complete machine learning library for Rust!
|
||||
//!
|
||||
//! In SmartCore you will find implementation of these ML algorithms:
|
||||
//! * Regression: Linear Regression (OLS), Decision Tree Regressor, Random Forest Regressor
|
||||
//! * Classification: Logistic Regressor, Decision Tree Classifier, Random Forest Classifier, Unsupervised Nearest Neighbors (KNN)
|
||||
//! * Clustering: K-Means
|
||||
//! * Matrix decomposition: PCA, LU, QR, SVD, EVD
|
||||
//! * Distance Metrics: Euclidian, Minkowski, Manhattan, Hamming, Mahalanobis
|
||||
//! * Evaluation Metrics: Accuracy, AUC, Recall, Precision, F1, Mean Absolute Error, Mean Squared Error, R2
|
||||
//!
|
||||
//! Most of algorithms implemented in SmartCore operate on n-dimentional arrays. While you can use Rust vectors with all functions defined in this library
|
||||
//! we do recommend to go with one of the popular linear algebra libraries available in Rust. At this moment we support these packages:
|
||||
//! * [ndarray](https://docs.rs/ndarray)
|
||||
//! * [nalgebra](https://docs.rs/nalgebra/)
|
||||
//!
|
||||
//! ## Getting Started
|
||||
//!
|
||||
//! To start using SmartCore simply add the following to your Cargo.toml file:
|
||||
//! ```ignore
|
||||
//! [dependencies]
|
||||
//! smartcore = "0.1.0"
|
||||
//! ```
|
||||
//!
|
||||
//! All ML algorithms in SmartCore are grouped into these generic categories:
|
||||
//! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data.
|
||||
//! * [Martix Decomposition](decomposition/index.html), various methods for matrix decomposition.
|
||||
//! * [Linear Models](linear/index.html), regression and classification methods where output is assumed to have linear relation to explanatory variables
|
||||
//! * [Ensemble Models](ensemble/index.html), variety of regression and classification ensemble models
|
||||
//! * [Tree-based Models](tree/index.html), classification and regression trees
|
||||
//! * [Nearest Neighbors](neighbors/index.html), K Nearest Neighbors for classification and regression
|
||||
//!
|
||||
//! Each category is assigned to a separate module.
|
||||
//!
|
||||
//! For example, KNN classifier is defined in [smartcore::neighbors::knn](neighbors/knn/index.html). To train and run it using standard Rust vectors you will
|
||||
//! run this code:
|
||||
//!
|
||||
//! ```
|
||||
//! // DenseMatrix defenition
|
||||
//! use smartcore::linalg::naive::dense_matrix::*;
|
||||
//! // KNNClassifier
|
||||
//! use smartcore::neighbors::knn::*;
|
||||
//! // Various distance metrics
|
||||
//! use smartcore::math::distance::*;
|
||||
//!
|
||||
//! // Turn Rust vectors with samples into a matrix
|
||||
//! let x = DenseMatrix::from_array(&[
|
||||
//! &[1., 2.],
|
||||
//! &[3., 4.],
|
||||
//! &[5., 6.],
|
||||
//! &[7., 8.],
|
||||
//! &[9., 10.]]);
|
||||
//! // Our classes are defined as a Vector
|
||||
//! let y = vec![2., 2., 2., 3., 3.];
|
||||
//!
|
||||
//! // Train classifier
|
||||
//! let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default());
|
||||
//!
|
||||
//! // Predict classes
|
||||
//! let y_hat = knn.predict(&x);
|
||||
//! ```
|
||||
|
||||
/// Various algorithms and helper methods that are used elsewhere in SmartCore
|
||||
pub mod algorithm;
|
||||
/// Algorithms for clustering of unlabeled data
|
||||
pub mod cluster;
|
||||
pub mod common;
|
||||
/// Matrix decomposition algorithms
|
||||
pub mod decomposition;
|
||||
/// Ensemble methods, including Random Forest classifier and regressor
|
||||
pub mod ensemble;
|
||||
/// Diverse collection of linear algebra abstractions and methods that power SmartCore algorithms
|
||||
pub mod linalg;
|
||||
/// Supervised classification and regression models that assume linear relationship between dependent and explanatory variables.
|
||||
pub mod linear;
|
||||
/// Multitude of helper methods and classes, including definitions of distance metrics
|
||||
pub mod math;
|
||||
/// Functions for assessing prediction error.
|
||||
pub mod metrics;
|
||||
/// Supervised neighbors-based learning methods
|
||||
pub mod neighbors;
|
||||
pub mod optimization;
|
||||
pub(crate) mod optimization;
|
||||
/// Supervised tree-based learning methods
|
||||
pub mod tree;
|
||||
|
||||
@@ -6,16 +6,29 @@ use crate::linalg::Matrix;
|
||||
use crate::math::num::FloatExt;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub enum LinearRegressionSolver {
|
||||
pub enum LinearRegressionSolverName {
|
||||
QR,
|
||||
SVD,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct LinearRegressionParameters {
|
||||
solver: LinearRegressionSolverName,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct LinearRegression<T: FloatExt, M: Matrix<T>> {
|
||||
coefficients: M,
|
||||
intercept: T,
|
||||
solver: LinearRegressionSolver,
|
||||
solver: LinearRegressionSolverName,
|
||||
}
|
||||
|
||||
impl Default for LinearRegressionParameters {
|
||||
fn default() -> Self {
|
||||
LinearRegressionParameters {
|
||||
solver: LinearRegressionSolverName::SVD
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: FloatExt, M: Matrix<T>> PartialEq for LinearRegression<T, M> {
|
||||
@@ -26,7 +39,7 @@ impl<T: FloatExt, M: Matrix<T>> PartialEq for LinearRegression<T, M> {
|
||||
}
|
||||
|
||||
impl<T: FloatExt, M: Matrix<T>> LinearRegression<T, M> {
|
||||
pub fn fit(x: &M, y: &M::RowVector, solver: LinearRegressionSolver) -> LinearRegression<T, M> {
|
||||
pub fn fit(x: &M, y: &M::RowVector, parameters: LinearRegressionParameters) -> LinearRegression<T, M> {
|
||||
let y_m = M::from_row_vector(y.clone());
|
||||
let b = y_m.transpose();
|
||||
let (x_nrows, num_attributes) = x.shape();
|
||||
@@ -38,9 +51,9 @@ impl<T: FloatExt, M: Matrix<T>> LinearRegression<T, M> {
|
||||
|
||||
let a = x.v_stack(&M::ones(x_nrows, 1));
|
||||
|
||||
let w = match solver {
|
||||
LinearRegressionSolver::QR => a.qr_solve_mut(b),
|
||||
LinearRegressionSolver::SVD => a.svd_solve_mut(b),
|
||||
let w = match parameters.solver {
|
||||
LinearRegressionSolverName::QR => a.qr_solve_mut(b),
|
||||
LinearRegressionSolverName::SVD => a.svd_solve_mut(b),
|
||||
};
|
||||
|
||||
let wights = w.slice(0..num_attributes, 0..1);
|
||||
@@ -48,7 +61,7 @@ impl<T: FloatExt, M: Matrix<T>> LinearRegression<T, M> {
|
||||
LinearRegression {
|
||||
intercept: w.get(num_attributes, 0),
|
||||
coefficients: wights,
|
||||
solver: solver,
|
||||
solver: parameters.solver,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,9 +103,9 @@ mod tests {
|
||||
114.2, 115.7, 116.9,
|
||||
]);
|
||||
|
||||
let y_hat_qr = LinearRegression::fit(&x, &y, LinearRegressionSolver::QR).predict(&x);
|
||||
let y_hat_qr = LinearRegression::fit(&x, &y, LinearRegressionParameters{solver: LinearRegressionSolverName::QR}).predict(&x);
|
||||
|
||||
let y_hat_svd = LinearRegression::fit(&x, &y, LinearRegressionSolver::SVD).predict(&x);
|
||||
let y_hat_svd = LinearRegression::fit(&x, &y, Default::default()).predict(&x);
|
||||
|
||||
assert!(y
|
||||
.iter()
|
||||
@@ -130,9 +143,9 @@ mod tests {
|
||||
114.2, 115.7, 116.9,
|
||||
];
|
||||
|
||||
let y_hat_qr = LinearRegression::fit(&x, &y, LinearRegressionSolver::QR).predict(&x);
|
||||
let y_hat_qr = LinearRegression::fit(&x, &y, LinearRegressionParameters{solver: LinearRegressionSolverName::QR}).predict(&x);
|
||||
|
||||
let y_hat_svd = LinearRegression::fit(&x, &y, LinearRegressionSolver::SVD).predict(&x);
|
||||
let y_hat_svd = LinearRegression::fit(&x, &y, Default::default()).predict(&x);
|
||||
|
||||
assert!(y
|
||||
.iter()
|
||||
@@ -170,7 +183,7 @@ mod tests {
|
||||
114.2, 115.7, 116.9,
|
||||
];
|
||||
|
||||
let lr = LinearRegression::fit(&x, &y, LinearRegressionSolver::QR);
|
||||
let lr = LinearRegression::fit(&x, &y, Default::default());
|
||||
|
||||
let deserialized_lr: LinearRegression<f64, DenseMatrix<f64>> =
|
||||
serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
|
||||
|
||||
+1
-1
@@ -1,2 +1,2 @@
|
||||
pub mod distance;
|
||||
pub mod num;
|
||||
pub(crate) mod num;
|
||||
|
||||
+37
-24
@@ -7,44 +7,60 @@ use crate::math::distance::Distance;
|
||||
use crate::math::num::FloatExt;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct KNNClassifier<T: FloatExt, D: Distance<Vec<T>, T>> {
|
||||
classes: Vec<T>,
|
||||
y: Vec<usize>,
|
||||
knn_algorithm: KNNAlgorithmV<T, D>,
|
||||
k: usize,
|
||||
}
|
||||
|
||||
pub enum KNNAlgorithmName {
|
||||
LinearSearch,
|
||||
CoverTree,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub enum KNNAlgorithmV<T: FloatExt, D: Distance<Vec<T>, T>> {
|
||||
pub struct KNNClassifierParameters {
|
||||
pub algorithm: KNNAlgorithmName,
|
||||
pub k: usize
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct KNNClassifier<T: FloatExt, D: Distance<Vec<T>, T>> {
|
||||
classes: Vec<T>,
|
||||
y: Vec<usize>,
|
||||
knn_algorithm: KNNAlgorithm<T, D>,
|
||||
k: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
enum KNNAlgorithm<T: FloatExt, D: Distance<Vec<T>, T>> {
|
||||
LinearSearch(LinearKNNSearch<Vec<T>, T, D>),
|
||||
CoverTree(CoverTree<Vec<T>, T, D>),
|
||||
}
|
||||
|
||||
impl Default for KNNClassifierParameters {
|
||||
fn default() -> Self {
|
||||
KNNClassifierParameters {
|
||||
algorithm: KNNAlgorithmName::CoverTree,
|
||||
k: 3
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl KNNAlgorithmName {
|
||||
fn fit<T: FloatExt, D: Distance<Vec<T>, T>>(
|
||||
&self,
|
||||
data: Vec<Vec<T>>,
|
||||
distance: D,
|
||||
) -> KNNAlgorithmV<T, D> {
|
||||
) -> KNNAlgorithm<T, D> {
|
||||
match *self {
|
||||
KNNAlgorithmName::LinearSearch => {
|
||||
KNNAlgorithmV::LinearSearch(LinearKNNSearch::new(data, distance))
|
||||
KNNAlgorithm::LinearSearch(LinearKNNSearch::new(data, distance))
|
||||
}
|
||||
KNNAlgorithmName::CoverTree => KNNAlgorithmV::CoverTree(CoverTree::new(data, distance)),
|
||||
KNNAlgorithmName::CoverTree => KNNAlgorithm::CoverTree(CoverTree::new(data, distance)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: FloatExt, D: Distance<Vec<T>, T>> KNNAlgorithmV<T, D> {
|
||||
impl<T: FloatExt, D: Distance<Vec<T>, T>> KNNAlgorithm<T, D> {
|
||||
fn find(&self, from: &Vec<T>, k: usize) -> Vec<usize> {
|
||||
match *self {
|
||||
KNNAlgorithmV::LinearSearch(ref linear) => linear.find(from, k),
|
||||
KNNAlgorithmV::CoverTree(ref cover) => cover.find(from, k),
|
||||
KNNAlgorithm::LinearSearch(ref linear) => linear.find(from, k),
|
||||
KNNAlgorithm::CoverTree(ref cover) => cover.find(from, k),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -76,9 +92,8 @@ impl<T: FloatExt, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
|
||||
pub fn fit<M: Matrix<T>>(
|
||||
x: &M,
|
||||
y: &M::RowVector,
|
||||
k: usize,
|
||||
distance: D,
|
||||
algorithm: KNNAlgorithmName,
|
||||
parameters: KNNClassifierParameters
|
||||
) -> KNNClassifier<T, D> {
|
||||
let y_m = M::from_row_vector(y.clone());
|
||||
|
||||
@@ -103,13 +118,13 @@ impl<T: FloatExt, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
|
||||
)
|
||||
);
|
||||
|
||||
assert!(k > 1, format!("k should be > 1, k=[{}]", k));
|
||||
assert!(parameters.k > 1, format!("k should be > 1, k=[{}]", parameters.k));
|
||||
|
||||
KNNClassifier {
|
||||
classes: classes,
|
||||
y: yi,
|
||||
k: k,
|
||||
knn_algorithm: algorithm.fit(data, distance),
|
||||
k: parameters.k,
|
||||
knn_algorithm: parameters.algorithm.fit(data, distance),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,9 +168,8 @@ mod tests {
|
||||
let knn = KNNClassifier::fit(
|
||||
&x,
|
||||
&y,
|
||||
3,
|
||||
Distances::euclidian(),
|
||||
KNNAlgorithmName::LinearSearch,
|
||||
KNNClassifierParameters{k: 3, algorithm: KNNAlgorithmName::LinearSearch}
|
||||
);
|
||||
let r = knn.predict(&x);
|
||||
assert_eq!(5, Vec::len(&r));
|
||||
@@ -169,10 +183,9 @@ mod tests {
|
||||
|
||||
let knn = KNNClassifier::fit(
|
||||
&x,
|
||||
&y,
|
||||
3,
|
||||
&y,
|
||||
Distances::euclidian(),
|
||||
KNNAlgorithmName::CoverTree,
|
||||
Default::default()
|
||||
);
|
||||
|
||||
let deserialized_knn = bincode::deserialize(&bincode::serialize(&knn).unwrap()).unwrap();
|
||||
|
||||
@@ -5,8 +5,7 @@ pub type F<'a, T, X> = dyn for<'b> Fn(&'b X) -> T + 'a;
|
||||
pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum FunctionOrder {
|
||||
FIRST,
|
||||
pub enum FunctionOrder {
|
||||
SECOND,
|
||||
THIRD,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user