Merge potential next release v0.4 (#187) Breaking Changes
* First draft of the new n-dimensional arrays + NB use case * Improves default implementation of multiple Array methods * Refactors tree methods * Adds matrix decomposition routines * Adds matrix decomposition methods to ndarray and nalgebra bindings * Refactoring + linear regression now uses array2 * Ridge & Linear regression * LBFGS optimizer & logistic regression * LBFGS optimizer & logistic regression * Changes linear methods, metrics and model selection methods to new n-dimensional arrays * Switches KNN and clustering algorithms to new n-d array layer * Refactors distance metrics * Optimizes knn and clustering methods * Refactors metrics module * Switches decomposition methods to n-dimensional arrays * Linalg refactoring - cleanup rng merge (#172) * Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure. * Exclude AUC metrics. Needs reimplementation * Improve developers walkthrough New traits system in place at `src/numbers` and `src/linalg` Co-authored-by: Lorenzo <tunedconsulting@gmail.com> * Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021 * Implement getters to use as_ref() in src/neighbors * Implement getters to use as_ref() in src/naive_bayes * Implement getters to use as_ref() in src/linear * Add Clone to src/naive_bayes * Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021 * Implement ndarray-bindings. Remove FloatNumber from implementations * Drop nalgebra-bindings support (as decided in conf-call to go for ndarray) * Remove benches. Benches will have their own repo at smartcore-benches * Implement SVC * Implement SVC serialization. Move search parameters in dedicated module * Implement SVR. Definitely too slow * Fix compilation issues for wasm (#202) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> * Fix tests (#203) * Port linalg/traits/stats.rs * Improve methods naming * Improve Display for DenseMatrix Co-authored-by: Montana Low <montanalow@users.noreply.github.com> Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
This commit is contained in:
@@ -30,11 +30,16 @@ use crate::dataset::deserialize_data;
|
||||
use crate::dataset::Dataset;
|
||||
|
||||
/// Get dataset
|
||||
pub fn load_dataset() -> Dataset<f32, f32> {
|
||||
pub fn load_dataset() -> Dataset<f32, u32> {
|
||||
let (x, y, num_samples, num_features) =
|
||||
match deserialize_data(std::include_bytes!("breast_cancer.xy")) {
|
||||
Err(why) => panic!("Can't deserialize breast_cancer.xy. {}", why),
|
||||
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
|
||||
Ok((x, y, num_samples, num_features)) => (
|
||||
x,
|
||||
y.into_iter().map(|x| x as u32).collect(),
|
||||
num_samples,
|
||||
num_features,
|
||||
),
|
||||
};
|
||||
|
||||
Dataset {
|
||||
@@ -66,18 +71,17 @@ pub fn load_dataset() -> Dataset<f32, f32> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
use super::super::*;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
fn refresh_cancer_dataset() {
|
||||
// run this test to generate breast_cancer.xy file.
|
||||
let dataset = load_dataset();
|
||||
assert!(serialize_data(&dataset, "breast_cancer.xy").is_ok());
|
||||
}
|
||||
// TODO: implement serialization
|
||||
// #[test]
|
||||
// #[ignore]
|
||||
// #[cfg(not(target_arch = "wasm32"))]
|
||||
// fn refresh_cancer_dataset() {
|
||||
// // run this test to generate breast_cancer.xy file.
|
||||
// let dataset = load_dataset();
|
||||
// assert!(serialize_data(&dataset, "breast_cancer.xy").is_ok());
|
||||
// }
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
|
||||
+16
-12
@@ -23,11 +23,16 @@ use crate::dataset::deserialize_data;
|
||||
use crate::dataset::Dataset;
|
||||
|
||||
/// Get dataset
|
||||
pub fn load_dataset() -> Dataset<f32, f32> {
|
||||
pub fn load_dataset() -> Dataset<f32, u32> {
|
||||
let (x, y, num_samples, num_features) =
|
||||
match deserialize_data(std::include_bytes!("diabetes.xy")) {
|
||||
Err(why) => panic!("Can't deserialize diabetes.xy. {}", why),
|
||||
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
|
||||
Ok((x, y, num_samples, num_features)) => (
|
||||
x,
|
||||
y.into_iter().map(|x| x as u32).collect(),
|
||||
num_samples,
|
||||
num_features,
|
||||
),
|
||||
};
|
||||
|
||||
Dataset {
|
||||
@@ -50,18 +55,17 @@ pub fn load_dataset() -> Dataset<f32, f32> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
use super::super::*;
|
||||
use super::*;
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn refresh_diabetes_dataset() {
|
||||
// run this test to generate diabetes.xy file.
|
||||
let dataset = load_dataset();
|
||||
assert!(serialize_data(&dataset, "diabetes.xy").is_ok());
|
||||
}
|
||||
// TODO: fix serialization
|
||||
// #[cfg(not(target_arch = "wasm32"))]
|
||||
// #[test]
|
||||
// #[ignore]
|
||||
// fn refresh_diabetes_dataset() {
|
||||
// // run this test to generate diabetes.xy file.
|
||||
// let dataset = load_dataset();
|
||||
// assert!(serialize_data(&dataset, "diabetes.xy").is_ok());
|
||||
// }
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
|
||||
@@ -48,7 +48,7 @@ pub fn make_blobs(
|
||||
}
|
||||
|
||||
/// Make a large circle containing a smaller circle in 2d.
|
||||
pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset<f32, f32> {
|
||||
pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset<f32, u32> {
|
||||
if !(0.0..1.0).contains(&factor) {
|
||||
panic!("'factor' has to be between 0 and 1.");
|
||||
}
|
||||
@@ -79,7 +79,7 @@ pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset<f32,
|
||||
|
||||
Dataset {
|
||||
data: x,
|
||||
target: y,
|
||||
target: y.into_iter().map(|x| x as u32).collect(),
|
||||
num_samples,
|
||||
num_features: 2,
|
||||
feature_names: (0..2).map(|n| n.to_string()).collect(),
|
||||
@@ -89,7 +89,7 @@ pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset<f32,
|
||||
}
|
||||
|
||||
/// Make two interleaving half circles in 2d
|
||||
pub fn make_moons(num_samples: usize, noise: f32) -> Dataset<f32, f32> {
|
||||
pub fn make_moons(num_samples: usize, noise: f32) -> Dataset<f32, u32> {
|
||||
let num_samples_out = num_samples / 2;
|
||||
let num_samples_in = num_samples - num_samples_out;
|
||||
|
||||
@@ -116,7 +116,7 @@ pub fn make_moons(num_samples: usize, noise: f32) -> Dataset<f32, f32> {
|
||||
|
||||
Dataset {
|
||||
data: x,
|
||||
target: y,
|
||||
target: y.into_iter().map(|x| x as u32).collect(),
|
||||
num_samples,
|
||||
num_features: 2,
|
||||
feature_names: (0..2).map(|n| n.to_string()).collect(),
|
||||
|
||||
+22
-15
@@ -19,11 +19,17 @@ use crate::dataset::deserialize_data;
|
||||
use crate::dataset::Dataset;
|
||||
|
||||
/// Get dataset
|
||||
pub fn load_dataset() -> Dataset<f32, f32> {
|
||||
let (x, y, num_samples, num_features) = match deserialize_data(std::include_bytes!("iris.xy")) {
|
||||
Err(why) => panic!("Can't deserialize iris.xy. {}", why),
|
||||
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
|
||||
};
|
||||
pub fn load_dataset() -> Dataset<f32, u32> {
|
||||
let (x, y, num_samples, num_features): (Vec<f32>, Vec<u32>, usize, usize) =
|
||||
match deserialize_data(std::include_bytes!("iris.xy")) {
|
||||
Err(why) => panic!("Can't deserialize iris.xy. {}", why),
|
||||
Ok((x, y, num_samples, num_features)) => (
|
||||
x,
|
||||
y.into_iter().map(|x| x as u32).collect(),
|
||||
num_samples,
|
||||
num_features,
|
||||
),
|
||||
};
|
||||
|
||||
Dataset {
|
||||
data: x,
|
||||
@@ -50,18 +56,19 @@ pub fn load_dataset() -> Dataset<f32, f32> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
use super::super::*;
|
||||
// #[cfg(not(target_arch = "wasm32"))]
|
||||
// use super::super::*;
|
||||
use super::*;
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn refresh_iris_dataset() {
|
||||
// run this test to generate iris.xy file.
|
||||
let dataset = load_dataset();
|
||||
assert!(serialize_data(&dataset, "iris.xy").is_ok());
|
||||
}
|
||||
// TODO: fix serialization
|
||||
// #[cfg(not(target_arch = "wasm32"))]
|
||||
// #[test]
|
||||
// #[ignore]
|
||||
// fn refresh_iris_dataset() {
|
||||
// // run this test to generate iris.xy file.
|
||||
// let dataset = load_dataset();
|
||||
// assert!(serialize_data(&dataset, "iris.xy").is_ok());
|
||||
// }
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
|
||||
+2
-2
@@ -9,7 +9,7 @@ pub mod generator;
|
||||
pub mod iris;
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
use crate::math::num::RealNumber;
|
||||
use crate::numbers::{basenum::Number, realnum::RealNumber};
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
@@ -55,7 +55,7 @@ impl<X, Y> Dataset<X, Y> {
|
||||
// Running this in wasm throws: operation not supported on this platform.
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn serialize_data<X: RealNumber, Y: RealNumber>(
|
||||
pub(crate) fn serialize_data<X: Number + RealNumber, Y: RealNumber>(
|
||||
dataset: &Dataset<X, Y>,
|
||||
filename: &str,
|
||||
) -> Result<(), io::Error> {
|
||||
|
||||
Reference in New Issue
Block a user