From 26b72b67f4e63946af0c4b26840312fa24dc96ab Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Thu, 3 Nov 2022 12:30:43 +0000 Subject: [PATCH 01/36] Add kernels' parameters to public interface --- src/svm/mod.rs | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/svm/mod.rs b/src/svm/mod.rs index 48e5907..46898c9 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -126,7 +126,12 @@ impl<'a> Default for RBFKernel<'a> { #[allow(dead_code)] impl<'a> RBFKernel<'a> { - fn with_gamma(mut self, gamma: f64) -> Self { + /// assign gamma parameter to kernel (required) + /// ```rust + /// use smartcore::svm::RBFKernel; + /// let knl = RBFKernel::default().with_gamma(0.7); + /// ``` + pub fn with_gamma(mut self, gamma: f64) -> Self { self.gamma = Some(gamma); self } @@ -158,19 +163,32 @@ impl<'a> Default for PolynomialKernel<'a> { #[allow(dead_code)] impl<'a> PolynomialKernel<'a> { - fn with_params(mut self, degree: f64, gamma: f64, coef0: f64) -> Self { + /// set parameters for kernel + /// ```rust + /// use smartcore::svm::PolynomialKernel; + /// let knl = PolynomialKernel::default().with_params(3.0, 0.7, 1.0); + /// ``` + pub fn with_params(mut self, degree: f64, gamma: f64, coef0: f64) -> Self { self.degree = Some(degree); self.gamma = Some(gamma); self.coef0 = Some(coef0); self } - - fn with_gamma(mut self, gamma: f64) -> Self { + /// set gamma parameter for kernel + /// ```rust + /// use smartcore::svm::PolynomialKernel; + /// let knl = PolynomialKernel::default().with_gamma(0.7); + /// ``` + pub fn with_gamma(mut self, gamma: f64) -> Self { self.gamma = Some(gamma); self } - - fn with_degree(self, degree: f64, n_features: usize) -> Self { + /// set degree parameter for kernel + /// ```rust + /// use smartcore::svm::PolynomialKernel; + /// let knl = PolynomialKernel::default().with_degree(3.0, 100); + /// ``` + pub fn with_degree(self, degree: f64, n_features: usize) -> Self { self.with_params(degree, 1f64, 1f64 / n_features as f64) } } @@ -198,12 +216,22 @@ impl<'a> Default for SigmoidKernel<'a> { #[allow(dead_code)] impl<'a> SigmoidKernel<'a> { - fn with_params(mut self, gamma: f64, coef0: f64) -> Self { + /// set parameters for kernel + /// ```rust + /// use smartcore::svm::SigmoidKernel; + /// let knl = SigmoidKernel::default().with_params(0.7, 1.0); + /// ``` + pub fn with_params(mut self, gamma: f64, coef0: f64) -> Self { self.gamma = Some(gamma); self.coef0 = Some(coef0); self } - fn with_gamma(mut self, gamma: f64) -> Self { + /// set gamma parameter for kernel + /// ```rust + /// use smartcore::svm::SigmoidKernel; + /// let knl = SigmoidKernel::default().with_gamma(0.7); + /// ``` + pub fn with_gamma(mut self, gamma: f64) -> Self { self.gamma = Some(gamma); self } From e50b4e86375d438555651efc77b31783e0063212 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Thu, 3 Nov 2022 13:40:54 +0000 Subject: [PATCH 02/36] Fix signature of metrics tests --- src/metrics/accuracy.rs | 8 ++++---- src/metrics/f1.rs | 4 ++-- src/metrics/mean_absolute_error.rs | 4 ++-- src/metrics/mean_squared_error.rs | 4 ++-- src/metrics/precision.rs | 12 ++++++------ src/metrics/r2.rs | 2 +- src/metrics/recall.rs | 12 ++++++------ 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/metrics/accuracy.rs b/src/metrics/accuracy.rs index 1279614..498efea 100644 --- a/src/metrics/accuracy.rs +++ b/src/metrics/accuracy.rs @@ -12,7 +12,7 @@ //! let y_pred: Vec = vec![0., 2., 1., 3.]; //! let y_true: Vec = vec![0., 1., 2., 3.]; //! -//! let score: f64 = Accuracy::new().get_score(&y_pred, &y_true); +//! let score: f64 = Accuracy::new().get_score( &y_true, &y_pred); //! ``` //! With integers: //! ``` @@ -21,7 +21,7 @@ //! let y_pred: Vec = vec![0, 2, 1, 3]; //! let y_true: Vec = vec![0, 1, 2, 3]; //! -//! let score: f64 = Accuracy::new().get_score(&y_pred, &y_true); +//! let score: f64 = Accuracy::new().get_score( &y_true, &y_pred); //! ``` //! //! @@ -92,7 +92,7 @@ mod tests { let y_pred: Vec = vec![0., 2., 1., 3.]; let y_true: Vec = vec![0., 1., 2., 3.]; - let score1: f64 = Accuracy::::new().get_score(&y_pred, &y_true); + let score1: f64 = Accuracy::::new().get_score( &y_true, &y_pred); let score2: f64 = Accuracy::::new().get_score(&y_true, &y_true); assert!((score1 - 0.5).abs() < 1e-8); @@ -108,7 +108,7 @@ mod tests { let y_pred: Vec = vec![0, 2, 1, 3]; let y_true: Vec = vec![0, 1, 2, 3]; - let score1: f64 = Accuracy::::new().get_score(&y_pred, &y_true); + let score1: f64 = Accuracy::::new().get_score( &y_true, &y_pred); let score2: f64 = Accuracy::::new().get_score(&y_true, &y_true); assert_eq!(score1, 0.5); diff --git a/src/metrics/f1.rs b/src/metrics/f1.rs index fd41019..f60d81b 100644 --- a/src/metrics/f1.rs +++ b/src/metrics/f1.rs @@ -15,7 +15,7 @@ //! let y_true: Vec = vec![0., 1., 1., 0., 1., 0.]; //! //! let beta = 1.0; // beta default is equal 1.0 anyway -//! let score: f64 = F1::new_with(beta).get_score(&y_pred, &y_true); +//! let score: f64 = F1::new_with(beta).get_score( &y_true, &y_pred); //! ``` //! //! @@ -92,7 +92,7 @@ mod tests { let y_true: Vec = vec![0., 1., 1., 0., 1., 0.]; let beta = 1.0; - let score1: f64 = F1::new_with(beta).get_score(&y_pred, &y_true); + let score1: f64 = F1::new_with(beta).get_score( &y_true, &y_pred); let score2: f64 = F1::new_with(beta).get_score(&y_true, &y_true); println!("{:?}", score1); diff --git a/src/metrics/mean_absolute_error.rs b/src/metrics/mean_absolute_error.rs index 36e5f48..66ffcb4 100644 --- a/src/metrics/mean_absolute_error.rs +++ b/src/metrics/mean_absolute_error.rs @@ -14,7 +14,7 @@ //! let y_pred: Vec = vec![3., -0.5, 2., 7.]; //! let y_true: Vec = vec![2.5, 0.0, 2., 8.]; //! -//! let mse: f64 = MeanAbsoluteError::new().get_score(&y_pred, &y_true); +//! let mse: f64 = MeanAbsoluteError::new().get_score( &y_true, &y_pred); //! ``` //! //! @@ -85,7 +85,7 @@ mod tests { let y_true: Vec = vec![3., -0.5, 2., 7.]; let y_pred: Vec = vec![2.5, 0.0, 2., 8.]; - let score1: f64 = MeanAbsoluteError::new().get_score(&y_pred, &y_true); + let score1: f64 = MeanAbsoluteError::new().get_score( &y_true, &y_pred); let score2: f64 = MeanAbsoluteError::new().get_score(&y_true, &y_true); assert!((score1 - 0.5).abs() < 1e-8); diff --git a/src/metrics/mean_squared_error.rs b/src/metrics/mean_squared_error.rs index 7443857..f19e89c 100644 --- a/src/metrics/mean_squared_error.rs +++ b/src/metrics/mean_squared_error.rs @@ -14,7 +14,7 @@ //! let y_pred: Vec = vec![3., -0.5, 2., 7.]; //! let y_true: Vec = vec![2.5, 0.0, 2., 8.]; //! -//! let mse: f64 = MeanSquareError::new().get_score(&y_pred, &y_true); +//! let mse: f64 = MeanSquareError::new().get_score( &y_true, &y_pred); //! ``` //! //! @@ -85,7 +85,7 @@ mod tests { let y_true: Vec = vec![3., -0.5, 2., 7.]; let y_pred: Vec = vec![2.5, 0.0, 2., 8.]; - let score1: f64 = MeanSquareError::new().get_score(&y_pred, &y_true); + let score1: f64 = MeanSquareError::new().get_score( &y_true, &y_pred); let score2: f64 = MeanSquareError::new().get_score(&y_true, &y_true); assert!((score1 - 0.375).abs() < 1e-8); diff --git a/src/metrics/precision.rs b/src/metrics/precision.rs index a6fcef1..dd09740 100644 --- a/src/metrics/precision.rs +++ b/src/metrics/precision.rs @@ -14,7 +14,7 @@ //! let y_pred: Vec = vec![0., 1., 1., 0.]; //! let y_true: Vec = vec![0., 0., 1., 1.]; //! -//! let score: f64 = Precision::new().get_score(&y_pred, &y_true); +//! let score: f64 = Precision::new().get_score(&y_true, &y_pred); //! ``` //! //! @@ -104,17 +104,17 @@ mod tests { let y_true: Vec = vec![0., 1., 1., 0.]; let y_pred: Vec = vec![0., 0., 1., 1.]; - let score1: f64 = Precision::new().get_score(&y_pred, &y_true); + let score1: f64 = Precision::new().get_score(&y_true, &y_pred); let score2: f64 = Precision::new().get_score(&y_pred, &y_pred); assert!((score1 - 0.5).abs() < 1e-8); assert!((score2 - 1.0).abs() < 1e-8); - let y_pred: Vec = vec![0., 0., 1., 1., 1., 1.]; let y_true: Vec = vec![0., 1., 1., 0., 1., 0.]; + let y_pred: Vec = vec![0., 0., 1., 1., 1., 1.]; - let score3: f64 = Precision::new().get_score(&y_pred, &y_true); - assert!((score3 - 0.5).abs() < 1e-8); + let score3: f64 = Precision::new().get_score(&y_true, &y_pred); + assert!((score3 - 0.6666666666).abs() < 1e-8); } #[cfg_attr( @@ -126,7 +126,7 @@ mod tests { let y_true: Vec = vec![0., 0., 0., 1., 1., 1., 2., 2., 2.]; let y_pred: Vec = vec![0., 1., 2., 0., 1., 2., 0., 1., 2.]; - let score1: f64 = Precision::new().get_score(&y_pred, &y_true); + let score1: f64 = Precision::new().get_score(&y_true, &y_pred); let score2: f64 = Precision::new().get_score(&y_pred, &y_pred); assert!((score1 - 0.333333333).abs() < 1e-8); diff --git a/src/metrics/r2.rs b/src/metrics/r2.rs index 6581abe..448e6d6 100644 --- a/src/metrics/r2.rs +++ b/src/metrics/r2.rs @@ -14,7 +14,7 @@ //! let y_pred: Vec = vec![3., -0.5, 2., 7.]; //! let y_true: Vec = vec![2.5, 0.0, 2., 8.]; //! -//! let mse: f64 = MeanAbsoluteError::new().get_score(&y_pred, &y_true); +//! let mse: f64 = MeanAbsoluteError::new().get_score( &y_true, &y_pred); //! ``` //! //! diff --git a/src/metrics/recall.rs b/src/metrics/recall.rs index 04a779a..252759e 100644 --- a/src/metrics/recall.rs +++ b/src/metrics/recall.rs @@ -14,7 +14,7 @@ //! let y_pred: Vec = vec![0., 1., 1., 0.]; //! let y_true: Vec = vec![0., 0., 1., 1.]; //! -//! let score: f64 = Recall::new().get_score(&y_pred, &y_true); +//! let score: f64 = Recall::new().get_score( &y_true, &y_pred); //! ``` //! //! @@ -105,17 +105,17 @@ mod tests { let y_true: Vec = vec![0., 1., 1., 0.]; let y_pred: Vec = vec![0., 0., 1., 1.]; - let score1: f64 = Recall::new().get_score(&y_pred, &y_true); + let score1: f64 = Recall::new().get_score(&y_true, &y_pred); let score2: f64 = Recall::new().get_score(&y_pred, &y_pred); assert!((score1 - 0.5).abs() < 1e-8); assert!((score2 - 1.0).abs() < 1e-8); - let y_pred: Vec = vec![0., 0., 1., 1., 1., 1.]; let y_true: Vec = vec![0., 1., 1., 0., 1., 0.]; + let y_pred: Vec = vec![0., 0., 1., 1., 1., 1.]; - let score3: f64 = Recall::new().get_score(&y_pred, &y_true); - assert!((score3 - 0.6666666666666666).abs() < 1e-8); + let score3: f64 = Recall::new().get_score(&y_true, &y_pred); + assert!((score3 - 0.5).abs() < 1e-8); } #[cfg_attr( @@ -127,7 +127,7 @@ mod tests { let y_true: Vec = vec![0., 0., 0., 1., 1., 1., 2., 2., 2.]; let y_pred: Vec = vec![0., 1., 2., 0., 1., 2., 0., 1., 2.]; - let score1: f64 = Recall::new().get_score(&y_pred, &y_true); + let score1: f64 = Recall::new().get_score( &y_true, &y_pred); let score2: f64 = Recall::new().get_score(&y_pred, &y_pred); assert!((score1 - 0.333333333).abs() < 1e-8); From d298709040df1a427fec03bc1fc21eadce20bb44 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Thu, 3 Nov 2022 13:44:27 +0000 Subject: [PATCH 03/36] cargo clippy --- src/metrics/accuracy.rs | 4 ++-- src/metrics/f1.rs | 2 +- src/metrics/mean_absolute_error.rs | 2 +- src/metrics/mean_squared_error.rs | 2 +- src/metrics/recall.rs | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/metrics/accuracy.rs b/src/metrics/accuracy.rs index 498efea..f449d1e 100644 --- a/src/metrics/accuracy.rs +++ b/src/metrics/accuracy.rs @@ -92,7 +92,7 @@ mod tests { let y_pred: Vec = vec![0., 2., 1., 3.]; let y_true: Vec = vec![0., 1., 2., 3.]; - let score1: f64 = Accuracy::::new().get_score( &y_true, &y_pred); + let score1: f64 = Accuracy::::new().get_score(&y_true, &y_pred); let score2: f64 = Accuracy::::new().get_score(&y_true, &y_true); assert!((score1 - 0.5).abs() < 1e-8); @@ -108,7 +108,7 @@ mod tests { let y_pred: Vec = vec![0, 2, 1, 3]; let y_true: Vec = vec![0, 1, 2, 3]; - let score1: f64 = Accuracy::::new().get_score( &y_true, &y_pred); + let score1: f64 = Accuracy::::new().get_score(&y_true, &y_pred); let score2: f64 = Accuracy::::new().get_score(&y_true, &y_true); assert_eq!(score1, 0.5); diff --git a/src/metrics/f1.rs b/src/metrics/f1.rs index f60d81b..437863a 100644 --- a/src/metrics/f1.rs +++ b/src/metrics/f1.rs @@ -92,7 +92,7 @@ mod tests { let y_true: Vec = vec![0., 1., 1., 0., 1., 0.]; let beta = 1.0; - let score1: f64 = F1::new_with(beta).get_score( &y_true, &y_pred); + let score1: f64 = F1::new_with(beta).get_score(&y_true, &y_pred); let score2: f64 = F1::new_with(beta).get_score(&y_true, &y_true); println!("{:?}", score1); diff --git a/src/metrics/mean_absolute_error.rs b/src/metrics/mean_absolute_error.rs index 66ffcb4..5b4000f 100644 --- a/src/metrics/mean_absolute_error.rs +++ b/src/metrics/mean_absolute_error.rs @@ -85,7 +85,7 @@ mod tests { let y_true: Vec = vec![3., -0.5, 2., 7.]; let y_pred: Vec = vec![2.5, 0.0, 2., 8.]; - let score1: f64 = MeanAbsoluteError::new().get_score( &y_true, &y_pred); + let score1: f64 = MeanAbsoluteError::new().get_score(&y_true, &y_pred); let score2: f64 = MeanAbsoluteError::new().get_score(&y_true, &y_true); assert!((score1 - 0.5).abs() < 1e-8); diff --git a/src/metrics/mean_squared_error.rs b/src/metrics/mean_squared_error.rs index f19e89c..ef78fad 100644 --- a/src/metrics/mean_squared_error.rs +++ b/src/metrics/mean_squared_error.rs @@ -85,7 +85,7 @@ mod tests { let y_true: Vec = vec![3., -0.5, 2., 7.]; let y_pred: Vec = vec![2.5, 0.0, 2., 8.]; - let score1: f64 = MeanSquareError::new().get_score( &y_true, &y_pred); + let score1: f64 = MeanSquareError::new().get_score(&y_true, &y_pred); let score2: f64 = MeanSquareError::new().get_score(&y_true, &y_true); assert!((score1 - 0.375).abs() < 1e-8); diff --git a/src/metrics/recall.rs b/src/metrics/recall.rs index 252759e..ab76d97 100644 --- a/src/metrics/recall.rs +++ b/src/metrics/recall.rs @@ -127,7 +127,7 @@ mod tests { let y_true: Vec = vec![0., 0., 0., 1., 1., 1., 2., 2., 2.]; let y_pred: Vec = vec![0., 1., 2., 0., 1., 2., 0., 1., 2.]; - let score1: f64 = Recall::new().get_score( &y_true, &y_pred); + let score1: f64 = Recall::new().get_score(&y_true, &y_pred); let score2: f64 = Recall::new().get_score(&y_pred, &y_pred); assert!((score1 - 0.333333333).abs() < 1e-8); From ba70bb941fe5ed90cbe968eea4991876fd687b01 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Thu, 3 Nov 2022 14:18:56 +0000 Subject: [PATCH 04/36] Implement Display for NaiveBayes --- src/naive_bayes/bernoulli.rs | 17 +++++++++++++++++ src/naive_bayes/categorical.rs | 13 +++++++++++++ src/naive_bayes/gaussian.rs | 16 ++++++++++++++++ src/naive_bayes/multinomial.rs | 16 ++++++++++++++++ 4 files changed, 62 insertions(+) diff --git a/src/naive_bayes/bernoulli.rs b/src/naive_bayes/bernoulli.rs index 02bf330..27731b2 100644 --- a/src/naive_bayes/bernoulli.rs +++ b/src/naive_bayes/bernoulli.rs @@ -364,6 +364,20 @@ pub struct BernoulliNB< binarize: Option, } +impl, Y: Array1> + fmt::Display for BernoulliNB +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "BernoulliNB:\ninner: {:?}\nbinarize: {:?}", + self.inner.as_ref().unwrap(), + self.binarize.as_ref().unwrap() + )?; + Ok(()) + } +} + impl, Y: Array1> SupervisedEstimator> for BernoulliNB { @@ -594,6 +608,9 @@ mod tests { ] ); + // test Display + println!("{}", &bnb); + let distribution = bnb.inner.clone().unwrap().distribution; assert_eq!( diff --git a/src/naive_bayes/categorical.rs b/src/naive_bayes/categorical.rs index f2ae4a8..970f799 100644 --- a/src/naive_bayes/categorical.rs +++ b/src/naive_bayes/categorical.rs @@ -139,6 +139,17 @@ impl NBDistribution for CategoricalNBDistribution } } +impl, Y: Array1> fmt::Display for CategoricalNB { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "CategoricalNB:\ninner: {:?}", + self.inner.as_ref().unwrap() + )?; + Ok(()) + } +} + impl CategoricalNBDistribution { /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features. /// * `x` - training data. @@ -539,6 +550,8 @@ mod tests { let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); let y_hat = cnb.predict(&x).unwrap(); assert_eq!(y_hat, vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1]); + + println!("{}", &cnb); } #[cfg_attr( diff --git a/src/naive_bayes/gaussian.rs b/src/naive_bayes/gaussian.rs index f23ffdb..a9c1d4f 100644 --- a/src/naive_bayes/gaussian.rs +++ b/src/naive_bayes/gaussian.rs @@ -271,6 +271,19 @@ pub struct GaussianNB< inner: Option>>, } +impl< + TX: Number + RealNumber + RealNumber, + TY: Number + Ord + Unsigned, + X: Array2, + Y: Array1, + > fmt::Display for GaussianNB +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "GaussianNB:\ninner: {:?}", self.inner.as_ref().unwrap())?; + Ok(()) + } +} + impl< TX: Number + RealNumber + RealNumber, TY: Number + Ord + Unsigned, @@ -433,6 +446,9 @@ mod tests { let gnb = GaussianNB::fit(&x, &y, parameters).unwrap(); assert_eq!(gnb.class_priors(), &priors); + + // test display for GNB + println!("{}", &gnb); } #[cfg_attr( diff --git a/src/naive_bayes/multinomial.rs b/src/naive_bayes/multinomial.rs index f3305ac..4191106 100644 --- a/src/naive_bayes/multinomial.rs +++ b/src/naive_bayes/multinomial.rs @@ -309,6 +309,19 @@ pub struct MultinomialNB< inner: Option>>, } +impl, Y: Array1> fmt::Display + for MultinomialNB +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "MultinomialNB:\ninner: {:?}", + self.inner.as_ref().unwrap() + )?; + Ok(()) + } +} + impl, Y: Array1> SupervisedEstimator for MultinomialNB { @@ -500,6 +513,9 @@ mod tests { ] ); + // test display + println!("{}", &nb); + let y_hat = nb.predict(&x).unwrap(); let distribution = nb.inner.clone().unwrap().distribution; From b66afa9222d332ec6652ed03bacc509b25eb7175 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Thu, 3 Nov 2022 14:58:05 +0000 Subject: [PATCH 05/36] Improve options conditionals --- src/linear/logistic_regression.rs | 9 +++------ src/tree/decision_tree_classifier.rs | 2 +- src/tree/decision_tree_regressor.rs | 4 ++-- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 6b706dd..7dd269c 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -518,12 +518,9 @@ impl, Y: for (i, y_hat_i) in y_hat.iterator(0).enumerate().take(n) { result.set( i, - self.classes()[if RealNumber::sigmoid(*y_hat_i + intercept) > RealNumber::half() - { - 1 - } else { - 0 - }], + self.classes()[usize::from( + RealNumber::sigmoid(*y_hat_i + intercept) > RealNumber::half(), + )], ); } } else { diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 043d79b..6341ab4 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -673,7 +673,7 @@ impl, Y: Array1> let mut is_pure = true; for i in 0..n_rows { if visitor.samples[i] > 0 { - if label == Option::None { + if label.is_none() { label = Option::Some(visitor.y[i]); } else if visitor.y[i] != label.unwrap() { is_pure = false; diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 397040b..12ea978 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -511,7 +511,7 @@ impl, Y: Array1> match queue.pop_front() { Some(node_id) => { let node = &self.nodes()[node_id]; - if node.true_child == None && node.false_child == None { + if node.true_child.is_none() && node.false_child.is_none() { result = node.output; } else if x.get((row, node.split_feature)).to_f64().unwrap() <= node.split_value.unwrap_or(std::f64::NAN) @@ -557,7 +557,7 @@ impl, Y: Array1> self.find_best_split(visitor, n, sum, parent_gain, *variable); } - self.nodes()[visitor.node].split_score != Option::None + self.nodes()[visitor.node].split_score.is_some() } fn find_best_split( From d592b628beffdb3a929df2f738ad1e2fe11b34e4 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Thu, 3 Nov 2022 15:49:00 +0000 Subject: [PATCH 06/36] Implement CSV reader with new traits (#209) --- .github/ISSUE_TEMPLATE.md | 1 + src/lib.rs | 4 +- src/readers/csv.rs | 77 +++++++++++++++++++++------------------ src/readers/error.rs | 7 ++++ src/svm/mod.rs | 3 +- 5 files changed, 54 insertions(+), 38 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4fee515..1177761 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,5 +1,6 @@ ### I'm submitting a - [ ] bug report. +- [ ] improvement. - [ ] feature request. ### Current Behaviour: diff --git a/src/lib.rs b/src/lib.rs index 11c5b38..a955de2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,8 +105,8 @@ pub mod neighbors; pub mod optimization; /// Preprocessing utilities pub mod preprocessing; -// /// Reading in Data. -// pub mod readers; +/// Reading in data from serialized foramts +pub mod readers; /// Support Vector Machines pub mod svm; /// Supervised tree-based learning methods diff --git a/src/readers/csv.rs b/src/readers/csv.rs index e80d99b..0b2c18c 100644 --- a/src/readers/csv.rs +++ b/src/readers/csv.rs @@ -1,23 +1,24 @@ //! This module contains utitilities to read-in matrices from csv files. -//! ``` +//! ```rust //! use smartcore::readers::csv; -//! use smartcore::linalg::naive::dense_matrix::DenseMatrix; -//! use crate::smartcore::linalg::BaseMatrix; +//! use smartcore::linalg::basic::matrix::DenseMatrix; //! use std::fs; //! //! fs::write("identity.csv", "header\n1.0,0.0\n0.0,1.0"); -//! assert_eq!( -//! csv::matrix_from_csv_source::, DenseMatrix<_>>( -//! fs::File::open("identity.csv").unwrap(), -//! csv::CSVDefinition::default() -//! ) -//! .unwrap(), -//! DenseMatrix::from_row_vectors(vec![vec![1.0, 0.0], vec![0.0, 1.0]]).unwrap() -//! ); +//! +//! let mtx = csv::matrix_from_csv_source::, DenseMatrix<_>>( +//! fs::File::open("identity.csv").unwrap(), +//! csv::CSVDefinition::default() +//! ) +//! .unwrap(); +//! println!("{}", &mtx); +//! //! fs::remove_file("identity.csv"); //! ``` -use crate::linalg::{BaseMatrix, BaseVector}; -use crate::math::num::RealNumber; + +use crate::linalg::basic::arrays::{Array1, Array2}; +use crate::numbers::basenum::Number; +use crate::numbers::realnum::RealNumber; use crate::readers::ReadingError; use std::io::Read; @@ -77,35 +78,41 @@ pub fn matrix_from_csv_source( definition: CSVDefinition<'_>, ) -> Result where - T: RealNumber, - RowVector: BaseVector, - Matrix: BaseMatrix, + T: Number + RealNumber + std::str::FromStr, + RowVector: Array1, + Matrix: Array2, { let csv_text = read_string_from_source(source)?; - let rows = extract_row_vectors_from_csv_text::( + let rows: Vec> = extract_row_vectors_from_csv_text::( &csv_text, &definition, detect_row_format(&csv_text, &definition)?, )?; + let nrows = rows.len(); + let ncols = rows[0].len(); - match Matrix::from_row_vectors(rows) { - Some(matrix) => Ok(matrix), - None => Err(ReadingError::NoRowsProvided), + // TODO: try to return ReadingError + let array2 = Matrix::from_iterator(rows.into_iter().flatten(), nrows, ncols, 0); + + if array2.shape() != (nrows, ncols) { + Err(ReadingError::ShapesDoNotMatch { msg: String::new() }) + } else { + Ok(array2) } } /// Given a string containing the contents of a csv file, extract its value /// into row-vectors. -fn extract_row_vectors_from_csv_text<'a, T, RowVector, Matrix>( +fn extract_row_vectors_from_csv_text< + 'a, + T: Number + RealNumber + std::str::FromStr, + RowVector: Array1, + Matrix: Array2, +>( csv_text: &'a str, definition: &'a CSVDefinition<'_>, row_format: CSVRowFormat<'_>, -) -> Result, ReadingError> -where - T: RealNumber, - RowVector: BaseVector, - Matrix: BaseMatrix, -{ +) -> Result>, ReadingError> { csv_text .lines() .skip(definition.n_rows_header) @@ -132,12 +139,12 @@ fn extract_vector_from_csv_line( row_format: &CSVRowFormat<'_>, ) -> Result where - T: RealNumber, - RowVector: BaseVector, + T: Number + RealNumber + std::str::FromStr, + RowVector: Array1, { validate_csv_row(line, row_format)?; - let extracted_fields = extract_fields_from_csv_row(line, row_format)?; - Ok(BaseVector::from_array(&extracted_fields[..])) + let extracted_fields: Vec = extract_fields_from_csv_row(line, row_format)?; + Ok(Array1::from_vec_slice(&extracted_fields[..])) } /// Extract the fields from a string containing the row of a csv file. @@ -146,7 +153,7 @@ fn extract_fields_from_csv_row( row_format: &CSVRowFormat<'_>, ) -> Result, ReadingError> where - T: RealNumber, + T: Number + RealNumber + std::str::FromStr, { row.split(row_format.field_seperator) .enumerate() @@ -192,7 +199,7 @@ fn enrich_reading_error( /// Extract the value from a single csv field. fn extract_value_from_csv_field(value_string: &str) -> Result where - T: RealNumber, + T: Number + RealNumber + std::str::FromStr, { // By default, `FromStr::Err` does not implement `Debug`. // Restricting it in the library leads to many breaking @@ -210,7 +217,7 @@ where mod tests { mod matrix_from_csv_source { use super::super::{read_string_from_source, CSVDefinition, ReadingError}; - use crate::linalg::naive::dense_matrix::DenseMatrix; + use crate::linalg::basic::matrix::DenseMatrix; use crate::readers::{csv::matrix_from_csv_source, io_testing}; #[test] @@ -298,7 +305,7 @@ mod tests { } mod extract_row_vectors_from_csv_text { use super::super::{extract_row_vectors_from_csv_text, CSVDefinition, CSVRowFormat}; - use crate::linalg::naive::dense_matrix::DenseMatrix; + use crate::linalg::basic::matrix::DenseMatrix; #[test] fn read_default_csv() { diff --git a/src/readers/error.rs b/src/readers/error.rs index 16e910d..047092a 100644 --- a/src/readers/error.rs +++ b/src/readers/error.rs @@ -24,6 +24,12 @@ pub enum ReadingError { /// and where it happened. msg: String, }, + /// Shape after deserialization is wrong + ShapesDoNotMatch { + /// More details about what row could not be read + /// and where it happened. + msg: String, + }, } impl From for ReadingError { fn from(io_error: std::io::Error) -> Self { @@ -39,6 +45,7 @@ impl ReadingError { ReadingError::InvalidField { msg } => Some(msg), ReadingError::InvalidRow { msg } => Some(msg), ReadingError::CouldNotReadFileSystem { msg } => Some(msg), + ReadingError::ShapesDoNotMatch { msg } => Some(msg), ReadingError::NoRowsProvided => None, } } diff --git a/src/svm/mod.rs b/src/svm/mod.rs index 46898c9..febfead 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -23,9 +23,10 @@ //! //! /// search parameters -pub mod search; pub mod svc; pub mod svr; +// /// search parameters space +// pub mod search; use core::fmt::Debug; use std::marker::PhantomData; From 35fe68e024a69ca9f3d63244caeccc740ffede44 Mon Sep 17 00:00:00 2001 From: morenol <22335041+morenol@users.noreply.github.com> Date: Thu, 3 Nov 2022 13:48:16 -0500 Subject: [PATCH 07/36] Fix CI (#227) * Update ci.yml Co-authored-by: Luis Moreno --- Cargo.toml | 2 -- src/readers/io_testing.rs | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c5cb4fd..0a23083 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,11 +49,9 @@ resolver = "2" [profile.test] debug = 1 opt-level = 3 -split-debuginfo = "unpacked" [profile.release] strip = true -debug = 1 lto = true codegen-units = 1 overflow-checks = true diff --git a/src/readers/io_testing.rs b/src/readers/io_testing.rs index 1376a5d..cb0b4b0 100644 --- a/src/readers/io_testing.rs +++ b/src/readers/io_testing.rs @@ -107,6 +107,7 @@ mod test { use std::fs; use std::io::Read; use std::path; + #[cfg(not(target_arch = "wasm32"))] #[test] fn test_temporary_text_file() { let path_of_temporary_file; @@ -126,7 +127,7 @@ mod test { // should have been cleaned up. assert!(!path::Path::new(&path_of_temporary_file).exists()) } - + #[cfg(not(target_arch = "wasm32"))] #[test] fn test_string_to_file() { let path_of_test_file = "test.file"; From 425c3c1d0b125a6254812d82fbf892d3cd64928b Mon Sep 17 00:00:00 2001 From: morenol <22335041+morenol@users.noreply.github.com> Date: Fri, 4 Nov 2022 17:08:30 -0500 Subject: [PATCH 08/36] Use Box in SVM and remove lifetimes (#228) * Do not change external API Authored-by: Luis Moreno --- src/svm/mod.rs | 83 +++++++++++++++++--------------------------------- src/svm/svc.rs | 46 ++++++++++++---------------- src/svm/svr.rs | 32 +++++++++---------- 3 files changed, 64 insertions(+), 97 deletions(-) diff --git a/src/svm/mod.rs b/src/svm/mod.rs index febfead..a30fe87 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -29,7 +29,6 @@ pub mod svr; // pub mod search; use core::fmt::Debug; -use std::marker::PhantomData; #[cfg(feature = "serde")] use serde::ser::{SerializeStruct, Serializer}; @@ -41,22 +40,22 @@ use crate::linalg::basic::arrays::{Array1, ArrayView1}; /// Defines a kernel function. /// This is a object-safe trait. -pub trait Kernel<'a> { +pub trait Kernel { #[allow(clippy::ptr_arg)] /// Apply kernel function to x_i and x_j fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result; /// Return a serializable name - fn name(&self) -> &'a str; + fn name(&self) -> &'static str; } -impl<'a> Debug for dyn Kernel<'_> + 'a { +impl Debug for dyn Kernel { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "Kernel") } } #[cfg(feature = "serde")] -impl<'a> Serialize for dyn Kernel<'_> + 'a { +impl Serialize for dyn Kernel { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -72,21 +71,21 @@ impl<'a> Serialize for dyn Kernel<'_> + 'a { #[derive(Debug, Clone)] pub struct Kernels {} -impl<'a> Kernels { +impl Kernels { /// Return a default linear - pub fn linear() -> LinearKernel<'a> { + pub fn linear() -> LinearKernel { LinearKernel::default() } /// Return a default RBF - pub fn rbf() -> RBFKernel<'a> { + pub fn rbf() -> RBFKernel { RBFKernel::default() } /// Return a default polynomial - pub fn polynomial() -> PolynomialKernel<'a> { + pub fn polynomial() -> PolynomialKernel { PolynomialKernel::default() } /// Return a default sigmoid - pub fn sigmoid() -> SigmoidKernel<'a> { + pub fn sigmoid() -> SigmoidKernel { SigmoidKernel::default() } } @@ -94,39 +93,19 @@ impl<'a> Kernels { /// Linear Kernel #[allow(clippy::derive_partial_eq_without_eq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone, PartialEq)] -pub struct LinearKernel<'a> { - phantom: PhantomData<&'a ()>, -} - -impl<'a> Default for LinearKernel<'a> { - fn default() -> Self { - Self { - phantom: PhantomData, - } - } -} +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub struct LinearKernel; /// Radial basis function (Gaussian) kernel #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone, PartialEq)] -pub struct RBFKernel<'a> { +#[derive(Debug, Default, Clone, PartialEq)] +pub struct RBFKernel { /// kernel coefficient pub gamma: Option, - phantom: PhantomData<&'a ()>, -} - -impl<'a> Default for RBFKernel<'a> { - fn default() -> Self { - Self { - gamma: Option::None, - phantom: PhantomData, - } - } } #[allow(dead_code)] -impl<'a> RBFKernel<'a> { +impl RBFKernel { /// assign gamma parameter to kernel (required) /// ```rust /// use smartcore::svm::RBFKernel; @@ -141,29 +120,26 @@ impl<'a> RBFKernel<'a> { /// Polynomial kernel #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq)] -pub struct PolynomialKernel<'a> { +pub struct PolynomialKernel { /// degree of the polynomial pub degree: Option, /// kernel coefficient pub gamma: Option, /// independent term in kernel function pub coef0: Option, - phantom: PhantomData<&'a ()>, } -impl<'a> Default for PolynomialKernel<'a> { +impl Default for PolynomialKernel { fn default() -> Self { Self { gamma: Option::None, degree: Option::None, coef0: Some(1f64), - phantom: PhantomData, } } } -#[allow(dead_code)] -impl<'a> PolynomialKernel<'a> { +impl PolynomialKernel { /// set parameters for kernel /// ```rust /// use smartcore::svm::PolynomialKernel; @@ -197,26 +173,23 @@ impl<'a> PolynomialKernel<'a> { /// Sigmoid (hyperbolic tangent) kernel #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq)] -pub struct SigmoidKernel<'a> { +pub struct SigmoidKernel { /// kernel coefficient pub gamma: Option, /// independent term in kernel function pub coef0: Option, - phantom: PhantomData<&'a ()>, } -impl<'a> Default for SigmoidKernel<'a> { +impl Default for SigmoidKernel { fn default() -> Self { Self { gamma: Option::None, coef0: Some(1f64), - phantom: PhantomData, } } } -#[allow(dead_code)] -impl<'a> SigmoidKernel<'a> { +impl SigmoidKernel { /// set parameters for kernel /// ```rust /// use smartcore::svm::SigmoidKernel; @@ -238,16 +211,16 @@ impl<'a> SigmoidKernel<'a> { } } -impl<'a> Kernel<'a> for LinearKernel<'a> { +impl Kernel for LinearKernel { fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result { Ok(x_i.dot(x_j)) } - fn name(&self) -> &'a str { + fn name(&self) -> &'static str { "Linear" } } -impl<'a> Kernel<'a> for RBFKernel<'a> { +impl Kernel for RBFKernel { fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result { if self.gamma.is_none() { return Err(Failed::because( @@ -258,12 +231,12 @@ impl<'a> Kernel<'a> for RBFKernel<'a> { let v_diff = x_i.sub(x_j); Ok((-self.gamma.unwrap() * v_diff.mul(&v_diff).sum()).exp()) } - fn name(&self) -> &'a str { + fn name(&self) -> &'static str { "RBF" } } -impl<'a> Kernel<'a> for PolynomialKernel<'a> { +impl Kernel for PolynomialKernel { fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result { if self.gamma.is_none() || self.coef0.is_none() || self.degree.is_none() { return Err(Failed::because( @@ -274,12 +247,12 @@ impl<'a> Kernel<'a> for PolynomialKernel<'a> { let dot = x_i.dot(x_j); Ok((self.gamma.unwrap() * dot + self.coef0.unwrap()).powf(self.degree.unwrap())) } - fn name(&self) -> &'a str { + fn name(&self) -> &'static str { "Polynomial" } } -impl<'a> Kernel<'a> for SigmoidKernel<'a> { +impl Kernel for SigmoidKernel { fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result { if self.gamma.is_none() || self.coef0.is_none() { return Err(Failed::because( @@ -290,7 +263,7 @@ impl<'a> Kernel<'a> for SigmoidKernel<'a> { let dot = x_i.dot(x_j); Ok(self.gamma.unwrap() * dot + self.coef0.unwrap().tanh()) } - fn name(&self) -> &'a str { + fn name(&self) -> &'static str { "Sigmoid" } } diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 716f521..9cb140d 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -58,7 +58,7 @@ //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; //! //! let knl = Kernels::linear(); -//! let params = &SVCParameters::default().with_c(200.0).with_kernel(&knl); +//! let params = &SVCParameters::default().with_c(200.0).with_kernel(knl); //! let svc = SVC::fit(&x, &y, params).unwrap(); //! //! let y_hat = svc.predict(&x).unwrap(); @@ -91,15 +91,9 @@ use crate::rand_custom::get_rng_impl; use crate::svm::Kernel; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone)] +#[derive(Debug)] /// SVC Parameters -pub struct SVCParameters< - 'a, - TX: Number + RealNumber, - TY: Number + Ord, - X: Array2, - Y: Array1, -> { +pub struct SVCParameters, Y: Array1> { /// Number of epochs. pub epoch: usize, /// Regularization parameter. @@ -108,7 +102,7 @@ pub struct SVCParameters< pub tol: TX, #[cfg_attr(feature = "serde", serde(skip_deserializing))] /// The kernel function. - pub kernel: Option<&'a dyn Kernel<'a>>, + pub kernel: Option>, /// Unused parameter. m: PhantomData<(X, Y, TY)>, /// Controls the pseudo random number generation for shuffling the data for probability estimates @@ -129,7 +123,7 @@ pub struct SVC<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: classes: Option>, instances: Option>>, #[cfg_attr(feature = "serde", serde(skip))] - parameters: Option<&'a SVCParameters<'a, TX, TY, X, Y>>, + parameters: Option<&'a SVCParameters>, w: Option>, b: Option, phantomdata: PhantomData<(X, Y)>, @@ -155,7 +149,7 @@ struct Cache, Y: Array1 struct Optimizer<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1> { x: &'a X, y: &'a Y, - parameters: &'a SVCParameters<'a, TX, TY, X, Y>, + parameters: &'a SVCParameters, svmin: usize, svmax: usize, gmin: TX, @@ -165,8 +159,8 @@ struct Optimizer<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y recalculate_minmax_grad: bool, } -impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1> - SVCParameters<'a, TX, TY, X, Y> +impl, Y: Array1> + SVCParameters { /// Number of epochs. pub fn with_epoch(mut self, epoch: usize) -> Self { @@ -184,8 +178,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 self } /// The kernel function. - pub fn with_kernel(mut self, kernel: &'a (dyn Kernel<'a>)) -> Self { - self.kernel = Some(kernel); + pub fn with_kernel(mut self, kernel: K) -> Self { + self.kernel = Some(Box::new(kernel)); self } @@ -196,8 +190,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } -impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1> Default - for SVCParameters<'a, TX, TY, X, Y> +impl, Y: Array1> Default + for SVCParameters { fn default() -> Self { SVCParameters { @@ -212,7 +206,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1> - SupervisedEstimatorBorrow<'a, X, Y, SVCParameters<'a, TX, TY, X, Y>> for SVC<'a, TX, TY, X, Y> + SupervisedEstimatorBorrow<'a, X, Y, SVCParameters> for SVC<'a, TX, TY, X, Y> { fn new() -> Self { Self { @@ -227,7 +221,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 fn fit( x: &'a X, y: &'a Y, - parameters: &'a SVCParameters<'a, TX, TY, X, Y>, + parameters: &'a SVCParameters, ) -> Result { SVC::fit(x, y, parameters) } @@ -251,7 +245,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2 + 'a, Y: Array pub fn fit( x: &'a X, y: &'a Y, - parameters: &'a SVCParameters<'a, TX, TY, X, Y>, + parameters: &'a SVCParameters, ) -> Result, Failed> { let (n, _) = x.shape(); @@ -447,7 +441,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 fn new( x: &'a X, y: &'a Y, - parameters: &'a SVCParameters<'a, TX, TY, X, Y>, + parameters: &'a SVCParameters, ) -> Optimizer<'a, TX, TY, X, Y> { let (n, _) = x.shape(); @@ -979,7 +973,7 @@ mod tests { let knl = Kernels::linear(); let params = SVCParameters::default() .with_c(200.0) - .with_kernel(&knl) + .with_kernel(knl) .with_seed(Some(100)); let y_hat = SVC::fit(&x, &y, ¶ms) @@ -1018,7 +1012,7 @@ mod tests { &y, &SVCParameters::default() .with_c(200.0) - .with_kernel(&Kernels::linear()), + .with_kernel(Kernels::linear()), ) .and_then(|lr| lr.decision_function(&x2)) .unwrap(); @@ -1073,7 +1067,7 @@ mod tests { &y, &SVCParameters::default() .with_c(1.0) - .with_kernel(&Kernels::rbf().with_gamma(0.7)), + .with_kernel(Kernels::rbf().with_gamma(0.7)), ) .and_then(|lr| lr.predict(&x)) .unwrap(); @@ -1122,7 +1116,7 @@ mod tests { ]; let knl = Kernels::linear(); - let params = SVCParameters::default().with_kernel(&knl); + let params = SVCParameters::default().with_kernel(knl); let svc = SVC::fit(&x, &y, ¶ms).unwrap(); // serialization diff --git a/src/svm/svr.rs b/src/svm/svr.rs index cf35bde..7a39a56 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -50,7 +50,7 @@ //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; //! //! let knl = Kernels::linear(); -//! let params = &SVRParameters::default().with_eps(2.0).with_c(10.0).with_kernel(&knl); +//! let params = &SVRParameters::default().with_eps(2.0).with_c(10.0).with_kernel(knl); //! // let svr = SVR::fit(&x, &y, params).unwrap(); //! //! // let y_hat = svr.predict(&x).unwrap(); @@ -83,9 +83,9 @@ use crate::numbers::floatnum::FloatNumber; use crate::svm::Kernel; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone)] +#[derive(Debug)] /// SVR Parameters -pub struct SVRParameters<'a, T: Number + FloatNumber + PartialOrd> { +pub struct SVRParameters { /// Epsilon in the epsilon-SVR model. pub eps: T, /// Regularization parameter. @@ -94,7 +94,7 @@ pub struct SVRParameters<'a, T: Number + FloatNumber + PartialOrd> { pub tol: T, #[cfg_attr(feature = "serde", serde(skip_deserializing))] /// The kernel function. - pub kernel: Option<&'a dyn Kernel<'a>>, + pub kernel: Option>, } #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -103,7 +103,7 @@ pub struct SVRParameters<'a, T: Number + FloatNumber + PartialOrd> { pub struct SVR<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> { instances: Option>>, #[cfg_attr(feature = "serde", serde(skip_deserializing))] - parameters: Option<&'a SVRParameters<'a, T>>, + parameters: Option<&'a SVRParameters>, w: Option>, b: T, phantom: PhantomData<(X, Y)>, @@ -123,7 +123,7 @@ struct SupportVector { struct Optimizer<'a, T: Number + FloatNumber + PartialOrd> { tol: T, c: T, - parameters: Option<&'a SVRParameters<'a, T>>, + parameters: Option<&'a SVRParameters>, svmin: usize, svmax: usize, gmin: T, @@ -140,7 +140,7 @@ struct Cache { data: Vec>>>, } -impl<'a, T: Number + FloatNumber + PartialOrd> SVRParameters<'a, T> { +impl SVRParameters { /// Epsilon in the epsilon-SVR model. pub fn with_eps(mut self, eps: T) -> Self { self.eps = eps; @@ -157,13 +157,13 @@ impl<'a, T: Number + FloatNumber + PartialOrd> SVRParameters<'a, T> { self } /// The kernel function. - pub fn with_kernel(mut self, kernel: &'a (dyn Kernel<'a>)) -> Self { - self.kernel = Some(kernel); + pub fn with_kernel(mut self, kernel: K) -> Self { + self.kernel = Some(Box::new(kernel)); self } } -impl<'a, T: Number + FloatNumber + PartialOrd> Default for SVRParameters<'a, T> { +impl Default for SVRParameters { fn default() -> Self { SVRParameters { eps: T::from_f64(0.1).unwrap(), @@ -175,7 +175,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd> Default for SVRParameters<'a, T> } impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> - SupervisedEstimatorBorrow<'a, X, Y, SVRParameters<'a, T>> for SVR<'a, T, X, Y> + SupervisedEstimatorBorrow<'a, X, Y, SVRParameters> for SVR<'a, T, X, Y> { fn new() -> Self { Self { @@ -186,7 +186,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> phantom: PhantomData, } } - fn fit(x: &'a X, y: &'a Y, parameters: &'a SVRParameters<'a, T>) -> Result { + fn fit(x: &'a X, y: &'a Y, parameters: &'a SVRParameters) -> Result { SVR::fit(x, y, parameters) } } @@ -208,7 +208,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> SVR<' pub fn fit( x: &'a X, y: &'a Y, - parameters: &'a SVRParameters<'a, T>, + parameters: &'a SVRParameters, ) -> Result, Failed> { let (n, _) = x.shape(); @@ -324,7 +324,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd> Optimizer<'a, T> { fn new, Y: Array1>( x: &'a X, y: &'a Y, - parameters: &'a SVRParameters<'a, T>, + parameters: &'a SVRParameters, ) -> Optimizer<'a, T> { let (n, _) = x.shape(); @@ -655,7 +655,7 @@ mod tests { &SVRParameters::default() .with_eps(2.0) .with_c(10.0) - .with_kernel(&knl), + .with_kernel(knl), ) .and_then(|lr| lr.predict(&x)) .unwrap(); @@ -697,7 +697,7 @@ mod tests { ]; let knl = Kernels::rbf().with_gamma(0.7); - let params = SVRParameters::default().with_kernel(&knl); + let params = SVRParameters::default().with_kernel(knl); let svr = SVR::fit(&x, &y, ¶ms).unwrap(); From ab18f127a0a33b31dcbe2ede5fbe45453bf455bf Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Fri, 4 Nov 2022 22:11:54 +0000 Subject: [PATCH 09/36] Update README.md --- README.md | 45 +++------------------------------------------ 1 file changed, 3 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 3822f63..c2f6c7a 100644 --- a/README.md +++ b/README.md @@ -12,49 +12,10 @@ -----

-The Most Advanced Machine Learning Library In Rust. +Machine Learning in Rust

----- +[![CI](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml/badge.svg)](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml) -To start getting familiar with the new Smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, your feedback is valuable for the future of the library. - -## Developers -Contributions welcome, please start from [CONTRIBUTING and other relevant files](.github/CONTRIBUTING.md). - -### Walkthrough: traits system and basic structures - -#### numbers -The library is founded on basic traits provided by `num-traits`. Basic traits are in `src/numbers`. These traits are used to define all the procedures in the library to make everything safer and provide constraints to what implementations can handle. - -#### linalg -`numbers` are made at use in linear algebra structures in the **`src/linalg/basic`** module. These sub-modules define the traits used all over the code base. - -* *arrays*: In particular data structures like `Array`, `Array1` (1-dimensional), `Array2` (matrix, 2-D); plus their "views" traits. Views are used to provide no-footprint access to data, they have composed traits to allow writing (mutable traits: `MutArray`, `ArrayViewMut`, ...). -* *matrix*: This provides the main entrypoint to matrices operations and currently the only structure provided in the shape of `struct DenseMatrix`. A matrix can be instantiated and automatically make available all the traits in "arrays" (sparse matrices implementation will be provided). -* *vector*: Convenience traits are implemented for `std::Vec` to allow extensive reuse. - -These are all traits and by definition they do not allow instantiation. For instantiable structures see implementation like `DenseMatrix` with relative constructor. - -#### linalg/traits -The traits in `src/linalg/traits` are closely linked to Linear Algebra's theoretical framework. These traits are used to specify characteristics and constraints for types accepted by various algorithms. For example these allow to define if a matrix is `QRDecomposable` and/or `SVDDecomposable`. See docstring for referencese to theoretical framework. - -As above these are all traits and by definition they do not allow instantiation. They are mostly used to provide constraints for implementations. For example, the implementation for Linear Regression requires the input data `X` to be in `smartcore`'s trait system `Array2 + QRDecomposable + SVDDecomposable`, a 2-D matrix that is both QR and SVD decomposable; that is what the provided strucure `linalg::arrays::matrix::DenseMatrix` happens to be: `impl QRDecomposable for DenseMatrix {};impl SVDDecomposable for DenseMatrix {}`. - -#### metrics -Implementations for metrics (classification, regression, cluster, ...) and distance measure (Euclidean, Hamming, Manhattan, ...). For example: `Accuracy`, `F1`, `AUC`, `Precision`, `R2`. As everything else in the code base, these implementations reuse `numbers` and `linalg` traits and structures. - -These are collected in structures like `pub struct ClassificationMetrics {}` that implements `metrics::Metrics`, these are groups of functions (classification, regression, cluster, ...) that provide instantiation for the structures. Each of those instantiation can be passed around using the relative function, like `pub fn accuracy>(y_true: &V, y_pred: &V) -> T`. This provides a mechanism for metrics to be passed to higher interfaces like the `cross_validate`: -```rust -let results = - cross_validate( - BiasedEstimator::fit, // custom estimator - &x, &y, // input data - NoParameters {}, // extra parameters - cv, // type of cross validator - &accuracy // **metrics function** <-------- - ).unwrap(); -``` - - -TODO: complete for all modules +To start getting familiar with the new Smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md). From d3a496419daa35f1c7835a2c1c4a2f06fb9131c1 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Fri, 4 Nov 2022 22:17:55 +0000 Subject: [PATCH 10/36] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c2f6c7a..fd6f481 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@

- User guide | API | Examples + User guide | API | Notebooks

From aab3817c58c6cc92a981f6a55f394d450c592be2 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Fri, 4 Nov 2022 22:23:36 +0000 Subject: [PATCH 11/36] Create DEVELOPERS.md --- .github/DEVELOPERS.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 .github/DEVELOPERS.md diff --git a/.github/DEVELOPERS.md b/.github/DEVELOPERS.md new file mode 100644 index 0000000..87c2506 --- /dev/null +++ b/.github/DEVELOPERS.md @@ -0,0 +1,40 @@ +# Smartcore: Introduction to modules + +## Walkthrough: traits system and basic structures + +#### numbers +The library is founded on basic traits provided by `num-traits`. Basic traits are in `src/numbers`. These traits are used to define all the procedures in the library to make everything safer and provide constraints to what implementations can handle. + +#### linalg +`numbers` are made at use in linear algebra structures in the **`src/linalg/basic`** module. These sub-modules define the traits used all over the code base. + +* *arrays*: In particular data structures like `Array`, `Array1` (1-dimensional), `Array2` (matrix, 2-D); plus their "views" traits. Views are used to provide no-footprint access to data, they have composed traits to allow writing (mutable traits: `MutArray`, `ArrayViewMut`, ...). +* *matrix*: This provides the main entrypoint to matrices operations and currently the only structure provided in the shape of `struct DenseMatrix`. A matrix can be instantiated and automatically make available all the traits in "arrays" (sparse matrices implementation will be provided). +* *vector*: Convenience traits are implemented for `std::Vec` to allow extensive reuse. + +These are all traits and by definition they do not allow instantiation. For instantiable structures see implementation like `DenseMatrix` with relative constructor. + +#### linalg/traits +The traits in `src/linalg/traits` are closely linked to Linear Algebra's theoretical framework. These traits are used to specify characteristics and constraints for types accepted by various algorithms. For example these allow to define if a matrix is `QRDecomposable` and/or `SVDDecomposable`. See docstring for referencese to theoretical framework. + +As above these are all traits and by definition they do not allow instantiation. They are mostly used to provide constraints for implementations. For example, the implementation for Linear Regression requires the input data `X` to be in `smartcore`'s trait system `Array2 + QRDecomposable + SVDDecomposable`, a 2-D matrix that is both QR and SVD decomposable; that is what the provided strucure `linalg::arrays::matrix::DenseMatrix` happens to be: `impl QRDecomposable for DenseMatrix {};impl SVDDecomposable for DenseMatrix {}`. + +#### metrics +Implementations for metrics (classification, regression, cluster, ...) and distance measure (Euclidean, Hamming, Manhattan, ...). For example: `Accuracy`, `F1`, `AUC`, `Precision`, `R2`. As everything else in the code base, these implementations reuse `numbers` and `linalg` traits and structures. + +These are collected in structures like `pub struct ClassificationMetrics {}` that implements `metrics::Metrics`, these are groups of functions (classification, regression, cluster, ...) that provide instantiation for the structures. Each of those instantiation can be passed around using the relative function, like `pub fn accuracy>(y_true: &V, y_pred: &V) -> T`. This provides a mechanism for metrics to be passed to higher interfaces like the `cross_validate`: +```rust +let results = + cross_validate( + BiasedEstimator::new(), // custom estimator + &x, &y, // input data + NoParameters {}, // extra parameters + cv, // type of cross validator + &accuracy // **metrics function** <-------- + ).unwrap(); +``` + +TODO: complete for all modules + +## Notebooks +Proceed to the [**notebooks**](https://github.com/smartcorelib/smartcore-jupyter/) to see these modules in action. From 23b36997309e1cdd7ed3a82bfa7ec9b2b6ee0cdf Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Mon, 7 Nov 2022 12:48:44 +0000 Subject: [PATCH 12/36] Release 0.3 --- .github/DEVELOPERS.md | 5 ++- CHANGELOG.md | 7 +++-- Cargo.toml | 16 +++++++--- LICENSE | 2 +- README.md | 4 +-- smartcore.svg | 2 +- src/algorithm/neighbour/cover_tree.rs | 10 +++--- src/cluster/kmeans.rs | 6 ++-- src/dataset/mod.rs | 2 +- src/ensemble/mod.rs | 2 +- src/ensemble/random_forest_classifier.rs | 3 -- src/ensemble/random_forest_regressor.rs | 3 -- src/lib.rs | 39 ++++++++++++++++++------ src/linear/linear_regression.rs | 5 +-- src/linear/logistic_regression.rs | 2 +- src/linear/ridge_regression.rs | 5 +-- src/metrics/auc.rs | 2 +- src/metrics/mod.rs | 2 +- src/model_selection/mod.rs | 2 +- src/neighbors/knn_classifier.rs | 2 +- src/numbers/realnum.rs | 2 +- src/svm/mod.rs | 2 +- src/svm/svc.rs | 5 ++- src/svm/svr.rs | 2 -- src/tree/decision_tree_classifier.rs | 12 +++----- src/tree/decision_tree_regressor.rs | 14 +++------ src/tree/mod.rs | 2 +- 27 files changed, 83 insertions(+), 77 deletions(-) diff --git a/.github/DEVELOPERS.md b/.github/DEVELOPERS.md index 87c2506..b3a647b 100644 --- a/.github/DEVELOPERS.md +++ b/.github/DEVELOPERS.md @@ -1,4 +1,7 @@ -# Smartcore: Introduction to modules +# smartcore: Introduction to modules + +Important source of information: +* [Rust API guidelines](https://rust-lang.github.io/api-guidelines/about.html) ## Walkthrough: traits system and basic structures diff --git a/CHANGELOG.md b/CHANGELOG.md index a9dda10..6052e07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,13 +4,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.3] - 2022-11 ## Added +- WARNING: Breaking changes! - Seeds to multiple algorithims that depend on random number generation. - Added feature `js` to use WASM in browser - Drop `nalgebra-bindings` feature -- Complete refactoring with *extensive API changes* that includes: +- Complete refactoring with **extensive API changes** that includes: * moving to a new traits system, less structs more traits * adapting all the modules to the new traits system * moving towards Rust 2021, in particular the use of `dyn` and `as_ref` @@ -19,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## BREAKING CHANGE - Added a new parameter to `train_test_split` to define the seed. -## [0.2.1] - 2022-05-10 +## [0.2.1] - 2021-05-10 ## Added - L2 regularization penalty to the Logistic Regression diff --git a/Cargo.toml b/Cargo.toml index 0a23083..0c3adda 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] name = "smartcore" -description = "The most advanced machine learning library in rust." +description = "Machine Learning in Rust." homepage = "https://smartcorelib.org" -version = "0.4.0" -authors = ["SmartCore Developers"] +version = "0.3.0" +authors = ["smartcore Developers"] edition = "2021" license = "Apache-2.0" documentation = "https://docs.rs/smartcore" @@ -11,6 +11,12 @@ repository = "https://github.com/smartcorelib/smartcore" readme = "README.md" keywords = ["machine-learning", "statistical", "ai", "optimization", "linear-algebra"] categories = ["science"] +exclude = [ + ".github", + ".gitignore", + "smartcore.iml", + "smartcore.svg", +] [dependencies] approx = "0.5.1" @@ -23,10 +29,10 @@ rand_distr = { version = "0.4", optional = true } serde = { version = "1", features = ["derive"], optional = true } [features] -default = ["serde", "datasets"] +default = [] serde = ["dep:serde"] ndarray-bindings = ["dep:ndarray"] -datasets = ["dep:rand_distr", "std"] +datasets = ["dep:rand_distr", "std", "serde"] std = ["rand/std_rng", "rand/std"] # wasm32 only js = ["getrandom/js"] diff --git a/LICENSE b/LICENSE index 3cd5786..9448cee 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2019-present at SmartCore developers (smartcorelib.org) + Copyright 2019-present at smartcore developers (smartcorelib.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index fd6f481..758a461 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@

- SmartCore + smartcore

@@ -18,4 +18,4 @@ ----- [![CI](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml/badge.svg)](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml) -To start getting familiar with the new Smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md). +To start getting familiar with the new smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md). diff --git a/smartcore.svg b/smartcore.svg index 3e4c68d..eaffd58 100644 --- a/smartcore.svg +++ b/smartcore.svg @@ -76,5 +76,5 @@ y="81.876823" x="91.861809" id="tspan842" - sodipodi:role="line">SmartCore + sodipodi:role="line">smartcore diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index db062f9..011a9cc 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -64,7 +64,7 @@ struct Node { max_dist: f64, parent_dist: f64, children: Vec, - scale: i64, + _scale: i64, } #[derive(Debug)] @@ -84,7 +84,7 @@ impl> CoverTree { max_dist: 0f64, parent_dist: 0f64, children: Vec::new(), - scale: 0, + _scale: 0, }; let mut tree = CoverTree { base, @@ -245,7 +245,7 @@ impl> CoverTree { max_dist: 0f64, parent_dist: 0f64, children: Vec::new(), - scale: 100, + _scale: 100, } } @@ -306,7 +306,7 @@ impl> CoverTree { max_dist: 0f64, parent_dist: 0f64, children, - scale: 100, + _scale: 100, } } else { let mut far: Vec = Vec::new(); @@ -375,7 +375,7 @@ impl> CoverTree { max_dist: self.max(consumed_set), parent_dist: 0f64, children, - scale: (top_scale - max_scale), + _scale: (top_scale - max_scale), } } } diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 9322d65..4384ddb 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -11,7 +11,7 @@ //! these re-calculated centroids becoming the new centers of their respective clusters. Next all instances of the training set are re-assigned to their closest cluster again. //! This iterative process continues until convergence is achieved and the clusters are considered settled. //! -//! Initial choice of K data points is very important and has big effect on performance of the algorithm. SmartCore uses k-means++ algorithm to initialize cluster centers. +//! Initial choice of K data points is very important and has big effect on performance of the algorithm. smartcore uses k-means++ algorithm to initialize cluster centers. //! //! Example: //! @@ -74,7 +74,7 @@ pub struct KMeans, Y: Array1> { k: usize, _y: Vec, size: Vec, - distortion: f64, + _distortion: f64, centroids: Vec>, _phantom_tx: PhantomData, _phantom_ty: PhantomData, @@ -313,7 +313,7 @@ impl, Y: Array1> KMeans k: parameters.k, _y: y, size, - distortion, + _distortion: distortion, centroids, _phantom_tx: PhantomData, _phantom_ty: PhantomData, diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index 5b32d02..ac48bf8 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -1,6 +1,6 @@ //! Datasets //! -//! In this module you will find small datasets that are used in SmartCore mostly for demonstration purposes. +//! In this module you will find small datasets that are used in smartcore mostly for demonstration purposes. pub mod boston; pub mod breast_cancer; pub mod diabetes; diff --git a/src/ensemble/mod.rs b/src/ensemble/mod.rs index 1ddf4b4..161df96 100644 --- a/src/ensemble/mod.rs +++ b/src/ensemble/mod.rs @@ -7,7 +7,7 @@ //! set and then aggregate their individual predictions to form a final prediction. In classification setting the overall prediction is the most commonly //! occurring majority class among the individual predictions. //! -//! In SmartCore you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html). +//! In smartcore you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html). //! Random forests provide an improvement over bagged trees by way of a small tweak that decorrelates the trees. As in bagging, we build a number of //! decision trees on bootstrapped training samples. But when building these decision trees, each time a split in a tree is considered, //! a random sample of _m_ predictors is chosen as split candidates from the full set of _p_ predictors. diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index d01acef..3db103b 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -104,7 +104,6 @@ pub struct RandomForestClassifier< X: Array2, Y: Array1, > { - parameters: Option, trees: Option>>, classes: Option>, samples: Option>>, @@ -198,7 +197,6 @@ impl, Y: { fn new() -> Self { Self { - parameters: Option::None, trees: Option::None, classes: Option::None, samples: Option::None, @@ -501,7 +499,6 @@ impl, Y: Array1, Y: Array1, > { - parameters: Option, trees: Option>>, samples: Option>>, } @@ -177,7 +176,6 @@ impl, Y: Array1 { fn new() -> Self { Self { - parameters: Option::None, trees: Option::None, samples: Option::None, } @@ -434,7 +432,6 @@ impl, Y: Array1 } Ok(RandomForestRegressor { - parameters: Some(parameters), trees: Some(trees), samples: maybe_all_samples, }) diff --git a/src/lib.rs b/src/lib.rs index a955de2..8746dbf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,25 +8,38 @@ #![warn(missing_docs)] #![warn(rustdoc::missing_doc_code_examples)] -//! # SmartCore +//! # smartcore //! -//! Welcome to SmartCore, machine learning in Rust! +//! Welcome to smartcore, machine learning in Rust! //! -//! SmartCore features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN, +//! `smartcore` features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN, //! as well as tools for model selection and model evaluation. //! -//! SmartCore provides its own traits system that extends Rust standard library, to deal with linear algebra and common +//! `smartcore` provides its own traits system that extends Rust standard library, to deal with linear algebra and common //! computational models. Its API is designed using well recognizable patterns. Extra features (like support for [ndarray](https://docs.rs/ndarray) //! structures) is available via optional features. //! //! ## Getting Started //! -//! To start using SmartCore simply add the following to your Cargo.toml file: +//! To start using `smartcore` latest stable version simply add the following to your `Cargo.toml` file: +//! ```ignore +//! [dependencies] +//! smartcore = "*" +//! ``` +//! +//! To start using smartcore development version with latest unstable additions: //! ```ignore //! [dependencies] //! smartcore = { git = "https://github.com/smartcorelib/smartcore", branch = "development" } //! ``` //! +//! There are different features that can be added to the base library, for example to add sample datasets: +//! ```ignore +//! [dependencies] +//! smartcore = { git = "https://github.com/smartcorelib/smartcore", features = ["datasets"] } +//! ``` +//! Check `smartcore`'s `Cargo.toml` for available features. +//! //! ## Using Jupyter //! For quick introduction, Jupyter Notebooks are available [here](https://github.com/smartcorelib/smartcore-jupyter/tree/main/notebooks). //! You can set up a local environment to run Rust notebooks using [EVCXR](https://github.com/google/evcxr) @@ -37,7 +50,7 @@ //! For example, you can use this code to fit a [K Nearest Neighbors classifier](neighbors/knn_classifier/index.html) to a dataset that is defined as standard Rust vector: //! //! ``` -//! // DenseMatrix defenition +//! // DenseMatrix definition //! use smartcore::linalg::basic::matrix::DenseMatrix; //! // KNNClassifier //! use smartcore::neighbors::knn_classifier::*; @@ -62,7 +75,9 @@ //! ``` //! //! ## Overview -//! All machine learning algorithms in SmartCore are grouped into these broad categories: +//! +//! ### Supported algorithms +//! All machine learning algorithms are grouped into these broad categories: //! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data. //! * [Matrix Decomposition](decomposition/index.html), various methods for matrix decomposition. //! * [Linear Models](linear/index.html), regression and classification methods where output is assumed to have linear relation to explanatory variables @@ -71,11 +86,14 @@ //! * [Nearest Neighbors](neighbors/index.html), K Nearest Neighbors for classification and regression //! * [Naive Bayes](naive_bayes/index.html), statistical classification technique based on Bayes Theorem //! * [SVM](svm/index.html), support vector machines +//! +//! ### Linear Algebra traits system +//! For an introduction to `smartcore`'s traits system see [this notebook](https://github.com/smartcorelib/smartcore-jupyter/blob/5523993c53c6ec1fd72eea130ef4e7883121c1ea/notebooks/01-A-little-bit-about-numbers.ipynb) /// Foundamental numbers traits pub mod numbers; -/// Various algorithms and helper methods that are used elsewhere in SmartCore +/// Various algorithms and helper methods that are used elsewhere in smartcore pub mod algorithm; pub mod api; @@ -89,7 +107,7 @@ pub mod decomposition; /// Ensemble methods, including Random Forest classifier and regressor pub mod ensemble; pub mod error; -/// Diverse collection of linear algebra abstractions and methods that power SmartCore algorithms +/// Diverse collection of linear algebra abstractions and methods that power smartcore algorithms pub mod linalg; /// Supervised classification and regression models that assume linear relationship between dependent and explanatory variables. pub mod linear; @@ -105,7 +123,8 @@ pub mod neighbors; pub mod optimization; /// Preprocessing utilities pub mod preprocessing; -/// Reading in data from serialized foramts +/// Reading in data from serialized formats +#[cfg(feature = "serde")] pub mod readers; /// Support Vector Machines pub mod svm; diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index 1f7d540..7f6dfad 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -12,7 +12,7 @@ //! \\[\hat{\beta} = (X^TX)^{-1}X^Ty \\] //! //! the \\((X^TX)^{-1}\\) term is both computationally expensive and numerically unstable. An alternative approach is to use a matrix decomposition to avoid this operation. -//! SmartCore uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). +//! smartcore uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). //! The QR decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly, //! but does not work for all data matrices. Unlike the QR decomposition, all matrices have an SVD decomposition. //! @@ -113,7 +113,6 @@ pub struct LinearRegression< > { coefficients: Option, intercept: Option, - solver: LinearRegressionSolverName, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -210,7 +209,6 @@ impl< Self { coefficients: Option::None, intercept: Option::None, - solver: LinearRegressionParameters::default().solver, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -276,7 +274,6 @@ impl< Ok(LinearRegression { intercept: Some(*w.get((num_attributes, 0))), coefficients: Some(weights), - solver: parameters.solver, _phantom_ty: PhantomData, _phantom_y: PhantomData, }) diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 7dd269c..e8c08d8 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -5,7 +5,7 @@ //! //! \\[ Pr(y=1) \approx \frac{e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}}{1 + e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}} \\] //! -//! SmartCore uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\) +//! smartcore uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\) //! //! Example: //! diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index 914afc2..e03948d 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -12,7 +12,7 @@ //! where \\(\alpha \geq 0\\) is a tuning parameter that controls strength of regularization. When \\(\alpha = 0\\) the penalty term has no effect, and ridge regression will produce the least squares estimates. //! However, as \\(\alpha \rightarrow \infty\\), the impact of the shrinkage penalty grows, and the ridge regression coefficient estimates will approach zero. //! -//! SmartCore uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). +//! smartcore uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). //! The Cholesky decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly, //! but does not work for all data matrices. Unlike the Cholesky decomposition, all matrices have an SVD decomposition. //! @@ -197,7 +197,6 @@ pub struct RidgeRegression< > { coefficients: Option, intercept: Option, - solver: Option, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -259,7 +258,6 @@ impl< Self { coefficients: Option::None, intercept: Option::None, - solver: Option::None, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -367,7 +365,6 @@ impl< Ok(RidgeRegression { intercept: Some(b), coefficients: Some(w), - solver: Some(parameters.solver), _phantom_ty: PhantomData, _phantom_y: PhantomData, }) diff --git a/src/metrics/auc.rs b/src/metrics/auc.rs index ecaf646..5848fbc 100644 --- a/src/metrics/auc.rs +++ b/src/metrics/auc.rs @@ -2,7 +2,7 @@ //! Computes the area under the receiver operating characteristic (ROC) curve that is equal to the probability that a classifier will rank a //! randomly chosen positive instance higher than a randomly chosen negative one. //! -//! SmartCore calculates ROC AUC from Wilcoxon or Mann-Whitney U test. +//! smartcore calculates ROC AUC from Wilcoxon or Mann-Whitney U test. //! //! Example: //! ``` diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 06d44a1..40086af 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -4,7 +4,7 @@ //! In a feedback loop you build your model first, then you get feedback from metrics, improve it and repeat until your model achieve desirable performance. //! Evaluation metrics helps to explain the performance of a model and compare models based on an objective criterion. //! -//! Choosing the right metric is crucial while evaluating machine learning models. In SmartCore you will find metrics for these classes of ML models: +//! Choosing the right metric is crucial while evaluating machine learning models. In smartcore you will find metrics for these classes of ML models: //! //! * [Classification metrics](struct.ClassificationMetrics.html) //! * [Regression metrics](struct.RegressionMetrics.html) diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index b8e4e7f..b712d67 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -7,7 +7,7 @@ //! Splitting data into multiple subsets helps us to find the right combination of hyperparameters, estimate model performance and choose the right model for //! the data. //! -//! In SmartCore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function. +//! In smartcore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function. //! //! ``` //! use smartcore::linalg::basic::matrix::DenseMatrix; diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index 67d094a..d13dce6 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -1,6 +1,6 @@ //! # K Nearest Neighbors Classifier //! -//! SmartCore relies on 2 backend algorithms to speedup KNN queries: +//! smartcore relies on 2 backend algorithms to speedup KNN queries: //! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html) //! * [`CoverTree`](../../algorithm/neighbour/cover_tree/index.html) //! diff --git a/src/numbers/realnum.rs b/src/numbers/realnum.rs index 8c60e47..cb5336a 100644 --- a/src/numbers/realnum.rs +++ b/src/numbers/realnum.rs @@ -1,5 +1,5 @@ //! # Real Number -//! Most algorithms in SmartCore rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ. +//! Most algorithms in smartcore rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ. //! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module. use num_traits::Float; diff --git a/src/svm/mod.rs b/src/svm/mod.rs index a30fe87..92b3ab4 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -9,7 +9,7 @@ //! SVM is memory efficient since it uses only a subset of training data to find a decision boundary. This subset is called support vectors. //! //! In SVM distance between a data point and the support vectors is defined by the kernel function. -//! SmartCore supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel. +//! smartcore supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel. //! Building a new kernel requires a good mathematical understanding of the [Mercer theorem](https://en.wikipedia.org/wiki/Mercer%27s_theorem) //! that gives necessary and sufficient condition for a function to be a kernel function. //! diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 9cb140d..c886ba1 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -20,7 +20,7 @@ //! //! Where \\( m \\) is a number of training samples, \\( y_i \\) is a label value (either 1 or -1) and \\(\langle\vec{w}, \vec{x}_i \rangle + b\\) is a decision boundary. //! -//! To solve this optimization problem, SmartCore uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm). +//! To solve this optimization problem, smartcore uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm). //! The optimizer reaches accuracies similar to that of a real SVM after performing two passes through the training examples. You can choose the number of passes //! through the data that the algorithm takes by changing the `epoch` parameter of the classifier. //! @@ -934,8 +934,7 @@ mod tests { use super::*; use crate::linalg::basic::matrix::DenseMatrix; use crate::metrics::accuracy; - #[cfg(feature = "serde")] - use crate::svm::*; + use crate::svm::Kernels; #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 7a39a56..8d49525 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -596,7 +596,6 @@ mod tests { use super::*; use crate::linalg::basic::matrix::DenseMatrix; use crate::metrics::mean_squared_error; - #[cfg(feature = "serde")] use crate::svm::Kernels; // #[test] @@ -617,7 +616,6 @@ mod tests { // assert!(iter.next().is_none()); // } - //TODO: had to disable this test as it runs for too long #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 6341ab4..a7b0228 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -163,7 +163,6 @@ impl Default for SplitCriterion { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] struct Node { - index: usize, output: usize, split_feature: usize, split_value: Option, @@ -406,9 +405,8 @@ impl Default for DecisionTreeClassifierSearchParameters { } impl Node { - fn new(index: usize, output: usize) -> Self { + fn new(output: usize) -> Self { Node { - index, output, split_feature: 0, split_value: Option::None, @@ -582,7 +580,7 @@ impl, Y: Array1> count[yi[i]] += samples[i]; } - let root = Node::new(0, which_max(&count)); + let root = Node::new(which_max(&count)); change_nodes.push(root); let mut order: Vec> = Vec::new(); @@ -831,11 +829,9 @@ impl, Y: Array1> let true_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(true_child_idx, visitor.true_child_output)); + self.nodes.push(Node::new(visitor.true_child_output)); let false_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(false_child_idx, visitor.false_child_output)); + self.nodes.push(Node::new(visitor.false_child_output)); self.nodes[visitor.node].true_child = Some(true_child_idx); self.nodes[visitor.node].false_child = Some(false_child_idx); diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 12ea978..cb6eb4f 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -11,7 +11,7 @@ //! //! where \\(\hat{y}_{Rk}\\) is the mean response for the training observations withing region _k_. //! -//! SmartCore uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space +//! smartcore uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space //! one predictor at a time. At each step of the tree-building process, the best split is made at that particular step, rather than looking ahead and picking a split that will lead to a better //! tree in some future step. //! @@ -128,7 +128,6 @@ impl, Y: Array1> #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] struct Node { - index: usize, output: f64, split_feature: usize, split_value: Option, @@ -299,9 +298,8 @@ impl Default for DecisionTreeRegressorSearchParameters { } impl Node { - fn new(index: usize, output: f64) -> Self { + fn new(output: f64) -> Self { Node { - index, output, split_feature: 0, split_value: Option::None, @@ -450,7 +448,7 @@ impl, Y: Array1> sum += *sample_i as f64 * y_m.get(i).to_f64().unwrap(); } - let root = Node::new(0, sum / (n as f64)); + let root = Node::new(sum / (n as f64)); nodes.push(root); let mut order: Vec> = Vec::new(); @@ -662,11 +660,9 @@ impl, Y: Array1> let true_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(true_child_idx, visitor.true_child_output)); + self.nodes.push(Node::new(visitor.true_child_output)); let false_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(false_child_idx, visitor.false_child_output)); + self.nodes.push(Node::new(visitor.false_child_output)); self.nodes[visitor.node].true_child = Some(true_child_idx); self.nodes[visitor.node].false_child = Some(false_child_idx); diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 700dc76..a1b82c8 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -9,7 +9,7 @@ //! Decision trees suffer from high variance and often does not deliver best prediction accuracy when compared to other supervised learning approaches, such as linear and logistic regression. //! Hence some techniques such as [Random Forests](../ensemble/index.html) use more than one decision tree to improve performance of the algorithm. //! -//! SmartCore uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees. +//! smartcore uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees. //! //! ## References: //! From 0ec89402e8ff5eb97d05c5d5ab6c0ca650342e7d Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Mon, 7 Nov 2022 12:50:32 +0000 Subject: [PATCH 13/36] minor fix --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 8746dbf..b06d668 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ //! # smartcore //! -//! Welcome to smartcore, machine learning in Rust! +//! Welcome to `smartcore`, machine learning in Rust! //! //! `smartcore` features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN, //! as well as tools for model selection and model evaluation. From cc91e31a0efd8c4d6ba8b75e8348b30963d9c3b7 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Mon, 7 Nov 2022 13:00:51 +0000 Subject: [PATCH 14/36] minor fixes --- src/cluster/kmeans.rs | 2 +- src/dataset/mod.rs | 2 +- src/lib.rs | 4 ++-- src/numbers/realnum.rs | 2 +- src/tree/mod.rs | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 4384ddb..c542ae2 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -11,7 +11,7 @@ //! these re-calculated centroids becoming the new centers of their respective clusters. Next all instances of the training set are re-assigned to their closest cluster again. //! This iterative process continues until convergence is achieved and the clusters are considered settled. //! -//! Initial choice of K data points is very important and has big effect on performance of the algorithm. smartcore uses k-means++ algorithm to initialize cluster centers. +//! Initial choice of K data points is very important and has big effect on performance of the algorithm. `smartcore` uses k-means++ algorithm to initialize cluster centers. //! //! Example: //! diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index ac48bf8..855b288 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -1,6 +1,6 @@ //! Datasets //! -//! In this module you will find small datasets that are used in smartcore mostly for demonstration purposes. +//! In this module you will find small datasets that are used in `smartcore` mostly for demonstration purposes. pub mod boston; pub mod breast_cancer; pub mod diabetes; diff --git a/src/lib.rs b/src/lib.rs index b06d668..03bfc03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,7 +75,7 @@ //! ``` //! //! ## Overview -//! +//! //! ### Supported algorithms //! All machine learning algorithms are grouped into these broad categories: //! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data. @@ -86,7 +86,7 @@ //! * [Nearest Neighbors](neighbors/index.html), K Nearest Neighbors for classification and regression //! * [Naive Bayes](naive_bayes/index.html), statistical classification technique based on Bayes Theorem //! * [SVM](svm/index.html), support vector machines -//! +//! //! ### Linear Algebra traits system //! For an introduction to `smartcore`'s traits system see [this notebook](https://github.com/smartcorelib/smartcore-jupyter/blob/5523993c53c6ec1fd72eea130ef4e7883121c1ea/notebooks/01-A-little-bit-about-numbers.ipynb) diff --git a/src/numbers/realnum.rs b/src/numbers/realnum.rs index cb5336a..f4d9aec 100644 --- a/src/numbers/realnum.rs +++ b/src/numbers/realnum.rs @@ -1,5 +1,5 @@ //! # Real Number -//! Most algorithms in smartcore rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ. +//! Most algorithms in `smartcore` rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ. //! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module. use num_traits::Float; diff --git a/src/tree/mod.rs b/src/tree/mod.rs index a1b82c8..340b0a8 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -9,7 +9,7 @@ //! Decision trees suffer from high variance and often does not deliver best prediction accuracy when compared to other supervised learning approaches, such as linear and logistic regression. //! Hence some techniques such as [Random Forests](../ensemble/index.html) use more than one decision tree to improve performance of the algorithm. //! -//! smartcore uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees. +//! `smartcore` uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees. //! //! ## References: //! From 3ac6598951600d28c2576230a92f3408abfd7812 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Mon, 7 Nov 2022 13:56:29 +0000 Subject: [PATCH 15/36] Exclude datasets test for wasm/wasi --- src/cluster/kmeans.rs | 1 + src/ensemble/random_forest_classifier.rs | 1 + src/tree/decision_tree_classifier.rs | 1 + 3 files changed, 3 insertions(+) diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index c542ae2..144f8c5 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -469,6 +469,7 @@ mod tests { all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test )] + #[cfg(feature = "datasets")] #[test] fn fit_predict_iris() { let x = DenseMatrix::from_2d_array(&[ diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index 3db103b..ca06e2f 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -634,6 +634,7 @@ mod tests { wasm_bindgen_test::wasm_bindgen_test )] #[test] + #[cfg(feature = "datasets")] fn fit_predict_iris() { let x = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index a7b0228..cbce14e 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -919,6 +919,7 @@ mod tests { wasm_bindgen_test::wasm_bindgen_test )] #[test] + #[cfg(feature = "datasets")] fn fit_predict_iris() { let x: DenseMatrix = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], From bf7b714126b47808947b61f449f3872bc15dcfab Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Mon, 7 Nov 2022 18:16:13 +0000 Subject: [PATCH 16/36] Add static analyzer to doc --- .github/CONTRIBUTING.md | 5 +++++ .gitignore | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c09dfa7..48bce72 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -25,6 +25,11 @@ Take a look to the conventions established by existing code: * Every module should provide a Rust doctest, a brief test embedded with the documentation that explains how to use the procedure implemented. * Every module should provide comprehensive tests at the end, in its `mod tests {}` sub-module. These tests can be flagged or not with configuration flags to allow WebAssembly target. * Run `cargo doc --no-deps --open` and read the generated documentation in the browser to be sure that your changes reflects in the documentation and new code is documented. +* a nice overview of the codebase is given by [static analyzer](https://mozilla.github.io/rust-code-analysis/metrics.html): +``` +$ cargo install rust-code-analysis-cli +$ rust-code-analysis-cli -m -O json -o . -p src/ --pr +``` ## Issue Report Process diff --git a/.gitignore b/.gitignore index 9c0651c..e2976f7 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,5 @@ src.dot out.svg FlameGraph/ -out.stacks \ No newline at end of file +out.stacks +*.json \ No newline at end of file From 8e6e5f9e68d33ff906acd1f37f9eb38e5e33c7a5 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 11:47:31 +0000 Subject: [PATCH 17/36] Use getrandom as default (for no-std feature) --- .github/CONTRIBUTING.md | 6 ++++++ .gitignore | 3 ++- Cargo.toml | 20 +++++++++----------- src/rand_custom.rs | 7 ++++++- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 48bce72..15b3906 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -25,11 +25,17 @@ Take a look to the conventions established by existing code: * Every module should provide a Rust doctest, a brief test embedded with the documentation that explains how to use the procedure implemented. * Every module should provide comprehensive tests at the end, in its `mod tests {}` sub-module. These tests can be flagged or not with configuration flags to allow WebAssembly target. * Run `cargo doc --no-deps --open` and read the generated documentation in the browser to be sure that your changes reflects in the documentation and new code is documented. + +#### digging deeper * a nice overview of the codebase is given by [static analyzer](https://mozilla.github.io/rust-code-analysis/metrics.html): ``` $ cargo install rust-code-analysis-cli +// print metrics for every module $ rust-code-analysis-cli -m -O json -o . -p src/ --pr +// print full AST for a module +$ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213 -d > ast.txt ``` +* find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file. ## Issue Report Process diff --git a/.gitignore b/.gitignore index e2976f7..0983a15 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,5 @@ out.svg FlameGraph/ out.stacks -*.json \ No newline at end of file +*.json +*.txt \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 0c3adda..0dc84ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ exclude = [ ".gitignore", "smartcore.iml", "smartcore.svg", + "tests/" ] [dependencies] @@ -25,6 +26,7 @@ ndarray = { version = "0.15", optional = true } num-traits = "0.2.12" num = "0.4" rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } +getrandom = { version = "*", features = ["js"] } rand_distr = { version = "0.4", optional = true } serde = { version = "1", features = ["derive"], optional = true } @@ -32,25 +34,21 @@ serde = { version = "1", features = ["derive"], optional = true } default = [] serde = ["dep:serde"] ndarray-bindings = ["dep:ndarray"] -datasets = ["dep:rand_distr", "std", "serde"] -std = ["rand/std_rng", "rand/std"] -# wasm32 only -js = ["getrandom/js"] +datasets = ["dep:rand_distr", "std_rand", "serde"] +std_rand = ["rand/std_rng", "rand/std"] [target.'cfg(target_arch = "wasm32")'.dependencies] getrandom = { version = "0.2", optional = true } -[dev-dependencies] -itertools = "*" -criterion = { version = "0.4", default-features = false } -serde_json = "1.0" -bincode = "1.3.1" - [target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies] wasm-bindgen-test = "0.3" +[dev-dependencies] +itertools = "*" +serde_json = "1.0" +bincode = "1.3.1" + [workspace] -resolver = "2" [profile.test] debug = 1 diff --git a/src/rand_custom.rs b/src/rand_custom.rs index 15f9e73..d06c344 100644 --- a/src/rand_custom.rs +++ b/src/rand_custom.rs @@ -1,5 +1,7 @@ #[cfg(not(feature = "std"))] pub(crate) use rand::rngs::SmallRng as RngImpl; +#[cfg(not(feature = "std"))] +use getrandom; #[cfg(feature = "std")] pub(crate) use rand::rngs::StdRng as RngImpl; use rand::SeedableRng; @@ -13,7 +15,10 @@ pub(crate) fn get_rng_impl(seed: Option) -> RngImpl { use rand::RngCore; RngImpl::seed_from_u64(rand::thread_rng().next_u64()) } else { - panic!("seed number needed for non-std build"); + // non-std build, use getrandom + let mut buf = [0u8; 64]; + getrandom::getrandom(&mut buf).unwrap(); + RngImpl::seed_from_u64(buf[0] as u64) } } } From 2fa454ea94c4eaf021571ed69b37e188ff75756b Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 11:48:14 +0000 Subject: [PATCH 18/36] fmt --- src/rand_custom.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rand_custom.rs b/src/rand_custom.rs index d06c344..7b4a3a5 100644 --- a/src/rand_custom.rs +++ b/src/rand_custom.rs @@ -1,7 +1,7 @@ #[cfg(not(feature = "std"))] -pub(crate) use rand::rngs::SmallRng as RngImpl; -#[cfg(not(feature = "std"))] use getrandom; +#[cfg(not(feature = "std"))] +pub(crate) use rand::rngs::SmallRng as RngImpl; #[cfg(feature = "std")] pub(crate) use rand::rngs::StdRng as RngImpl; use rand::SeedableRng; From c1af60cafb37fb799ea8457384fdf05aed924c8b Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 11:55:32 +0000 Subject: [PATCH 19/36] cleanup --- src/rand_custom.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rand_custom.rs b/src/rand_custom.rs index 7b4a3a5..2156ab0 100644 --- a/src/rand_custom.rs +++ b/src/rand_custom.rs @@ -1,6 +1,4 @@ #[cfg(not(feature = "std"))] -use getrandom; -#[cfg(not(feature = "std"))] pub(crate) use rand::rngs::SmallRng as RngImpl; #[cfg(feature = "std")] pub(crate) use rand::rngs::StdRng as RngImpl; From 3c4a807be8a9ce998b8bbf81501c96fe322d9be4 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 12:04:39 +0000 Subject: [PATCH 20/36] Fix std_rand feature --- src/rand_custom.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/rand_custom.rs b/src/rand_custom.rs index 2156ab0..b22390e 100644 --- a/src/rand_custom.rs +++ b/src/rand_custom.rs @@ -1,19 +1,20 @@ -#[cfg(not(feature = "std"))] -pub(crate) use rand::rngs::SmallRng as RngImpl; -#[cfg(feature = "std")] -pub(crate) use rand::rngs::StdRng as RngImpl; +#[cfg(not(feature = "std_rand"))] +pub use rand::rngs::SmallRng as RngImpl; +#[cfg(feature = "std_rand")] +pub use rand::rngs::StdRng as RngImpl; use rand::SeedableRng; -pub(crate) fn get_rng_impl(seed: Option) -> RngImpl { +/// Custom switch for random fuctions +pub fn get_rng_impl(seed: Option) -> RngImpl { match seed { Some(seed) => RngImpl::seed_from_u64(seed), None => { cfg_if::cfg_if! { - if #[cfg(feature = "std")] { + if #[cfg(feature = "std_rand")] { use rand::RngCore; RngImpl::seed_from_u64(rand::thread_rng().next_u64()) } else { - // non-std build, use getrandom + // no std_random feature build, use getrandom let mut buf = [0u8; 64]; getrandom::getrandom(&mut buf).unwrap(); RngImpl::seed_from_u64(buf[0] as u64) From b4206c4b08f31a28c9b81264c45a89eb5d4762a2 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Tue, 8 Nov 2022 12:15:10 +0000 Subject: [PATCH 21/36] minor fix --- src/ensemble/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ensemble/mod.rs b/src/ensemble/mod.rs index 161df96..8cebd5c 100644 --- a/src/ensemble/mod.rs +++ b/src/ensemble/mod.rs @@ -7,7 +7,7 @@ //! set and then aggregate their individual predictions to form a final prediction. In classification setting the overall prediction is the most commonly //! occurring majority class among the individual predictions. //! -//! In smartcore you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html). +//! In `smartcore` you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html). //! Random forests provide an improvement over bagged trees by way of a small tweak that decorrelates the trees. As in bagging, we build a number of //! decision trees on bootstrapped training samples. But when building these decision trees, each time a split in a tree is considered, //! a random sample of _m_ predictors is chosen as split candidates from the full set of _p_ predictors. From a60fdaf235be3a8447b5c436f7669d94bc140bc7 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Tue, 8 Nov 2022 12:17:04 +0000 Subject: [PATCH 22/36] minor fix --- src/linear/linear_regression.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index 7f6dfad..a5c7699 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -12,7 +12,7 @@ //! \\[\hat{\beta} = (X^TX)^{-1}X^Ty \\] //! //! the \\((X^TX)^{-1}\\) term is both computationally expensive and numerically unstable. An alternative approach is to use a matrix decomposition to avoid this operation. -//! smartcore uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). +//! `smartcore` uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). //! The QR decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly, //! but does not work for all data matrices. Unlike the QR decomposition, all matrices have an SVD decomposition. //! From 78bf75b5d8fc3a8cf044071896991bdf012fc128 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Tue, 8 Nov 2022 12:17:32 +0000 Subject: [PATCH 23/36] minor fix --- src/linear/logistic_regression.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index e8c08d8..8bf65bf 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -5,7 +5,7 @@ //! //! \\[ Pr(y=1) \approx \frac{e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}}{1 + e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}} \\] //! -//! smartcore uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\) +//! `smartcore` uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\) //! //! Example: //! From b71c7b49cb59d18d9ad4a97370832a4f96c9f82e Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Tue, 8 Nov 2022 12:18:03 +0000 Subject: [PATCH 24/36] minor fix --- src/linear/ridge_regression.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index e03948d..6bd5595 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -12,7 +12,7 @@ //! where \\(\alpha \geq 0\\) is a tuning parameter that controls strength of regularization. When \\(\alpha = 0\\) the penalty term has no effect, and ridge regression will produce the least squares estimates. //! However, as \\(\alpha \rightarrow \infty\\), the impact of the shrinkage penalty grows, and the ridge regression coefficient estimates will approach zero. //! -//! smartcore uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). +//! `smartcore` uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). //! The Cholesky decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly, //! but does not work for all data matrices. Unlike the Cholesky decomposition, all matrices have an SVD decomposition. //! From a4097fce152ece11f6bb4f1ce7b4636f54cfcdd6 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Tue, 8 Nov 2022 12:18:35 +0000 Subject: [PATCH 25/36] minor fix --- src/metrics/auc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/metrics/auc.rs b/src/metrics/auc.rs index 5848fbc..0a7ddf4 100644 --- a/src/metrics/auc.rs +++ b/src/metrics/auc.rs @@ -2,7 +2,7 @@ //! Computes the area under the receiver operating characteristic (ROC) curve that is equal to the probability that a classifier will rank a //! randomly chosen positive instance higher than a randomly chosen negative one. //! -//! smartcore calculates ROC AUC from Wilcoxon or Mann-Whitney U test. +//! `smartcore` calculates ROC AUC from Wilcoxon or Mann-Whitney U test. //! //! Example: //! ``` From 6c6f92697fe30cf73a7c50ba598ba53e76cc2002 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 12:21:34 +0000 Subject: [PATCH 26/36] minor fixes to doc --- src/metrics/mod.rs | 2 +- src/model_selection/mod.rs | 2 +- src/neighbors/knn_classifier.rs | 2 +- src/svm/mod.rs | 2 +- src/svm/svc.rs | 2 +- src/tree/decision_tree_regressor.rs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 40086af..c7e1be3 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -4,7 +4,7 @@ //! In a feedback loop you build your model first, then you get feedback from metrics, improve it and repeat until your model achieve desirable performance. //! Evaluation metrics helps to explain the performance of a model and compare models based on an objective criterion. //! -//! Choosing the right metric is crucial while evaluating machine learning models. In smartcore you will find metrics for these classes of ML models: +//! Choosing the right metric is crucial while evaluating machine learning models. In `smartcore` you will find metrics for these classes of ML models: //! //! * [Classification metrics](struct.ClassificationMetrics.html) //! * [Regression metrics](struct.RegressionMetrics.html) diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index b712d67..222b9d7 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -7,7 +7,7 @@ //! Splitting data into multiple subsets helps us to find the right combination of hyperparameters, estimate model performance and choose the right model for //! the data. //! -//! In smartcore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function. +//! In `smartcore` a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function. //! //! ``` //! use smartcore::linalg::basic::matrix::DenseMatrix; diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index d13dce6..882ac55 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -1,6 +1,6 @@ //! # K Nearest Neighbors Classifier //! -//! smartcore relies on 2 backend algorithms to speedup KNN queries: +//! `smartcore` relies on 2 backend algorithms to speedup KNN queries: //! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html) //! * [`CoverTree`](../../algorithm/neighbour/cover_tree/index.html) //! diff --git a/src/svm/mod.rs b/src/svm/mod.rs index 92b3ab4..ef0f003 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -9,7 +9,7 @@ //! SVM is memory efficient since it uses only a subset of training data to find a decision boundary. This subset is called support vectors. //! //! In SVM distance between a data point and the support vectors is defined by the kernel function. -//! smartcore supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel. +//! `smartcore` supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel. //! Building a new kernel requires a good mathematical understanding of the [Mercer theorem](https://en.wikipedia.org/wiki/Mercer%27s_theorem) //! that gives necessary and sufficient condition for a function to be a kernel function. //! diff --git a/src/svm/svc.rs b/src/svm/svc.rs index c886ba1..74998f5 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -20,7 +20,7 @@ //! //! Where \\( m \\) is a number of training samples, \\( y_i \\) is a label value (either 1 or -1) and \\(\langle\vec{w}, \vec{x}_i \rangle + b\\) is a decision boundary. //! -//! To solve this optimization problem, smartcore uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm). +//! To solve this optimization problem, `smartcore` uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm). //! The optimizer reaches accuracies similar to that of a real SVM after performing two passes through the training examples. You can choose the number of passes //! through the data that the algorithm takes by changing the `epoch` parameter of the classifier. //! diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index cb6eb4f..0146cbc 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -11,7 +11,7 @@ //! //! where \\(\hat{y}_{Rk}\\) is the mean response for the training observations withing region _k_. //! -//! smartcore uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space +//! `smartcore` uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space //! one predictor at a time. At each step of the tree-building process, the best split is made at that particular step, rather than looking ahead and picking a split that will lead to a better //! tree in some future step. //! From 98b18c4dae40c63ae8cb5cc6dc4adedc8dbeb9e3 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 13:53:50 +0000 Subject: [PATCH 27/36] Remove unused tests flags --- src/cluster/kmeans.rs | 3 +-- src/ensemble/random_forest_classifier.rs | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 144f8c5..18f8308 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -469,9 +469,8 @@ mod tests { all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test )] - #[cfg(feature = "datasets")] #[test] - fn fit_predict_iris() { + fn fit_predict() { let x = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index ca06e2f..8ea174b 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -634,8 +634,7 @@ mod tests { wasm_bindgen_test::wasm_bindgen_test )] #[test] - #[cfg(feature = "datasets")] - fn fit_predict_iris() { + fn fit_predict() { let x = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], From dad0d01f6df55160d8c8d857169f4522aae945ce Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 13:59:49 +0000 Subject: [PATCH 28/36] Update CHANGELOG --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6052e07..06d6d79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Added - WARNING: Breaking changes! - Seeds to multiple algorithims that depend on random number generation. -- Added feature `js` to use WASM in browser - Drop `nalgebra-bindings` feature - Complete refactoring with **extensive API changes** that includes: * moving to a new traits system, less structs more traits * adapting all the modules to the new traits system - * moving towards Rust 2021, in particular the use of `dyn` and `as_ref` + * moving to Rust 2021, in particular the use of `dyn` and `as_ref` * reorganization of the code base, trying to eliminate duplicates +- usage of `serde` is now optional, use the `serde` feature +- default feature is now Wasm-/Wasi-first for minimal binary size ## BREAKING CHANGE - Added a new parameter to `train_test_split` to define the seed. From 48f1d6b74d6b11d3a13abb4235c8e77c2103f6ff Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 14:19:40 +0000 Subject: [PATCH 29/36] use getrandom/js --- Cargo.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0dc84ff..8ec0e97 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ ndarray = { version = "0.15", optional = true } num-traits = "0.2.12" num = "0.4" rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } -getrandom = { version = "*", features = ["js"] } +getrandom = "*" rand_distr = { version = "0.4", optional = true } serde = { version = "1", features = ["derive"], optional = true } @@ -36,6 +36,8 @@ serde = ["dep:serde"] ndarray-bindings = ["dep:ndarray"] datasets = ["dep:rand_distr", "std_rand", "serde"] std_rand = ["rand/std_rng", "rand/std"] +# used by wasm32-unknown-unknown +js = ["getrandom/js"] [target.'cfg(target_arch = "wasm32")'.dependencies] getrandom = { version = "0.2", optional = true } From c934f6b6cf89b1e09af33b7a4599c8abbbfec67f Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Tue, 8 Nov 2022 14:23:13 +0000 Subject: [PATCH 30/36] update comment --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8ec0e97..4fb260b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ serde = ["dep:serde"] ndarray-bindings = ["dep:ndarray"] datasets = ["dep:rand_distr", "std_rand", "serde"] std_rand = ["rand/std_rng", "rand/std"] -# used by wasm32-unknown-unknown +# used by wasm32-unknown-unknown for in-browser usage js = ["getrandom/js"] [target.'cfg(target_arch = "wasm32")'.dependencies] From 6c03e6e0b344188bec8b5b04cd818469649f42e1 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 15:17:31 +0000 Subject: [PATCH 31/36] update CHANGELOG --- CHANGELOG.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06d6d79..d105432 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,22 +4,27 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.3] - 2022-11 +## [0.3.0] - 2022-11-09 ## Added - WARNING: Breaking changes! -- Seeds to multiple algorithims that depend on random number generation. -- Drop `nalgebra-bindings` feature - Complete refactoring with **extensive API changes** that includes: * moving to a new traits system, less structs more traits * adapting all the modules to the new traits system - * moving to Rust 2021, in particular the use of `dyn` and `as_ref` - * reorganization of the code base, trying to eliminate duplicates -- usage of `serde` is now optional, use the `serde` feature -- default feature is now Wasm-/Wasi-first for minimal binary size + * moving to Rust 2021, use of object-safe traits and `as_ref` + * reorganization of the code base, eliminate duplicates +- implements `readers` (needs "serde" feature) for read/write CSV file, extendible to other formats +- default feature is now Wasm-/Wasi-first -## BREAKING CHANGE -- Added a new parameter to `train_test_split` to define the seed. +## Changed +- WARNING: Breaking changes! +- Seeds to multiple algorithims that depend on random number generation +- Added a new parameter to `train_test_split` to define the seed +- changed use of "serde" feature + +## Dropped +- WARNING: Breaking changes! +- Drop `nalgebra-bindings` feature, only `ndarray` as supported library ## [0.2.1] - 2021-05-10 From 161d2499178329fe924e28f8d3439e5a87ab6379 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Tue, 8 Nov 2022 15:22:34 +0000 Subject: [PATCH 32/36] Release 0.3 (#235) --- .github/CONTRIBUTING.md | 11 +++++++ .github/DEVELOPERS.md | 5 ++- .gitignore | 4 ++- CHANGELOG.md | 27 ++++++++++------ Cargo.toml | 34 ++++++++++++--------- LICENSE | 2 +- README.md | 4 +-- smartcore.svg | 2 +- src/algorithm/neighbour/cover_tree.rs | 10 +++--- src/cluster/kmeans.rs | 8 ++--- src/dataset/mod.rs | 2 +- src/ensemble/mod.rs | 2 +- src/ensemble/random_forest_classifier.rs | 5 +-- src/ensemble/random_forest_regressor.rs | 3 -- src/lib.rs | 39 ++++++++++++++++++------ src/linear/linear_regression.rs | 5 +-- src/linear/logistic_regression.rs | 2 +- src/linear/ridge_regression.rs | 5 +-- src/metrics/auc.rs | 2 +- src/metrics/mod.rs | 2 +- src/model_selection/mod.rs | 2 +- src/neighbors/knn_classifier.rs | 2 +- src/numbers/realnum.rs | 2 +- src/rand_custom.rs | 18 ++++++----- src/svm/mod.rs | 2 +- src/svm/svc.rs | 5 ++- src/svm/svr.rs | 2 -- src/tree/decision_tree_classifier.rs | 13 +++----- src/tree/decision_tree_regressor.rs | 14 +++------ src/tree/mod.rs | 2 +- 30 files changed, 133 insertions(+), 103 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c09dfa7..15b3906 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -26,6 +26,17 @@ Take a look to the conventions established by existing code: * Every module should provide comprehensive tests at the end, in its `mod tests {}` sub-module. These tests can be flagged or not with configuration flags to allow WebAssembly target. * Run `cargo doc --no-deps --open` and read the generated documentation in the browser to be sure that your changes reflects in the documentation and new code is documented. +#### digging deeper +* a nice overview of the codebase is given by [static analyzer](https://mozilla.github.io/rust-code-analysis/metrics.html): +``` +$ cargo install rust-code-analysis-cli +// print metrics for every module +$ rust-code-analysis-cli -m -O json -o . -p src/ --pr +// print full AST for a module +$ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213 -d > ast.txt +``` +* find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file. + ## Issue Report Process 1. Go to the project's issues. diff --git a/.github/DEVELOPERS.md b/.github/DEVELOPERS.md index 87c2506..b3a647b 100644 --- a/.github/DEVELOPERS.md +++ b/.github/DEVELOPERS.md @@ -1,4 +1,7 @@ -# Smartcore: Introduction to modules +# smartcore: Introduction to modules + +Important source of information: +* [Rust API guidelines](https://rust-lang.github.io/api-guidelines/about.html) ## Walkthrough: traits system and basic structures diff --git a/.gitignore b/.gitignore index 9c0651c..0983a15 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,6 @@ src.dot out.svg FlameGraph/ -out.stacks \ No newline at end of file +out.stacks +*.json +*.txt \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index a9dda10..d105432 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,22 +4,29 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.3.0] - 2022-11-09 ## Added -- Seeds to multiple algorithims that depend on random number generation. -- Added feature `js` to use WASM in browser -- Drop `nalgebra-bindings` feature -- Complete refactoring with *extensive API changes* that includes: +- WARNING: Breaking changes! +- Complete refactoring with **extensive API changes** that includes: * moving to a new traits system, less structs more traits * adapting all the modules to the new traits system - * moving towards Rust 2021, in particular the use of `dyn` and `as_ref` - * reorganization of the code base, trying to eliminate duplicates + * moving to Rust 2021, use of object-safe traits and `as_ref` + * reorganization of the code base, eliminate duplicates +- implements `readers` (needs "serde" feature) for read/write CSV file, extendible to other formats +- default feature is now Wasm-/Wasi-first -## BREAKING CHANGE -- Added a new parameter to `train_test_split` to define the seed. +## Changed +- WARNING: Breaking changes! +- Seeds to multiple algorithims that depend on random number generation +- Added a new parameter to `train_test_split` to define the seed +- changed use of "serde" feature -## [0.2.1] - 2022-05-10 +## Dropped +- WARNING: Breaking changes! +- Drop `nalgebra-bindings` feature, only `ndarray` as supported library + +## [0.2.1] - 2021-05-10 ## Added - L2 regularization penalty to the Logistic Regression diff --git a/Cargo.toml b/Cargo.toml index 0a23083..4fb260b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] name = "smartcore" -description = "The most advanced machine learning library in rust." +description = "Machine Learning in Rust." homepage = "https://smartcorelib.org" -version = "0.4.0" -authors = ["SmartCore Developers"] +version = "0.3.0" +authors = ["smartcore Developers"] edition = "2021" license = "Apache-2.0" documentation = "https://docs.rs/smartcore" @@ -11,6 +11,13 @@ repository = "https://github.com/smartcorelib/smartcore" readme = "README.md" keywords = ["machine-learning", "statistical", "ai", "optimization", "linear-algebra"] categories = ["science"] +exclude = [ + ".github", + ".gitignore", + "smartcore.iml", + "smartcore.svg", + "tests/" +] [dependencies] approx = "0.5.1" @@ -19,32 +26,31 @@ ndarray = { version = "0.15", optional = true } num-traits = "0.2.12" num = "0.4" rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } +getrandom = "*" rand_distr = { version = "0.4", optional = true } serde = { version = "1", features = ["derive"], optional = true } [features] -default = ["serde", "datasets"] +default = [] serde = ["dep:serde"] ndarray-bindings = ["dep:ndarray"] -datasets = ["dep:rand_distr", "std"] -std = ["rand/std_rng", "rand/std"] -# wasm32 only +datasets = ["dep:rand_distr", "std_rand", "serde"] +std_rand = ["rand/std_rng", "rand/std"] +# used by wasm32-unknown-unknown for in-browser usage js = ["getrandom/js"] [target.'cfg(target_arch = "wasm32")'.dependencies] getrandom = { version = "0.2", optional = true } -[dev-dependencies] -itertools = "*" -criterion = { version = "0.4", default-features = false } -serde_json = "1.0" -bincode = "1.3.1" - [target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies] wasm-bindgen-test = "0.3" +[dev-dependencies] +itertools = "*" +serde_json = "1.0" +bincode = "1.3.1" + [workspace] -resolver = "2" [profile.test] debug = 1 diff --git a/LICENSE b/LICENSE index 3cd5786..9448cee 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2019-present at SmartCore developers (smartcorelib.org) + Copyright 2019-present at smartcore developers (smartcorelib.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index fd6f481..758a461 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@

- SmartCore + smartcore

@@ -18,4 +18,4 @@ ----- [![CI](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml/badge.svg)](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml) -To start getting familiar with the new Smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md). +To start getting familiar with the new smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md). diff --git a/smartcore.svg b/smartcore.svg index 3e4c68d..eaffd58 100644 --- a/smartcore.svg +++ b/smartcore.svg @@ -76,5 +76,5 @@ y="81.876823" x="91.861809" id="tspan842" - sodipodi:role="line">SmartCore + sodipodi:role="line">smartcore diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index db062f9..011a9cc 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -64,7 +64,7 @@ struct Node { max_dist: f64, parent_dist: f64, children: Vec, - scale: i64, + _scale: i64, } #[derive(Debug)] @@ -84,7 +84,7 @@ impl> CoverTree { max_dist: 0f64, parent_dist: 0f64, children: Vec::new(), - scale: 0, + _scale: 0, }; let mut tree = CoverTree { base, @@ -245,7 +245,7 @@ impl> CoverTree { max_dist: 0f64, parent_dist: 0f64, children: Vec::new(), - scale: 100, + _scale: 100, } } @@ -306,7 +306,7 @@ impl> CoverTree { max_dist: 0f64, parent_dist: 0f64, children, - scale: 100, + _scale: 100, } } else { let mut far: Vec = Vec::new(); @@ -375,7 +375,7 @@ impl> CoverTree { max_dist: self.max(consumed_set), parent_dist: 0f64, children, - scale: (top_scale - max_scale), + _scale: (top_scale - max_scale), } } } diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 9322d65..18f8308 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -11,7 +11,7 @@ //! these re-calculated centroids becoming the new centers of their respective clusters. Next all instances of the training set are re-assigned to their closest cluster again. //! This iterative process continues until convergence is achieved and the clusters are considered settled. //! -//! Initial choice of K data points is very important and has big effect on performance of the algorithm. SmartCore uses k-means++ algorithm to initialize cluster centers. +//! Initial choice of K data points is very important and has big effect on performance of the algorithm. `smartcore` uses k-means++ algorithm to initialize cluster centers. //! //! Example: //! @@ -74,7 +74,7 @@ pub struct KMeans, Y: Array1> { k: usize, _y: Vec, size: Vec, - distortion: f64, + _distortion: f64, centroids: Vec>, _phantom_tx: PhantomData, _phantom_ty: PhantomData, @@ -313,7 +313,7 @@ impl, Y: Array1> KMeans k: parameters.k, _y: y, size, - distortion, + _distortion: distortion, centroids, _phantom_tx: PhantomData, _phantom_ty: PhantomData, @@ -470,7 +470,7 @@ mod tests { wasm_bindgen_test::wasm_bindgen_test )] #[test] - fn fit_predict_iris() { + fn fit_predict() { let x = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index 5b32d02..855b288 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -1,6 +1,6 @@ //! Datasets //! -//! In this module you will find small datasets that are used in SmartCore mostly for demonstration purposes. +//! In this module you will find small datasets that are used in `smartcore` mostly for demonstration purposes. pub mod boston; pub mod breast_cancer; pub mod diabetes; diff --git a/src/ensemble/mod.rs b/src/ensemble/mod.rs index 1ddf4b4..8cebd5c 100644 --- a/src/ensemble/mod.rs +++ b/src/ensemble/mod.rs @@ -7,7 +7,7 @@ //! set and then aggregate their individual predictions to form a final prediction. In classification setting the overall prediction is the most commonly //! occurring majority class among the individual predictions. //! -//! In SmartCore you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html). +//! In `smartcore` you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html). //! Random forests provide an improvement over bagged trees by way of a small tweak that decorrelates the trees. As in bagging, we build a number of //! decision trees on bootstrapped training samples. But when building these decision trees, each time a split in a tree is considered, //! a random sample of _m_ predictors is chosen as split candidates from the full set of _p_ predictors. diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index d01acef..8ea174b 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -104,7 +104,6 @@ pub struct RandomForestClassifier< X: Array2, Y: Array1, > { - parameters: Option, trees: Option>>, classes: Option>, samples: Option>>, @@ -198,7 +197,6 @@ impl, Y: { fn new() -> Self { Self { - parameters: Option::None, trees: Option::None, classes: Option::None, samples: Option::None, @@ -501,7 +499,6 @@ impl, Y: Array1, Y: Array1, > { - parameters: Option, trees: Option>>, samples: Option>>, } @@ -177,7 +176,6 @@ impl, Y: Array1 { fn new() -> Self { Self { - parameters: Option::None, trees: Option::None, samples: Option::None, } @@ -434,7 +432,6 @@ impl, Y: Array1 } Ok(RandomForestRegressor { - parameters: Some(parameters), trees: Some(trees), samples: maybe_all_samples, }) diff --git a/src/lib.rs b/src/lib.rs index a955de2..03bfc03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,25 +8,38 @@ #![warn(missing_docs)] #![warn(rustdoc::missing_doc_code_examples)] -//! # SmartCore +//! # smartcore //! -//! Welcome to SmartCore, machine learning in Rust! +//! Welcome to `smartcore`, machine learning in Rust! //! -//! SmartCore features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN, +//! `smartcore` features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN, //! as well as tools for model selection and model evaluation. //! -//! SmartCore provides its own traits system that extends Rust standard library, to deal with linear algebra and common +//! `smartcore` provides its own traits system that extends Rust standard library, to deal with linear algebra and common //! computational models. Its API is designed using well recognizable patterns. Extra features (like support for [ndarray](https://docs.rs/ndarray) //! structures) is available via optional features. //! //! ## Getting Started //! -//! To start using SmartCore simply add the following to your Cargo.toml file: +//! To start using `smartcore` latest stable version simply add the following to your `Cargo.toml` file: +//! ```ignore +//! [dependencies] +//! smartcore = "*" +//! ``` +//! +//! To start using smartcore development version with latest unstable additions: //! ```ignore //! [dependencies] //! smartcore = { git = "https://github.com/smartcorelib/smartcore", branch = "development" } //! ``` //! +//! There are different features that can be added to the base library, for example to add sample datasets: +//! ```ignore +//! [dependencies] +//! smartcore = { git = "https://github.com/smartcorelib/smartcore", features = ["datasets"] } +//! ``` +//! Check `smartcore`'s `Cargo.toml` for available features. +//! //! ## Using Jupyter //! For quick introduction, Jupyter Notebooks are available [here](https://github.com/smartcorelib/smartcore-jupyter/tree/main/notebooks). //! You can set up a local environment to run Rust notebooks using [EVCXR](https://github.com/google/evcxr) @@ -37,7 +50,7 @@ //! For example, you can use this code to fit a [K Nearest Neighbors classifier](neighbors/knn_classifier/index.html) to a dataset that is defined as standard Rust vector: //! //! ``` -//! // DenseMatrix defenition +//! // DenseMatrix definition //! use smartcore::linalg::basic::matrix::DenseMatrix; //! // KNNClassifier //! use smartcore::neighbors::knn_classifier::*; @@ -62,7 +75,9 @@ //! ``` //! //! ## Overview -//! All machine learning algorithms in SmartCore are grouped into these broad categories: +//! +//! ### Supported algorithms +//! All machine learning algorithms are grouped into these broad categories: //! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data. //! * [Matrix Decomposition](decomposition/index.html), various methods for matrix decomposition. //! * [Linear Models](linear/index.html), regression and classification methods where output is assumed to have linear relation to explanatory variables @@ -71,11 +86,14 @@ //! * [Nearest Neighbors](neighbors/index.html), K Nearest Neighbors for classification and regression //! * [Naive Bayes](naive_bayes/index.html), statistical classification technique based on Bayes Theorem //! * [SVM](svm/index.html), support vector machines +//! +//! ### Linear Algebra traits system +//! For an introduction to `smartcore`'s traits system see [this notebook](https://github.com/smartcorelib/smartcore-jupyter/blob/5523993c53c6ec1fd72eea130ef4e7883121c1ea/notebooks/01-A-little-bit-about-numbers.ipynb) /// Foundamental numbers traits pub mod numbers; -/// Various algorithms and helper methods that are used elsewhere in SmartCore +/// Various algorithms and helper methods that are used elsewhere in smartcore pub mod algorithm; pub mod api; @@ -89,7 +107,7 @@ pub mod decomposition; /// Ensemble methods, including Random Forest classifier and regressor pub mod ensemble; pub mod error; -/// Diverse collection of linear algebra abstractions and methods that power SmartCore algorithms +/// Diverse collection of linear algebra abstractions and methods that power smartcore algorithms pub mod linalg; /// Supervised classification and regression models that assume linear relationship between dependent and explanatory variables. pub mod linear; @@ -105,7 +123,8 @@ pub mod neighbors; pub mod optimization; /// Preprocessing utilities pub mod preprocessing; -/// Reading in data from serialized foramts +/// Reading in data from serialized formats +#[cfg(feature = "serde")] pub mod readers; /// Support Vector Machines pub mod svm; diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index 1f7d540..a5c7699 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -12,7 +12,7 @@ //! \\[\hat{\beta} = (X^TX)^{-1}X^Ty \\] //! //! the \\((X^TX)^{-1}\\) term is both computationally expensive and numerically unstable. An alternative approach is to use a matrix decomposition to avoid this operation. -//! SmartCore uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). +//! `smartcore` uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). //! The QR decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly, //! but does not work for all data matrices. Unlike the QR decomposition, all matrices have an SVD decomposition. //! @@ -113,7 +113,6 @@ pub struct LinearRegression< > { coefficients: Option, intercept: Option, - solver: LinearRegressionSolverName, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -210,7 +209,6 @@ impl< Self { coefficients: Option::None, intercept: Option::None, - solver: LinearRegressionParameters::default().solver, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -276,7 +274,6 @@ impl< Ok(LinearRegression { intercept: Some(*w.get((num_attributes, 0))), coefficients: Some(weights), - solver: parameters.solver, _phantom_ty: PhantomData, _phantom_y: PhantomData, }) diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 7dd269c..8bf65bf 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -5,7 +5,7 @@ //! //! \\[ Pr(y=1) \approx \frac{e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}}{1 + e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}} \\] //! -//! SmartCore uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\) +//! `smartcore` uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\) //! //! Example: //! diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index 914afc2..6bd5595 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -12,7 +12,7 @@ //! where \\(\alpha \geq 0\\) is a tuning parameter that controls strength of regularization. When \\(\alpha = 0\\) the penalty term has no effect, and ridge regression will produce the least squares estimates. //! However, as \\(\alpha \rightarrow \infty\\), the impact of the shrinkage penalty grows, and the ridge regression coefficient estimates will approach zero. //! -//! SmartCore uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). +//! `smartcore` uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). //! The Cholesky decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly, //! but does not work for all data matrices. Unlike the Cholesky decomposition, all matrices have an SVD decomposition. //! @@ -197,7 +197,6 @@ pub struct RidgeRegression< > { coefficients: Option, intercept: Option, - solver: Option, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -259,7 +258,6 @@ impl< Self { coefficients: Option::None, intercept: Option::None, - solver: Option::None, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -367,7 +365,6 @@ impl< Ok(RidgeRegression { intercept: Some(b), coefficients: Some(w), - solver: Some(parameters.solver), _phantom_ty: PhantomData, _phantom_y: PhantomData, }) diff --git a/src/metrics/auc.rs b/src/metrics/auc.rs index ecaf646..0a7ddf4 100644 --- a/src/metrics/auc.rs +++ b/src/metrics/auc.rs @@ -2,7 +2,7 @@ //! Computes the area under the receiver operating characteristic (ROC) curve that is equal to the probability that a classifier will rank a //! randomly chosen positive instance higher than a randomly chosen negative one. //! -//! SmartCore calculates ROC AUC from Wilcoxon or Mann-Whitney U test. +//! `smartcore` calculates ROC AUC from Wilcoxon or Mann-Whitney U test. //! //! Example: //! ``` diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 06d44a1..c7e1be3 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -4,7 +4,7 @@ //! In a feedback loop you build your model first, then you get feedback from metrics, improve it and repeat until your model achieve desirable performance. //! Evaluation metrics helps to explain the performance of a model and compare models based on an objective criterion. //! -//! Choosing the right metric is crucial while evaluating machine learning models. In SmartCore you will find metrics for these classes of ML models: +//! Choosing the right metric is crucial while evaluating machine learning models. In `smartcore` you will find metrics for these classes of ML models: //! //! * [Classification metrics](struct.ClassificationMetrics.html) //! * [Regression metrics](struct.RegressionMetrics.html) diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index b8e4e7f..222b9d7 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -7,7 +7,7 @@ //! Splitting data into multiple subsets helps us to find the right combination of hyperparameters, estimate model performance and choose the right model for //! the data. //! -//! In SmartCore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function. +//! In `smartcore` a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function. //! //! ``` //! use smartcore::linalg::basic::matrix::DenseMatrix; diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index 67d094a..882ac55 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -1,6 +1,6 @@ //! # K Nearest Neighbors Classifier //! -//! SmartCore relies on 2 backend algorithms to speedup KNN queries: +//! `smartcore` relies on 2 backend algorithms to speedup KNN queries: //! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html) //! * [`CoverTree`](../../algorithm/neighbour/cover_tree/index.html) //! diff --git a/src/numbers/realnum.rs b/src/numbers/realnum.rs index 8c60e47..f4d9aec 100644 --- a/src/numbers/realnum.rs +++ b/src/numbers/realnum.rs @@ -1,5 +1,5 @@ //! # Real Number -//! Most algorithms in SmartCore rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ. +//! Most algorithms in `smartcore` rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ. //! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module. use num_traits::Float; diff --git a/src/rand_custom.rs b/src/rand_custom.rs index 15f9e73..b22390e 100644 --- a/src/rand_custom.rs +++ b/src/rand_custom.rs @@ -1,19 +1,23 @@ -#[cfg(not(feature = "std"))] -pub(crate) use rand::rngs::SmallRng as RngImpl; -#[cfg(feature = "std")] -pub(crate) use rand::rngs::StdRng as RngImpl; +#[cfg(not(feature = "std_rand"))] +pub use rand::rngs::SmallRng as RngImpl; +#[cfg(feature = "std_rand")] +pub use rand::rngs::StdRng as RngImpl; use rand::SeedableRng; -pub(crate) fn get_rng_impl(seed: Option) -> RngImpl { +/// Custom switch for random fuctions +pub fn get_rng_impl(seed: Option) -> RngImpl { match seed { Some(seed) => RngImpl::seed_from_u64(seed), None => { cfg_if::cfg_if! { - if #[cfg(feature = "std")] { + if #[cfg(feature = "std_rand")] { use rand::RngCore; RngImpl::seed_from_u64(rand::thread_rng().next_u64()) } else { - panic!("seed number needed for non-std build"); + // no std_random feature build, use getrandom + let mut buf = [0u8; 64]; + getrandom::getrandom(&mut buf).unwrap(); + RngImpl::seed_from_u64(buf[0] as u64) } } } diff --git a/src/svm/mod.rs b/src/svm/mod.rs index a30fe87..ef0f003 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -9,7 +9,7 @@ //! SVM is memory efficient since it uses only a subset of training data to find a decision boundary. This subset is called support vectors. //! //! In SVM distance between a data point and the support vectors is defined by the kernel function. -//! SmartCore supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel. +//! `smartcore` supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel. //! Building a new kernel requires a good mathematical understanding of the [Mercer theorem](https://en.wikipedia.org/wiki/Mercer%27s_theorem) //! that gives necessary and sufficient condition for a function to be a kernel function. //! diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 9cb140d..74998f5 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -20,7 +20,7 @@ //! //! Where \\( m \\) is a number of training samples, \\( y_i \\) is a label value (either 1 or -1) and \\(\langle\vec{w}, \vec{x}_i \rangle + b\\) is a decision boundary. //! -//! To solve this optimization problem, SmartCore uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm). +//! To solve this optimization problem, `smartcore` uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm). //! The optimizer reaches accuracies similar to that of a real SVM after performing two passes through the training examples. You can choose the number of passes //! through the data that the algorithm takes by changing the `epoch` parameter of the classifier. //! @@ -934,8 +934,7 @@ mod tests { use super::*; use crate::linalg::basic::matrix::DenseMatrix; use crate::metrics::accuracy; - #[cfg(feature = "serde")] - use crate::svm::*; + use crate::svm::Kernels; #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 7a39a56..8d49525 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -596,7 +596,6 @@ mod tests { use super::*; use crate::linalg::basic::matrix::DenseMatrix; use crate::metrics::mean_squared_error; - #[cfg(feature = "serde")] use crate::svm::Kernels; // #[test] @@ -617,7 +616,6 @@ mod tests { // assert!(iter.next().is_none()); // } - //TODO: had to disable this test as it runs for too long #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 6341ab4..cbce14e 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -163,7 +163,6 @@ impl Default for SplitCriterion { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] struct Node { - index: usize, output: usize, split_feature: usize, split_value: Option, @@ -406,9 +405,8 @@ impl Default for DecisionTreeClassifierSearchParameters { } impl Node { - fn new(index: usize, output: usize) -> Self { + fn new(output: usize) -> Self { Node { - index, output, split_feature: 0, split_value: Option::None, @@ -582,7 +580,7 @@ impl, Y: Array1> count[yi[i]] += samples[i]; } - let root = Node::new(0, which_max(&count)); + let root = Node::new(which_max(&count)); change_nodes.push(root); let mut order: Vec> = Vec::new(); @@ -831,11 +829,9 @@ impl, Y: Array1> let true_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(true_child_idx, visitor.true_child_output)); + self.nodes.push(Node::new(visitor.true_child_output)); let false_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(false_child_idx, visitor.false_child_output)); + self.nodes.push(Node::new(visitor.false_child_output)); self.nodes[visitor.node].true_child = Some(true_child_idx); self.nodes[visitor.node].false_child = Some(false_child_idx); @@ -923,6 +919,7 @@ mod tests { wasm_bindgen_test::wasm_bindgen_test )] #[test] + #[cfg(feature = "datasets")] fn fit_predict_iris() { let x: DenseMatrix = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 12ea978..0146cbc 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -11,7 +11,7 @@ //! //! where \\(\hat{y}_{Rk}\\) is the mean response for the training observations withing region _k_. //! -//! SmartCore uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space +//! `smartcore` uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space //! one predictor at a time. At each step of the tree-building process, the best split is made at that particular step, rather than looking ahead and picking a split that will lead to a better //! tree in some future step. //! @@ -128,7 +128,6 @@ impl, Y: Array1> #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] struct Node { - index: usize, output: f64, split_feature: usize, split_value: Option, @@ -299,9 +298,8 @@ impl Default for DecisionTreeRegressorSearchParameters { } impl Node { - fn new(index: usize, output: f64) -> Self { + fn new(output: f64) -> Self { Node { - index, output, split_feature: 0, split_value: Option::None, @@ -450,7 +448,7 @@ impl, Y: Array1> sum += *sample_i as f64 * y_m.get(i).to_f64().unwrap(); } - let root = Node::new(0, sum / (n as f64)); + let root = Node::new(sum / (n as f64)); nodes.push(root); let mut order: Vec> = Vec::new(); @@ -662,11 +660,9 @@ impl, Y: Array1> let true_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(true_child_idx, visitor.true_child_output)); + self.nodes.push(Node::new(visitor.true_child_output)); let false_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(false_child_idx, visitor.false_child_output)); + self.nodes.push(Node::new(visitor.false_child_output)); self.nodes[visitor.node].true_child = Some(true_child_idx); self.nodes[visitor.node].false_child = Some(false_child_idx); diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 700dc76..340b0a8 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -9,7 +9,7 @@ //! Decision trees suffer from high variance and often does not deliver best prediction accuracy when compared to other supervised learning approaches, such as linear and logistic regression. //! Hence some techniques such as [Random Forests](../ensemble/index.html) use more than one decision tree to improve performance of the algorithm. //! -//! SmartCore uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees. +//! `smartcore` uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees. //! //! ## References: //! From c683073b143fdf7f104612d38489158140b4eca4 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 8 Nov 2022 15:35:04 +0000 Subject: [PATCH 33/36] make work cargo build --target wasm32-unknown-unknown --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4fb260b..42faefa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ std_rand = ["rand/std_rng", "rand/std"] js = ["getrandom/js"] [target.'cfg(target_arch = "wasm32")'.dependencies] -getrandom = { version = "0.2", optional = true } +getrandom = { version = "*", features = ["js"] } [target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies] wasm-bindgen-test = "0.3" From 9eaae9ef35d86e187cd8ffda8d41b096ec724595 Mon Sep 17 00:00:00 2001 From: morenol <22335041+morenol@users.noreply.github.com> Date: Tue, 8 Nov 2022 11:07:14 -0500 Subject: [PATCH 34/36] Fixes for release (#237) * Fixes for release * add new test * Remove change applied in development branch * Only add dependency for wasm32 * Update ci.yml Co-authored-by: Luis Moreno Co-authored-by: Lorenzo --- .github/workflows/ci.yml | 7 ++++++- Cargo.toml | 3 +-- src/rand_custom.rs | 14 +++++++++++--- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e2cd825..89b3b37 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,11 +46,16 @@ jobs: - name: Install test runner for wasi if: matrix.platform.target == 'wasm32-wasi' run: curl https://wasmtime.dev/install.sh -sSf | bash - - name: Stable Build + - name: Stable Build with all features uses: actions-rs/cargo@v1 with: command: build args: --all-features --target ${{ matrix.platform.target }} + - name: Stable Build without features + uses: actions-rs/cargo@v1 + with: + command: build + args: --target ${{ matrix.platform.target }} - name: Tests if: matrix.platform.target == 'x86_64-unknown-linux-gnu' || matrix.platform.target == 'x86_64-pc-windows-msvc' || matrix.platform.target == 'aarch64-apple-darwin' uses: actions-rs/cargo@v1 diff --git a/Cargo.toml b/Cargo.toml index 42faefa..63c9389 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,6 @@ ndarray = { version = "0.15", optional = true } num-traits = "0.2.12" num = "0.4" rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } -getrandom = "*" rand_distr = { version = "0.4", optional = true } serde = { version = "1", features = ["derive"], optional = true } @@ -40,7 +39,7 @@ std_rand = ["rand/std_rng", "rand/std"] js = ["getrandom/js"] [target.'cfg(target_arch = "wasm32")'.dependencies] -getrandom = { version = "*", features = ["js"] } +getrandom = { version = "*", optional = true } [target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies] wasm-bindgen-test = "0.3" diff --git a/src/rand_custom.rs b/src/rand_custom.rs index b22390e..936ec9e 100644 --- a/src/rand_custom.rs +++ b/src/rand_custom.rs @@ -15,9 +15,17 @@ pub fn get_rng_impl(seed: Option) -> RngImpl { RngImpl::seed_from_u64(rand::thread_rng().next_u64()) } else { // no std_random feature build, use getrandom - let mut buf = [0u8; 64]; - getrandom::getrandom(&mut buf).unwrap(); - RngImpl::seed_from_u64(buf[0] as u64) + #[cfg(feature = "js")] + { + let mut buf = [0u8; 64]; + getrandom::getrandom(&mut buf).unwrap(); + RngImpl::seed_from_u64(buf[0] as u64) + } + #[cfg(not(feature = "js"))] + { + // Using 0 as default seed + RngImpl::seed_from_u64(0) + } } } } From 8efb959b3cff5b6700c630f03d4fa857e3c6729c Mon Sep 17 00:00:00 2001 From: morenol <22335041+morenol@users.noreply.github.com> Date: Tue, 8 Nov 2022 11:18:05 -0500 Subject: [PATCH 35/36] Handle kernel serialization (#232) * Handle kernel serialization * Do not use typetag in WASM * enable tests for serialization * Update serde feature deps Co-authored-by: Luis Moreno Co-authored-by: Lorenzo --- Cargo.toml | 5 ++++- src/svm/mod.rs | 46 ++++++++++------------------------------------ src/svm/svc.rs | 12 ++++++++---- src/svm/svr.rs | 17 ++++++++--------- 4 files changed, 30 insertions(+), 50 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 63c9389..b13a1e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,9 +29,12 @@ rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } rand_distr = { version = "0.4", optional = true } serde = { version = "1", features = ["derive"], optional = true } +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +typetag = { version = "0.2", optional = true } + [features] default = [] -serde = ["dep:serde"] +serde = ["dep:serde", "dep:typetag"] ndarray-bindings = ["dep:ndarray"] datasets = ["dep:rand_distr", "std_rand", "serde"] std_rand = ["rand/std_rng", "rand/std"] diff --git a/src/svm/mod.rs b/src/svm/mod.rs index ef0f003..b2bd79c 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -30,8 +30,6 @@ pub mod svr; use core::fmt::Debug; -#[cfg(feature = "serde")] -use serde::ser::{SerializeStruct, Serializer}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -40,36 +38,20 @@ use crate::linalg::basic::arrays::{Array1, ArrayView1}; /// Defines a kernel function. /// This is a object-safe trait. -pub trait Kernel { +#[cfg_attr( + all(feature = "serde", not(target_arch = "wasm32")), + typetag::serde(tag = "type") +)] +pub trait Kernel: Debug { #[allow(clippy::ptr_arg)] /// Apply kernel function to x_i and x_j fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result; - /// Return a serializable name - fn name(&self) -> &'static str; -} - -impl Debug for dyn Kernel { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "Kernel") - } -} - -#[cfg(feature = "serde")] -impl Serialize for dyn Kernel { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut s = serializer.serialize_struct("Kernel", 1)?; - s.serialize_field("type", &self.name())?; - s.end() - } } /// Pre-defined kernel functions #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] -pub struct Kernels {} +pub struct Kernels; impl Kernels { /// Return a default linear @@ -211,15 +193,14 @@ impl SigmoidKernel { } } +#[cfg_attr(all(feature = "serde", not(target_arch = "wasm32")), typetag::serde)] impl Kernel for LinearKernel { fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result { Ok(x_i.dot(x_j)) } - fn name(&self) -> &'static str { - "Linear" - } } +#[cfg_attr(all(feature = "serde", not(target_arch = "wasm32")), typetag::serde)] impl Kernel for RBFKernel { fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result { if self.gamma.is_none() { @@ -231,11 +212,9 @@ impl Kernel for RBFKernel { let v_diff = x_i.sub(x_j); Ok((-self.gamma.unwrap() * v_diff.mul(&v_diff).sum()).exp()) } - fn name(&self) -> &'static str { - "RBF" - } } +#[cfg_attr(all(feature = "serde", not(target_arch = "wasm32")), typetag::serde)] impl Kernel for PolynomialKernel { fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result { if self.gamma.is_none() || self.coef0.is_none() || self.degree.is_none() { @@ -247,11 +226,9 @@ impl Kernel for PolynomialKernel { let dot = x_i.dot(x_j); Ok((self.gamma.unwrap() * dot + self.coef0.unwrap()).powf(self.degree.unwrap())) } - fn name(&self) -> &'static str { - "Polynomial" - } } +#[cfg_attr(all(feature = "serde", not(target_arch = "wasm32")), typetag::serde)] impl Kernel for SigmoidKernel { fn apply(&self, x_i: &Vec, x_j: &Vec) -> Result { if self.gamma.is_none() || self.coef0.is_none() { @@ -263,9 +240,6 @@ impl Kernel for SigmoidKernel { let dot = x_i.dot(x_j); Ok(self.gamma.unwrap() * dot + self.coef0.unwrap().tanh()) } - fn name(&self) -> &'static str { - "Sigmoid" - } } #[cfg(test)] diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 74998f5..8cd5d5b 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -100,8 +100,11 @@ pub struct SVCParameters>, /// Unused parameter. m: PhantomData<(X, Y, TY)>, @@ -1085,7 +1088,7 @@ mod tests { wasm_bindgen_test::wasm_bindgen_test )] #[test] - #[cfg(feature = "serde")] + #[cfg(all(feature = "serde", not(target_arch = "wasm32")))] fn svc_serde() { let x = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], @@ -1119,8 +1122,9 @@ mod tests { let svc = SVC::fit(&x, &y, ¶ms).unwrap(); // serialization - let serialized_svc = &serde_json::to_string(&svc).unwrap(); + let deserialized_svc: SVC = + serde_json::from_str(&serde_json::to_string(&svc).unwrap()).unwrap(); - println!("{:?}", serialized_svc); + assert_eq!(svc, deserialized_svc); } } diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 8d49525..bf53e72 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -92,8 +92,11 @@ pub struct SVRParameters { pub c: T, /// Tolerance for stopping criterion. pub tol: T, - #[cfg_attr(feature = "serde", serde(skip_deserializing))] /// The kernel function. + #[cfg_attr( + all(feature = "serde", target_arch = "wasm32"), + serde(skip_serializing, skip_deserializing) + )] pub kernel: Option>, } @@ -668,7 +671,7 @@ mod tests { wasm_bindgen_test::wasm_bindgen_test )] #[test] - #[cfg(feature = "serde")] + #[cfg(all(feature = "serde", not(target_arch = "wasm32")))] fn svr_serde() { let x = DenseMatrix::from_2d_array(&[ &[234.289, 235.6, 159.0, 107.608, 1947., 60.323], @@ -699,13 +702,9 @@ mod tests { let svr = SVR::fit(&x, &y, ¶ms).unwrap(); - let serialized = &serde_json::to_string(&svr).unwrap(); + let deserialized_svr: SVR, _> = + serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap(); - println!("{}", &serialized); - - // let deserialized_svr: SVR, LinearKernel> = - // serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap(); - - // assert_eq!(svr, deserialized_svr); + assert_eq!(svr, deserialized_svr); } } From 3126ee87d3272eebd2ed783835235c5e0fa69008 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Wed, 9 Nov 2022 12:03:03 +0000 Subject: [PATCH 36/36] Pin deps version --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b13a1e3..5da2fe8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,13 +42,13 @@ std_rand = ["rand/std_rng", "rand/std"] js = ["getrandom/js"] [target.'cfg(target_arch = "wasm32")'.dependencies] -getrandom = { version = "*", optional = true } +getrandom = { version = "0.2.8", optional = true } [target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies] wasm-bindgen-test = "0.3" [dev-dependencies] -itertools = "*" +itertools = "0.10.5" serde_json = "1.0" bincode = "1.3.1"