Merge potential next release v0.4 (#187) Breaking Changes

* First draft of the new n-dimensional arrays + NB use case
* Improves default implementation of multiple Array methods
* Refactors tree methods
* Adds matrix decomposition routines
* Adds matrix decomposition methods to ndarray and nalgebra bindings
* Refactoring + linear regression now uses array2
* Ridge & Linear regression
* LBFGS optimizer & logistic regression
* LBFGS optimizer & logistic regression
* Changes linear methods, metrics and model selection methods to new n-dimensional arrays
* Switches KNN and clustering algorithms to new n-d array layer
* Refactors distance metrics
* Optimizes knn and clustering methods
* Refactors metrics module
* Switches decomposition methods to n-dimensional arrays
* Linalg refactoring - cleanup rng merge (#172)
* Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure.
* Exclude AUC metrics. Needs reimplementation
* Improve developers walkthrough

New traits system in place at `src/numbers` and `src/linalg`
Co-authored-by: Lorenzo <tunedconsulting@gmail.com>

* Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021
* Implement getters to use as_ref() in src/neighbors
* Implement getters to use as_ref() in src/naive_bayes
* Implement getters to use as_ref() in src/linear
* Add Clone to src/naive_bayes
* Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021
* Implement ndarray-bindings. Remove FloatNumber from implementations
* Drop nalgebra-bindings support (as decided in conf-call to go for ndarray)
* Remove benches. Benches will have their own repo at smartcore-benches
* Implement SVC
* Implement SVC serialization. Move search parameters in dedicated module
* Implement SVR. Definitely too slow
* Fix compilation issues for wasm (#202)

Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>
* Fix tests (#203)

* Port linalg/traits/stats.rs
* Improve methods naming
* Improve Display for DenseMatrix

Co-authored-by: Montana Low <montanalow@users.noreply.github.com>
Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
This commit is contained in:
Lorenzo
2022-10-31 10:44:57 +00:00
committed by morenol
parent a32eb66a6a
commit a7fa0585eb
110 changed files with 10327 additions and 9107 deletions
+121 -72
View File
@@ -12,9 +12,9 @@
//! To fit the model to a 4 x 2 matrix with 4 training samples, 2 features per sample:
//!
//! ```
//! use smartcore::linalg::naive::dense_matrix::*;
//! use smartcore::linalg::basic::matrix::DenseMatrix;
//! use smartcore::neighbors::knn_classifier::*;
//! use smartcore::math::distance::*;
//! use smartcore::metrics::distance::*;
//!
//! //your explanatory variables. Each row is a training sample with 2 numerical features
//! let x = DenseMatrix::from_2d_array(&[
@@ -23,7 +23,7 @@
//! &[5., 6.],
//! &[7., 8.],
//! &[9., 10.]]);
//! let y = vec![2., 2., 2., 3., 3.]; //your class labels
//! let y = vec![2, 2, 2, 3, 3]; //your class labels
//!
//! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
//! let y_hat = knn.predict(&x).unwrap();
@@ -39,16 +39,16 @@ use serde::{Deserialize, Serialize};
use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
use crate::api::{Predictor, SupervisedEstimator};
use crate::error::Failed;
use crate::linalg::{row_iter, Matrix};
use crate::math::distance::euclidian::Euclidian;
use crate::math::distance::{Distance, Distances};
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::{Array1, Array2};
use crate::metrics::distance::euclidian::Euclidian;
use crate::metrics::distance::{Distance, Distances};
use crate::neighbors::KNNWeightFunction;
use crate::numbers::basenum::Number;
/// `KNNClassifier` parameters. Use `Default::default()` for default values.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct KNNClassifierParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
pub struct KNNClassifierParameters<T: Number, D: Distance<Vec<T>>> {
#[cfg_attr(feature = "serde", serde(default))]
/// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
@@ -71,15 +71,44 @@ pub struct KNNClassifierParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
/// K Nearest Neighbors Classifier
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct KNNClassifier<T: RealNumber, D: Distance<Vec<T>, T>> {
classes: Vec<T>,
y: Vec<usize>,
knn_algorithm: KNNAlgorithm<T, D>,
weight: KNNWeightFunction,
k: usize,
pub struct KNNClassifier<
TX: Number,
TY: Number + Ord,
X: Array2<TX>,
Y: Array1<TY>,
D: Distance<Vec<TX>>,
> {
classes: Option<Vec<TY>>,
y: Option<Vec<usize>>,
knn_algorithm: Option<KNNAlgorithm<TX, D>>,
weight: Option<KNNWeightFunction>,
k: Option<usize>,
_phantom_tx: PhantomData<TX>,
_phantom_x: PhantomData<X>,
_phantom_y: PhantomData<Y>,
}
impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifierParameters<T, D> {
impl<TX: Number, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
KNNClassifier<TX, TY, X, Y, D>
{
fn classes(&self) -> &Vec<TY> {
self.classes.as_ref().unwrap()
}
fn y(&self) -> &Vec<usize> {
self.y.as_ref().unwrap()
}
fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> {
self.knn_algorithm.as_ref().unwrap()
}
fn weight(&self) -> &KNNWeightFunction {
self.weight.as_ref().unwrap()
}
fn k(&self) -> usize {
self.k.unwrap()
}
}
impl<T: Number, D: Distance<Vec<T>>> KNNClassifierParameters<T, D> {
/// number of training samples to consider when estimating class for new point. Default value is 3.
pub fn with_k(mut self, k: usize) -> Self {
self.k = k;
@@ -88,7 +117,7 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifierParameters<T, D> {
/// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
pub fn with_distance<DD: Distance<Vec<T>, T>>(
pub fn with_distance<DD: Distance<Vec<T>>>(
self,
distance: DD,
) -> KNNClassifierParameters<T, DD> {
@@ -112,7 +141,7 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifierParameters<T, D> {
}
}
impl<T: RealNumber> Default for KNNClassifierParameters<T, Euclidian> {
impl<T: Number> Default for KNNClassifierParameters<T, Euclidian<T>> {
fn default() -> Self {
KNNClassifierParameters {
distance: Distances::euclidian(),
@@ -124,21 +153,23 @@ impl<T: RealNumber> Default for KNNClassifierParameters<T, Euclidian> {
}
}
impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for KNNClassifier<T, D> {
impl<TX: Number, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>> PartialEq
for KNNClassifier<TX, TY, X, Y, D>
{
fn eq(&self, other: &Self) -> bool {
if self.classes.len() != other.classes.len()
|| self.k != other.k
|| self.y.len() != other.y.len()
if self.classes().len() != other.classes().len()
|| self.k() != other.k()
|| self.y().len() != other.y().len()
{
false
} else {
for i in 0..self.classes.len() {
if (self.classes[i] - other.classes[i]).abs() > T::epsilon() {
for i in 0..self.classes().len() {
if self.classes()[i] != other.classes()[i] {
return false;
}
}
for i in 0..self.y.len() {
if self.y[i] != other.y[i] {
for i in 0..self.y().len() {
if self.y().get(i) != other.y().get(i) {
return false;
}
}
@@ -147,48 +178,59 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for KNNClassifier<T, D> {
}
}
impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>>
SupervisedEstimator<M, M::RowVector, KNNClassifierParameters<T, D>> for KNNClassifier<T, D>
impl<TX: Number, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
SupervisedEstimator<X, Y, KNNClassifierParameters<TX, D>> for KNNClassifier<TX, TY, X, Y, D>
{
fn fit(
x: &M,
y: &M::RowVector,
parameters: KNNClassifierParameters<T, D>,
) -> Result<Self, Failed> {
fn new() -> Self {
Self {
classes: Option::None,
y: Option::None,
knn_algorithm: Option::None,
weight: Option::None,
k: Option::None,
_phantom_tx: PhantomData,
_phantom_x: PhantomData,
_phantom_y: PhantomData,
}
}
fn fit(x: &X, y: &Y, parameters: KNNClassifierParameters<TX, D>) -> Result<Self, Failed> {
KNNClassifier::fit(x, y, parameters)
}
}
impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>> Predictor<M, M::RowVector>
for KNNClassifier<T, D>
impl<TX: Number, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
Predictor<X, Y> for KNNClassifier<TX, TY, X, Y, D>
{
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
fn predict(&self, x: &X) -> Result<Y, Failed> {
self.predict(x)
}
}
impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
impl<TX: Number, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
KNNClassifier<TX, TY, X, Y, D>
{
/// Fits KNN classifier to a NxM matrix where N is number of samples and M is number of features.
/// * `x` - training data
/// * `y` - vector with target values (classes) of length N
/// * `parameters` - additional parameters like search algorithm and k
pub fn fit<M: Matrix<T>>(
x: &M,
y: &M::RowVector,
parameters: KNNClassifierParameters<T, D>,
) -> Result<KNNClassifier<T, D>, Failed> {
let y_m = M::from_row_vector(y.clone());
let (_, y_n) = y_m.shape();
pub fn fit(
x: &X,
y: &Y,
parameters: KNNClassifierParameters<TX, D>,
) -> Result<KNNClassifier<TX, TY, X, Y, D>, Failed> {
let y_n = y.shape();
let (x_n, _) = x.shape();
let data = row_iter(x).collect();
let data = x
.row_iter()
.map(|row| row.iterator(0).copied().collect())
.collect();
let mut yi: Vec<usize> = vec![0; y_n];
let classes = y_m.unique();
let classes = y.unique();
for (i, yi_i) in yi.iter_mut().enumerate().take(y_n) {
let yc = y_m.get(0, i);
let yc = *y.get(i);
*yi_i = classes.iter().position(|c| yc == *c).unwrap();
}
@@ -207,43 +249,50 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
}
Ok(KNNClassifier {
classes,
y: yi,
k: parameters.k,
knn_algorithm: parameters.algorithm.fit(data, parameters.distance)?,
weight: parameters.weight,
classes: Some(classes),
y: Some(yi),
k: Some(parameters.k),
knn_algorithm: Some(parameters.algorithm.fit(data, parameters.distance)?),
weight: Some(parameters.weight),
_phantom_tx: PhantomData,
_phantom_x: PhantomData,
_phantom_y: PhantomData,
})
}
/// Estimates the class labels for the provided data.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
/// Returns a vector of size N with class estimates.
pub fn predict<M: Matrix<T>>(&self, x: &M) -> Result<M::RowVector, Failed> {
let mut result = M::zeros(1, x.shape().0);
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
let mut result = Y::zeros(x.shape().0);
for (i, x) in row_iter(x).enumerate() {
result.set(0, i, self.classes[self.predict_for_row(x)?]);
let mut row_vec = vec![TX::zero(); x.shape().1];
for (i, row) in x.row_iter().enumerate() {
row.iterator(0)
.zip(row_vec.iter_mut())
.for_each(|(&s, v)| *v = s);
result.set(i, self.classes()[self.predict_for_row(&row_vec)?]);
}
Ok(result.to_row_vector())
Ok(result)
}
fn predict_for_row(&self, x: Vec<T>) -> Result<usize, Failed> {
let search_result = self.knn_algorithm.find(&x, self.k)?;
fn predict_for_row(&self, row: &Vec<TX>) -> Result<usize, Failed> {
let search_result = self.knn_algorithm().find(row, self.k())?;
let weights = self
.weight
.weight()
.calc_weights(search_result.iter().map(|v| v.1).collect());
let w_sum = weights.iter().copied().sum();
let w_sum: f64 = weights.iter().copied().sum();
let mut c = vec![T::zero(); self.classes.len()];
let mut max_c = T::zero();
let mut c = vec![0f64; self.classes().len()];
let mut max_c = 0f64;
let mut max_i = 0;
for (r, w) in search_result.iter().zip(weights.iter()) {
c[self.y[r.0]] += *w / w_sum;
if c[self.y[r.0]] > max_c {
max_c = c[self.y[r.0]];
max_i = self.y[r.0];
c[self.y()[r.0]] += *w / w_sum;
if c[self.y()[r.0]] > max_c {
max_c = c[self.y()[r.0]];
max_i = self.y()[r.0];
}
}
@@ -254,14 +303,14 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
use crate::linalg::basic::matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn knn_fit_predict() {
let x =
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
let y = vec![2., 2., 2., 3., 3.];
let y = vec![2, 2, 2, 3, 3];
let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
let y_hat = knn.predict(&x).unwrap();
assert_eq!(5, Vec::len(&y_hat));
@@ -272,7 +321,7 @@ mod tests {
#[test]
fn knn_fit_predict_weighted() {
let x = DenseMatrix::from_2d_array(&[&[1.], &[2.], &[3.], &[4.], &[5.]]);
let y = vec![2., 2., 2., 3., 3.];
let y = vec![2, 2, 2, 3, 3];
let knn = KNNClassifier::fit(
&x,
&y,
@@ -283,7 +332,7 @@ mod tests {
)
.unwrap();
let y_hat = knn.predict(&DenseMatrix::from_2d_array(&[&[4.1]])).unwrap();
assert_eq!(vec![3.0], y_hat);
assert_eq!(vec![3], y_hat);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
@@ -292,7 +341,7 @@ mod tests {
fn serde() {
let x =
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
let y = vec![2., 2., 2., 3., 3.];
let y = vec![2, 2, 2, 3, 3];
let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
+109 -56
View File
@@ -14,9 +14,9 @@
//! To fit the model to a 4 x 2 matrix with 4 training samples, 2 features per sample:
//!
//! ```
//! use smartcore::linalg::naive::dense_matrix::*;
//! use smartcore::linalg::basic::matrix::DenseMatrix;
//! use smartcore::neighbors::knn_regressor::*;
//! use smartcore::math::distance::*;
//! use smartcore::metrics::distance::*;
//!
//! //your explanatory variables. Each row is a training sample with 2 numerical features
//! let x = DenseMatrix::from_2d_array(&[
@@ -42,16 +42,16 @@ use serde::{Deserialize, Serialize};
use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
use crate::api::{Predictor, SupervisedEstimator};
use crate::error::Failed;
use crate::linalg::{row_iter, BaseVector, Matrix};
use crate::math::distance::euclidian::Euclidian;
use crate::math::distance::{Distance, Distances};
use crate::math::num::RealNumber;
use crate::linalg::basic::arrays::{Array1, Array2};
use crate::metrics::distance::euclidian::Euclidian;
use crate::metrics::distance::{Distance, Distances};
use crate::neighbors::KNNWeightFunction;
use crate::numbers::basenum::Number;
/// `KNNRegressor` parameters. Use `Default::default()` for default values.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct KNNRegressorParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
pub struct KNNRegressorParameters<T: Number, D: Distance<Vec<T>>> {
#[cfg_attr(feature = "serde", serde(default))]
/// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
@@ -74,14 +74,45 @@ pub struct KNNRegressorParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
/// K Nearest Neighbors Regressor
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct KNNRegressor<T: RealNumber, D: Distance<Vec<T>, T>> {
y: Vec<T>,
knn_algorithm: KNNAlgorithm<T, D>,
weight: KNNWeightFunction,
k: usize,
pub struct KNNRegressor<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
{
y: Option<Y>,
knn_algorithm: Option<KNNAlgorithm<TX, D>>,
weight: Option<KNNWeightFunction>,
k: Option<usize>,
_phantom_tx: PhantomData<TX>,
_phantom_ty: PhantomData<TY>,
_phantom_x: PhantomData<X>,
}
impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressorParameters<T, D> {
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
KNNRegressor<TX, TY, X, Y, D>
{
///
fn y(&self) -> &Y {
self.y.as_ref().unwrap()
}
///
fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> {
self.knn_algorithm
.as_ref()
.expect("Missing parameter: KNNAlgorithm")
}
///
fn weight(&self) -> &KNNWeightFunction {
self.weight.as_ref().expect("Missing parameter: weight")
}
#[allow(dead_code)]
///
fn k(&self) -> usize {
self.k.unwrap()
}
}
impl<T: Number, D: Distance<Vec<T>>> KNNRegressorParameters<T, D> {
/// number of training samples to consider when estimating class for new point. Default value is 3.
pub fn with_k(mut self, k: usize) -> Self {
self.k = k;
@@ -90,7 +121,7 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressorParameters<T, D> {
/// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
pub fn with_distance<DD: Distance<Vec<T>, T>>(
pub fn with_distance<DD: Distance<Vec<T>>>(
self,
distance: DD,
) -> KNNRegressorParameters<T, DD> {
@@ -114,7 +145,7 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressorParameters<T, D> {
}
}
impl<T: RealNumber> Default for KNNRegressorParameters<T, Euclidian> {
impl<T: Number> Default for KNNRegressorParameters<T, Euclidian<T>> {
fn default() -> Self {
KNNRegressorParameters {
distance: Distances::euclidian(),
@@ -126,13 +157,15 @@ impl<T: RealNumber> Default for KNNRegressorParameters<T, Euclidian> {
}
}
impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for KNNRegressor<T, D> {
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>> PartialEq
for KNNRegressor<TX, TY, X, Y, D>
{
fn eq(&self, other: &Self) -> bool {
if self.k != other.k || self.y.len() != other.y.len() {
if self.k != other.k || self.y().shape() != other.y().shape() {
false
} else {
for i in 0..self.y.len() {
if (self.y[i] - other.y[i]).abs() > T::epsilon() {
for i in 0..self.y().shape() {
if self.y().get(i) != other.y().get(i) {
return false;
}
}
@@ -141,42 +174,53 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for KNNRegressor<T, D> {
}
}
impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>>
SupervisedEstimator<M, M::RowVector, KNNRegressorParameters<T, D>> for KNNRegressor<T, D>
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
SupervisedEstimator<X, Y, KNNRegressorParameters<TX, D>> for KNNRegressor<TX, TY, X, Y, D>
{
fn fit(
x: &M,
y: &M::RowVector,
parameters: KNNRegressorParameters<T, D>,
) -> Result<Self, Failed> {
fn new() -> Self {
Self {
y: Option::None,
knn_algorithm: Option::None,
weight: Option::None,
k: Option::None,
_phantom_tx: PhantomData,
_phantom_ty: PhantomData,
_phantom_x: PhantomData,
}
}
fn fit(x: &X, y: &Y, parameters: KNNRegressorParameters<TX, D>) -> Result<Self, Failed> {
KNNRegressor::fit(x, y, parameters)
}
}
impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>> Predictor<M, M::RowVector>
for KNNRegressor<T, D>
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>> Predictor<X, Y>
for KNNRegressor<TX, TY, X, Y, D>
{
fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
fn predict(&self, x: &X) -> Result<Y, Failed> {
self.predict(x)
}
}
impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressor<T, D> {
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
KNNRegressor<TX, TY, X, Y, D>
{
/// Fits KNN regressor to a NxM matrix where N is number of samples and M is number of features.
/// * `x` - training data
/// * `y` - vector with real values
/// * `parameters` - additional parameters like search algorithm and k
pub fn fit<M: Matrix<T>>(
x: &M,
y: &M::RowVector,
parameters: KNNRegressorParameters<T, D>,
) -> Result<KNNRegressor<T, D>, Failed> {
let y_m = M::from_row_vector(y.clone());
let (_, y_n) = y_m.shape();
pub fn fit(
x: &X,
y: &Y,
parameters: KNNRegressorParameters<TX, D>,
) -> Result<KNNRegressor<TX, TY, X, Y, D>, Failed> {
let y_n = y.shape();
let (x_n, _) = x.shape();
let data = row_iter(x).collect();
let data = x
.row_iter()
.map(|row| row.iterator(0).copied().collect())
.collect();
if x_n != y_n {
return Err(Failed::fit(&format!(
@@ -192,38 +236,47 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressor<T, D> {
)));
}
let knn_algo = parameters.algorithm.fit(data, parameters.distance)?;
Ok(KNNRegressor {
y: y.to_vec(),
k: parameters.k,
knn_algorithm: parameters.algorithm.fit(data, parameters.distance)?,
weight: parameters.weight,
y: Some(y.clone()),
k: Some(parameters.k),
knn_algorithm: Some(knn_algo),
weight: Some(parameters.weight),
_phantom_tx: PhantomData,
_phantom_ty: PhantomData,
_phantom_x: PhantomData,
})
}
/// Predict the target for the provided data.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
/// Returns a vector of size N with estimates.
pub fn predict<M: Matrix<T>>(&self, x: &M) -> Result<M::RowVector, Failed> {
let mut result = M::zeros(1, x.shape().0);
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
let mut result = Y::zeros(x.shape().0);
for (i, x) in row_iter(x).enumerate() {
result.set(0, i, self.predict_for_row(x)?);
let mut row_vec = vec![TX::zero(); x.shape().1];
for (i, row) in x.row_iter().enumerate() {
row.iterator(0)
.zip(row_vec.iter_mut())
.for_each(|(&s, v)| *v = s);
result.set(i, self.predict_for_row(&row_vec)?);
}
Ok(result.to_row_vector())
Ok(result)
}
fn predict_for_row(&self, x: Vec<T>) -> Result<T, Failed> {
let search_result = self.knn_algorithm.find(&x, self.k)?;
let mut result = T::zero();
fn predict_for_row(&self, row: &Vec<TX>) -> Result<TY, Failed> {
let search_result = self.knn_algorithm().find(row, self.k.unwrap())?;
let mut result = TY::zero();
let weights = self
.weight
.weight()
.calc_weights(search_result.iter().map(|v| v.1).collect());
let w_sum = weights.iter().copied().sum();
let w_sum: f64 = weights.iter().copied().sum();
for (r, w) in search_result.iter().zip(weights.iter()) {
result += self.y[r.0] * (*w / w_sum);
result += *self.y().get(r.0) * TY::from_f64(*w / w_sum).unwrap();
}
Ok(result)
@@ -233,8 +286,8 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressor<T, D> {
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
use crate::math::distance::Distances;
use crate::linalg::basic::matrix::DenseMatrix;
use crate::metrics::distance::Distances;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
+5 -6
View File
@@ -32,7 +32,6 @@
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use crate::math::num::RealNumber;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@@ -65,21 +64,21 @@ impl Default for KNNWeightFunction {
}
impl KNNWeightFunction {
fn calc_weights<T: RealNumber>(&self, distances: Vec<T>) -> std::vec::Vec<T> {
fn calc_weights(&self, distances: Vec<f64>) -> std::vec::Vec<f64> {
match *self {
KNNWeightFunction::Distance => {
// if there are any points that has zero distance from one or more training points,
// those training points are weighted as 1.0 and the other points as 0.0
if distances.iter().any(|&e| e == T::zero()) {
if distances.iter().any(|&e| e == 0f64) {
distances
.iter()
.map(|e| if *e == T::zero() { T::one() } else { T::zero() })
.map(|e| if *e == 0f64 { 1f64 } else { 0f64 })
.collect()
} else {
distances.iter().map(|e| T::one() / *e).collect()
distances.iter().map(|e| 1f64 / *e).collect()
}
}
KNNWeightFunction::Uniform => vec![T::one(); distances.len()],
KNNWeightFunction::Uniform => vec![1f64; distances.len()],
}
}
}