Merge potential next release v0.4 (#187) Breaking Changes
* First draft of the new n-dimensional arrays + NB use case * Improves default implementation of multiple Array methods * Refactors tree methods * Adds matrix decomposition routines * Adds matrix decomposition methods to ndarray and nalgebra bindings * Refactoring + linear regression now uses array2 * Ridge & Linear regression * LBFGS optimizer & logistic regression * LBFGS optimizer & logistic regression * Changes linear methods, metrics and model selection methods to new n-dimensional arrays * Switches KNN and clustering algorithms to new n-d array layer * Refactors distance metrics * Optimizes knn and clustering methods * Refactors metrics module * Switches decomposition methods to n-dimensional arrays * Linalg refactoring - cleanup rng merge (#172) * Remove legacy DenseMatrix and BaseMatrix implementation. Port the new Number, FloatNumber and Array implementation into module structure. * Exclude AUC metrics. Needs reimplementation * Improve developers walkthrough New traits system in place at `src/numbers` and `src/linalg` Co-authored-by: Lorenzo <tunedconsulting@gmail.com> * Provide SupervisedEstimator with a constructor to avoid explicit dynamical box allocation in 'cross_validate' and 'cross_validate_predict' as required by the use of 'dyn' as per Rust 2021 * Implement getters to use as_ref() in src/neighbors * Implement getters to use as_ref() in src/naive_bayes * Implement getters to use as_ref() in src/linear * Add Clone to src/naive_bayes * Change signature for cross_validate and other model_selection functions to abide to use of dyn in Rust 2021 * Implement ndarray-bindings. Remove FloatNumber from implementations * Drop nalgebra-bindings support (as decided in conf-call to go for ndarray) * Remove benches. Benches will have their own repo at smartcore-benches * Implement SVC * Implement SVC serialization. Move search parameters in dedicated module * Implement SVR. Definitely too slow * Fix compilation issues for wasm (#202) Co-authored-by: Luis Moreno <morenol@users.noreply.github.com> * Fix tests (#203) * Port linalg/traits/stats.rs * Improve methods naming * Improve Display for DenseMatrix Co-authored-by: Montana Low <montanalow@users.noreply.github.com> Co-authored-by: VolodymyrOrlov <volodymyr.orlov@gmail.com>
This commit is contained in:
@@ -0,0 +1,89 @@
|
||||
//! # Euclidian Metric Distance
|
||||
//!
|
||||
//! The Euclidean distance (L2) between two points \\( x \\) and \\( y \\) in n-space is defined as
|
||||
//!
|
||||
//! \\[ d(x, y) = \sqrt{\sum_{i=1}^n (x-y)^2} \\]
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::euclidian::Euclidian;
|
||||
//!
|
||||
//! let x = vec![1., 1.];
|
||||
//! let y = vec![2., 2.];
|
||||
//!
|
||||
//! let l2: f64 = Euclidian::new().distance(&x, &y);
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
use super::Distance;
|
||||
|
||||
/// Euclidean distance is a measure of the true straight line distance between two points in Euclidean n-space.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Euclidian<T> {
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number> Default for Euclidian<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number> Euclidian<T> {
|
||||
/// instatiate the initial structure
|
||||
pub fn new() -> Euclidian<T> {
|
||||
Euclidian { _t: PhantomData }
|
||||
}
|
||||
|
||||
/// return sum of squared distances
|
||||
#[inline]
|
||||
pub(crate) fn squared_distance<A: ArrayView1<T>>(x: &A, y: &A) -> f64 {
|
||||
if x.shape() != y.shape() {
|
||||
panic!("Input vector sizes are different.");
|
||||
}
|
||||
|
||||
let sum: f64 = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(&a, &b)| {
|
||||
let r = a - b;
|
||||
(r * r).to_f64().unwrap()
|
||||
})
|
||||
.sum();
|
||||
|
||||
sum
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Euclidian<T> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
Euclidian::squared_distance(x, y).sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn squared_distance() {
|
||||
let a = vec![1, 2, 3];
|
||||
let b = vec![4, 5, 6];
|
||||
|
||||
let l2: f64 = Euclidian::new().distance(&a, &b);
|
||||
|
||||
assert!((l2 - 5.19615242).abs() < 1e-8);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
//! # Hamming Distance
|
||||
//!
|
||||
//! Hamming Distance measures the similarity between two integer-valued vectors of the same length.
|
||||
//! Given two vectors \\( x \in ℝ^n \\), \\( y \in ℝ^n \\) the hamming distance between \\( x \\) and \\( y \\), \\( d(x, y) \\), is the number of places where \\( x \\) and \\( y \\) differ.
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::hamming::Hamming;
|
||||
//!
|
||||
//! let a = vec![1, 0, 0, 1, 0, 0, 1];
|
||||
//! let b = vec![1, 1, 0, 0, 1, 0, 1];
|
||||
//!
|
||||
//! let h: f64 = Hamming::new().distance(&a, &b);
|
||||
//!
|
||||
//! ```
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use super::Distance;
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
/// While comparing two integer-valued vectors of equal length, Hamming distance is the number of bit positions in which the two bits are different
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Hamming<T: Number> {
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number> Hamming<T> {
|
||||
/// instatiate the initial structure
|
||||
pub fn new() -> Hamming<T> {
|
||||
Hamming { _t: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number> Default for Hamming<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Hamming<T> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
if x.shape() != y.shape() {
|
||||
panic!("Input vector sizes are different");
|
||||
}
|
||||
|
||||
let dist: usize = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(a, b)| match a != b {
|
||||
true => 1,
|
||||
false => 0,
|
||||
})
|
||||
.sum();
|
||||
|
||||
dist as f64 / x.shape() as f64
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn hamming_distance() {
|
||||
let a = vec![1, 0, 0, 1, 0, 0, 1];
|
||||
let b = vec![1, 1, 0, 0, 1, 0, 1];
|
||||
|
||||
let h: f64 = Hamming::new().distance(&a, &b);
|
||||
|
||||
assert!((h - 0.42857142).abs() < 1e-8);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
//! # Mahalanobis Distance
|
||||
//!
|
||||
//! The Mahalanobis distance (MD) is the distance between two points in multivariate space.
|
||||
//! In a regular Euclidean space the distance between any two points can be measured with [Euclidean distance](../euclidian/index.html).
|
||||
//! For uncorrelated variables, the Euclidean distance equals the MD. However, if two or more variables are correlated the measurements become impossible
|
||||
//! with Euclidean distance because the axes are no longer at right angles to each other. MD on the other hand, is scale-invariant,
|
||||
//! it takes into account the covariance matrix of the dataset when calculating distance between 2 points that belong to the same space as the dataset.
|
||||
//!
|
||||
//! MD between two vectors \\( x \in ℝ^n \\) and \\( y \in ℝ^n \\) is defined as
|
||||
//! \\[ d(x, y) = \sqrt{(x - y)^TS^{-1}(x - y)}\\]
|
||||
//!
|
||||
//! where \\( S \\) is the covariance matrix of the dataset.
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::linalg::basic::matrix::DenseMatrix;
|
||||
//! use smartcore::linalg::basic::arrays::ArrayView2;
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::mahalanobis::Mahalanobis;
|
||||
//!
|
||||
//! let data = DenseMatrix::from_2d_array(&[
|
||||
//! &[64., 580., 29.],
|
||||
//! &[66., 570., 33.],
|
||||
//! &[68., 590., 37.],
|
||||
//! &[69., 660., 46.],
|
||||
//! &[73., 600., 55.],
|
||||
//! ]);
|
||||
//!
|
||||
//! let a = data.mean_by(0);
|
||||
//! let b = vec![66., 640., 44.];
|
||||
//!
|
||||
//! let mahalanobis = Mahalanobis::new(&data);
|
||||
//!
|
||||
//! mahalanobis.distance(&a, &b);
|
||||
//! ```
|
||||
//!
|
||||
//! ## References
|
||||
//! * ["Introduction to Multivariate Statistical Analysis in Chemometrics", Varmuza, K., Filzmoser, P., 2016, p.46](https://www.taylorfrancis.com/books/9780429145049)
|
||||
//! * ["Example of Calculating the Mahalanobis Distance", McCaffrey, J.D.](https://jamesmccaffrey.wordpress.com/2017/11/09/example-of-calculating-the-mahalanobis-distance/)
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use super::Distance;
|
||||
use crate::linalg::basic::arrays::{Array, Array2, ArrayView1};
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::linalg::traits::lu::LUDecomposable;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
/// Mahalanobis distance.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Mahalanobis<T: Number, M: Array2<f64>> {
|
||||
/// covariance matrix of the dataset
|
||||
pub sigma: M,
|
||||
/// inverse of the covariance matrix
|
||||
pub sigmaInv: M,
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number, M: Array2<f64> + LUDecomposable<f64>> Mahalanobis<T, M> {
|
||||
/// Constructs new instance of `Mahalanobis` from given dataset
|
||||
/// * `data` - a matrix of _NxM_ where _N_ is number of observations and _M_ is number of attributes
|
||||
pub fn new<X: Array2<T>>(data: &X) -> Mahalanobis<T, M> {
|
||||
let (_, m) = data.shape();
|
||||
let mut sigma = M::zeros(m, m);
|
||||
data.cov(&mut sigma);
|
||||
let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap();
|
||||
Mahalanobis {
|
||||
sigma,
|
||||
sigmaInv,
|
||||
_t: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs new instance of `Mahalanobis` from given covariance matrix
|
||||
/// * `cov` - a covariance matrix
|
||||
pub fn new_from_covariance<X: Array2<f64> + LUDecomposable<f64>>(cov: &X) -> Mahalanobis<T, X> {
|
||||
let sigma = cov.clone();
|
||||
let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap();
|
||||
Mahalanobis {
|
||||
sigma,
|
||||
sigmaInv,
|
||||
_t: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Mahalanobis<T, DenseMatrix<f64>> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
let (nrows, ncols) = self.sigma.shape();
|
||||
if x.shape() != nrows {
|
||||
panic!(
|
||||
"Array x[{}] has different dimension with Sigma[{}][{}].",
|
||||
x.shape(),
|
||||
nrows,
|
||||
ncols
|
||||
);
|
||||
}
|
||||
|
||||
if y.shape() != nrows {
|
||||
panic!(
|
||||
"Array y[{}] has different dimension with Sigma[{}][{}].",
|
||||
y.shape(),
|
||||
nrows,
|
||||
ncols
|
||||
);
|
||||
}
|
||||
|
||||
let n = x.shape();
|
||||
|
||||
let z: Vec<f64> = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(&a, &b)| (a - b).to_f64().unwrap())
|
||||
.collect();
|
||||
|
||||
// np.dot(np.dot((a-b),VI),(a-b).T)
|
||||
let mut s = 0f64;
|
||||
for j in 0..n {
|
||||
for i in 0..n {
|
||||
s += *self.sigmaInv.get((i, j)) * z[i] * z[j];
|
||||
}
|
||||
}
|
||||
|
||||
s.sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::basic::arrays::ArrayView2;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn mahalanobis_distance() {
|
||||
let data = DenseMatrix::from_2d_array(&[
|
||||
&[64., 580., 29.],
|
||||
&[66., 570., 33.],
|
||||
&[68., 590., 37.],
|
||||
&[69., 660., 46.],
|
||||
&[73., 600., 55.],
|
||||
]);
|
||||
|
||||
let a = data.mean_by(0);
|
||||
let b = vec![66., 640., 44.];
|
||||
|
||||
let mahalanobis = Mahalanobis::new(&data);
|
||||
|
||||
let md: f64 = mahalanobis.distance(&a, &b);
|
||||
|
||||
assert!((md - 5.33).abs() < 1e-2);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
//! # Manhattan Distance
|
||||
//!
|
||||
//! The Manhattan distance between two points \\(x \in ℝ^n \\) and \\( y \in ℝ^n \\) in n-dimensional space is the sum of the distances in each dimension.
|
||||
//!
|
||||
//! \\[ d(x, y) = \sum_{i=0}^n \lvert x_i - y_i \rvert \\]
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::manhattan::Manhattan;
|
||||
//!
|
||||
//! let x = vec![1., 1.];
|
||||
//! let y = vec![2., 2.];
|
||||
//!
|
||||
//! let l1: f64 = Manhattan::new().distance(&x, &y);
|
||||
//! ```
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
use super::Distance;
|
||||
|
||||
/// Manhattan distance
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Manhattan<T: Number> {
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number> Manhattan<T> {
|
||||
/// instatiate the initial structure
|
||||
pub fn new() -> Manhattan<T> {
|
||||
Manhattan { _t: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number> Default for Manhattan<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Manhattan<T> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
if x.shape() != y.shape() {
|
||||
panic!("Input vector sizes are different");
|
||||
}
|
||||
|
||||
let dist: f64 = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(&a, &b)| (a - b).to_f64().unwrap().abs())
|
||||
.sum();
|
||||
|
||||
dist
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn manhattan_distance() {
|
||||
let a = vec![1., 2., 3.];
|
||||
let b = vec![4., 5., 6.];
|
||||
|
||||
let l1: f64 = Manhattan::new().distance(&a, &b);
|
||||
|
||||
assert!((l1 - 9.0).abs() < 1e-8);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
//! # Minkowski Distance
|
||||
//!
|
||||
//! The Minkowski distance of order _p_ (where _p_ is an integer) is a metric in a normed vector space which can be considered as a generalization of both the Euclidean distance and the Manhattan distance.
|
||||
//! The Manhattan distance between two points \\(x \in ℝ^n \\) and \\( y \in ℝ^n \\) in n-dimensional space is defined as:
|
||||
//!
|
||||
//! \\[ d(x, y) = \left(\sum_{i=0}^n \lvert x_i - y_i \rvert^p\right)^{1/p} \\]
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```
|
||||
//! use smartcore::metrics::distance::Distance;
|
||||
//! use smartcore::metrics::distance::minkowski::Minkowski;
|
||||
//!
|
||||
//! let x = vec![1., 1.];
|
||||
//! let y = vec![2., 2.];
|
||||
//!
|
||||
//! let l1: f64 = Minkowski::new(1).distance(&x, &y);
|
||||
//! let l2: f64 = Minkowski::new(2).distance(&x, &y);
|
||||
//!
|
||||
//! ```
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::linalg::basic::arrays::ArrayView1;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
use super::Distance;
|
||||
|
||||
/// Defines the Minkowski distance of order `p`
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Minkowski<T: Number> {
|
||||
/// order, integer
|
||||
pub p: u16,
|
||||
_t: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Number> Minkowski<T> {
|
||||
/// instatiate the initial structure
|
||||
pub fn new(p: u16) -> Minkowski<T> {
|
||||
Minkowski { p, _t: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Number, A: ArrayView1<T>> Distance<A> for Minkowski<T> {
|
||||
fn distance(&self, x: &A, y: &A) -> f64 {
|
||||
if x.shape() != y.shape() {
|
||||
panic!("Input vector sizes are different");
|
||||
}
|
||||
if self.p < 1 {
|
||||
panic!("p must be at least 1");
|
||||
}
|
||||
|
||||
let p_t = self.p as f64;
|
||||
|
||||
let dist: f64 = x
|
||||
.iterator(0)
|
||||
.zip(y.iterator(0))
|
||||
.map(|(&a, &b)| (a - b).to_f64().unwrap().abs().powf(p_t))
|
||||
.sum();
|
||||
|
||||
dist.powf(1f64 / p_t)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
|
||||
#[test]
|
||||
fn minkowski_distance() {
|
||||
let a = vec![1., 2., 3.];
|
||||
let b = vec![4., 5., 6.];
|
||||
|
||||
let l1: f64 = Minkowski::new(1).distance(&a, &b);
|
||||
let l2: f64 = Minkowski::new(2).distance(&a, &b);
|
||||
let l3: f64 = Minkowski::new(3).distance(&a, &b);
|
||||
|
||||
assert!((l1 - 9.0).abs() < 1e-8);
|
||||
assert!((l2 - 5.19615242).abs() < 1e-8);
|
||||
assert!((l3 - 4.32674871).abs() < 1e-8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "p must be at least 1")]
|
||||
fn minkowski_distance_negative_p() {
|
||||
let a = vec![1., 2., 3.];
|
||||
let b = vec![4., 5., 6.];
|
||||
|
||||
let _: f64 = Minkowski::new(0).distance(&a, &b);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
//! # Collection of Distance Functions
|
||||
//!
|
||||
//! Many algorithms in machine learning require a measure of distance between data points. Distance metric (or metric) is a function that defines a distance between a pair of point elements of a set.
|
||||
//! Formally, the distance can be any metric measure that is defined as \\( d(x, y) \geq 0\\) and follows three conditions:
|
||||
//! 1. \\( d(x, y) = 0 \\) if and only \\( x = y \\), positive definiteness
|
||||
//! 1. \\( d(x, y) = d(y, x) \\), symmetry
|
||||
//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality
|
||||
//!
|
||||
//! for all \\(x, y, z \in Z \\)
|
||||
//!
|
||||
//! A good distance metric helps to improve the performance of classification, clustering and information retrieval algorithms significantly.
|
||||
//!
|
||||
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||||
|
||||
/// Euclidean Distance is the straight-line distance between two points in Euclidean spacere that presents the shortest distance between these points.
|
||||
pub mod euclidian;
|
||||
/// Hamming Distance between two strings is the number of positions at which the corresponding symbols are different.
|
||||
pub mod hamming;
|
||||
/// The Mahalanobis distance is the distance between two points in multivariate space.
|
||||
pub mod mahalanobis;
|
||||
/// Also known as rectilinear distance, city block distance, taxicab metric.
|
||||
pub mod manhattan;
|
||||
/// A generalization of both the Euclidean distance and the Manhattan distance.
|
||||
pub mod minkowski;
|
||||
|
||||
use crate::linalg::basic::arrays::Array2;
|
||||
use crate::linalg::traits::lu::LUDecomposable;
|
||||
use crate::numbers::basenum::Number;
|
||||
|
||||
/// Distance metric, a function that calculates distance between two points
|
||||
pub trait Distance<T>: Clone {
|
||||
/// Calculates distance between _a_ and _b_
|
||||
fn distance(&self, a: &T, b: &T) -> f64;
|
||||
}
|
||||
|
||||
/// Multitude of distance metric functions
|
||||
pub struct Distances {}
|
||||
|
||||
impl Distances {
|
||||
/// Euclidian distance, see [`Euclidian`](euclidian/index.html)
|
||||
pub fn euclidian<T: Number>() -> euclidian::Euclidian<T> {
|
||||
euclidian::Euclidian::new()
|
||||
}
|
||||
|
||||
/// Minkowski distance, see [`Minkowski`](minkowski/index.html)
|
||||
/// * `p` - function order. Should be >= 1
|
||||
pub fn minkowski<T: Number>(p: u16) -> minkowski::Minkowski<T> {
|
||||
minkowski::Minkowski::new(p)
|
||||
}
|
||||
|
||||
/// Manhattan distance, see [`Manhattan`](manhattan/index.html)
|
||||
pub fn manhattan<T: Number>() -> manhattan::Manhattan<T> {
|
||||
manhattan::Manhattan::new()
|
||||
}
|
||||
|
||||
/// Hamming distance, see [`Hamming`](hamming/index.html)
|
||||
pub fn hamming<T: Number>() -> hamming::Hamming<T> {
|
||||
hamming::Hamming::new()
|
||||
}
|
||||
|
||||
/// Mahalanobis distance, see [`Mahalanobis`](mahalanobis/index.html)
|
||||
pub fn mahalanobis<T: Number, M: Array2<T>, C: Array2<f64> + LUDecomposable<f64>>(
|
||||
data: &M,
|
||||
) -> mahalanobis::Mahalanobis<T, C> {
|
||||
mahalanobis::Mahalanobis::new(data)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user