feat: documents distance and num modules
This commit is contained in:
@@ -1,14 +1,34 @@
|
|||||||
|
//! # Euclidian Metric Distance
|
||||||
|
//!
|
||||||
|
//! The Euclidean distance (L2) between two points \\( x \\) and \\( y \\) in n-space is defined as
|
||||||
|
//!
|
||||||
|
//! \\[ d(x, y) = \sqrt{\sum_{i=1}^n (x-y)^2} \\]
|
||||||
|
//!
|
||||||
|
//! Example:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use smartcore::math::distance::Distance;
|
||||||
|
//! use smartcore::math::distance::euclidian::Euclidian;
|
||||||
|
//!
|
||||||
|
//! let x = vec![1., 1.];
|
||||||
|
//! let y = vec![2., 2.];
|
||||||
|
//!
|
||||||
|
//! let l2: f64 = Euclidian{}.distance(&x, &y);
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::math::num::RealNumber;
|
use crate::math::num::RealNumber;
|
||||||
|
|
||||||
use super::Distance;
|
use super::Distance;
|
||||||
|
|
||||||
|
/// Euclidean distance is a measure of the true straight line distance between two points in Euclidean n-space.
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct Euclidian {}
|
pub struct Euclidian {}
|
||||||
|
|
||||||
impl Euclidian {
|
impl Euclidian {
|
||||||
pub fn squared_distance<T: RealNumber>(x: &Vec<T>, y: &Vec<T>) -> T {
|
pub(crate) fn squared_distance<T: RealNumber>(x: &Vec<T>, y: &Vec<T>) -> T {
|
||||||
if x.len() != y.len() {
|
if x.len() != y.len() {
|
||||||
panic!("Input vector sizes are different.");
|
panic!("Input vector sizes are different.");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,30 @@
|
|||||||
|
//! # Hamming Distance
|
||||||
|
//!
|
||||||
|
//! Hamming Distance measures the similarity between two integer-valued vectors of the same length.
|
||||||
|
//! Given two vectors \\( x \in ℝ^n \\), \\( y \in ℝ^n \\) the hamming distance between \\( x \\) and \\( y \\), \\( d(x, y) \\), is the number of places where \\( x \\) and \\( y \\) differ.
|
||||||
|
//!
|
||||||
|
//! Example:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use smartcore::math::distance::Distance;
|
||||||
|
//! use smartcore::math::distance::hamming::Hamming;
|
||||||
|
//!
|
||||||
|
//! let a = vec![1, 0, 0, 1, 0, 0, 1];
|
||||||
|
//! let b = vec![1, 1, 0, 0, 1, 0, 1];
|
||||||
|
//!
|
||||||
|
//! let h: f64 = Hamming {}.distance(&a, &b);
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::math::num::RealNumber;
|
use crate::math::num::RealNumber;
|
||||||
|
|
||||||
use super::Distance;
|
use super::Distance;
|
||||||
|
|
||||||
|
/// While comparing two integer-valued vectors of equal length, Hamming distance is the number of bit positions in which the two bits are different
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct Hamming {}
|
pub struct Hamming {}
|
||||||
|
|
||||||
@@ -29,7 +50,7 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn minkowski_distance() {
|
fn hamming_distance() {
|
||||||
let a = vec![1, 0, 0, 1, 0, 0, 1];
|
let a = vec![1, 0, 0, 1, 0, 0, 1];
|
||||||
let b = vec![1, 1, 0, 0, 1, 0, 1];
|
let b = vec![1, 1, 0, 0, 1, 0, 1];
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,44 @@
|
|||||||
|
//! # Mahalanobis Distance
|
||||||
|
//!
|
||||||
|
//! The Mahalanobis distance (MD) is the distance between two points in multivariate space.
|
||||||
|
//! In a regular Euclidean space the distance between any two points can be measured with [Euclidean distance](euclidian/index.html).
|
||||||
|
//! For uncorrelated variables, the Euclidean distance equals the MD. However, if two or more variables are correlated the measurements become impossible
|
||||||
|
//! with Euclidean distance because the axes are no longer at right angles to each other. MD on the other hand, is scale-invariant,
|
||||||
|
//! it takes into account the covariance matrix of the dataset when calculating distance between 2 points that belong to the same space as the dataset.
|
||||||
|
//!
|
||||||
|
//! MD between two vectors \\( x \in ℝ^n \\) and \\( y \in ℝ^n \\) is defined as
|
||||||
|
//! \\[ d(x, y) = \sqrt{(x - y)^TS^{-1}(x - y)}\\]
|
||||||
|
//!
|
||||||
|
//! where \\( S \\) is the covariance matrix of the dataset.
|
||||||
|
//!
|
||||||
|
//! Example:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use smartcore::linalg::naive::dense_matrix::*;
|
||||||
|
//! use smartcore::math::distance::Distance;
|
||||||
|
//! use smartcore::math::distance::mahalanobis::Mahalanobis;
|
||||||
|
//!
|
||||||
|
//! let data = DenseMatrix::from_array(&[
|
||||||
|
//! &[64., 580., 29.],
|
||||||
|
//! &[66., 570., 33.],
|
||||||
|
//! &[68., 590., 37.],
|
||||||
|
//! &[69., 660., 46.],
|
||||||
|
//! &[73., 600., 55.],
|
||||||
|
//! ]);
|
||||||
|
//!
|
||||||
|
//! let a = data.column_mean();
|
||||||
|
//! let b = vec![66., 640., 44.];
|
||||||
|
//!
|
||||||
|
//! let mahalanobis = Mahalanobis::new(&data);
|
||||||
|
//!
|
||||||
|
//! mahalanobis.distance(&a, &b);
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ## References
|
||||||
|
//! * ["Introduction to Multivariate Statistical Analysis in Chemometrics", Varmuza, K., Filzmoser, P., 2016, p.46](https://www.taylorfrancis.com/books/9780429145049)
|
||||||
|
//! * ["Example of Calculating the Mahalanobis Distance", McCaffrey, J.D.](https://jamesmccaffrey.wordpress.com/2017/11/09/example-of-calculating-the-mahalanobis-distance/)
|
||||||
|
//!
|
||||||
|
//! <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
|
||||||
#![allow(non_snake_case)]
|
#![allow(non_snake_case)]
|
||||||
|
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
@@ -9,14 +50,19 @@ use crate::math::num::RealNumber;
|
|||||||
use super::Distance;
|
use super::Distance;
|
||||||
use crate::linalg::Matrix;
|
use crate::linalg::Matrix;
|
||||||
|
|
||||||
|
/// Mahalanobis distance.
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct Mahalanobis<T: RealNumber, M: Matrix<T>> {
|
pub struct Mahalanobis<T: RealNumber, M: Matrix<T>> {
|
||||||
|
/// covariance matrix of the dataset
|
||||||
pub sigma: M,
|
pub sigma: M,
|
||||||
|
/// inverse of the covariance matrix
|
||||||
pub sigmaInv: M,
|
pub sigmaInv: M,
|
||||||
t: PhantomData<T>,
|
t: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: RealNumber, M: Matrix<T>> Mahalanobis<T, M> {
|
impl<T: RealNumber, M: Matrix<T>> Mahalanobis<T, M> {
|
||||||
|
/// Constructs new instance of `Mahalanobis` from given dataset
|
||||||
|
/// * `data` - a matrix of _NxM_ where _N_ is number of observations and _M_ is number of attributes
|
||||||
pub fn new(data: &M) -> Mahalanobis<T, M> {
|
pub fn new(data: &M) -> Mahalanobis<T, M> {
|
||||||
let sigma = data.cov();
|
let sigma = data.cov();
|
||||||
let sigmaInv = sigma.lu().inverse();
|
let sigmaInv = sigma.lu().inverse();
|
||||||
@@ -27,6 +73,8 @@ impl<T: RealNumber, M: Matrix<T>> Mahalanobis<T, M> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Constructs new instance of `Mahalanobis` from given covariance matrix
|
||||||
|
/// * `cov` - a covariance matrix
|
||||||
pub fn new_from_covariance(cov: &M) -> Mahalanobis<T, M> {
|
pub fn new_from_covariance(cov: &M) -> Mahalanobis<T, M> {
|
||||||
let sigma = cov.clone();
|
let sigma = cov.clone();
|
||||||
let sigmaInv = sigma.lu().inverse();
|
let sigmaInv = sigma.lu().inverse();
|
||||||
@@ -99,6 +147,8 @@ mod tests {
|
|||||||
|
|
||||||
let mahalanobis = Mahalanobis::new(&data);
|
let mahalanobis = Mahalanobis::new(&data);
|
||||||
|
|
||||||
println!("{}", mahalanobis.distance(&a, &b));
|
let md: f64 = mahalanobis.distance(&a, &b);
|
||||||
|
|
||||||
|
assert!((md - 5.33).abs() < 1e-2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,28 @@
|
|||||||
|
//! # Manhattan Distance
|
||||||
|
//!
|
||||||
|
//! The Manhattan distance between two points \\(x \in ℝ^n \\) and \\( y \in ℝ^n \\) in n-dimensional space is the sum of the distances in each dimension.
|
||||||
|
//!
|
||||||
|
//! \\[ d(x, y) = \sum_{i=0}^n \lvert x_i - y_i \rvert \\]
|
||||||
|
//!
|
||||||
|
//! Example:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use smartcore::math::distance::Distance;
|
||||||
|
//! use smartcore::math::distance::manhattan::Manhattan;
|
||||||
|
//!
|
||||||
|
//! let x = vec![1., 1.];
|
||||||
|
//! let y = vec![2., 2.];
|
||||||
|
//!
|
||||||
|
//! let l1: f64 = Manhattan {}.distance(&x, &y);
|
||||||
|
//! ```
|
||||||
|
//! <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::math::num::RealNumber;
|
use crate::math::num::RealNumber;
|
||||||
|
|
||||||
use super::Distance;
|
use super::Distance;
|
||||||
|
|
||||||
|
/// Manhattan distance
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct Manhattan {}
|
pub struct Manhattan {}
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,35 @@
|
|||||||
|
//! # Minkowski Distance
|
||||||
|
//!
|
||||||
|
//! The Minkowski distance of order _p_ (where _p_ is an integer) is a metric in a normed vector space which can be considered as a generalization of both the Euclidean distance and the Manhattan distance.
|
||||||
|
//! The Manhattan distance between two points \\(x \in ℝ^n \\) and \\( y \in ℝ^n \\) in n-dimensional space is defined as:
|
||||||
|
//!
|
||||||
|
//! \\[ d(x, y) = \left(\sum_{i=0}^n \lvert x_i - y_i \rvert^p\right)^{1/p} \\]
|
||||||
|
//!
|
||||||
|
//! Example:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use smartcore::math::distance::Distance;
|
||||||
|
//! use smartcore::math::distance::minkowski::Minkowski;
|
||||||
|
//!
|
||||||
|
//! let x = vec![1., 1.];
|
||||||
|
//! let y = vec![2., 2.];
|
||||||
|
//!
|
||||||
|
//! let l1: f64 = Minkowski { p: 1 }.distance(&x, &y);
|
||||||
|
//! let l2: f64 = Minkowski { p: 2 }.distance(&x, &y);
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::math::num::RealNumber;
|
use crate::math::num::RealNumber;
|
||||||
|
|
||||||
use super::Distance;
|
use super::Distance;
|
||||||
|
|
||||||
|
/// Defines the Minkowski distance of order `p`
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct Minkowski {
|
pub struct Minkowski {
|
||||||
|
/// order, integer
|
||||||
pub p: u16,
|
pub p: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ pub mod manhattan;
|
|||||||
/// A generalization of both the Euclidean distance and the Manhattan distance.
|
/// A generalization of both the Euclidean distance and the Manhattan distance.
|
||||||
pub mod minkowski;
|
pub mod minkowski;
|
||||||
|
|
||||||
|
use crate::linalg::Matrix;
|
||||||
use crate::math::num::RealNumber;
|
use crate::math::num::RealNumber;
|
||||||
|
|
||||||
/// Distance metric, a function that calculates distance between two points
|
/// Distance metric, a function that calculates distance between two points
|
||||||
@@ -35,24 +36,29 @@ pub trait Distance<T, F: RealNumber> {
|
|||||||
pub struct Distances {}
|
pub struct Distances {}
|
||||||
|
|
||||||
impl Distances {
|
impl Distances {
|
||||||
/// Euclidian distance
|
/// Euclidian distance, see [`Euclidian`](euclidian/index.html)
|
||||||
pub fn euclidian() -> euclidian::Euclidian {
|
pub fn euclidian() -> euclidian::Euclidian {
|
||||||
euclidian::Euclidian {}
|
euclidian::Euclidian {}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Minkowski distance
|
/// Minkowski distance, see [`Minkowski`](minkowski/index.html)
|
||||||
/// * `p` - function order. Should be >= 1
|
/// * `p` - function order. Should be >= 1
|
||||||
pub fn minkowski(p: u16) -> minkowski::Minkowski {
|
pub fn minkowski(p: u16) -> minkowski::Minkowski {
|
||||||
minkowski::Minkowski { p: p }
|
minkowski::Minkowski { p: p }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Manhattan distance
|
/// Manhattan distance, see [`Manhattan`](manhattan/index.html)
|
||||||
pub fn manhattan() -> manhattan::Manhattan {
|
pub fn manhattan() -> manhattan::Manhattan {
|
||||||
manhattan::Manhattan {}
|
manhattan::Manhattan {}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Hamming distance
|
/// Hamming distance, see [`Hamming`](hamming/index.html)
|
||||||
pub fn hamming() -> hamming::Hamming {
|
pub fn hamming() -> hamming::Hamming {
|
||||||
hamming::Hamming {}
|
hamming::Hamming {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Mahalanobis distance, see [`Mahalanobis`](mahalanobis/index.html)
|
||||||
|
pub fn mahalanobis<T: RealNumber, M: Matrix<T>>(data: &M) -> mahalanobis::Mahalanobis<T, M> {
|
||||||
|
mahalanobis::Mahalanobis::new(data)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,21 +1,34 @@
|
|||||||
|
//! # Real Number
|
||||||
|
//! Most algorithms in SmartCore rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ.
|
||||||
|
//! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module.
|
||||||
|
|
||||||
use num_traits::{Float, FromPrimitive};
|
use num_traits::{Float, FromPrimitive};
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
use std::fmt::{Debug, Display};
|
use std::fmt::{Debug, Display};
|
||||||
use std::iter::{Product, Sum};
|
use std::iter::{Product, Sum};
|
||||||
|
|
||||||
|
/// Defines real number
|
||||||
|
/// <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
|
||||||
pub trait RealNumber: Float + FromPrimitive + Debug + Display + Copy + Sum + Product {
|
pub trait RealNumber: Float + FromPrimitive + Debug + Display + Copy + Sum + Product {
|
||||||
|
/// Copy sign from `sign` - another real number
|
||||||
fn copysign(self, sign: Self) -> Self;
|
fn copysign(self, sign: Self) -> Self;
|
||||||
|
|
||||||
|
/// Calculates natural \\( \ln(1+e^x) \\) without overflow.
|
||||||
fn ln_1pe(self) -> Self;
|
fn ln_1pe(self) -> Self;
|
||||||
|
|
||||||
|
/// Efficient implementation of Sigmoid function, \\( S(x) = \frac{1}{1 + e^{-x}} \\), see [Sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function)
|
||||||
fn sigmoid(self) -> Self;
|
fn sigmoid(self) -> Self;
|
||||||
|
|
||||||
|
/// Returns pseudorandom number between 0 and 1
|
||||||
fn rand() -> Self;
|
fn rand() -> Self;
|
||||||
|
|
||||||
|
/// Returns 2
|
||||||
fn two() -> Self;
|
fn two() -> Self;
|
||||||
|
|
||||||
|
/// Returns .5
|
||||||
fn half() -> Self;
|
fn half() -> Self;
|
||||||
|
|
||||||
|
/// Returns \\( x^2 \\)
|
||||||
fn square(self) -> Self {
|
fn square(self) -> Self {
|
||||||
self * self
|
self * self
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user