feat: adds new distance measures + LU decomposition

This commit is contained in:
Volodymyr Orlov
2020-06-05 10:40:17 -07:00
parent f8f1e75fe2
commit e20e9ca6e0
16 changed files with 594 additions and 28 deletions
+4 -8
View File
@@ -21,17 +21,13 @@ impl Euclidian {
sum
}
pub fn distance<T: FloatExt>(x: &Vec<T>, y: &Vec<T>) -> T {
Euclidian::squared_distance(x, y).sqrt()
}
}
impl<T: FloatExt> Distance<Vec<T>, T> for Euclidian {
fn distance(x: &Vec<T>, y: &Vec<T>) -> T {
Self::distance(x, y)
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> T {
Euclidian::squared_distance(x, y).sqrt()
}
}
@@ -46,9 +42,9 @@ mod tests {
let a = vec![1., 2., 3.];
let b = vec![4., 5., 6.];
let d_arr: f64 = Euclidian::distance(&a, &b);
let l2: f64 = Euclidian{}.distance(&a, &b);
assert!((d_arr - 5.19615242).abs() < 1e-8);
assert!((l2 - 5.19615242).abs() < 1e-8);
}
}
+45
View File
@@ -0,0 +1,45 @@
use serde::{Serialize, Deserialize};
use crate::math::num::FloatExt;
use super::Distance;
#[derive(Serialize, Deserialize, Debug)]
pub struct Hamming {
}
impl<T: PartialEq, F: FloatExt> Distance<Vec<T>, F> for Hamming {
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> F {
if x.len() != y.len() {
panic!("Input vector sizes are different");
}
let mut dist = 0;
for i in 0..x.len() {
if x[i] != y[i]{
dist += 1;
}
}
F::from_i64(dist).unwrap() / F::from_usize(x.len()).unwrap()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn minkowski_distance() {
let a = vec![1, 0, 0, 1, 0, 0, 1];
let b = vec![1, 1, 0, 0, 1, 0, 1];
let h: f64 = Hamming{}.distance(&a, &b);
assert!((h - 0.42857142).abs() < 1e-8);
}
}
+97
View File
@@ -0,0 +1,97 @@
#![allow(non_snake_case)]
use std::marker::PhantomData;
use serde::{Serialize, Deserialize};
use crate::math::num::FloatExt;
use super::Distance;
use crate::linalg::Matrix;
#[derive(Serialize, Deserialize, Debug)]
pub struct Mahalanobis<T: FloatExt, M: Matrix<T>> {
pub sigma: M,
pub sigmaInv: M,
t: PhantomData<T>
}
impl<T: FloatExt, M: Matrix<T>> Mahalanobis<T, M> {
pub fn new(data: &M) -> Mahalanobis<T, M> {
let sigma = data.cov();
let sigmaInv = sigma.lu().inverse();
Mahalanobis {
sigma: sigma,
sigmaInv: sigmaInv,
t: PhantomData
}
}
pub fn new_from_covariance(cov: &M) -> Mahalanobis<T, M> {
let sigma = cov.clone();
let sigmaInv = sigma.lu().inverse();
Mahalanobis {
sigma: sigma,
sigmaInv: sigmaInv,
t: PhantomData
}
}
}
impl<T: FloatExt, M: Matrix<T>> Distance<Vec<T>, T> for Mahalanobis<T, M> {
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> T {
let (nrows, ncols) = self.sigma.shape();
if x.len() != nrows {
panic!("Array x[{}] has different dimension with Sigma[{}][{}].", x.len(), nrows, ncols);
}
if y.len() != nrows {
panic!("Array y[{}] has different dimension with Sigma[{}][{}].", y.len(), nrows, ncols);
}
println!("{}", self.sigmaInv);
let n = x.len();
let mut z = vec![T::zero(); n];
for i in 0..n {
z[i] = x[i] - y[i];
}
// np.dot(np.dot((a-b),VI),(a-b).T)
let mut s = T::zero();
for j in 0..n {
for i in 0..n {
s = s + self.sigmaInv.get(i, j) * z[i] * z[j];
}
}
s.sqrt()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[test]
fn mahalanobis_distance() {
let data = DenseMatrix::from_array(&[
&[ 64., 580., 29.],
&[ 66., 570., 33.],
&[ 68., 590., 37.],
&[ 69., 660., 46.],
&[ 73., 600., 55.]]);
let a = data.column_mean();
let b = vec![66., 640., 44.];
let mahalanobis = Mahalanobis::new(&data);
println!("{}", mahalanobis.distance(&a, &b));
}
}
+43
View File
@@ -0,0 +1,43 @@
use serde::{Serialize, Deserialize};
use crate::math::num::FloatExt;
use super::Distance;
#[derive(Serialize, Deserialize, Debug)]
pub struct Manhattan {
}
impl<T: FloatExt> Distance<Vec<T>, T> for Manhattan {
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> T {
if x.len() != y.len() {
panic!("Input vector sizes are different");
}
let mut dist = T::zero();
for i in 0..x.len() {
dist = dist + (x[i] - y[i]).abs();
}
dist
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn manhattan_distance() {
let a = vec![1., 2., 3.];
let b = vec![4., 5., 6.];
let l1: f64 = Manhattan{}.distance(&a, &b);
assert!((l1 - 9.0).abs() < 1e-8);
}
}
+63
View File
@@ -0,0 +1,63 @@
use serde::{Serialize, Deserialize};
use crate::math::num::FloatExt;
use super::Distance;
#[derive(Serialize, Deserialize, Debug)]
pub struct Minkowski<T: FloatExt> {
pub p: T
}
impl<T: FloatExt> Distance<Vec<T>, T> for Minkowski<T> {
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> T {
if x.len() != y.len() {
panic!("Input vector sizes are different");
}
if self.p < T::one() {
panic!("p must be at least 1");
}
let mut dist = T::zero();
for i in 0..x.len() {
let d = (x[i] - y[i]).abs();
dist = dist + d.powf(self.p);
}
dist.powf(T::one()/self.p)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn minkowski_distance() {
let a = vec![1., 2., 3.];
let b = vec![4., 5., 6.];
let l1: f64 = Minkowski{p: 1.0}.distance(&a, &b);
let l2: f64 = Minkowski{p: 2.0}.distance(&a, &b);
let l3: f64 = Minkowski{p: 3.0}.distance(&a, &b);
assert!((l1 - 9.0).abs() < 1e-8);
assert!((l2 - 5.19615242).abs() < 1e-8);
assert!((l3 - 4.32674871).abs() < 1e-8);
}
#[test]
#[should_panic(expected = "p must be at least 1")]
fn minkowski_distance_negative_p() {
let a = vec![1., 2., 3.];
let b = vec![4., 5., 6.];
let _: f64 = Minkowski{p: 0.0}.distance(&a, &b);
}
}
+17 -1
View File
@@ -1,9 +1,13 @@
pub mod euclidian;
pub mod minkowski;
pub mod manhattan;
pub mod hamming;
pub mod mahalanobis;
use crate::math::num::FloatExt;
pub trait Distance<T, F: FloatExt>{
fn distance(a: &T, b: &T) -> F;
fn distance(&self, a: &T, b: &T) -> F;
}
pub struct Distances{
@@ -13,4 +17,16 @@ impl Distances {
pub fn euclidian() -> euclidian::Euclidian{
euclidian::Euclidian {}
}
pub fn minkowski<T: FloatExt>(p: T) -> minkowski::Minkowski<T>{
minkowski::Minkowski {p: p}
}
pub fn manhattan() -> manhattan::Manhattan{
manhattan::Manhattan {}
}
pub fn hamming() -> hamming::Hamming{
hamming::Hamming {}
}
}