fix: renames FloatExt to RealNumber

This commit is contained in:
Volodymyr Orlov
2020-08-29 20:17:01 -07:00
parent 8705867386
commit fa0918cee3
43 changed files with 238 additions and 208 deletions
+3 -3
View File
@@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::math::num::FloatExt;
use crate::math::num::RealNumber;
use super::Distance;
@@ -8,7 +8,7 @@ use super::Distance;
pub struct Euclidian {}
impl Euclidian {
pub fn squared_distance<T: FloatExt>(x: &Vec<T>, y: &Vec<T>) -> T {
pub fn squared_distance<T: RealNumber>(x: &Vec<T>, y: &Vec<T>) -> T {
if x.len() != y.len() {
panic!("Input vector sizes are different.");
}
@@ -22,7 +22,7 @@ impl Euclidian {
}
}
impl<T: FloatExt> Distance<Vec<T>, T> for Euclidian {
impl<T: RealNumber> Distance<Vec<T>, T> for Euclidian {
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> T {
Euclidian::squared_distance(x, y).sqrt()
}
+2 -2
View File
@@ -1,13 +1,13 @@
use serde::{Deserialize, Serialize};
use crate::math::num::FloatExt;
use crate::math::num::RealNumber;
use super::Distance;
#[derive(Serialize, Deserialize, Debug)]
pub struct Hamming {}
impl<T: PartialEq, F: FloatExt> Distance<Vec<T>, F> for Hamming {
impl<T: PartialEq, F: RealNumber> Distance<Vec<T>, F> for Hamming {
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> F {
if x.len() != y.len() {
panic!("Input vector sizes are different");
+4 -4
View File
@@ -4,19 +4,19 @@ use std::marker::PhantomData;
use serde::{Deserialize, Serialize};
use crate::math::num::FloatExt;
use crate::math::num::RealNumber;
use super::Distance;
use crate::linalg::Matrix;
#[derive(Serialize, Deserialize, Debug)]
pub struct Mahalanobis<T: FloatExt, M: Matrix<T>> {
pub struct Mahalanobis<T: RealNumber, M: Matrix<T>> {
pub sigma: M,
pub sigmaInv: M,
t: PhantomData<T>,
}
impl<T: FloatExt, M: Matrix<T>> Mahalanobis<T, M> {
impl<T: RealNumber, M: Matrix<T>> Mahalanobis<T, M> {
pub fn new(data: &M) -> Mahalanobis<T, M> {
let sigma = data.cov();
let sigmaInv = sigma.lu().inverse();
@@ -38,7 +38,7 @@ impl<T: FloatExt, M: Matrix<T>> Mahalanobis<T, M> {
}
}
impl<T: FloatExt, M: Matrix<T>> Distance<Vec<T>, T> for Mahalanobis<T, M> {
impl<T: RealNumber, M: Matrix<T>> Distance<Vec<T>, T> for Mahalanobis<T, M> {
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> T {
let (nrows, ncols) = self.sigma.shape();
if x.len() != nrows {
+2 -2
View File
@@ -1,13 +1,13 @@
use serde::{Deserialize, Serialize};
use crate::math::num::FloatExt;
use crate::math::num::RealNumber;
use super::Distance;
#[derive(Serialize, Deserialize, Debug)]
pub struct Manhattan {}
impl<T: FloatExt> Distance<Vec<T>, T> for Manhattan {
impl<T: RealNumber> Distance<Vec<T>, T> for Manhattan {
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> T {
if x.len() != y.len() {
panic!("Input vector sizes are different");
+13 -11
View File
@@ -1,30 +1,32 @@
use serde::{Deserialize, Serialize};
use crate::math::num::FloatExt;
use crate::math::num::RealNumber;
use super::Distance;
#[derive(Serialize, Deserialize, Debug)]
pub struct Minkowski<T: FloatExt> {
pub p: T,
pub struct Minkowski {
pub p: u16,
}
impl<T: FloatExt> Distance<Vec<T>, T> for Minkowski<T> {
impl<T: RealNumber> Distance<Vec<T>, T> for Minkowski {
fn distance(&self, x: &Vec<T>, y: &Vec<T>) -> T {
if x.len() != y.len() {
panic!("Input vector sizes are different");
}
if self.p < T::one() {
if self.p < 1 {
panic!("p must be at least 1");
}
let mut dist = T::zero();
let p_t = T::from_u16(self.p).unwrap();
for i in 0..x.len() {
let d = (x[i] - y[i]).abs();
dist = dist + d.powf(self.p);
dist = dist + d.powf(p_t);
}
dist.powf(T::one() / self.p)
dist.powf(T::one() / p_t)
}
}
@@ -37,9 +39,9 @@ mod tests {
let a = vec![1., 2., 3.];
let b = vec![4., 5., 6.];
let l1: f64 = Minkowski { p: 1.0 }.distance(&a, &b);
let l2: f64 = Minkowski { p: 2.0 }.distance(&a, &b);
let l3: f64 = Minkowski { p: 3.0 }.distance(&a, &b);
let l1: f64 = Minkowski { p: 1 }.distance(&a, &b);
let l2: f64 = Minkowski { p: 2 }.distance(&a, &b);
let l3: f64 = Minkowski { p: 3 }.distance(&a, &b);
assert!((l1 - 9.0).abs() < 1e-8);
assert!((l2 - 5.19615242).abs() < 1e-8);
@@ -52,6 +54,6 @@ mod tests {
let a = vec![1., 2., 3.];
let b = vec![4., 5., 6.];
let _: f64 = Minkowski { p: 0.0 }.distance(&a, &b);
let _: f64 = Minkowski { p: 0 }.distance(&a, &b);
}
}
+30 -3
View File
@@ -1,30 +1,57 @@
//! # Collection of Distance Functions
//!
//! Many algorithms in machine learning require a measure of distance between data points. Distance metric (or metric) is a function that defines a distance between a pair of point elements of a set.
//! Formally, the distance can be any metric measure that is defined as \\( d(x, y) \geq 0\\) and follows three conditions:
//! 1. \\( d(x, y) = 0 \\) if and only \\( x = y \\), positive definiteness
//! 1. \\( d(x, y) = d(y, x) \\), symmetry
//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality
//!
//! for all \\(x, y, z \in Z \\)
//!
//! A good distance metric helps to improve the performance of classification, clustering and information retrieval algorithms significantly.
//!
//! <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML"></script>
/// Euclidean Distance is the straight-line distance between two points in Euclidean spacere that presents the shortest distance between these points.
pub mod euclidian;
/// Hamming Distance between two strings is the number of positions at which the corresponding symbols are different.
pub mod hamming;
/// The Mahalanobis distance is the distance between two points in multivariate space.
pub mod mahalanobis;
/// Also known as rectilinear distance, city block distance, taxicab metric.
pub mod manhattan;
/// A generalization of both the Euclidean distance and the Manhattan distance.
pub mod minkowski;
use crate::math::num::FloatExt;
use crate::math::num::RealNumber;
pub trait Distance<T, F: FloatExt> {
/// Distance metric, a function that calculates distance between two points
pub trait Distance<T, F: RealNumber> {
/// Calculates distance between _a_ and _b_
fn distance(&self, a: &T, b: &T) -> F;
}
/// Multitude of distance metric functions
pub struct Distances {}
impl Distances {
/// Euclidian distance
pub fn euclidian() -> euclidian::Euclidian {
euclidian::Euclidian {}
}
pub fn minkowski<T: FloatExt>(p: T) -> minkowski::Minkowski<T> {
/// Minkowski distance
/// * `p` - function order. Should be >= 1
pub fn minkowski(p: u16) -> minkowski::Minkowski {
minkowski::Minkowski { p: p }
}
/// Manhattan distance
pub fn manhattan() -> manhattan::Manhattan {
manhattan::Manhattan {}
}
/// Hamming distance
pub fn hamming() -> hamming::Hamming {
hamming::Hamming {}
}
+2 -1
View File
@@ -1,2 +1,3 @@
/// Multitude of distance metrics are defined here
pub mod distance;
pub(crate) mod num;
pub mod num;
+3 -3
View File
@@ -3,7 +3,7 @@ use rand::prelude::*;
use std::fmt::{Debug, Display};
use std::iter::{Product, Sum};
pub trait FloatExt: Float + FromPrimitive + Debug + Display + Copy + Sum + Product {
pub trait RealNumber: Float + FromPrimitive + Debug + Display + Copy + Sum + Product {
fn copysign(self, sign: Self) -> Self;
fn ln_1pe(self) -> Self;
@@ -21,7 +21,7 @@ pub trait FloatExt: Float + FromPrimitive + Debug + Display + Copy + Sum + Produ
}
}
impl FloatExt for f64 {
impl RealNumber for f64 {
fn copysign(self, sign: Self) -> Self {
self.copysign(sign)
}
@@ -58,7 +58,7 @@ impl FloatExt for f64 {
}
}
impl FloatExt for f32 {
impl RealNumber for f32 {
fn copysign(self, sign: Self) -> Self {
self.copysign(sign)
}