From c1d7c038a687301d2d1282bc1cf07b8a6afa9f34 Mon Sep 17 00:00:00 2001
From: Volodymyr Orlov <vorl@Echo.local>
Date: Mon, 23 Dec 2019 10:33:19 -0800
Subject: [PATCH] feat: add basic Matrix implementation for ndarray

---
 Cargo.toml                                |   7 +-
 src/classification/logistic_regression.rs |  44 +-
 src/linalg/mod.rs                         |   1 +
 src/linalg/naive/dense_matrix.rs          |  14 +-
 src/linalg/ndarray_bindings.rs            | 492 ++++++++++++++++++++++
 src/regression/linear_regression.rs       |   2 +-
 6 files changed, 545 insertions(+), 15 deletions(-)
 create mode 100644 src/linalg/ndarray_bindings.rs
diff --git a/Cargo.toml b/Cargo.toml
index bdd9631..5054d5f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,14 +5,13 @@ authors = ["Vlad Orlov"]
 edition = "2018"
 
 [dependencies]
-ndarray = "0.12.1"
-ndarray-linalg = "0.10"
+ndarray = "0.13"
 num-traits = "0.2"
 rand = "0.7.2"
 
 [dev-dependencies]
-ndarray = "0.12.1"
-criterion = "0.2"
+ndarray = "0.13"
+criterion = "0.3"
 
 [[bench]]
 name = "distance"
diff --git a/src/classification/logistic_regression.rs b/src/classification/logistic_regression.rs
index b784e92..5212973 100644
--- a/src/classification/logistic_regression.rs
+++ b/src/classification/logistic_regression.rs
@@ -193,13 +193,13 @@ impl<M: Matrix> LogisticRegression<M> {
     pub fn predict(&self, x: &M) -> M {
         if self.num_classes == 2 {            
             let (nrows, _) = x.shape();
-            let x_and_bias = x.h_stack(&M::ones(nrows, 1));
+            let x_and_bias = x.v_stack(&M::ones(nrows, 1));
             let y_hat: Vec<f64> = x_and_bias.dot(&self.weights.transpose()).to_raw_vector();
             M::from_vec(1, nrows, y_hat.iter().map(|y_hat| self.classes[if y_hat.sigmoid() > 0.5 { 1 } else { 0 }]).collect())            
 
         } else {
             let (nrows, _) = x.shape();
-            let x_and_bias = x.h_stack(&M::ones(nrows, 1));        
+            let x_and_bias = x.v_stack(&M::ones(nrows, 1));        
             let y_hat = x_and_bias.dot(&self.weights.transpose());            
             let class_idxs = y_hat.argmax();
             M::from_vec(1, nrows, class_idxs.iter().map(|class_idx| self.classes[*class_idx]).collect())
@@ -235,7 +235,9 @@ impl<M: Matrix> LogisticRegression<M> {
 #[cfg(test)]
 mod tests {    
     use super::*; 
-    use crate::linalg::naive::dense_matrix::DenseMatrix;    
+    use crate::linalg::naive::dense_matrix::DenseMatrix;   
+    use crate::linalg::ndarray_bindings;
+    use ndarray::{arr2, Array};
 
     #[test]
     fn multiclass_objective_f() { 
@@ -388,4 +390,40 @@ mod tests {
         
 
     }
+
+    #[test]
+    fn tt() { 
+        let x = arr2(&[
+            [5.1, 3.5, 1.4, 0.2],
+            [4.9, 3.0, 1.4, 0.2],
+            [4.7, 3.2, 1.3, 0.2],
+            [4.6, 3.1, 1.5, 0.2],
+            [5.0, 3.6, 1.4, 0.2],
+            [5.4, 3.9, 1.7, 0.4],
+            [4.6, 3.4, 1.4, 0.3],
+            [5.0, 3.4, 1.5, 0.2],
+            [4.4, 2.9, 1.4, 0.2],
+            [4.9, 3.1, 1.5, 0.1],
+            [7.0, 3.2, 4.7, 1.4],
+            [6.4, 3.2, 4.5, 1.5],
+            [6.9, 3.1, 4.9, 1.5],
+            [5.5, 2.3, 4.0, 1.3],
+            [6.5, 2.8, 4.6, 1.5],
+            [5.7, 2.8, 4.5, 1.3],
+            [6.3, 3.3, 4.7, 1.6],
+            [4.9, 2.4, 3.3, 1.0],
+            [6.6, 2.9, 4.6, 1.3],
+            [5.2, 2.7, 3.9, 1.4]]);
+        let y = Array::from_shape_vec((1, 20), vec![0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]).unwrap();        
+
+        let lr = LogisticRegression::fit(&x, &y);        
+
+        println!("{:?}", lr);
+
+        let y_hat = lr.predict(&x).to_raw_vector();                        
+
+        assert_eq!(y_hat, vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]);
+        
+    }
+
 }
\ No newline at end of file
diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs
index 869313e..f3eebfb 100644
--- a/src/linalg/mod.rs
+++ b/src/linalg/mod.rs
@@ -2,6 +2,7 @@ use std::ops::Range;
 use std::fmt::Debug;
 
 pub mod naive;
+pub mod ndarray_bindings;
 
 pub trait Matrix: Clone + Debug {  
 
diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs
index ec86a00..067844e 100644
--- a/src/linalg/naive/dense_matrix.rs
+++ b/src/linalg/naive/dense_matrix.rs
@@ -153,7 +153,7 @@ impl Matrix for DenseMatrix {
         (self.nrows, self.ncols)
     }
 
-    fn v_stack(&self, other: &Self) -> Self {
+    fn h_stack(&self, other: &Self) -> Self {
         if self.ncols != other.ncols {
             panic!("Number of columns in both matrices should be equal");
         }
@@ -170,7 +170,7 @@ impl Matrix for DenseMatrix {
         result    
     }
 
-    fn h_stack(&self, other: &Self) -> Self{
+    fn v_stack(&self, other: &Self) -> Self{
         if self.nrows != other.nrows {
             panic!("Number of rows in both matrices should be equal");
         }
@@ -989,7 +989,7 @@ mod tests {
     }
 
     #[test]
-    fn v_stack() { 
+    fn h_stack() { 
 
             let a = DenseMatrix::from_2d_array(
                 &[
@@ -1007,12 +1007,12 @@ mod tests {
                     &[7., 8., 9.], 
                     &[1., 2., 3.], 
                     &[4., 5., 6.]]);
-            let result = a.v_stack(&b);               
+            let result = a.h_stack(&b);               
             assert_eq!(result, expected);
     }
 
     #[test]
-    fn h_stack() { 
+    fn v_stack() { 
 
             let a = DenseMatrix::from_2d_array(
                 &[
@@ -1029,7 +1029,7 @@ mod tests {
                     &[1., 2., 3., 1., 2.], 
                     &[4., 5., 6., 3., 4.], 
                     &[7., 8., 9., 5., 6.]]);
-            let result = a.h_stack(&b);               
+            let result = a.v_stack(&b);               
             assert_eq!(result, expected);
     }
 
@@ -1144,6 +1144,6 @@ mod tests {
             assert!((prob.get(0, 0) - 0.09).abs() < 0.01);     
             assert!((prob.get(0, 1) - 0.24).abs() < 0.01);     
             assert!((prob.get(0, 2) - 0.66).abs() < 0.01);            
-    }
+    }    
 
 }
diff --git a/src/linalg/ndarray_bindings.rs b/src/linalg/ndarray_bindings.rs
new file mode 100644
index 0000000..0a18ace
--- /dev/null
+++ b/src/linalg/ndarray_bindings.rs
@@ -0,0 +1,492 @@
+use std::ops::Range;
+use crate::linalg::{Matrix};
+use ndarray::{Array, ArrayBase, OwnedRepr, Ix2, Axis, stack, s};
+
+impl Matrix for ArrayBase<OwnedRepr<f64>, Ix2>
+{
+
+    fn from_array(nrows: usize, ncols: usize, values: &[f64]) -> Self {
+        Array::from_shape_vec((nrows, ncols), values.to_vec()).unwrap()     
+    }
+    
+    fn from_vec(nrows: usize, ncols: usize, values: Vec<f64>) -> Self {
+        Array::from_shape_vec((nrows, ncols), values).unwrap()     
+    }
+
+    fn get(&self, row: usize, col: usize) -> f64 {
+        self[[row, col]]
+    }
+
+    fn set(&mut self, row: usize, col: usize, x: f64) {
+        self[[row, col]] = x;
+    }
+
+    fn qr_solve_mut(&mut self, b: Self) -> Self {
+        panic!("qr_solve_mut method is not implemented for ndarray");
+    }
+
+    fn svd_solve_mut(&mut self, b: Self) -> Self {
+        panic!("qr_solve_mut method is not implemented for ndarray");
+    }
+
+    fn zeros(nrows: usize, ncols: usize) -> Self {
+        Array::zeros((nrows, ncols))
+    }
+
+    fn ones(nrows: usize, ncols: usize) -> Self {
+        Array::ones((nrows, ncols))
+    }
+
+    fn to_raw_vector(&self) -> Vec<f64> {
+        self.to_owned().iter().map(|v| *v).collect()
+    }
+
+    fn fill(nrows: usize, ncols: usize, value: f64) -> Self {
+        Array::from_elem((nrows, ncols), value)
+    }
+
+    fn shape(&self) -> (usize, usize) {
+        (self.rows(), self.cols())
+    }    
+
+    fn v_stack(&self, other: &Self) -> Self {
+        stack(Axis(1), &[self.view(), other.view()]).unwrap()
+    }
+
+    fn h_stack(&self, other: &Self) -> Self {
+        stack(Axis(0), &[self.view(), other.view()]).unwrap()
+    }
+
+    fn dot(&self, other: &Self) -> Self {
+       self.dot(other)
+    }
+
+    fn vector_dot(&self, other: &Self) -> f64 {
+        self.dot(&other.view().reversed_axes())[[0, 0]]
+    }    
+
+    fn slice(&self, rows: Range<usize>, cols: Range<usize>) -> Self {
+        self.slice(s![rows, cols]).to_owned()
+    }
+
+    fn approximate_eq(&self, other: &Self, error: f64) -> bool {
+        false
+    }
+
+    fn add_mut(&mut self, other: &Self) -> &Self {
+        *self += other;
+        self        
+    }
+
+    fn sub_mut(&mut self, other: &Self) -> &Self {
+        *self -= other;
+        self
+    }
+
+    fn mul_mut(&mut self, other: &Self) -> &Self {
+        *self *= other;
+        self        
+    }
+
+    fn div_mut(&mut self, other: &Self) -> &Self{
+        *self /= other;
+        self
+    }
+
+    fn add_scalar_mut(&mut self, scalar: f64) -> &Self{
+        *self += scalar;
+        self
+    }
+
+    fn sub_scalar_mut(&mut self, scalar: f64) -> &Self{
+        *self -= scalar;
+        self
+    }
+
+    fn mul_scalar_mut(&mut self, scalar: f64) -> &Self{
+        *self *= scalar;
+        self
+    }
+
+    fn div_scalar_mut(&mut self, scalar: f64) -> &Self{
+        *self /= scalar;
+        self
+    }
+
+    fn transpose(&self) -> Self{
+        self.clone().reversed_axes()
+    }
+
+    fn generate_positive_definite(nrows: usize, ncols: usize) -> Self{
+        panic!("generate_positive_definite method is not implemented for ndarray");
+    }
+
+    fn rand(nrows: usize, ncols: usize) -> Self{
+        panic!("rand method is not implemented for ndarray");
+    }
+
+    fn norm2(&self) -> f64{
+        self.iter().map(|x| x * x).sum::<f64>().sqrt()
+    }
+
+    fn norm(&self, p:f64) -> f64 {
+        if p.is_infinite() && p.is_sign_positive() {
+            self.iter().fold(std::f64::NEG_INFINITY, |f, &val| {
+                let v = val.abs();
+                if f > v {
+                    f
+                } else {
+                    v
+                }
+            })            
+        } else if p.is_infinite() && p.is_sign_negative() {
+            self.iter().fold(std::f64::INFINITY, |f, &val| {
+                let v = val.abs();
+                if f < v {
+                    f
+                } else {
+                    v
+                }
+            })
+        } else {
+
+            let mut norm = 0f64;
+
+            for xi in self.iter() {
+                norm += xi.abs().powf(p);
+            }
+
+            norm.powf(1.0/p)
+        }
+    }
+
+    fn negative_mut(&mut self){
+        *self *= -1.;
+    }
+
+    fn reshape(&self, nrows: usize, ncols: usize) -> Self{
+        self.clone().into_shape((nrows, ncols)).unwrap()
+    }
+
+    fn copy_from(&mut self, other: &Self){
+        self.assign(&other);
+    }
+
+    fn abs_mut(&mut self) -> &Self{
+        self
+    }
+
+    fn sum(&self) -> f64{
+        self.sum()
+    }
+
+    fn max_diff(&self, other: &Self) -> f64{
+        let mut max_diff = 0f64;
+        for r in 0..self.nrows() {
+            for c in 0..self.ncols() {
+                max_diff = max_diff.max((self[(r, c)] - other[(r, c)]).abs());
+            }
+        }
+        max_diff
+    }
+    
+    fn softmax_mut(&mut self){
+        let max = self.iter().map(|x| x.abs()).fold(std::f64::NEG_INFINITY, |a, b| a.max(b));
+        let mut z = 0.;
+        for r in 0..self.nrows() {
+            for c in 0..self.ncols() {
+                let p = (self[(r, c)] - max).exp();
+                self.set(r, c, p);
+                z += p;
+            }
+        }
+        for r in 0..self.nrows() {
+            for c in 0..self.ncols() {
+                self.set(r, c, self[(r, c)] / z);
+            }
+        }
+    }
+
+    fn pow_mut(&mut self, p: f64) -> &Self{
+        for r in 0..self.nrows() {
+            for c in 0..self.ncols() {
+                self.set(r, c, self[(r, c)].powf(p));
+            }
+        }
+        self
+    }
+
+    fn argmax(&self) -> Vec<usize>{
+        let mut res = vec![0usize; self.nrows()];
+
+        for r in 0..self.nrows() {
+            let mut max = std::f64::NEG_INFINITY;
+            let mut max_pos = 0usize;
+            for c in 0..self.ncols() {
+                let v = self[(r, c)];
+                if max < v {
+                    max = v;
+                    max_pos = c; 
+                }
+            }
+            res[r] = max_pos;
+        }
+
+        res
+
+    }
+    
+    fn unique(&self) -> Vec<f64> {
+        let mut result = self.clone().into_raw_vec();
+        result.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        result.dedup();
+        result
+    }
+
+}
+
+#[cfg(test)]
+mod tests {    
+    use super::*; 
+    use ndarray::{arr2, Array2};
+
+    #[test]
+    fn add_mut() { 
+
+        let mut a1 = arr2(&[[ 1.,  2.,  3.],
+                            [4., 5., 6.]]);
+        let a2 = a1.clone();
+        let a3 = a1.clone() + a2.clone();
+        a1.add_mut(&a2);
+
+        assert_eq!(a1, a3);        
+
+    }
+
+    #[test]
+    fn sub_mut() { 
+
+        let mut a1 = arr2(&[[ 1.,  2.,  3.],
+                            [4., 5., 6.]]);
+        let a2 = a1.clone();
+        let a3 = a1.clone() - a2.clone();
+        a1.sub_mut(&a2);
+
+        assert_eq!(a1, a3);        
+
+    }
+
+    #[test]
+    fn mul_mut() { 
+
+        let mut a1 = arr2(&[[ 1.,  2.,  3.],
+                            [4., 5., 6.]]);
+        let a2 = a1.clone();
+        let a3 = a1.clone() * a2.clone();
+        a1.mul_mut(&a2);        
+
+        assert_eq!(a1, a3);        
+
+    }
+
+    #[test]
+    fn div_mut() { 
+
+        let mut a1 = arr2(&[[ 1.,  2.,  3.],
+                            [4., 5., 6.]]);
+        let a2 = a1.clone();
+        let a3 = a1.clone() / a2.clone();
+        a1.div_mut(&a2);        
+
+        assert_eq!(a1, a3);        
+
+    }
+
+    #[test]
+    fn from_array_from_vec() { 
+
+        let a1 = arr2(&[[ 1.,  2.,  3.],
+                        [4., 5., 6.]]);
+        let a2 = Array2::from_array(2, 3, &[1., 2., 3., 4., 5., 6.]);
+        let a3 = Array2::from_vec(2, 3, vec![1., 2., 3., 4., 5., 6.]);
+        
+        assert_eq!(a1, a2);
+        assert_eq!(a1, a3);        
+
+    }
+
+    #[test]
+    fn vstack_hstack() { 
+
+        let a1 = arr2(&[[1.,  2.,  3.],
+                        [4., 5., 6.]]);
+        let a2 = arr2(&[[ 7.], [8.]]);
+
+        let a3 = arr2(&[[9., 10., 11., 12.]]);
+
+        let expected = arr2(&[[1., 2., 3., 7.],
+                              [4., 5., 6., 8.],
+                              [9., 10., 11., 12.]]);
+
+        let result = a1.v_stack(&a2).h_stack(&a3);        
+        
+        assert_eq!(result, expected);      
+
+    }
+
+    #[test]
+    fn to_raw_vector() {
+        let result = arr2(&[[1.,  2.,  3.], [4., 5., 6.]]).to_raw_vector();
+        let expected = vec![1., 2., 3., 4., 5., 6.];
+
+        assert_eq!(result, expected);     
+    }
+
+    #[test]
+    fn get_set() {
+        let mut result = arr2(&[[1.,  2.,  3.], [4., 5., 6.]]);
+        let expected = arr2(&[[1.,  2.,  3.], [4., 10., 6.]]);
+
+        result.set(1, 1, 10.);
+
+        assert_eq!(result, expected);
+        assert_eq!(10., Matrix::get(&result, 1, 1));     
+    }
+
+    #[test]
+    fn dot() { 
+
+            let a = arr2(&[
+                [1., 2., 3.],
+                [4., 5., 6.]]);
+            let b = arr2(&[
+                 [1., 2.],
+                 [3., 4.],
+                 [5., 6.]]);
+            let expected = arr2(&[
+                    [22., 28.], 
+                    [49., 64.]]);            
+            let result = Matrix::dot(&a, &b);
+            assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn vector_dot() { 
+            let a = arr2(&[[1., 2., 3.]]);
+            let b = arr2(&[[1., 2., 3.]]);            
+            assert_eq!(14., a.vector_dot(&b));
+    }
+
+    #[test]
+    fn slice() { 
+
+            let a = arr2(
+                &[
+                    [1., 2., 3., 1., 2.], 
+                    [4., 5., 6., 3., 4.], 
+                    [7., 8., 9., 5., 6.]]);
+            let expected = arr2(
+                &[
+                    [2., 3.], 
+                    [5., 6.]]);
+            let result = Matrix::slice(&a, 0..2, 1..3);
+            assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn scalar_ops() { 
+            let a = arr2(&[[1., 2., 3.]]);            
+            assert_eq!(&arr2(&[[2., 3., 4.]]), a.clone().add_scalar_mut(1.));
+            assert_eq!(&arr2(&[[0., 1., 2.]]), a.clone().sub_scalar_mut(1.));
+            assert_eq!(&arr2(&[[2., 4., 6.]]), a.clone().mul_scalar_mut(2.));
+            assert_eq!(&arr2(&[[0.5, 1., 1.5]]), a.clone().div_scalar_mut(2.));
+    }
+
+    #[test]
+    fn transpose() {
+        let m = arr2(&[[1.0, 3.0], [2.0, 4.0]]);
+        let expected = arr2(&[[1.0, 2.0], [3.0, 4.0]]);
+        let m_transposed = m.transpose();
+        assert_eq!(m_transposed, expected);       
+    }
+
+    #[test]
+    fn norm() { 
+        let v = arr2(&[[3., -2., 6.]]);            
+        assert_eq!(v.norm(1.), 11.);
+        assert_eq!(v.norm(2.), 7.);
+        assert_eq!(v.norm(std::f64::INFINITY), 6.);
+        assert_eq!(v.norm(std::f64::NEG_INFINITY), 2.);
+    }
+
+    #[test]
+    fn negative_mut() { 
+        let mut v = arr2(&[[3., -2., 6.]]);       
+        v.negative_mut();     
+        assert_eq!(v, arr2(&[[-3., 2., -6.]]));        
+    }
+
+    #[test]
+    fn reshape() {
+        let m_orig = arr2(&[[1., 2., 3., 4., 5., 6.]]);
+        let m_2_by_3 = Matrix::reshape(&m_orig, 2, 3);
+        let m_result = Matrix::reshape(&m_2_by_3, 1, 6);        
+        assert_eq!(Matrix::shape(&m_2_by_3), (2, 3));
+        assert_eq!(Matrix::get(&m_2_by_3, 1, 1), 5.);
+        assert_eq!(Matrix::get(&m_result, 0, 1), 2.);
+        assert_eq!(Matrix::get(&m_result, 0, 3), 4.);
+    }
+
+    #[test]
+    fn copy_from() {
+        let mut src = arr2(&[[1., 2., 3.]]);
+        let dst = Array2::<f64>::zeros((1, 3)); 
+        src.copy_from(&dst);
+        assert_eq!(src, dst);
+    }
+
+    #[test]
+    fn sum() {
+        let src = arr2(&[[1., 2., 3.]]);        
+        assert_eq!(src.sum(), 6.);
+    }
+
+    #[test]
+    fn max_diff() {
+        let a1 = arr2(&[[1., 2., 3.], [4., -5., 6.]]);    
+        let a2 = arr2(&[[2., 3., 4.], [1., 0., -12.]]);        
+        assert_eq!(a1.max_diff(&a2), 18.);
+        assert_eq!(a2.max_diff(&a2), 0.);
+    }
+
+    #[test]
+    fn softmax_mut(){
+        let mut prob = arr2(&[[1., 2., 3.]]);  
+        prob.softmax_mut();            
+        assert!((Matrix::get(&prob, 0, 0) - 0.09).abs() < 0.01);     
+        assert!((Matrix::get(&prob, 0, 1) - 0.24).abs() < 0.01);     
+        assert!((Matrix::get(&prob, 0, 2) - 0.66).abs() < 0.01); 
+    }
+
+    #[test]
+    fn pow_mut(){
+        let mut a = arr2(&[[1., 2., 3.]]);  
+        a.pow_mut(3.);
+        assert_eq!(a, arr2(&[[1., 8., 27.]]));
+    }
+
+    #[test]
+    fn argmax(){
+        let a = arr2(&[[1., 2., 3.], [-5., -6., -7.], [0.1, 0.2, 0.1]]);  
+        let res = a.argmax();
+        assert_eq!(res, vec![2, 0, 1]);
+    }
+
+    #[test]
+    fn unique(){
+        let a = arr2(&[[1., 2., 2.], [-2., -6., -7.], [2., 3., 4.]]);  
+        let res = a.unique();
+        assert_eq!(res.len(), 7);
+        assert_eq!(res, vec![-7., -6., -2., 1., 2., 3., 4.]);
+    }
+}
\ No newline at end of file
diff --git a/src/regression/linear_regression.rs b/src/regression/linear_regression.rs
index 4b42f98..e042dd4 100644
--- a/src/regression/linear_regression.rs
+++ b/src/regression/linear_regression.rs
@@ -27,7 +27,7 @@ impl<M: Matrix> LinearRegression<M> {
         }
         
         let b = y.clone();
-        let mut a = x.h_stack(&M::ones(x_nrows, 1));
+        let mut a = x.v_stack(&M::ones(x_nrows, 1));
 
         let w = match solver {
             LinearRegressionSolver::QR => a.qr_solve_mut(b),