From c1d7c038a687301d2d1282bc1cf07b8a6afa9f34 Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Mon, 23 Dec 2019 10:33:19 -0800 Subject: [PATCH] feat: add basic Matrix implementation for ndarray --- Cargo.toml | 7 +- src/classification/logistic_regression.rs | 44 +- src/linalg/mod.rs | 1 + src/linalg/naive/dense_matrix.rs | 14 +- src/linalg/ndarray_bindings.rs | 492 ++++++++++++++++++++++ src/regression/linear_regression.rs | 2 +- 6 files changed, 545 insertions(+), 15 deletions(-) create mode 100644 src/linalg/ndarray_bindings.rs diff --git a/Cargo.toml b/Cargo.toml index bdd9631..5054d5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,14 +5,13 @@ authors = ["Vlad Orlov"] edition = "2018" [dependencies] -ndarray = "0.12.1" -ndarray-linalg = "0.10" +ndarray = "0.13" num-traits = "0.2" rand = "0.7.2" [dev-dependencies] -ndarray = "0.12.1" -criterion = "0.2" +ndarray = "0.13" +criterion = "0.3" [[bench]] name = "distance" diff --git a/src/classification/logistic_regression.rs b/src/classification/logistic_regression.rs index b784e92..5212973 100644 --- a/src/classification/logistic_regression.rs +++ b/src/classification/logistic_regression.rs @@ -193,13 +193,13 @@ impl LogisticRegression { pub fn predict(&self, x: &M) -> M { if self.num_classes == 2 { let (nrows, _) = x.shape(); - let x_and_bias = x.h_stack(&M::ones(nrows, 1)); + let x_and_bias = x.v_stack(&M::ones(nrows, 1)); let y_hat: Vec = x_and_bias.dot(&self.weights.transpose()).to_raw_vector(); M::from_vec(1, nrows, y_hat.iter().map(|y_hat| self.classes[if y_hat.sigmoid() > 0.5 { 1 } else { 0 }]).collect()) } else { let (nrows, _) = x.shape(); - let x_and_bias = x.h_stack(&M::ones(nrows, 1)); + let x_and_bias = x.v_stack(&M::ones(nrows, 1)); let y_hat = x_and_bias.dot(&self.weights.transpose()); let class_idxs = y_hat.argmax(); M::from_vec(1, nrows, class_idxs.iter().map(|class_idx| self.classes[*class_idx]).collect()) @@ -235,7 +235,9 @@ impl LogisticRegression { #[cfg(test)] mod tests { use super::*; - use crate::linalg::naive::dense_matrix::DenseMatrix; + use crate::linalg::naive::dense_matrix::DenseMatrix; + use crate::linalg::ndarray_bindings; + use ndarray::{arr2, Array}; #[test] fn multiclass_objective_f() { @@ -388,4 +390,40 @@ mod tests { } + + #[test] + fn tt() { + let x = arr2(&[ + [5.1, 3.5, 1.4, 0.2], + [4.9, 3.0, 1.4, 0.2], + [4.7, 3.2, 1.3, 0.2], + [4.6, 3.1, 1.5, 0.2], + [5.0, 3.6, 1.4, 0.2], + [5.4, 3.9, 1.7, 0.4], + [4.6, 3.4, 1.4, 0.3], + [5.0, 3.4, 1.5, 0.2], + [4.4, 2.9, 1.4, 0.2], + [4.9, 3.1, 1.5, 0.1], + [7.0, 3.2, 4.7, 1.4], + [6.4, 3.2, 4.5, 1.5], + [6.9, 3.1, 4.9, 1.5], + [5.5, 2.3, 4.0, 1.3], + [6.5, 2.8, 4.6, 1.5], + [5.7, 2.8, 4.5, 1.3], + [6.3, 3.3, 4.7, 1.6], + [4.9, 2.4, 3.3, 1.0], + [6.6, 2.9, 4.6, 1.3], + [5.2, 2.7, 3.9, 1.4]]); + let y = Array::from_shape_vec((1, 20), vec![0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]).unwrap(); + + let lr = LogisticRegression::fit(&x, &y); + + println!("{:?}", lr); + + let y_hat = lr.predict(&x).to_raw_vector(); + + assert_eq!(y_hat, vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]); + + } + } \ No newline at end of file diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index 869313e..f3eebfb 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -2,6 +2,7 @@ use std::ops::Range; use std::fmt::Debug; pub mod naive; +pub mod ndarray_bindings; pub trait Matrix: Clone + Debug { diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index ec86a00..067844e 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -153,7 +153,7 @@ impl Matrix for DenseMatrix { (self.nrows, self.ncols) } - fn v_stack(&self, other: &Self) -> Self { + fn h_stack(&self, other: &Self) -> Self { if self.ncols != other.ncols { panic!("Number of columns in both matrices should be equal"); } @@ -170,7 +170,7 @@ impl Matrix for DenseMatrix { result } - fn h_stack(&self, other: &Self) -> Self{ + fn v_stack(&self, other: &Self) -> Self{ if self.nrows != other.nrows { panic!("Number of rows in both matrices should be equal"); } @@ -989,7 +989,7 @@ mod tests { } #[test] - fn v_stack() { + fn h_stack() { let a = DenseMatrix::from_2d_array( &[ @@ -1007,12 +1007,12 @@ mod tests { &[7., 8., 9.], &[1., 2., 3.], &[4., 5., 6.]]); - let result = a.v_stack(&b); + let result = a.h_stack(&b); assert_eq!(result, expected); } #[test] - fn h_stack() { + fn v_stack() { let a = DenseMatrix::from_2d_array( &[ @@ -1029,7 +1029,7 @@ mod tests { &[1., 2., 3., 1., 2.], &[4., 5., 6., 3., 4.], &[7., 8., 9., 5., 6.]]); - let result = a.h_stack(&b); + let result = a.v_stack(&b); assert_eq!(result, expected); } @@ -1144,6 +1144,6 @@ mod tests { assert!((prob.get(0, 0) - 0.09).abs() < 0.01); assert!((prob.get(0, 1) - 0.24).abs() < 0.01); assert!((prob.get(0, 2) - 0.66).abs() < 0.01); - } + } } diff --git a/src/linalg/ndarray_bindings.rs b/src/linalg/ndarray_bindings.rs new file mode 100644 index 0000000..0a18ace --- /dev/null +++ b/src/linalg/ndarray_bindings.rs @@ -0,0 +1,492 @@ +use std::ops::Range; +use crate::linalg::{Matrix}; +use ndarray::{Array, ArrayBase, OwnedRepr, Ix2, Axis, stack, s}; + +impl Matrix for ArrayBase, Ix2> +{ + + fn from_array(nrows: usize, ncols: usize, values: &[f64]) -> Self { + Array::from_shape_vec((nrows, ncols), values.to_vec()).unwrap() + } + + fn from_vec(nrows: usize, ncols: usize, values: Vec) -> Self { + Array::from_shape_vec((nrows, ncols), values).unwrap() + } + + fn get(&self, row: usize, col: usize) -> f64 { + self[[row, col]] + } + + fn set(&mut self, row: usize, col: usize, x: f64) { + self[[row, col]] = x; + } + + fn qr_solve_mut(&mut self, b: Self) -> Self { + panic!("qr_solve_mut method is not implemented for ndarray"); + } + + fn svd_solve_mut(&mut self, b: Self) -> Self { + panic!("qr_solve_mut method is not implemented for ndarray"); + } + + fn zeros(nrows: usize, ncols: usize) -> Self { + Array::zeros((nrows, ncols)) + } + + fn ones(nrows: usize, ncols: usize) -> Self { + Array::ones((nrows, ncols)) + } + + fn to_raw_vector(&self) -> Vec { + self.to_owned().iter().map(|v| *v).collect() + } + + fn fill(nrows: usize, ncols: usize, value: f64) -> Self { + Array::from_elem((nrows, ncols), value) + } + + fn shape(&self) -> (usize, usize) { + (self.rows(), self.cols()) + } + + fn v_stack(&self, other: &Self) -> Self { + stack(Axis(1), &[self.view(), other.view()]).unwrap() + } + + fn h_stack(&self, other: &Self) -> Self { + stack(Axis(0), &[self.view(), other.view()]).unwrap() + } + + fn dot(&self, other: &Self) -> Self { + self.dot(other) + } + + fn vector_dot(&self, other: &Self) -> f64 { + self.dot(&other.view().reversed_axes())[[0, 0]] + } + + fn slice(&self, rows: Range, cols: Range) -> Self { + self.slice(s![rows, cols]).to_owned() + } + + fn approximate_eq(&self, other: &Self, error: f64) -> bool { + false + } + + fn add_mut(&mut self, other: &Self) -> &Self { + *self += other; + self + } + + fn sub_mut(&mut self, other: &Self) -> &Self { + *self -= other; + self + } + + fn mul_mut(&mut self, other: &Self) -> &Self { + *self *= other; + self + } + + fn div_mut(&mut self, other: &Self) -> &Self{ + *self /= other; + self + } + + fn add_scalar_mut(&mut self, scalar: f64) -> &Self{ + *self += scalar; + self + } + + fn sub_scalar_mut(&mut self, scalar: f64) -> &Self{ + *self -= scalar; + self + } + + fn mul_scalar_mut(&mut self, scalar: f64) -> &Self{ + *self *= scalar; + self + } + + fn div_scalar_mut(&mut self, scalar: f64) -> &Self{ + *self /= scalar; + self + } + + fn transpose(&self) -> Self{ + self.clone().reversed_axes() + } + + fn generate_positive_definite(nrows: usize, ncols: usize) -> Self{ + panic!("generate_positive_definite method is not implemented for ndarray"); + } + + fn rand(nrows: usize, ncols: usize) -> Self{ + panic!("rand method is not implemented for ndarray"); + } + + fn norm2(&self) -> f64{ + self.iter().map(|x| x * x).sum::().sqrt() + } + + fn norm(&self, p:f64) -> f64 { + if p.is_infinite() && p.is_sign_positive() { + self.iter().fold(std::f64::NEG_INFINITY, |f, &val| { + let v = val.abs(); + if f > v { + f + } else { + v + } + }) + } else if p.is_infinite() && p.is_sign_negative() { + self.iter().fold(std::f64::INFINITY, |f, &val| { + let v = val.abs(); + if f < v { + f + } else { + v + } + }) + } else { + + let mut norm = 0f64; + + for xi in self.iter() { + norm += xi.abs().powf(p); + } + + norm.powf(1.0/p) + } + } + + fn negative_mut(&mut self){ + *self *= -1.; + } + + fn reshape(&self, nrows: usize, ncols: usize) -> Self{ + self.clone().into_shape((nrows, ncols)).unwrap() + } + + fn copy_from(&mut self, other: &Self){ + self.assign(&other); + } + + fn abs_mut(&mut self) -> &Self{ + self + } + + fn sum(&self) -> f64{ + self.sum() + } + + fn max_diff(&self, other: &Self) -> f64{ + let mut max_diff = 0f64; + for r in 0..self.nrows() { + for c in 0..self.ncols() { + max_diff = max_diff.max((self[(r, c)] - other[(r, c)]).abs()); + } + } + max_diff + } + + fn softmax_mut(&mut self){ + let max = self.iter().map(|x| x.abs()).fold(std::f64::NEG_INFINITY, |a, b| a.max(b)); + let mut z = 0.; + for r in 0..self.nrows() { + for c in 0..self.ncols() { + let p = (self[(r, c)] - max).exp(); + self.set(r, c, p); + z += p; + } + } + for r in 0..self.nrows() { + for c in 0..self.ncols() { + self.set(r, c, self[(r, c)] / z); + } + } + } + + fn pow_mut(&mut self, p: f64) -> &Self{ + for r in 0..self.nrows() { + for c in 0..self.ncols() { + self.set(r, c, self[(r, c)].powf(p)); + } + } + self + } + + fn argmax(&self) -> Vec{ + let mut res = vec![0usize; self.nrows()]; + + for r in 0..self.nrows() { + let mut max = std::f64::NEG_INFINITY; + let mut max_pos = 0usize; + for c in 0..self.ncols() { + let v = self[(r, c)]; + if max < v { + max = v; + max_pos = c; + } + } + res[r] = max_pos; + } + + res + + } + + fn unique(&self) -> Vec { + let mut result = self.clone().into_raw_vec(); + result.sort_by(|a, b| a.partial_cmp(b).unwrap()); + result.dedup(); + result + } + +} + +#[cfg(test)] +mod tests { + use super::*; + use ndarray::{arr2, Array2}; + + #[test] + fn add_mut() { + + let mut a1 = arr2(&[[ 1., 2., 3.], + [4., 5., 6.]]); + let a2 = a1.clone(); + let a3 = a1.clone() + a2.clone(); + a1.add_mut(&a2); + + assert_eq!(a1, a3); + + } + + #[test] + fn sub_mut() { + + let mut a1 = arr2(&[[ 1., 2., 3.], + [4., 5., 6.]]); + let a2 = a1.clone(); + let a3 = a1.clone() - a2.clone(); + a1.sub_mut(&a2); + + assert_eq!(a1, a3); + + } + + #[test] + fn mul_mut() { + + let mut a1 = arr2(&[[ 1., 2., 3.], + [4., 5., 6.]]); + let a2 = a1.clone(); + let a3 = a1.clone() * a2.clone(); + a1.mul_mut(&a2); + + assert_eq!(a1, a3); + + } + + #[test] + fn div_mut() { + + let mut a1 = arr2(&[[ 1., 2., 3.], + [4., 5., 6.]]); + let a2 = a1.clone(); + let a3 = a1.clone() / a2.clone(); + a1.div_mut(&a2); + + assert_eq!(a1, a3); + + } + + #[test] + fn from_array_from_vec() { + + let a1 = arr2(&[[ 1., 2., 3.], + [4., 5., 6.]]); + let a2 = Array2::from_array(2, 3, &[1., 2., 3., 4., 5., 6.]); + let a3 = Array2::from_vec(2, 3, vec![1., 2., 3., 4., 5., 6.]); + + assert_eq!(a1, a2); + assert_eq!(a1, a3); + + } + + #[test] + fn vstack_hstack() { + + let a1 = arr2(&[[1., 2., 3.], + [4., 5., 6.]]); + let a2 = arr2(&[[ 7.], [8.]]); + + let a3 = arr2(&[[9., 10., 11., 12.]]); + + let expected = arr2(&[[1., 2., 3., 7.], + [4., 5., 6., 8.], + [9., 10., 11., 12.]]); + + let result = a1.v_stack(&a2).h_stack(&a3); + + assert_eq!(result, expected); + + } + + #[test] + fn to_raw_vector() { + let result = arr2(&[[1., 2., 3.], [4., 5., 6.]]).to_raw_vector(); + let expected = vec![1., 2., 3., 4., 5., 6.]; + + assert_eq!(result, expected); + } + + #[test] + fn get_set() { + let mut result = arr2(&[[1., 2., 3.], [4., 5., 6.]]); + let expected = arr2(&[[1., 2., 3.], [4., 10., 6.]]); + + result.set(1, 1, 10.); + + assert_eq!(result, expected); + assert_eq!(10., Matrix::get(&result, 1, 1)); + } + + #[test] + fn dot() { + + let a = arr2(&[ + [1., 2., 3.], + [4., 5., 6.]]); + let b = arr2(&[ + [1., 2.], + [3., 4.], + [5., 6.]]); + let expected = arr2(&[ + [22., 28.], + [49., 64.]]); + let result = Matrix::dot(&a, &b); + assert_eq!(result, expected); + } + + #[test] + fn vector_dot() { + let a = arr2(&[[1., 2., 3.]]); + let b = arr2(&[[1., 2., 3.]]); + assert_eq!(14., a.vector_dot(&b)); + } + + #[test] + fn slice() { + + let a = arr2( + &[ + [1., 2., 3., 1., 2.], + [4., 5., 6., 3., 4.], + [7., 8., 9., 5., 6.]]); + let expected = arr2( + &[ + [2., 3.], + [5., 6.]]); + let result = Matrix::slice(&a, 0..2, 1..3); + assert_eq!(result, expected); + } + + #[test] + fn scalar_ops() { + let a = arr2(&[[1., 2., 3.]]); + assert_eq!(&arr2(&[[2., 3., 4.]]), a.clone().add_scalar_mut(1.)); + assert_eq!(&arr2(&[[0., 1., 2.]]), a.clone().sub_scalar_mut(1.)); + assert_eq!(&arr2(&[[2., 4., 6.]]), a.clone().mul_scalar_mut(2.)); + assert_eq!(&arr2(&[[0.5, 1., 1.5]]), a.clone().div_scalar_mut(2.)); + } + + #[test] + fn transpose() { + let m = arr2(&[[1.0, 3.0], [2.0, 4.0]]); + let expected = arr2(&[[1.0, 2.0], [3.0, 4.0]]); + let m_transposed = m.transpose(); + assert_eq!(m_transposed, expected); + } + + #[test] + fn norm() { + let v = arr2(&[[3., -2., 6.]]); + assert_eq!(v.norm(1.), 11.); + assert_eq!(v.norm(2.), 7.); + assert_eq!(v.norm(std::f64::INFINITY), 6.); + assert_eq!(v.norm(std::f64::NEG_INFINITY), 2.); + } + + #[test] + fn negative_mut() { + let mut v = arr2(&[[3., -2., 6.]]); + v.negative_mut(); + assert_eq!(v, arr2(&[[-3., 2., -6.]])); + } + + #[test] + fn reshape() { + let m_orig = arr2(&[[1., 2., 3., 4., 5., 6.]]); + let m_2_by_3 = Matrix::reshape(&m_orig, 2, 3); + let m_result = Matrix::reshape(&m_2_by_3, 1, 6); + assert_eq!(Matrix::shape(&m_2_by_3), (2, 3)); + assert_eq!(Matrix::get(&m_2_by_3, 1, 1), 5.); + assert_eq!(Matrix::get(&m_result, 0, 1), 2.); + assert_eq!(Matrix::get(&m_result, 0, 3), 4.); + } + + #[test] + fn copy_from() { + let mut src = arr2(&[[1., 2., 3.]]); + let dst = Array2::::zeros((1, 3)); + src.copy_from(&dst); + assert_eq!(src, dst); + } + + #[test] + fn sum() { + let src = arr2(&[[1., 2., 3.]]); + assert_eq!(src.sum(), 6.); + } + + #[test] + fn max_diff() { + let a1 = arr2(&[[1., 2., 3.], [4., -5., 6.]]); + let a2 = arr2(&[[2., 3., 4.], [1., 0., -12.]]); + assert_eq!(a1.max_diff(&a2), 18.); + assert_eq!(a2.max_diff(&a2), 0.); + } + + #[test] + fn softmax_mut(){ + let mut prob = arr2(&[[1., 2., 3.]]); + prob.softmax_mut(); + assert!((Matrix::get(&prob, 0, 0) - 0.09).abs() < 0.01); + assert!((Matrix::get(&prob, 0, 1) - 0.24).abs() < 0.01); + assert!((Matrix::get(&prob, 0, 2) - 0.66).abs() < 0.01); + } + + #[test] + fn pow_mut(){ + let mut a = arr2(&[[1., 2., 3.]]); + a.pow_mut(3.); + assert_eq!(a, arr2(&[[1., 8., 27.]])); + } + + #[test] + fn argmax(){ + let a = arr2(&[[1., 2., 3.], [-5., -6., -7.], [0.1, 0.2, 0.1]]); + let res = a.argmax(); + assert_eq!(res, vec![2, 0, 1]); + } + + #[test] + fn unique(){ + let a = arr2(&[[1., 2., 2.], [-2., -6., -7.], [2., 3., 4.]]); + let res = a.unique(); + assert_eq!(res.len(), 7); + assert_eq!(res, vec![-7., -6., -2., 1., 2., 3., 4.]); + } +} \ No newline at end of file diff --git a/src/regression/linear_regression.rs b/src/regression/linear_regression.rs index 4b42f98..e042dd4 100644 --- a/src/regression/linear_regression.rs +++ b/src/regression/linear_regression.rs @@ -27,7 +27,7 @@ impl LinearRegression { } let b = y.clone(); - let mut a = x.h_stack(&M::ones(x_nrows, 1)); + let mut a = x.v_stack(&M::ones(x_nrows, 1)); let w = match solver { LinearRegressionSolver::QR => a.qr_solve_mut(b),