diff --git a/src/classification/knn.rs b/src/classification/knn.rs index 02be3a6..b3d8266 100644 --- a/src/classification/knn.rs +++ b/src/classification/knn.rs @@ -1,9 +1,7 @@ -use crate::linalg::Matrix; +use crate::linalg::{Matrix, row_iter}; use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName}; use crate::algorithm::neighbour::linear_search::LinearKNNSearch; use crate::algorithm::neighbour::cover_tree::CoverTree; -use crate::common::Nominal; -use ndarray::{ArrayBase, Data, Ix1, Ix2}; type F = dyn Fn(&Vec, &Vec) -> f64; @@ -24,7 +22,7 @@ impl<'a> KNNClassifier<'a> { let (_, y_n) = y_m.shape(); let (x_n, _) = x.shape(); - let data = x.to_vector(); + let data = row_iter(x).collect(); let mut yi: Vec = vec![0; y_n]; let classes = y_m.unique(); @@ -48,20 +46,16 @@ impl<'a> KNNClassifier<'a> { } pub fn predict(&self, x: &M) -> M::RowVector { - let mut result = M::zeros(1, x.shape().0); - - let (n, _) = x.shape(); + let mut result = M::zeros(1, x.shape().0); - for i in 0..n { - result.set(0, i, self.classes[self.predict_for_row(x, i)]); - } + row_iter(x).enumerate().for_each(|(i, x)| result.set(0, i, self.classes[self.predict_for_row(x)])); result.to_row_vector() } - pub(in crate) fn predict_for_row(&self, x: &M, row: usize) -> usize { + fn predict_for_row(&self, x: Vec) -> usize { - let idxs = self.knn_algorithm.find(&x.get_row_as_vec(row), self.k); + let idxs = self.knn_algorithm.find(&x, self.k); let mut c = vec![0; self.classes.len()]; let mut max_c = 0; let mut max_i = 0; diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index 6ff1167..89fcc2c 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -1,12 +1,14 @@ +pub mod naive; +pub mod svd; +pub mod ndarray_bindings; + use std::ops::Range; use std::fmt::Debug; - -pub mod naive; -pub mod ndarray_bindings; +use svd::SVD; pub trait Matrix: Clone + Debug { - type RowVector: Clone + Debug; + type RowVector: Clone + Debug; fn from_row_vector(vec: Self::RowVector) -> Self; @@ -16,25 +18,24 @@ pub trait Matrix: Clone + Debug { fn get_row_as_vec(&self, row: usize) -> Vec; - fn get_col_as_vec(&self, col: usize) -> Vec; - - fn to_vector(&self) -> Vec> { - - let (n, _) = self.shape(); - let mut data = Vec::new(); - - for i in 0..n { - data.push(self.get_row_as_vec(i)); - } - - data - } + fn get_col_as_vec(&self, col: usize) -> Vec; fn set(&mut self, row: usize, col: usize, x: f64); fn qr_solve_mut(&mut self, b: Self) -> Self; - fn svd_solve_mut(&mut self, b: Self) -> Self; + fn svd(&self) -> SVD; + + fn svd_solve_mut(&mut self, b: Self) -> Self { + self.svd_solve(b) + } + + fn svd_solve(&self, b: Self) -> Self { + + let svd = self.svd(); + svd.solve(b) + + } fn zeros(nrows: usize, ncols: usize) -> Self; @@ -170,4 +171,35 @@ pub trait Matrix: Clone + Debug { fn unique(&self) -> Vec; +} + +pub fn row_iter(m: &M) -> RowIter { + RowIter{ + m: m, + pos: 0, + max_pos: m.shape().0 + } +} + +pub struct RowIter<'a, M: Matrix> { + m: &'a M, + pos: usize, + max_pos: usize +} + +impl<'a, M: Matrix> Iterator for RowIter<'a, M> { + + type Item = Vec; + + fn next(&mut self) -> Option> { + let res; + if self.pos < self.max_pos { + res = Some(self.m.get_row_as_vec(self.pos)) + } else { + res = None + } + self.pos += 1; + res + } + } \ No newline at end of file diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index 772afcd..9f56dc3 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -1,5 +1,6 @@ use std::ops::Range; use crate::linalg::{Matrix}; +use crate::linalg::svd::SVD; use crate::math; use rand::prelude::*; @@ -338,14 +339,12 @@ impl Matrix for DenseMatrix { } - fn svd_solve_mut(&mut self, mut b: DenseMatrix) -> DenseMatrix { + fn svd(&self) -> SVD { - if self.nrows != b.nrows { - panic!("Dimensions do not agree. Self.nrows should equal b.nrows but is {}, {}", self.nrows, b.nrows); - } + let mut U = self.clone(); - let m = self.nrows; - let n = self.ncols; + let m = U.nrows; + let n = U.ncols; let (mut l, mut nm) = (0usize, 0usize); let (mut anorm, mut g, mut scale) = (0f64, 0f64, 0f64); @@ -363,32 +362,32 @@ impl Matrix for DenseMatrix { if i < m { for k in i..m { - scale += self.get(k, i).abs(); + scale += U.get(k, i).abs(); } if scale.abs() > math::EPSILON { for k in i..m { - self.div_element_mut(k, i, scale); - s += self.get(k, i) * self.get(k, i); + U.div_element_mut(k, i, scale); + s += U.get(k, i) * U.get(k, i); } - let mut f = self.get(i, i); + let mut f = U.get(i, i); g = -s.sqrt().copysign(f); let h = f * g - s; - self.set(i, i, f - g); + U.set(i, i, f - g); for j in l - 1..n { s = 0f64; for k in i..m { - s += self.get(k, i) * self.get(k, j); + s += U.get(k, i) * U.get(k, j); } f = s / h; for k in i..m { - self.add_element_mut(k, j, f * self.get(k, i)); + U.add_element_mut(k, j, f * U.get(k, i)); } } for k in i..m { - self.mul_element_mut(k, i, scale); + U.mul_element_mut(k, i, scale); } } } @@ -400,37 +399,37 @@ impl Matrix for DenseMatrix { if i + 1 <= m && i + 1 != n { for k in l - 1..n { - scale += self.get(i, k).abs(); + scale += U.get(i, k).abs(); } if scale.abs() > math::EPSILON { for k in l - 1..n { - self.div_element_mut(i, k, scale); - s += self.get(i, k) * self.get(i, k); + U.div_element_mut(i, k, scale); + s += U.get(i, k) * U.get(i, k); } - let f = self.get(i, l - 1); + let f = U.get(i, l - 1); g = -s.sqrt().copysign(f); let h = f * g - s; - self.set(i, l - 1, f - g); + U.set(i, l - 1, f - g); for k in l - 1..n { - rv1[k] = self.get(i, k) / h; + rv1[k] = U.get(i, k) / h; } for j in l - 1..m { s = 0f64; for k in l - 1..n { - s += self.get(j, k) * self.get(i, k); + s += U.get(j, k) * U.get(i, k); } for k in l - 1..n { - self.add_element_mut(j, k, s * rv1[k]); + U.add_element_mut(j, k, s * rv1[k]); } } for k in l - 1..n { - self.mul_element_mut(i, k, scale); + U.mul_element_mut(i, k, scale); } } } @@ -443,12 +442,12 @@ impl Matrix for DenseMatrix { if i < n - 1 { if g != 0.0 { for j in l..n { - v.set(j, i, (self.get(i, j) / self.get(i, l)) / g); + v.set(j, i, (U.get(i, j) / U.get(i, l)) / g); } for j in l..n { let mut s = 0f64; for k in l..n { - s += self.get(i, k) * v.get(k, j); + s += U.get(i, k) * v.get(k, j); } for k in l..n { v.add_element_mut(k, j, s * v.get(k, i)); @@ -469,7 +468,7 @@ impl Matrix for DenseMatrix { l = i + 1; g = w[i]; for j in l..n { - self.set(i, j, 0f64); + U.set(i, j, 0f64); } if g.abs() > math::EPSILON { @@ -477,23 +476,23 @@ impl Matrix for DenseMatrix { for j in l..n { let mut s = 0f64; for k in l..m { - s += self.get(k, i) * self.get(k, j); + s += U.get(k, i) * U.get(k, j); } - let f = (s / self.get(i, i)) * g; + let f = (s / U.get(i, i)) * g; for k in i..m { - self.add_element_mut(k, j, f * self.get(k, i)); + U.add_element_mut(k, j, f * U.get(k, i)); } } for j in i..m { - self.mul_element_mut(j, i, g); + U.mul_element_mut(j, i, g); } } else { for j in i..m { - self.set(j, i, 0f64); + U.set(j, i, 0f64); } } - self.add_element_mut(i, i, 1f64); + U.add_element_mut(i, i, 1f64); } for k in (0..n).rev() { @@ -528,10 +527,10 @@ impl Matrix for DenseMatrix { c = g * h; s = -f * h; for j in 0..m { - let y = self.get(j, nm); - let z = self.get(j, i); - self.set(j, nm, y * c + z * s); - self.set(j, i, z * c - y * s); + let y = U.get(j, nm); + let z = U.get(j, i); + U.set(j, nm, y * c + z * s); + U.set(j, i, z * c - y * s); } } } @@ -595,10 +594,10 @@ impl Matrix for DenseMatrix { f = c * g + s * y; x = c * y - s * g; for jj in 0..m { - y = self.get(jj, j); - z = self.get(jj, i); - self.set(jj, j, y * c + z * s); - self.set(jj, i, z * c - y * s); + y = U.get(jj, j); + z = U.get(jj, i); + U.set(jj, j, y * c + z * s); + U.set(jj, i, z * c - y * s); } } @@ -625,7 +624,7 @@ impl Matrix for DenseMatrix { for i in inc..n { let sw = w[i]; for k in 0..m { - su[k] = self.get(k, i); + su[k] = U.get(k, i); } for k in 0..n { sv[k] = v.get(k, i); @@ -634,7 +633,7 @@ impl Matrix for DenseMatrix { while w[j - inc] < sw { w[j] = w[j - inc]; for k in 0..m { - self.set(k, j, self.get(k, j - inc)); + U.set(k, j, U.get(k, j - inc)); } for k in 0..n { v.set(k, j, v.get(k, j - inc)); @@ -646,7 +645,7 @@ impl Matrix for DenseMatrix { } w[j] = sw; for k in 0..m { - self.set(k, j, su[k]); + U.set(k, j, su[k]); } for k in 0..n { v.set(k, j, sv[k]); @@ -661,7 +660,7 @@ impl Matrix for DenseMatrix { for k in 0..n { let mut s = 0.; for i in 0..m { - if self.get(i, k) < 0. { + if U.get(i, k) < 0. { s += 1.; } } @@ -672,43 +671,17 @@ impl Matrix for DenseMatrix { } if s > (m + n) as f64 / 2. { for i in 0..m { - self.set(i, k, -self.get(i, k)); + U.set(i, k, -U.get(i, k)); } for j in 0..n { v.set(j, k, -v.get(j, k)); } } - } + } - let tol = 0.5 * ((m + n) as f64 + 1.).sqrt() * w[0] * math::EPSILON; + SVD::new(U, v, w) - let p = b.ncols; - - for k in 0..p { - let mut tmp = vec![0f64; v.nrows]; - for j in 0..n { - let mut r = 0f64; - if w[j] > tol { - for i in 0..m { - r += self.get(i, j) * b.get(i, k); - } - r /= w[j]; - } - tmp[j] = r; - } - - for j in 0..n { - let mut r = 0.0; - for jj in 0..n { - r += v.get(j, jj) * tmp[jj]; - } - b.set(j, k, r); - } - } - - b - - } + } fn approximate_eq(&self, other: &Self, error: f64) -> bool { if self.ncols != other.ncols || self.nrows != other.nrows { @@ -1007,17 +980,7 @@ mod tests { let expected_w = DenseMatrix::new(3, 2, vec![-0.20, 0.87, 0.47, -1.28, 2.22, 0.66]); let w = a.qr_solve_mut(b); assert!(w.approximate_eq(&expected_w, 1e-2)); - } - - #[test] - fn svd_solve_mut() { - - let mut a = DenseMatrix::from_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); - let b = DenseMatrix::from_array(&[&[0.5, 0.2],&[0.5, 0.8], &[0.5, 0.3]]); - let expected_w = DenseMatrix::new(3, 2, vec![-0.20, 0.87, 0.47, -1.28, 2.22, 0.66]); - let w = a.svd_solve_mut(b); - assert!(w.approximate_eq(&expected_w, 1e-2)); - } + } #[test] fn h_stack() { diff --git a/src/linalg/ndarray_bindings.rs b/src/linalg/ndarray_bindings.rs index 13947ad..f0bb767 100644 --- a/src/linalg/ndarray_bindings.rs +++ b/src/linalg/ndarray_bindings.rs @@ -1,5 +1,6 @@ use std::ops::Range; use crate::linalg::{Matrix}; +use crate::linalg::svd::SVD; use ndarray::{Array, ArrayBase, OwnedRepr, Ix2, Ix1, Axis, stack, s}; impl Matrix for ArrayBase, Ix2> @@ -32,13 +33,13 @@ impl Matrix for ArrayBase, Ix2> self[[row, col]] = x; } - fn qr_solve_mut(&mut self, b: Self) -> Self { - panic!("qr_solve_mut method is not implemented for ndarray"); + fn svd(&self) -> SVD{ + panic!("svd method is not implemented for ndarray"); } - fn svd_solve_mut(&mut self, b: Self) -> Self { + fn qr_solve_mut(&mut self, b: Self) -> Self { panic!("qr_solve_mut method is not implemented for ndarray"); - } + } fn zeros(nrows: usize, ncols: usize) -> Self { Array::zeros((nrows, ncols)) diff --git a/src/linalg/svd.rs b/src/linalg/svd.rs new file mode 100644 index 0000000..ce4b79c --- /dev/null +++ b/src/linalg/svd.rs @@ -0,0 +1,160 @@ +use crate::linalg::{Matrix}; + +#[derive(Debug, Clone)] +pub struct SVD { + U: M, + V: M, + s: Vec, + full: bool, + m: usize, + n: usize, + tol: f64 +} + +impl SVD { + pub fn new(U: M, V: M, s: Vec) -> SVD { + let m = U.shape().0; + let n = V.shape().0; + let full = s.len() == m.min(n); + let tol = 0.5 * ((m + n) as f64 + 1.).sqrt() * s[0] * std::f64::EPSILON; + SVD { + U: U, + V: V, + s: s, + full: full, + m: m, + n: n, + tol: tol + } + } + + pub fn solve(&self, mut b: M) -> M { + let p = b.shape().1; + + if self.U.shape().0 != b.shape().0 { + panic!("Dimensions do not agree. U.nrows should equal b.nrows but is {}, {}", self.U.shape().0, b.shape().0); + } + + for k in 0..p { + let mut tmp = vec![0f64; self.n]; + for j in 0..self.n { + let mut r = 0f64; + if self.s[j] > self.tol { + for i in 0..self.m { + r += self.U.get(i, j) * b.get(i, k); + } + r /= self.s[j]; + } + tmp[j] = r; + } + + for j in 0..self.n { + let mut r = 0.0; + for jj in 0..self.n { + r += self.V.get(j, jj) * tmp[jj]; + } + b.set(j, k, r); + } + } + + b + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_matrix::DenseMatrix; + + #[test] + fn decompose_symmetric() { + + let A = DenseMatrix::from_array(&[ + &[0.9000, 0.4000, 0.7000], + &[0.4000, 0.5000, 0.3000], + &[0.7000, 0.3000, 0.8000]]); + + let s = vec![1.7498382, 0.3165784, 0.1335834]; + + let U = DenseMatrix::from_array(&[ + &[0.6881997, -0.07121225, 0.7220180], + &[0.3700456, 0.89044952, -0.2648886], + &[0.6240573, -0.44947578, -0.639158] + ]); + + let V = DenseMatrix::from_array(&[ + &[0.6881997, -0.07121225, 0.7220180], + &[0.3700456, 0.89044952, -0.2648886], + &[0.6240573, -0.44947578, -0.6391588] + ]); + + let svd = A.svd(); + + assert!(V.abs().approximate_eq(&svd.V.abs(), 1e-4)); + assert!(U.abs().approximate_eq(&svd.U.abs(), 1e-4)); + for i in 0..s.len() { + assert!((s[i] - svd.s[i]).abs() < 1e-4); + } + + } + + #[test] + fn decompose_asymmetric() { + + let A = DenseMatrix::from_array(&[ + &[1.19720880, -1.8391378, 0.3019585, -1.1165701, -1.7210814, 0.4918882, -0.04247433], + &[0.06605075, 1.0315583, 0.8294362, -0.3646043, -1.6038017, -0.9188110, -0.63760340], + &[-1.02637715, 1.0747931, -0.8089055, -0.4726863, -0.2064826, -0.3325532, 0.17966051], + &[-1.45817729, -0.8942353, 0.3459245, 1.5068363, -2.0180708, -0.3696350, -1.19575563], + &[-0.07318103, -0.2783787, 1.2237598, 0.1995332, 0.2545336, -0.1392502, -1.88207227], + &[0.88248425, -0.9360321, 0.1393172, 0.1393281, -0.3277873, -0.5553013, 1.63805985], + &[0.12641406, -0.8710055, -0.2712301, 0.2296515, 1.1781535, -0.2158704, -0.27529472] + ]); + + let s = vec![3.8589375, 3.4396766, 2.6487176, 2.2317399, 1.5165054, 0.8109055, 0.2706515]; + + let U = DenseMatrix::from_array(&[ + &[-0.3082776, 0.77676231, 0.01330514, 0.23231424, -0.47682758, 0.13927109, 0.02640713], + &[-0.4013477, -0.09112050, 0.48754440, 0.47371793, 0.40636608, 0.24600706, -0.37796295], + &[0.0599719, -0.31406586, 0.45428229, -0.08071283, -0.38432597, 0.57320261, 0.45673993], + &[-0.7694214, -0.12681435, -0.05536793, -0.62189972, -0.02075522, -0.01724911, -0.03681864], + &[-0.3319069, -0.17984404, -0.54466777, 0.45335157, 0.19377726, 0.12333423, 0.55003852], + &[0.1259351, 0.49087824, 0.16349687, -0.32080176, 0.64828744, 0.20643772, 0.38812467], + &[0.1491884, 0.01768604, -0.47884363, -0.14108924, 0.03922507, 0.73034065, -0.43965505] + ]); + + let V = DenseMatrix::from_array(&[ + &[-0.2122609, -0.54650056, 0.08071332, -0.43239135, -0.2925067, 0.1414550, 0.59769207], + &[-0.1943605, 0.63132116, -0.54059857, -0.37089970, -0.1363031, 0.2892641, 0.17774114], + &[0.3031265, -0.06182488, 0.18579097, -0.38606409, -0.5364911, 0.2983466, -0.58642548], + &[0.1844063, 0.24425278, 0.25923756, 0.59043765, -0.4435443, 0.3959057, 0.37019098], + &[-0.7164205, 0.30694911, 0.58264743, -0.07458095, -0.1142140, -0.1311972, -0.13124764], + &[-0.1103067, -0.10633600, 0.18257905, -0.03638501, 0.5722925, 0.7784398, -0.09153611], + &[-0.5156083, -0.36573746, -0.47613340, 0.41342817, -0.2659765, 0.1654796, -0.32346758] + ]); + + let svd = A.svd(); + + assert!(V.abs().approximate_eq(&svd.V.abs(), 1e-4)); + assert!(U.abs().approximate_eq(&svd.U.abs(), 1e-4)); + for i in 0..s.len() { + assert!((s[i] - svd.s[i]).abs() < 1e-4); + } + + } + + #[test] + fn solve() { + + let mut a = DenseMatrix::from_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); + let b = DenseMatrix::from_array(&[&[0.5, 0.2],&[0.5, 0.8], &[0.5, 0.3]]); + let expected_w = DenseMatrix::from_array(&[ + &[-0.20, -1.28], + &[0.87, 2.22], + &[0.47, 0.66] + ]); + let w = a.svd_solve_mut(b); + assert!(w.approximate_eq(&expected_w, 1e-2)); + } + +} \ No newline at end of file