From 50744208a988d7a084c9ce12f987541151e4bb7d Mon Sep 17 00:00:00 2001 From: Volodymyr Orlov Date: Thu, 10 Oct 2019 08:53:42 -0700 Subject: [PATCH] Adds OLS and naive linear algebra routine --- src/lib.rs | 2 + src/linalg/mod.rs | 31 +++ src/linalg/naive/dense_matrix.rs | 413 ++++++++++++++++++++++++++++ src/linalg/naive/mod.rs | 1 + src/math/distance/euclidian.rs | 8 +- src/regression/linear_regression.rs | 90 ++++++ src/regression/mod.rs | 9 + 7 files changed, 547 insertions(+), 7 deletions(-) create mode 100644 src/linalg/mod.rs create mode 100644 src/linalg/naive/dense_matrix.rs create mode 100644 src/linalg/naive/mod.rs create mode 100644 src/regression/linear_regression.rs create mode 100644 src/regression/mod.rs diff --git a/src/lib.rs b/src/lib.rs index 3203152..a887a67 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ pub mod classification; +pub mod regression; +pub mod linalg; pub mod math; pub mod error; pub mod algorithm; diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs new file mode 100644 index 0000000..7df3abb --- /dev/null +++ b/src/linalg/mod.rs @@ -0,0 +1,31 @@ +use std::ops::Range; + +pub mod naive; + +pub trait Matrix: Into>{ + + fn get(&self, row: usize, col: usize) -> f64; + + fn qr_solve_mut(&mut self, b: Self) -> Self; + + fn zeros(nrows: usize, ncols: usize) -> Self; + + fn ones(nrows: usize, ncols: usize) -> Self; + + fn fill(nrows: usize, ncols: usize, value: f64) -> Self; + + fn shape(&self) -> (usize, usize); + + fn v_stack(&self, other: &Self) -> Self; + + fn h_stack(&self, other: &Self) -> Self; + + fn dot(&self, other: &Self) -> Self; + + fn slice(&self, rows: Range, cols: Range) -> Self; + + fn approximate_eq(&self, other: &Self, error: f64) -> bool; + + fn add_mut(&mut self, other: &Self); + +} \ No newline at end of file diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs new file mode 100644 index 0000000..1c96bdd --- /dev/null +++ b/src/linalg/naive/dense_matrix.rs @@ -0,0 +1,413 @@ +use std::ops::Range; +use crate::linalg::Matrix; +use crate::math; + +#[derive(Debug)] +pub struct DenseMatrix { + + ncols: usize, + nrows: usize, + values: Vec + +} + +impl DenseMatrix { + + pub fn from_2d_array(values: &[&[f64]]) -> DenseMatrix { + DenseMatrix::from_2d_vec(&values.into_iter().map(|row| Vec::from(*row)).collect()) + } + + pub fn from_2d_vec(values: &Vec>) -> DenseMatrix { + let nrows = values.len(); + let ncols = values.first().unwrap_or_else(|| panic!("Cannot create 2d matrix from an empty vector")).len(); + let mut m = DenseMatrix { + ncols: ncols, + nrows: nrows, + values: vec![0f64; ncols*nrows] + }; + for row in 0..nrows { + for col in 0..ncols { + m.set(row, col, values[row][col]); + } + } + m + } + + pub fn from_array(nrows: usize, ncols: usize, values: &[f64]) -> DenseMatrix { + DenseMatrix::from_vec(nrows, ncols, Vec::from(values)) + } + + pub fn from_vec(nrows: usize, ncols: usize, values: Vec) -> DenseMatrix { + DenseMatrix { + ncols: ncols, + nrows: nrows, + values: values + } + } + + pub fn div_mut(&mut self, b: DenseMatrix) -> () { + if self.nrows != b.nrows || self.ncols != b.ncols { + panic!("Can't divide matrices of different sizes."); + } + + for i in 0..self.values.len() { + self.values[i] /= b.values[i]; + } + } + + fn set(&mut self, row: usize, col: usize, x: f64) { + self.values[col*self.nrows + row] = x; + } + + fn div_element_mut(&mut self, row: usize, col: usize, x: f64) { + self.values[col*self.nrows + row] /= x; + } + + fn add_element_mut(&mut self, row: usize, col: usize, x: f64) { + self.values[col*self.nrows + row] += x + } + + fn sub_element_mut(&mut self, row: usize, col: usize, x: f64) { + self.values[col*self.nrows + row] -= x; + } + +} + +impl PartialEq for DenseMatrix { + fn eq(&self, other: &Self) -> bool { + if self.ncols != other.ncols || self.nrows != other.nrows { + return false + } + + let len = self.values.len(); + let other_len = other.values.len(); + + if len != other_len { + return false; + } + + for i in 0..len { + if (self.values[i] - other.values[i]).abs() > math::SMALL_ERROR { + return false; + } + } + + true + } +} + +impl Into> for DenseMatrix { + fn into(self) -> Vec { + self.values + } +} + +impl Matrix for DenseMatrix { + + fn get(&self, row: usize, col: usize) -> f64 { + self.values[col*self.nrows + row] + } + + fn zeros(nrows: usize, ncols: usize) -> DenseMatrix { + DenseMatrix::fill(nrows, ncols, 0f64) + } + + fn ones(nrows: usize, ncols: usize) -> DenseMatrix { + DenseMatrix::fill(nrows, ncols, 1f64) + } + + fn shape(&self) -> (usize, usize) { + (self.nrows, self.ncols) + } + + fn v_stack(&self, other: &Self) -> Self { + if self.ncols != other.ncols { + panic!("Number of columns in both matrices should be equal"); + } + let mut result = DenseMatrix::zeros(self.nrows + other.nrows, self.ncols); + for c in 0..self.ncols { + for r in 0..self.nrows+other.nrows { + if r < self.nrows { + result.set(r, c, self.get(r, c)); + } else { + result.set(r, c, other.get(r - self.nrows, c)); + } + } + } + result + } + + fn h_stack(&self, other: &Self) -> Self{ + if self.nrows != other.nrows { + panic!("Number of rows in both matrices should be equal"); + } + let mut result = DenseMatrix::zeros(self.nrows, self.ncols + other.ncols); + for r in 0..self.nrows { + for c in 0..self.ncols+other.ncols { + if c < self.ncols { + result.set(r, c, self.get(r, c)); + } else { + result.set(r, c, other.get(r, c - self.ncols)); + } + } + } + result + } + + fn dot(&self, other: &Self) -> Self { + if self.ncols != other.nrows { + panic!("Number of rows of A should equal number of columns of B"); + } + let inner_d = self.ncols; + let mut result = DenseMatrix::zeros(self.nrows, other.ncols); + + for r in 0..self.nrows { + for c in 0..other.ncols { + let mut s = 0f64; + for i in 0..inner_d { + s += self.get(r, i) * other.get(i, c); + } + result.set(r, c, s); + } + } + + result + } + + fn slice(&self, rows: Range, cols: Range) -> DenseMatrix { + + let ncols = cols.len(); + let nrows = rows.len(); + + let mut m = DenseMatrix::from_vec(nrows, ncols, vec![0f64; nrows * ncols]); + + for r in rows.start..rows.end { + for c in cols.start..cols.end { + m.set(r-rows.start, c-cols.start, self.get(r, c)); + } + } + + m + } + + fn qr_solve_mut(&mut self, mut b: DenseMatrix) -> DenseMatrix { + let m = self.nrows; + let n = self.ncols; + let nrhs = b.ncols; + + let mut r_diagonal: Vec = vec![0f64; n]; + + for k in 0..n { + let mut nrm = 0f64; + for i in k..m { + nrm = nrm.hypot(self.get(i, k)); + } + + if nrm > math::SMALL_ERROR { + + if self.get(k, k) < 0f64 { + nrm = -nrm; + } + for i in k..m { + self.div_element_mut(i, k, nrm); + } + self.add_element_mut(k, k, 1f64); + + for j in k+1..n { + let mut s = 0f64; + for i in k..m { + s += self.get(i, k) * self.get(i, j); + } + s = -s / self.get(k, k); + for i in k..m { + self.add_element_mut(i, j, s * self.get(i, k)); + } + } + } + r_diagonal[k] = -nrm; + } + + for j in 0..r_diagonal.len() { + if r_diagonal[j].abs() < math::SMALL_ERROR { + panic!("Matrix is rank deficient."); + } + } + + for k in 0..n { + for j in 0..nrhs { + let mut s = 0f64; + for i in k..m { + s += self.get(i, k) * b.get(i, j); + } + s = -s / self.get(k, k); + for i in k..m { + b.add_element_mut(i, j, s * self.get(i, k)); + } + } + } + + for k in (0..n).rev() { + for j in 0..nrhs { + b.set(k, j, b.get(k, j) / r_diagonal[k]); + } + + for i in 0..k { + for j in 0..nrhs { + b.sub_element_mut(i, j, b.get(k, j) * self.get(i, k)); + } + } + } + + b + + } + + fn approximate_eq(&self, other: &Self, error: f64) -> bool { + if self.ncols != other.ncols || self.nrows != other.nrows { + return false + } + + for c in 0..self.ncols { + for r in 0..self.nrows { + if (self.get(r, c) - other.get(r, c)).abs() > error { + return false + } + } + } + + true + } + + fn fill(nrows: usize, ncols: usize, value: f64) -> Self { + DenseMatrix::from_vec(nrows, ncols, vec![value; ncols * nrows]) + } + + fn add_mut(&mut self, other: &Self) { + if self.ncols != other.ncols || self.nrows != other.nrows { + panic!("A and B should have the same shape"); + } + for c in 0..self.ncols { + for r in 0..self.nrows { + self.add_element_mut(r, c, other.get(r, c)); + } + } + } + +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn qr_solve_mut() { + + let mut a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); + let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2],&[0.5, 0.8], &[0.5, 0.3]]); + let expected_w = DenseMatrix::from_array(3, 2, &[-0.20270270270270263, 0.8783783783783784, 0.4729729729729729, -1.2837837837837829, 2.2297297297297303, 0.6621621621621613]); + let w = a.qr_solve_mut(b); + assert_eq!(w, expected_w); + } + + #[test] + fn v_stack() { + + let a = DenseMatrix::from_2d_array( + &[ + &[1., 2., 3.], + &[4., 5., 6.], + &[7., 8., 9.]]); + let b = DenseMatrix::from_2d_array( + &[ + &[1., 2., 3.], + &[4., 5., 6.]]); + let expected = DenseMatrix::from_2d_array( + &[ + &[1., 2., 3.], + &[4., 5., 6.], + &[7., 8., 9.], + &[1., 2., 3.], + &[4., 5., 6.]]); + let result = a.v_stack(&b); + assert_eq!(result, expected); + } + + #[test] + fn h_stack() { + + let a = DenseMatrix::from_2d_array( + &[ + &[1., 2., 3.], + &[4., 5., 6.], + &[7., 8., 9.]]); + let b = DenseMatrix::from_2d_array( + &[ + &[1., 2.], + &[3., 4.], + &[5., 6.]]); + let expected = DenseMatrix::from_2d_array( + &[ + &[1., 2., 3., 1., 2.], + &[4., 5., 6., 3., 4.], + &[7., 8., 9., 5., 6.]]); + let result = a.h_stack(&b); + assert_eq!(result, expected); + } + + #[test] + fn dot() { + + let a = DenseMatrix::from_2d_array( + &[ + &[1., 2., 3.], + &[4., 5., 6.]]); + let b = DenseMatrix::from_2d_array( + &[ + &[1., 2.], + &[3., 4.], + &[5., 6.]]); + let expected = DenseMatrix::from_2d_array( + &[ + &[22., 28.], + &[49., 64.]]); + let result = a.dot(&b); + assert_eq!(result, expected); + } + + #[test] + fn slice() { + + let m = DenseMatrix::from_2d_array( + &[ + &[1., 2., 3., 1., 2.], + &[4., 5., 6., 3., 4.], + &[7., 8., 9., 5., 6.]]); + let expected = DenseMatrix::from_2d_array( + &[ + &[2., 3.], + &[5., 6.]]); + let result = m.slice(0..2, 1..3); + assert_eq!(result, expected); + } + + + #[test] + fn approximate_eq() { + let m = DenseMatrix::from_2d_array( + &[ + &[2., 3.], + &[5., 6.]]); + let m_eq = DenseMatrix::from_2d_array( + &[ + &[2.5, 3.0], + &[5., 5.5]]); + let m_neq = DenseMatrix::from_2d_array( + &[ + &[3.0, 3.0], + &[5., 6.5]]); + assert!(m.approximate_eq(&m_eq, 0.5)); + assert!(!m.approximate_eq(&m_neq, 0.5)); + } + +} + diff --git a/src/linalg/naive/mod.rs b/src/linalg/naive/mod.rs new file mode 100644 index 0000000..54d0c2d --- /dev/null +++ b/src/linalg/naive/mod.rs @@ -0,0 +1 @@ +pub mod dense_matrix; \ No newline at end of file diff --git a/src/math/distance/euclidian.rs b/src/math/distance/euclidian.rs index 89894e1..83945a4 100644 --- a/src/math/distance/euclidian.rs +++ b/src/math/distance/euclidian.rs @@ -36,16 +36,10 @@ mod tests { fn measure_simple_euclidian_distance() { let a = arr1(&[1, 2, 3]); let b = arr1(&[4, 5, 6]); - - // let r1 = a.distance_to(&b); - // let r2 = a.view().distance_to(&b.view()); + let d_arr = Array1::distance(&a, &b); let d_view = ArrayView1::distance(&a.view(), &b.view()); - - - // assert!((r1 - 5.19615242).abs() < 1e-8); - // assert!((r2 - 5.19615242).abs() < 1e-8); assert!((d_arr - 5.19615242).abs() < 1e-8); assert!((d_view - 5.19615242).abs() < 1e-8); } diff --git a/src/regression/linear_regression.rs b/src/regression/linear_regression.rs new file mode 100644 index 0000000..3ce7926 --- /dev/null +++ b/src/regression/linear_regression.rs @@ -0,0 +1,90 @@ +use crate::linalg::Matrix; +use crate::regression::Regression; +use std::fmt::Debug; + +#[derive(Debug)] +pub enum LinearRegressionSolver { + QR, + SVD +} + +#[derive(Debug)] +pub struct LinearRegression { + coefficients: M, + intercept: f64, + solver: LinearRegressionSolver +} + +impl LinearRegression { + + pub fn fit(x: &M, y: &M, solver: LinearRegressionSolver) -> LinearRegression{ + + let (x_nrows, num_attributes) = x.shape(); + let (y_nrows, _) = y.shape(); + + if x_nrows != y_nrows { + panic!("Number of rows of X doesn't match number of rows of Y"); + } + + let b = y.v_stack(&M::ones(1, 1)); + let mut a = x.h_stack(&M::ones(x_nrows, 1)); + + let w = a.qr_solve_mut(b); + + let wights = w.slice(0..num_attributes, 0..1); + + LinearRegression { + intercept: w.get(num_attributes, 0), + coefficients: wights, + solver: solver + } + } + +} + +impl Regression for LinearRegression { + + + fn predict(&self, x: M) -> M { + let (nrows, _) = x.shape(); + let mut y_hat = x.dot(&self.coefficients); + y_hat.add_mut(&M::fill(nrows, 1, self.intercept)); + y_hat + } + +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_matrix::DenseMatrix; + + #[test] + fn knn_fit_predict() { + + let x = DenseMatrix::from_2d_array(&[ + &[234.289, 235.6, 159.0, 107.608, 1947., 60.323], + &[259.426, 232.5, 145.6, 108.632, 1948., 61.122], + &[258.054, 368.2, 161.6, 109.773, 1949., 60.171], + &[284.599, 335.1, 165.0, 110.929, 1950., 61.187], + &[328.975, 209.9, 309.9, 112.075, 1951., 63.221], + &[346.999, 193.2, 359.4, 113.270, 1952., 63.639], + &[365.385, 187.0, 354.7, 115.094, 1953., 64.989], + &[363.112, 357.8, 335.0, 116.219, 1954., 63.761], + &[397.469, 290.4, 304.8, 117.388, 1955., 66.019], + &[419.180, 282.2, 285.7, 118.734, 1956., 67.857], + &[442.769, 293.6, 279.8, 120.445, 1957., 68.169], + &[444.546, 468.1, 263.7, 121.950, 1958., 66.513], + &[482.704, 381.3, 255.2, 123.366, 1959., 68.655], + &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], + &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], + &[554.894, 400.7, 282.7, 130.081, 1962., 70.551]]); + let y = DenseMatrix::from_array(16, 1, &[83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]); + + let lr = LinearRegression::fit(&x, &y, LinearRegressionSolver::QR); + + let y_hat = lr.predict(x); + + assert!(y.approximate_eq(&y_hat, 5.)); + } +} \ No newline at end of file diff --git a/src/regression/mod.rs b/src/regression/mod.rs new file mode 100644 index 0000000..319bf20 --- /dev/null +++ b/src/regression/mod.rs @@ -0,0 +1,9 @@ +pub mod linear_regression; + +use crate::linalg::Matrix; + +pub trait Regression { + + fn predict(&self, x: M) -> M; + +} \ No newline at end of file