Adds OLS and naive linear algebra routine

2019-10-10 08:53:42 -07:00
parent b8e1bf9a80
commit 50744208a9
7 changed files with 547 additions and 7 deletions
@@ -1,4 +1,6 @@
 pub mod classification;
+pub mod regression;
+pub mod linalg;
 pub mod math;
 pub mod error;
 pub mod algorithm;
@@ -0,0 +1,31 @@
+use std::ops::Range;
+
+pub mod naive;
+
+pub trait Matrix: Into<Vec<f64>>{ 
+
+    fn get(&self, row: usize, col: usize) -> f64; 
+
+    fn qr_solve_mut(&mut self, b: Self) -> Self;
+
+    fn zeros(nrows: usize, ncols: usize) -> Self;
+
+    fn ones(nrows: usize, ncols: usize) -> Self;
+
+    fn fill(nrows: usize, ncols: usize, value: f64) -> Self;
+
+    fn shape(&self) -> (usize, usize);
+
+    fn v_stack(&self, other: &Self) -> Self;
+
+    fn h_stack(&self, other: &Self) -> Self;
+
+    fn dot(&self, other: &Self) -> Self;
+
+    fn slice(&self, rows: Range<usize>, cols: Range<usize>) -> Self;
+
+    fn approximate_eq(&self, other: &Self, error: f64) -> bool;
+
+    fn add_mut(&mut self, other: &Self);
+
+}
@@ -0,0 +1,413 @@
+use std::ops::Range;
+use crate::linalg::Matrix;
+use crate::math;
+
+#[derive(Debug)]
+pub struct DenseMatrix {
+
+    ncols: usize,
+    nrows: usize,
+    values: Vec<f64> 
+
+}
+
+impl DenseMatrix {
+
+    pub fn from_2d_array(values: &[&[f64]]) -> DenseMatrix {
+        DenseMatrix::from_2d_vec(&values.into_iter().map(|row| Vec::from(*row)).collect())
+    }
+
+    pub fn from_2d_vec(values: &Vec<Vec<f64>>) -> DenseMatrix {
+        let nrows = values.len();
+        let ncols = values.first().unwrap_or_else(|| panic!("Cannot create 2d matrix from an empty vector")).len();
+        let mut m = DenseMatrix {
+            ncols: ncols,
+            nrows: nrows,
+            values: vec![0f64; ncols*nrows]
+        };
+        for row in 0..nrows {
+            for col in 0..ncols {
+                m.set(row, col, values[row][col]);
+            }
+        }
+        m
+    }
+
+    pub fn from_array(nrows: usize, ncols: usize, values: &[f64]) -> DenseMatrix {
+       DenseMatrix::from_vec(nrows, ncols, Vec::from(values)) 
+    }
+
+    pub fn from_vec(nrows: usize, ncols: usize, values: Vec<f64>) -> DenseMatrix {
+        DenseMatrix {
+            ncols: ncols,
+            nrows: nrows,
+            values: values
+        }
+    }
+
+    pub fn div_mut(&mut self, b: DenseMatrix) -> () {
+        if self.nrows != b.nrows || self.ncols != b.ncols {
+            panic!("Can't divide matrices of different sizes.");
+        }
+
+        for i in 0..self.values.len() {
+            self.values[i] /= b.values[i];
+        }
+    }
+
+    fn set(&mut self, row: usize, col: usize, x: f64) {
+        self.values[col*self.nrows + row] = x;        
+    }
+
+    fn div_element_mut(&mut self, row: usize, col: usize, x: f64) {
+        self.values[col*self.nrows + row] /= x;
+    }
+
+    fn add_element_mut(&mut self, row: usize, col: usize, x: f64) {
+        self.values[col*self.nrows + row] += x
+    }
+
+    fn sub_element_mut(&mut self, row: usize, col: usize, x: f64) {
+        self.values[col*self.nrows + row] -= x;
+    }
+    
+}
+
+impl PartialEq for DenseMatrix {
+    fn eq(&self, other: &Self) -> bool {
+        if self.ncols != other.ncols || self.nrows != other.nrows {
+            return false
+        }
+
+        let len = self.values.len();
+        let other_len = other.values.len();
+
+        if len != other_len {
+            return false;
+        }
+
+        for i in 0..len {
+            if (self.values[i] - other.values[i]).abs() > math::SMALL_ERROR {
+                return false;
+            }
+        }
+
+        true
+    }
+}
+
+impl Into<Vec<f64>> for DenseMatrix {
+    fn into(self) -> Vec<f64> {
+        self.values
+    }
+}
+
+impl Matrix for DenseMatrix {
+
+    fn get(&self, row: usize, col: usize) -> f64 {
+        self.values[col*self.nrows + row]
+    }
+
+    fn zeros(nrows: usize, ncols: usize) -> DenseMatrix {
+        DenseMatrix::fill(nrows, ncols, 0f64)
+    }
+
+    fn ones(nrows: usize, ncols: usize) -> DenseMatrix {
+        DenseMatrix::fill(nrows, ncols, 1f64)
+    }
+
+    fn shape(&self) -> (usize, usize) {
+        (self.nrows, self.ncols)
+    }
+
+    fn v_stack(&self, other: &Self) -> Self {
+        if self.ncols != other.ncols {
+            panic!("Number of columns in both matrices should be equal");
+        }
+        let mut result = DenseMatrix::zeros(self.nrows + other.nrows, self.ncols);
+        for c in 0..self.ncols {
+            for r in 0..self.nrows+other.nrows {                 
+                if r <  self.nrows {              
+                    result.set(r, c, self.get(r, c));
+                } else {
+                    result.set(r, c, other.get(r - self.nrows, c));
+                }
+            }
+        }    
+        result    
+    }
+
+    fn h_stack(&self, other: &Self) -> Self{
+        if self.nrows != other.nrows {
+            panic!("Number of rows in both matrices should be equal");
+        }
+        let mut result = DenseMatrix::zeros(self.nrows, self.ncols + other.ncols);
+        for r in 0..self.nrows {
+            for c in 0..self.ncols+other.ncols {                             
+                if c <  self.ncols {              
+                    result.set(r, c, self.get(r, c));
+                } else {
+                    result.set(r, c, other.get(r, c - self.ncols));
+                }
+            }
+        }
+        result
+    }
+
+    fn dot(&self, other: &Self) -> Self {
+        if self.ncols != other.nrows {
+            panic!("Number of rows of A should equal number of columns of B");
+        }
+        let inner_d = self.ncols;
+        let mut result = DenseMatrix::zeros(self.nrows, other.ncols);
+
+        for r in 0..self.nrows {
+            for c in 0..other.ncols {
+                let mut s = 0f64;
+                for i in 0..inner_d {
+                    s += self.get(r, i) * other.get(i, c);
+                }
+                result.set(r, c, s);
+            }
+        }
+
+        result
+    }
+
+    fn slice(&self, rows: Range<usize>, cols: Range<usize>) -> DenseMatrix {
+
+        let ncols = cols.len();
+        let nrows = rows.len();
+
+        let mut m = DenseMatrix::from_vec(nrows, ncols, vec![0f64; nrows * ncols]);
+
+        for r in rows.start..rows.end {
+            for c in cols.start..cols.end {
+                m.set(r-rows.start, c-cols.start, self.get(r, c));
+            }
+        }
+
+        m
+    }
+
+    fn qr_solve_mut(&mut self, mut b: DenseMatrix) -> DenseMatrix {
+        let m = self.nrows;
+        let n = self.ncols;
+        let nrhs = b.ncols;
+
+        let mut r_diagonal: Vec<f64> = vec![0f64; n];
+
+        for k in 0..n {
+            let mut nrm = 0f64;
+            for i in k..m {
+                nrm = nrm.hypot(self.get(i, k));
+            }
+
+            if nrm > math::SMALL_ERROR {
+
+                if self.get(k, k) < 0f64 {
+                    nrm = -nrm;
+                }
+                for i in k..m {
+                    self.div_element_mut(i, k, nrm);
+                }
+                self.add_element_mut(k, k, 1f64);
+
+                for j in k+1..n {
+                    let mut s = 0f64;
+                    for i in k..m {
+                        s += self.get(i, k) * self.get(i, j);
+                    }
+                    s = -s / self.get(k, k);
+                    for i in k..m {
+                        self.add_element_mut(i, j, s * self.get(i, k));
+                    }
+                }
+            }            
+            r_diagonal[k] = -nrm;
+        }        
+        
+        for j in 0..r_diagonal.len() {
+            if r_diagonal[j].abs() < math::SMALL_ERROR  {
+                panic!("Matrix is rank deficient.");                
+            }
+        }
+
+        for k in 0..n {
+            for j in 0..nrhs {
+                let mut s = 0f64;
+                for i in k..m {
+                    s += self.get(i, k) * b.get(i, j);
+                }
+                s = -s / self.get(k, k);
+                for i in k..m {
+                    b.add_element_mut(i, j, s * self.get(i, k));
+                }
+            }
+        }         
+
+        for k in (0..n).rev() {
+            for j in 0..nrhs {
+                b.set(k, j, b.get(k, j) / r_diagonal[k]);
+            }
+            
+            for i in 0..k {
+                for j in 0..nrhs {
+                    b.sub_element_mut(i, j, b.get(k, j) * self.get(i, k));
+                }
+            }
+        }
+
+        b
+
+    }
+
+    fn approximate_eq(&self, other: &Self, error: f64) -> bool {
+        if self.ncols != other.ncols || self.nrows != other.nrows {
+            return false
+        }
+
+        for c in 0..self.ncols {
+            for r in 0..self.nrows {
+                if (self.get(r, c) - other.get(r, c)).abs() > error {
+                    return false
+                }
+            }
+        }
+
+        true
+    }
+
+    fn fill(nrows: usize, ncols: usize, value: f64) -> Self {
+        DenseMatrix::from_vec(nrows, ncols, vec![value; ncols * nrows])
+    }
+
+    fn add_mut(&mut self, other: &Self) {
+        if self.ncols != other.ncols || self.nrows != other.nrows {
+            panic!("A and B should have the same shape");
+        }        
+        for c in 0..self.ncols {
+            for r in 0..self.nrows {
+                self.add_element_mut(r, c, other.get(r, c));
+            }
+        }
+    }
+
+}
+
+#[cfg(test)]
+mod tests {    
+    use super::*; 
+
+    #[test]
+    fn qr_solve_mut() { 
+
+            let mut a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
+            let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2],&[0.5, 0.8], &[0.5, 0.3]]);
+            let expected_w = DenseMatrix::from_array(3, 2, &[-0.20270270270270263, 0.8783783783783784, 0.4729729729729729, -1.2837837837837829, 2.2297297297297303, 0.6621621621621613]);
+            let w = a.qr_solve_mut(b);   
+            assert_eq!(w, expected_w);
+    }
+
+    #[test]
+    fn v_stack() { 
+
+            let a = DenseMatrix::from_2d_array(
+                &[
+                    &[1., 2., 3.],
+                    &[4., 5., 6.],
+                    &[7., 8., 9.]]);
+            let b = DenseMatrix::from_2d_array(
+                &[
+                    &[1., 2., 3.],
+                    &[4., 5., 6.]]);
+            let expected = DenseMatrix::from_2d_array(
+                &[
+                    &[1., 2., 3.], 
+                    &[4., 5., 6.], 
+                    &[7., 8., 9.], 
+                    &[1., 2., 3.], 
+                    &[4., 5., 6.]]);
+            let result = a.v_stack(&b);               
+            assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn h_stack() { 
+
+            let a = DenseMatrix::from_2d_array(
+                &[
+                    &[1., 2., 3.],
+                    &[4., 5., 6.],
+                    &[7., 8., 9.]]);
+            let b = DenseMatrix::from_2d_array(
+                &[
+                    &[1., 2.],
+                    &[3., 4.],
+                    &[5., 6.]]);
+            let expected = DenseMatrix::from_2d_array(
+                &[
+                    &[1., 2., 3., 1., 2.], 
+                    &[4., 5., 6., 3., 4.], 
+                    &[7., 8., 9., 5., 6.]]);
+            let result = a.h_stack(&b);               
+            assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn dot() { 
+
+            let a = DenseMatrix::from_2d_array(
+                &[
+                    &[1., 2., 3.],
+                    &[4., 5., 6.]]);
+            let b = DenseMatrix::from_2d_array(
+                &[
+                    &[1., 2.],
+                    &[3., 4.],
+                    &[5., 6.]]);
+            let expected = DenseMatrix::from_2d_array(
+                &[
+                    &[22., 28.], 
+                    &[49., 64.]]);
+            let result = a.dot(&b);               
+            assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn slice() { 
+
+            let m = DenseMatrix::from_2d_array(
+                &[
+                    &[1., 2., 3., 1., 2.], 
+                    &[4., 5., 6., 3., 4.], 
+                    &[7., 8., 9., 5., 6.]]);
+            let expected = DenseMatrix::from_2d_array(
+                &[
+                    &[2., 3.], 
+                    &[5., 6.]]);
+            let result = m.slice(0..2, 1..3);
+            assert_eq!(result, expected);
+    }
+    
+
+    #[test]
+    fn approximate_eq() {             
+            let m = DenseMatrix::from_2d_array(
+                &[
+                    &[2., 3.], 
+                    &[5., 6.]]);
+            let m_eq = DenseMatrix::from_2d_array(
+                &[
+                    &[2.5, 3.0], 
+                    &[5., 5.5]]);
+            let m_neq = DenseMatrix::from_2d_array(
+                &[
+                    &[3.0, 3.0], 
+                    &[5., 6.5]]);            
+            assert!(m.approximate_eq(&m_eq, 0.5));
+            assert!(!m.approximate_eq(&m_neq, 0.5));
+    }
+
+}
+
@@ -0,0 +1 @@
+pub mod dense_matrix;
@@ -37,15 +37,9 @@ mod tests {
        let a = arr1(&[1, 2, 3]);
        let b = arr1(&[4, 5, 6]);             
        
-        // let r1 = a.distance_to(&b);
-        // let r2 = a.view().distance_to(&b.view());
        let d_arr = Array1::distance(&a, &b);
        let d_view = ArrayView1::distance(&a.view(), &b.view());

-        
-
-        // assert!((r1 - 5.19615242).abs() < 1e-8);
-        // assert!((r2 - 5.19615242).abs() < 1e-8);
        assert!((d_arr - 5.19615242).abs() < 1e-8);
        assert!((d_view - 5.19615242).abs() < 1e-8);
    }    
@@ -0,0 +1,90 @@
+use crate::linalg::Matrix;
+use crate::regression::Regression;
+use std::fmt::Debug;
+
+#[derive(Debug)]
+pub enum LinearRegressionSolver {
+    QR,
+    SVD
+}
+
+#[derive(Debug)]
+pub struct LinearRegression<M: Matrix> {    
+    coefficients: M,
+    intercept: f64,
+    solver: LinearRegressionSolver
+}
+
+impl<M: Matrix> LinearRegression<M> {
+
+    pub fn fit(x: &M, y: &M, solver: LinearRegressionSolver) -> LinearRegression<M>{
+
+        let (x_nrows, num_attributes) = x.shape();
+        let (y_nrows, _) = y.shape();
+
+        if x_nrows != y_nrows {
+            panic!("Number of rows of X doesn't match number of rows of Y");
+        }
+
+        let b = y.v_stack(&M::ones(1, 1));
+        let mut a = x.h_stack(&M::ones(x_nrows, 1));
+
+        let w  = a.qr_solve_mut(b); 
+
+        let wights = w.slice(0..num_attributes, 0..1);  
+
+        LinearRegression {
+            intercept: w.get(num_attributes, 0),
+            coefficients: wights,
+            solver: solver
+        }
+    }
+
+}
+
+impl<M: Matrix> Regression<M> for LinearRegression<M> {
+
+
+    fn predict(&self, x: M) -> M {
+        let (nrows, _) = x.shape();
+        let mut y_hat = x.dot(&self.coefficients);
+        y_hat.add_mut(&M::fill(nrows, 1, self.intercept));
+        y_hat
+    }
+
+}
+
+#[cfg(test)]
+mod tests {    
+    use super::*; 
+    use crate::linalg::naive::dense_matrix::DenseMatrix;
+
+    #[test]
+    fn knn_fit_predict() { 
+
+            let x = DenseMatrix::from_2d_array(&[
+                &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+                &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+                &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+                &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
+                &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+                &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
+                &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
+                &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
+                &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+                &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
+                &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+                &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
+                &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+                &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+                &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+                &[554.894, 400.7, 282.7, 130.081, 1962., 70.551]]);
+            let y = DenseMatrix::from_array(16, 1, &[83.0,  88.5,  88.2,  89.5,  96.2,  98.1,  99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]);        
+
+            let lr = LinearRegression::fit(&x, &y, LinearRegressionSolver::QR);
+
+            let y_hat = lr.predict(x);
+
+            assert!(y.approximate_eq(&y_hat, 5.));
+    }
+}
@@ -0,0 +1,9 @@
+pub mod linear_regression;
+
+use crate::linalg::Matrix;
+
+pub trait Regression<M: Matrix> {    
+
+    fn predict(&self, x: M) -> M;
+
+}