diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 15b3906..895db0f 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -37,6 +37,8 @@ $ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213 ``` * find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file. +* Please take a look to the output of a profiler to spot most evident performance problems, see [this guide about using a profiler](http://www.codeofview.com/fix-rs/2017/01/24/how-to-optimize-rust-programs-on-linux/). + ## Issue Report Process 1. Go to the project's issues. diff --git a/Cargo.toml b/Cargo.toml index a30db16..48d9180 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "smartcore" description = "Machine Learning in Rust." homepage = "https://smartcorelib.org" -version = "0.3.1" +version = "0.3.2" authors = ["smartcore Developers"] edition = "2021" license = "Apache-2.0" diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index f21e04f..e108cea 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -431,9 +431,9 @@ impl SVDDecomposable for DenseMatrix {} impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixView<'a, T> { fn get(&self, pos: (usize, usize)) -> &T { if self.column_major { - &self.values[(pos.0 + pos.1 * self.stride)] + &self.values[pos.0 + pos.1 * self.stride] } else { - &self.values[(pos.0 * self.stride + pos.1)] + &self.values[pos.0 * self.stride + pos.1] } } @@ -495,9 +495,9 @@ impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for DenseMatrixView<'a impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixMutView<'a, T> { fn get(&self, pos: (usize, usize)) -> &T { if self.column_major { - &self.values[(pos.0 + pos.1 * self.stride)] + &self.values[pos.0 + pos.1 * self.stride] } else { - &self.values[(pos.0 * self.stride + pos.1)] + &self.values[pos.0 * self.stride + pos.1] } } @@ -519,9 +519,9 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray { fn set(&mut self, pos: (usize, usize), x: T) { if self.column_major { - self.values[(pos.0 + pos.1 * self.stride)] = x; + self.values[pos.0 + pos.1 * self.stride] = x; } else { - self.values[(pos.0 * self.stride + pos.1)] = x; + self.values[pos.0 * self.stride + pos.1] = x; } } diff --git a/src/linalg/basic/vector.rs b/src/linalg/basic/vector.rs index 99da981..5d79ab2 100644 --- a/src/linalg/basic/vector.rs +++ b/src/linalg/basic/vector.rs @@ -15,6 +15,25 @@ pub struct VecView<'a, T: Debug + Display + Copy + Sized> { ptr: &'a [T], } +impl Array for &[T] { + fn get(&self, i: usize) -> &T { + &self[i] + } + + fn shape(&self) -> usize { + self.len() + } + + fn is_empty(&self) -> bool { + self.len() > 0 + } + + fn iterator<'b>(&'b self, axis: u8) -> Box + 'b> { + assert!(axis == 0, "For one dimensional array `axis` should == 0"); + Box::new(self.iter()) + } +} + impl Array for Vec { fn get(&self, i: usize) -> &T { &self[i] @@ -46,6 +65,7 @@ impl MutArray for Vec { } impl ArrayView1 for Vec {} +impl ArrayView1 for &[T] {} impl MutArrayView1 for Vec {} diff --git a/src/model_selection/kfold.rs b/src/model_selection/kfold.rs index 760881b..d7ad22d 100644 --- a/src/model_selection/kfold.rs +++ b/src/model_selection/kfold.rs @@ -283,9 +283,7 @@ mod tests { (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]), (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]), ]; - for ((train, test), (expected_train, expected_test)) in - k.split(&x).into_iter().zip(expected) - { + for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) { assert_eq!(test, expected_test); assert_eq!(train, expected_train); } @@ -307,9 +305,7 @@ mod tests { (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]), (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]), ]; - for ((train, test), (expected_train, expected_test)) in - k.split(&x).into_iter().zip(expected) - { + for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) { assert_eq!(test.len(), expected_test.len()); assert_eq!(train.len(), expected_train.len()); } diff --git a/src/readers/csv.rs b/src/readers/csv.rs index 730f293..d67d4b5 100644 --- a/src/readers/csv.rs +++ b/src/readers/csv.rs @@ -83,7 +83,7 @@ where Matrix: Array2, { let csv_text = read_string_from_source(source)?; - let rows: Vec> = extract_row_vectors_from_csv_text::( + let rows: Vec> = extract_row_vectors_from_csv_text( &csv_text, &definition, detect_row_format(&csv_text, &definition)?, @@ -103,12 +103,7 @@ where /// Given a string containing the contents of a csv file, extract its value /// into row-vectors. -fn extract_row_vectors_from_csv_text< - 'a, - T: Number + RealNumber + std::str::FromStr, - RowVector: Array1, - Matrix: Array2, ->( +fn extract_row_vectors_from_csv_text<'a, T: Number + RealNumber + std::str::FromStr>( csv_text: &'a str, definition: &'a CSVDefinition<'_>, row_format: CSVRowFormat<'_>, @@ -305,12 +300,11 @@ mod tests { } mod extract_row_vectors_from_csv_text { use super::super::{extract_row_vectors_from_csv_text, CSVDefinition, CSVRowFormat}; - use crate::linalg::basic::matrix::DenseMatrix; #[test] fn read_default_csv() { assert_eq!( - extract_row_vectors_from_csv_text::, DenseMatrix<_>>( + extract_row_vectors_from_csv_text::( "column 1, column 2, column3\n1.0,2.0,3.0\n4.0,5.0,6.0", &CSVDefinition::default(), CSVRowFormat { diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 131f44c..252d43a 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -322,19 +322,26 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2 + 'a, Y: Array let (n, _) = x.shape(); let mut y_hat: Vec = Array1::zeros(n); + let mut row = Vec::with_capacity(n); for i in 0..n { - let row_pred: TX = - self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)); + row.clear(); + row.extend(x.get_row(i).iterator(0).copied()); + let row_pred: TX = self.predict_for_row(&row); y_hat.set(i, row_pred); } Ok(y_hat) } - fn predict_for_row(&self, x: Vec) -> TX { + fn predict_for_row(&self, x: &[TX]) -> TX { let mut f = self.b.unwrap(); + let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.instances.as_ref().unwrap().len() { + let xj: Vec<_> = self.instances.as_ref().unwrap()[i] + .iter() + .map(|e| e.to_f64().unwrap()) + .collect(); f += self.w.as_ref().unwrap()[i] * TX::from( self.parameters @@ -343,13 +350,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2 + 'a, Y: Array .kernel .as_ref() .unwrap() - .apply( - &x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.instances.as_ref().unwrap()[i] - .iter() - .map(|e| e.to_f64().unwrap()) - .collect(), - ) + .apply(&xi, &xj) .unwrap(), ) .unwrap(); @@ -472,14 +473,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let tol = self.parameters.tol; let good_enough = TX::from_i32(1000).unwrap(); + let mut x = Vec::with_capacity(n); for _ in 0..self.parameters.epoch { for i in self.permutate(n) { - self.process( - i, - Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), - *self.y.get(i), - &mut cache, - ); + x.clear(); + x.extend(self.x.get_row(i).iterator(0).take(n).copied()); + self.process(i, &x, *self.y.get(i), &mut cache); loop { self.reprocess(tol, &mut cache); self.find_min_max_gradient(); @@ -511,24 +510,17 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let mut cp = 0; let mut cn = 0; + let mut x = Vec::with_capacity(n); for i in self.permutate(n) { + x.clear(); + x.extend(self.x.get_row(i).iterator(0).take(n).copied()); if *self.y.get(i) == TY::one() && cp < few { - if self.process( - i, - Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), - *self.y.get(i), - cache, - ) { + if self.process(i, &x, *self.y.get(i), cache) { cp += 1; } } else if *self.y.get(i) == TY::from(-1).unwrap() && cn < few - && self.process( - i, - Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), - *self.y.get(i), - cache, - ) + && self.process(i, &x, *self.y.get(i), cache) { cn += 1; } @@ -539,7 +531,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } - fn process(&mut self, i: usize, x: Vec, y: TY, cache: &mut Cache) -> bool { + fn process(&mut self, i: usize, x: &[TX], y: TY, cache: &mut Cache) -> bool { for j in 0..self.sv.len() { if self.sv[j].index == i { return true; @@ -551,15 +543,14 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let mut cache_values: Vec<((usize, usize), TX)> = Vec::new(); for v in self.sv.iter() { + let xi: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect(); + let xj: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); let k = self .parameters .kernel .as_ref() .unwrap() - .apply( - &v.x.iter().map(|e| e.to_f64().unwrap()).collect(), - &x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi, &xj) .unwrap(); cache_values.push(((i, v.index), TX::from(k).unwrap())); g -= v.alpha * k; @@ -578,7 +569,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 cache.insert(v.0, v.1.to_f64().unwrap()); } - let x_f64 = x.iter().map(|e| e.to_f64().unwrap()).collect(); + let x_f64: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); let k_v = self .parameters .kernel @@ -701,8 +692,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let km = sv1.k; let gm = sv1.grad; let mut best = 0f64; + let xi: Vec<_> = sv1.x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.sv.len() { let v = &self.sv[i]; + let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect(); let z = v.grad - gm; let k = cache.get( sv1, @@ -711,10 +704,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &sv1.x.iter().map(|e| e.to_f64().unwrap()).collect(), - &v.x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi, &xj) .unwrap(), ); let mut curv = km + v.k - 2f64 * k; @@ -732,6 +722,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } + let xi: Vec<_> = self.sv[idx_1] + .x + .iter() + .map(|e| e.to_f64().unwrap()) + .collect::>(); + idx_2.map(|idx_2| { ( idx_1, @@ -742,16 +738,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .as_ref() .unwrap() .apply( - &self.sv[idx_1] - .x - .iter() - .map(|e| e.to_f64().unwrap()) - .collect(), + &xi, &self.sv[idx_2] .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), ) .unwrap() }), @@ -765,8 +757,11 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let km = sv2.k; let gm = sv2.grad; let mut best = 0f64; + + let xi: Vec<_> = sv2.x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.sv.len() { let v = &self.sv[i]; + let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect(); let z = gm - v.grad; let k = cache.get( sv2, @@ -775,10 +770,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &sv2.x.iter().map(|e| e.to_f64().unwrap()).collect(), - &v.x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi, &xj) .unwrap(), ); let mut curv = km + v.k - 2f64 * k; @@ -797,6 +789,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } + let xj: Vec<_> = self.sv[idx_2] + .x + .iter() + .map(|e| e.to_f64().unwrap()) + .collect(); + idx_1.map(|idx_1| { ( idx_1, @@ -811,12 +809,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), - &self.sv[idx_2] - .x - .iter() - .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), + &xj, ) .unwrap() }), @@ -835,12 +829,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), &self.sv[idx_2] .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), ) .unwrap(), )), @@ -895,7 +889,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 self.sv[v1].alpha -= step.to_f64().unwrap(); self.sv[v2].alpha += step.to_f64().unwrap(); + let xi_v1: Vec<_> = self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect(); + let xi_v2: Vec<_> = self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.sv.len() { + let xj: Vec<_> = self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(); let k2 = cache.get( &self.sv[v2], &self.sv[i], @@ -903,10 +900,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi_v2, &xj) .unwrap(), ); let k1 = cache.get( @@ -916,10 +910,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi_v1, &xj) .unwrap(), ); self.sv[i].grad -= step.to_f64().unwrap() * (k2 - k1); diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 6fbd15b..7511aea 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -248,19 +248,20 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> SVR<' let mut y_hat: Vec = Vec::::zeros(n); + let mut x_i = Vec::with_capacity(n); for i in 0..n { - y_hat.set( - i, - self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)), - ); + x_i.clear(); + x_i.extend(x.get_row(i).iterator(0).copied()); + y_hat.set(i, self.predict_for_row(&x_i)); } Ok(y_hat) } - pub(crate) fn predict_for_row(&self, x: Vec) -> T { + pub(crate) fn predict_for_row(&self, x: &[T]) -> T { let mut f = self.b; + let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.instances.as_ref().unwrap().len() { f += self.w.as_ref().unwrap()[i] * T::from( @@ -270,10 +271,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> SVR<' .kernel .as_ref() .unwrap() - .apply( - &x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.instances.as_ref().unwrap()[i], - ) + .apply(&xi, &self.instances.as_ref().unwrap()[i]) .unwrap(), ) .unwrap()