diff --git a/.circleci/config.yml b/.circleci/config.yml index 4ed3135..17da167 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,5 +1,11 @@ version: 2.1 +workflows: + version: 2.1 + build: + jobs: + - build + - clippy jobs: build: docker: @@ -24,3 +30,14 @@ jobs: paths: - "~/.cargo" - "./target" + clippy: + docker: + - image: circleci/rust:latest + steps: + - checkout + - run: + name: Install cargo clippy + command: rustup component add clippy + - run: + name: Run cargo clippy + command: cargo clippy --all-features -- -Drust-2018-idioms -Dwarnings diff --git a/src/algorithm/neighbour/bbd_tree.rs b/src/algorithm/neighbour/bbd_tree.rs index cc71f54..85e6628 100644 --- a/src/algorithm/neighbour/bbd_tree.rs +++ b/src/algorithm/neighbour/bbd_tree.rs @@ -50,8 +50,8 @@ impl BBDTree { } let mut tree = BBDTree { - nodes: nodes, - index: index, + nodes, + index, root: 0, }; @@ -113,7 +113,7 @@ impl BBDTree { } } - if !self.nodes[node].lower.is_none() { + if self.nodes[node].lower.is_some() { let mut new_candidates = vec![0; k]; let mut newk = 0; @@ -134,7 +134,7 @@ impl BBDTree { return self.filter( self.nodes[node].lower.unwrap(), centroids, - &mut new_candidates, + &new_candidates, newk, sums, counts, @@ -142,7 +142,7 @@ impl BBDTree { ) + self.filter( self.nodes[node].upper.unwrap(), centroids, - &mut new_candidates, + &new_candidates, newk, sums, counts, @@ -152,7 +152,7 @@ impl BBDTree { } for i in 0..d { - sums[closest][i] = sums[closest][i] + self.nodes[node].sum[i]; + sums[closest][i] += self.nodes[node].sum[i]; } counts[closest] += self.nodes[node].count; @@ -184,11 +184,11 @@ impl BBDTree { let mut rhs = T::zero(); for i in 0..d { let diff = test[i] - best[i]; - lhs = lhs + diff * diff; + lhs += diff * diff; if diff > T::zero() { - rhs = rhs + (center[i] + radius[i] - best[i]) * diff; + rhs += (center[i] + radius[i] - best[i]) * diff; } else { - rhs = rhs + (center[i] - radius[i] - best[i]) * diff; + rhs += (center[i] - radius[i] - best[i]) * diff; } } @@ -244,7 +244,7 @@ impl BBDTree { if end > begin + 1 { let len = end - begin; for i in 0..d { - node.sum[i] = node.sum[i] * T::from(len).unwrap(); + node.sum[i] *= T::from(len).unwrap(); } } @@ -261,9 +261,7 @@ impl BBDTree { let mut i2_good = data.get(self.index[i2], split_index) >= split_cutoff; if !i1_good && !i2_good { - let temp = self.index[i1]; - self.index[i1] = self.index[i2]; - self.index[i2] = temp; + self.index.swap(i1, i2); i1_good = true; i2_good = true; } @@ -302,7 +300,7 @@ impl BBDTree { let mut scatter = T::zero(); for i in 0..d { let x = (node.sum[i] / T::from(node.count).unwrap()) - center[i]; - scatter = scatter + x * x; + scatter += x * x; } node.cost + T::from(node.count).unwrap() * scatter } diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index da870d2..e7dbac0 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -51,7 +51,7 @@ impl> PartialEq for CoverTree { return false; } } - return true; + true } } @@ -84,11 +84,11 @@ impl> CoverTree scale: 0, }; let mut tree = CoverTree { - base: base, + base, inv_log_base: F::one() / base.ln(), - distance: distance, - root: root, - data: data, + distance, + root, + data, identical_excluded: false, }; @@ -101,7 +101,7 @@ impl> CoverTree /// * `p` - look for k nearest points to `p` /// * `k` - the number of nearest neighbors to return pub fn find(&self, p: &T, k: usize) -> Result, Failed> { - if k <= 0 { + if k == 0 { return Err(Failed::because(FailedError::FindFailed, "k should be > 0")); } @@ -147,10 +147,11 @@ impl> CoverTree *heap.peek() }; if d <= (upper_bound + child.max_dist) { - if c > 0 && d < upper_bound { - if !self.identical_excluded || self.get_data_value(child.idx) != p { - heap.add(d); - } + if c > 0 + && d < upper_bound + && (!self.identical_excluded || self.get_data_value(child.idx) != p) + { + heap.add(d); } if !child.children.is_empty() { @@ -234,7 +235,7 @@ impl> CoverTree fn new_leaf(&self, idx: usize) -> Node { Node { - idx: idx, + idx, max_dist: F::zero(), parent_dist: F::zero(), children: Vec::new(), @@ -298,7 +299,7 @@ impl> CoverTree idx: p, max_dist: F::zero(), parent_dist: F::zero(), - children: children, + children, scale: 100, } } else { @@ -368,7 +369,7 @@ impl> CoverTree idx: p, max_dist: self.max(consumed_set), parent_dist: F::zero(), - children: children, + children, scale: (top_scale - max_scale), } } @@ -442,7 +443,7 @@ impl> CoverTree max = n.dist[n.dist.len() - 1]; } } - return max; + max } } diff --git a/src/algorithm/neighbour/linear_search.rs b/src/algorithm/neighbour/linear_search.rs index e89a793..d09f2ed 100644 --- a/src/algorithm/neighbour/linear_search.rs +++ b/src/algorithm/neighbour/linear_search.rs @@ -44,8 +44,8 @@ impl> LinearKNNSearch { /// * `distance` - distance metric to use for searching. This function should extend [`Distance`](../../../math/distance/index.html) interface. pub fn new(data: Vec, distance: D) -> Result, Failed> { Ok(LinearKNNSearch { - data: data, - distance: distance, + data, + distance, f: PhantomData, }) } @@ -157,7 +157,7 @@ mod tests { .iter() .map(|v| v.0) .collect(); - found_idxs1.sort(); + found_idxs1.sort_unstable(); assert_eq!(vec!(0, 1, 2), found_idxs1); @@ -167,7 +167,7 @@ mod tests { .iter() .map(|v| *v.2) .collect(); - found_idxs1.sort(); + found_idxs1.sort_unstable(); assert_eq!(vec!(2, 3, 4, 5, 6, 7, 8), found_idxs1); @@ -187,7 +187,7 @@ mod tests { .iter() .map(|v| v.0) .collect(); - found_idxs2.sort(); + found_idxs2.sort_unstable(); assert_eq!(vec!(1, 2, 3), found_idxs2); } diff --git a/src/algorithm/neighbour/mod.rs b/src/algorithm/neighbour/mod.rs index 0a4f21a..7ef1c5c 100644 --- a/src/algorithm/neighbour/mod.rs +++ b/src/algorithm/neighbour/mod.rs @@ -66,10 +66,10 @@ impl KNNAlgorithmName { ) -> Result, Failed> { match *self { KNNAlgorithmName::LinearSearch => { - LinearKNNSearch::new(data, distance).map(|a| KNNAlgorithm::LinearSearch(a)) + LinearKNNSearch::new(data, distance).map(KNNAlgorithm::LinearSearch) } KNNAlgorithmName::CoverTree => { - CoverTree::new(data, distance).map(|a| KNNAlgorithm::CoverTree(a)) + CoverTree::new(data, distance).map(KNNAlgorithm::CoverTree) } } } diff --git a/src/algorithm/sort/heap_select.rs b/src/algorithm/sort/heap_select.rs index ae3ff18..a44b2bb 100644 --- a/src/algorithm/sort/heap_select.rs +++ b/src/algorithm/sort/heap_select.rs @@ -15,7 +15,7 @@ pub struct HeapSelection { impl<'a, T: PartialOrd + Debug> HeapSelection { pub fn with_capacity(k: usize) -> HeapSelection { HeapSelection { - k: k, + k, n: 0, sorted: false, heap: Vec::new(), @@ -51,7 +51,7 @@ impl<'a, T: PartialOrd + Debug> HeapSelection { pub fn peek(&self) -> &T { if self.sorted { - return &self.heap[0]; + &self.heap[0] } else { &self .heap @@ -62,11 +62,11 @@ impl<'a, T: PartialOrd + Debug> HeapSelection { } pub fn peek_mut(&mut self) -> &mut T { - return &mut self.heap[0]; + &mut self.heap[0] } pub fn get(self) -> Vec { - return self.heap; + self.heap } fn sift_down(&mut self, k: usize, n: usize) { diff --git a/src/cluster/dbscan.rs b/src/cluster/dbscan.rs index 488a7ac..e595028 100644 --- a/src/cluster/dbscan.rs +++ b/src/cluster/dbscan.rs @@ -29,8 +29,6 @@ //! * ["A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise", Ester M., Kriegel HP., Sander J., Xu X.](http://faculty.marshall.usc.edu/gareth-james/ISL/) //! * ["Density-Based Clustering in Spatial Databases: The Algorithm GDBSCAN and its Applications", Sander J., Ester M., Kriegel HP., Xu X.](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.63.1629&rep=rep1&type=pdf) -extern crate rand; - use std::fmt::Debug; use std::iter::Sum; @@ -93,11 +91,11 @@ impl, T>> DBSCAN { parameters: DBSCANParameters, ) -> Result, Failed> { if parameters.min_samples < 1 { - return Err(Failed::fit(&format!("Invalid minPts"))); + return Err(Failed::fit(&"Invalid minPts".to_string())); } if parameters.eps <= T::zero() { - return Err(Failed::fit(&format!("Invalid radius: "))); + return Err(Failed::fit(&"Invalid radius: ".to_string())); } let mut k = 0; diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index eff65aa..26a4038 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -52,8 +52,6 @@ //! * ["An Introduction to Statistical Learning", James G., Witten D., Hastie T., Tibshirani R., 10.3.1 K-Means Clustering](http://faculty.marshall.usc.edu/gareth-james/ISL/) //! * ["k-means++: The Advantages of Careful Seeding", Arthur D., Vassilvitskii S.](http://ilpubs.stanford.edu:8090/778/1/2006-13.pdf) -extern crate rand; - use rand::Rng; use std::fmt::Debug; use std::iter::Sum; @@ -129,7 +127,7 @@ impl KMeans { return Err(Failed::fit(&format!("invalid number of clusters: {}", k))); } - if parameters.max_iter <= 0 { + if parameters.max_iter == 0 { return Err(Failed::fit(&format!( "invalid maximum number of iterations: {}", parameters.max_iter @@ -149,13 +147,13 @@ impl KMeans { for i in 0..n { for j in 0..d { - centroids[y[i]][j] = centroids[y[i]][j] + data.get(i, j); + centroids[y[i]][j] += data.get(i, j); } } for i in 0..k { for j in 0..d { - centroids[i][j] = centroids[i][j] / T::from(size[i]).unwrap(); + centroids[i][j] /= T::from(size[i]).unwrap(); } } @@ -178,11 +176,11 @@ impl KMeans { } Ok(KMeans { - k: k, - y: y, - size: size, - distortion: distortion, - centroids: centroids, + k, + y, + size, + distortion, + centroids, }) } @@ -235,13 +233,13 @@ impl KMeans { let mut sum: T = T::zero(); for i in d.iter() { - sum = sum + *i; + sum += *i; } let cutoff = T::from(rng.gen::()).unwrap() * sum; let mut cost = T::zero(); let mut index = 0; while index < n { - cost = cost + d[index]; + cost += d[index]; if cost >= cutoff { break; } diff --git a/src/dataset/boston.rs b/src/dataset/boston.rs index 2a0d30e..33f7700 100644 --- a/src/dataset/boston.rs +++ b/src/dataset/boston.rs @@ -38,8 +38,8 @@ pub fn load_dataset() -> Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", diff --git a/src/dataset/breast_cancer.rs b/src/dataset/breast_cancer.rs index 0a88f31..e469794 100644 --- a/src/dataset/breast_cancer.rs +++ b/src/dataset/breast_cancer.rs @@ -40,8 +40,8 @@ pub fn load_dataset() -> Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "mean radius", "mean texture", "mean perimeter", "mean area", "mean smoothness", "mean compactness", "mean concavity", diff --git a/src/dataset/diabetes.rs b/src/dataset/diabetes.rs index 352fd46..2a3e20c 100644 --- a/src/dataset/diabetes.rs +++ b/src/dataset/diabetes.rs @@ -33,8 +33,8 @@ pub fn load_dataset() -> Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6", ] diff --git a/src/dataset/digits.rs b/src/dataset/digits.rs index 10068ab..fd643d5 100644 --- a/src/dataset/digits.rs +++ b/src/dataset/digits.rs @@ -23,8 +23,8 @@ pub fn load_dataset() -> Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "sepal length (cm)", "sepal width (cm)", diff --git a/src/dataset/generator.rs b/src/dataset/generator.rs index fd4f400..e0b2939 100644 --- a/src/dataset/generator.rs +++ b/src/dataset/generator.rs @@ -39,8 +39,8 @@ pub fn make_blobs( Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: (0..num_features).map(|n| n.to_string()).collect(), target_names: vec!["label".to_string()], description: "Isotropic Gaussian blobs".to_string(), @@ -49,7 +49,7 @@ pub fn make_blobs( /// Make a large circle containing a smaller circle in 2d. pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset { - if factor >= 1.0 || factor < 0.0 { + if !(0.0..1.0).contains(&factor) { panic!("'factor' has to be between 0 and 1."); } @@ -82,7 +82,7 @@ pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "sepal length (cm)", "sepal width (cm)", diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index bfcd1c9..da790b4 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -56,19 +56,19 @@ pub(crate) fn serialize_data( ) -> Result<(), io::Error> { match File::create(filename) { Ok(mut file) => { - file.write(&dataset.num_features.to_le_bytes())?; - file.write(&dataset.num_samples.to_le_bytes())?; + file.write_all(&dataset.num_features.to_le_bytes())?; + file.write_all(&dataset.num_samples.to_le_bytes())?; let x: Vec = dataset .data .iter() - .map(|v| *v) + .copied() .flat_map(|f| f.to_f32_bits().to_le_bytes().to_vec().into_iter()) .collect(); file.write_all(&x)?; let y: Vec = dataset .target .iter() - .map(|v| *v) + .copied() .flat_map(|f| f.to_f32_bits().to_le_bytes().to_vec().into_iter()) .collect(); file.write_all(&y)?; diff --git a/src/decomposition/pca.rs b/src/decomposition/pca.rs index f66ca9b..f25aaad 100644 --- a/src/decomposition/pca.rs +++ b/src/decomposition/pca.rs @@ -68,14 +68,14 @@ impl> PartialEq for PCA { if self.eigenvectors != other.eigenvectors || self.eigenvalues.len() != other.eigenvalues.len() { - return false; + false } else { for i in 0..self.eigenvalues.len() { if (self.eigenvalues[i] - other.eigenvalues[i]).abs() > T::epsilon() { return false; } } - return true; + true } } } @@ -190,16 +190,16 @@ impl> PCA { let mut pmu = vec![T::zero(); n_components]; for k in 0..n { for i in 0..n_components { - pmu[i] = pmu[i] + projection.get(i, k) * mu[k]; + pmu[i] += projection.get(i, k) * mu[k]; } } Ok(PCA { - eigenvectors: eigenvectors, - eigenvalues: eigenvalues, + eigenvectors, + eigenvalues, projection: projection.transpose(), - mu: mu, - pmu: pmu, + mu, + pmu, }) } diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index add6079..011b0ba 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -45,8 +45,6 @@ //! //! //! -extern crate rand; - use std::default::Default; use std::fmt::Debug; @@ -89,7 +87,7 @@ pub struct RandomForestClassifier { impl PartialEq for RandomForestClassifier { fn eq(&self, other: &Self) -> bool { if self.classes.len() != other.classes.len() || self.trees.len() != other.trees.len() { - return false; + false } else { for i in 0..self.classes.len() { if (self.classes[i] - other.classes[i]).abs() > T::epsilon() { @@ -139,13 +137,13 @@ impl RandomForestClassifier { yi[i] = classes.iter().position(|c| yc == *c).unwrap(); } - let mtry = parameters.m.unwrap_or( + let mtry = parameters.m.unwrap_or_else(|| { (T::from(num_attributes).unwrap()) .sqrt() .floor() .to_usize() - .unwrap(), - ); + .unwrap() + }); let classes = y_m.unique(); let k = classes.len(); @@ -164,8 +162,8 @@ impl RandomForestClassifier { } Ok(RandomForestClassifier { - parameters: parameters, - trees: trees, + parameters, + trees, classes, }) } @@ -191,7 +189,7 @@ impl RandomForestClassifier { result[tree.predict_for_row(x, row)] += 1; } - return which_max(&result); + which_max(&result) } fn sample_with_replacement(y: &Vec, num_classes: usize) -> Vec { diff --git a/src/ensemble/random_forest_regressor.rs b/src/ensemble/random_forest_regressor.rs index d25c850..36fa096 100644 --- a/src/ensemble/random_forest_regressor.rs +++ b/src/ensemble/random_forest_regressor.rs @@ -42,7 +42,6 @@ //! //! //! -extern crate rand; use std::default::Default; use std::fmt::Debug; @@ -95,7 +94,7 @@ impl Default for RandomForestRegressorParameters { impl PartialEq for RandomForestRegressor { fn eq(&self, other: &Self) -> bool { if self.trees.len() != other.trees.len() { - return false; + false } else { for i in 0..self.trees.len() { if self.trees[i] != other.trees[i] { @@ -135,10 +134,7 @@ impl RandomForestRegressor { trees.push(tree); } - Ok(RandomForestRegressor { - parameters: parameters, - trees: trees, - }) + Ok(RandomForestRegressor { parameters, trees }) } /// Predict class for `x` @@ -161,7 +157,7 @@ impl RandomForestRegressor { let mut result = T::zero(); for tree in self.trees.iter() { - result = result + tree.predict_for_row(x, row); + result += tree.predict_for_row(x, row); } result / T::from(n_trees).unwrap() diff --git a/src/error/mod.rs b/src/error/mod.rs index c411e87..1615290 100644 --- a/src/error/mod.rs +++ b/src/error/mod.rs @@ -61,7 +61,7 @@ impl Failed { /// new instance of `err` pub fn because(err: FailedError, msg: &str) -> Self { Failed { - err: err, + err, msg: msg.to_string(), } } @@ -82,7 +82,7 @@ impl PartialEq for Failed { } impl fmt::Display for FailedError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let failed_err_str = match self { FailedError::FitFailed => "Fit failed", FailedError::PredictFailed => "Predict failed", @@ -96,7 +96,7 @@ impl fmt::Display for FailedError { } impl fmt::Display for Failed { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}: {}", self.err, self.msg) } } diff --git a/src/lib.rs b/src/lib.rs index 083b95f..ada7925 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,10 @@ +#![allow( + clippy::needless_range_loop, + clippy::ptr_arg, + clippy::type_complexity, + clippy::too_many_arguments, + clippy::many_single_char_names +)] #![warn(missing_docs)] #![warn(missing_doc_code_examples)] @@ -85,6 +92,8 @@ pub mod math; /// Functions for assessing prediction error. pub mod metrics; pub mod model_selection; +/// Supervised learning algorithms based on applying the Bayes theorem with the independence assumptions between predictors +pub mod naive_bayes; /// Supervised neighbors-based learning methods pub mod neighbors; pub(crate) mod optimization; diff --git a/src/linalg/cholesky.rs b/src/linalg/cholesky.rs index e55d6bb..724dc8a 100644 --- a/src/linalg/cholesky.rs +++ b/src/linalg/cholesky.rs @@ -46,10 +46,7 @@ pub struct Cholesky> { impl> Cholesky { pub(crate) fn new(R: M) -> Cholesky { - Cholesky { - R: R, - t: PhantomData, - } + Cholesky { R, t: PhantomData } } /// Get lower triangular matrix. @@ -90,7 +87,8 @@ impl> Cholesky { if bn != rn { return Err(Failed::because( FailedError::SolutionFailed, - &format!("Can't solve Ax = b for x. Number of rows in b != number of rows in R."), + &"Can\'t solve Ax = b for x. Number of rows in b != number of rows in R." + .to_string(), )); } @@ -130,7 +128,7 @@ pub trait CholeskyDecomposableMatrix: BaseMatrix { if m != n { return Err(Failed::because( FailedError::DecompositionFailed, - &format!("Can't do Cholesky decomposition on a non-square matrix"), + &"Can\'t do Cholesky decomposition on a non-square matrix".to_string(), )); } @@ -143,14 +141,14 @@ pub trait CholeskyDecomposableMatrix: BaseMatrix { } s = (self.get(j, k) - s) / self.get(k, k); self.set(j, k, s); - d = d + s * s; + d += s * s; } d = self.get(j, j) - d; if d < T::zero() { return Err(Failed::because( FailedError::DecompositionFailed, - &format!("The matrix is not positive definite."), + &"The matrix is not positive definite.".to_string(), )); } diff --git a/src/linalg/evd.rs b/src/linalg/evd.rs index 60602ce..c216696 100644 --- a/src/linalg/evd.rs +++ b/src/linalg/evd.rs @@ -93,7 +93,7 @@ pub trait EVDDecomposableMatrix: BaseMatrix { sort(&mut d, &mut e, &mut V); } - Ok(EVD { V: V, d: d, e: e }) + Ok(EVD { V, d, e }) } } @@ -107,7 +107,7 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec let mut scale = T::zero(); let mut h = T::zero(); for k in 0..i { - scale = scale + d[k].abs(); + scale += d[k].abs(); } if scale == T::zero() { e[i] = d[i - 1]; @@ -118,8 +118,8 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec } } else { for k in 0..i { - d[k] = d[k] / scale; - h = h + d[k] * d[k]; + d[k] /= scale; + h += d[k] * d[k]; } let mut f = d[i - 1]; let mut g = h.sqrt(); @@ -127,7 +127,7 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec g = -g; } e[i] = scale * g; - h = h - f * g; + h -= f * g; d[i - 1] = f - g; for j in 0..i { e[j] = T::zero(); @@ -138,19 +138,19 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec V.set(j, i, f); g = e[j] + V.get(j, j) * f; for k in j + 1..=i - 1 { - g = g + V.get(k, j) * d[k]; - e[k] = e[k] + V.get(k, j) * f; + g += V.get(k, j) * d[k]; + e[k] += V.get(k, j) * f; } e[j] = g; } f = T::zero(); for j in 0..i { - e[j] = e[j] / h; - f = f + e[j] * d[j]; + e[j] /= h; + f += e[j] * d[j]; } let hh = f / (h + h); for j in 0..i { - e[j] = e[j] - hh * d[j]; + e[j] -= hh * d[j]; } for j in 0..i { f = d[j]; @@ -176,7 +176,7 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec for j in 0..=i { let mut g = T::zero(); for k in 0..=i { - g = g + V.get(k, i + 1) * V.get(k, j); + g += V.get(k, i + 1) * V.get(k, j); } for k in 0..=i { V.sub_element_mut(k, j, g * d[k]); @@ -239,9 +239,9 @@ fn tql2>(V: &mut M, d: &mut Vec, e: &mut Vec< let dl1 = d[l + 1]; let mut h = g - d[l]; for i in l + 2..n { - d[i] = d[i] - h; + d[i] -= h; } - f = f + h; + f += h; p = d[m]; let mut c = T::one(); @@ -278,7 +278,7 @@ fn tql2>(V: &mut M, d: &mut Vec, e: &mut Vec< } } } - d[l] = d[l] + f; + d[l] += f; e[l] = T::zero(); } @@ -321,8 +321,8 @@ fn balance>(A: &mut M) -> Vec { let mut c = T::zero(); for j in 0..n { if j != i { - c = c + A.get(j, i).abs(); - r = r + A.get(i, j).abs(); + c += A.get(j, i).abs(); + r += A.get(i, j).abs(); } } if c != T::zero() && r != T::zero() { @@ -330,18 +330,18 @@ fn balance>(A: &mut M) -> Vec { let mut f = T::one(); let s = c + r; while c < g { - f = f * radix; - c = c * sqrdx; + f *= radix; + c *= sqrdx; } g = r * radix; while c > g { - f = f / radix; - c = c / sqrdx; + f /= radix; + c /= sqrdx; } if (c + r) / f < t * s { done = false; g = T::one() / f; - scale[i] = scale[i] * f; + scale[i] *= f; for j in 0..n { A.mul_element_mut(i, j, g); } @@ -353,7 +353,7 @@ fn balance>(A: &mut M) -> Vec { } } - return scale; + scale } fn elmhes>(A: &mut M) -> Vec { @@ -386,7 +386,7 @@ fn elmhes>(A: &mut M) -> Vec { for i in (m + 1)..n { let mut y = A.get(i, m - 1); if y != T::zero() { - y = y / x; + y /= x; A.set(i, m - 1, y); for j in m..n { A.sub_element_mut(i, j, y * A.get(m, j)); @@ -399,7 +399,7 @@ fn elmhes>(A: &mut M) -> Vec { } } - return perm; + perm } fn eltran>(A: &M, V: &mut M, perm: &Vec) { @@ -430,7 +430,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e for i in 0..n { for j in i32::max(i as i32 - 1, 0)..n as i32 { - anorm = anorm + A.get(i, j as usize).abs(); + anorm += A.get(i, j as usize).abs(); } } @@ -467,7 +467,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e p = T::half() * (y - x); q = p * p + w; z = q.abs().sqrt(); - x = x + t; + x += t; A.set(nn, nn, x); A.set(nn - 1, nn - 1, y + t); if q >= T::zero() { @@ -482,8 +482,8 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e p = x / s; q = z / s; r = (p * p + q * q).sqrt(); - p = p / r; - q = q / r; + p /= r; + q /= r; for j in nn - 1..n { z = A.get(nn - 1, j); A.set(nn - 1, j, q * z + p * A.get(nn, j)); @@ -516,7 +516,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e panic!("Too many iterations in hqr"); } if its == 10 || its == 20 { - t = t + x; + t += x; for i in 0..nn + 1 { A.sub_element_mut(i, i, x); } @@ -535,9 +535,9 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e q = A.get(m + 1, m + 1) - z - r - s; r = A.get(m + 2, m + 1); s = p.abs() + q.abs() + r.abs(); - p = p / s; - q = q / s; - r = r / s; + p /= s; + q /= s; + r /= s; if m == l { break; } @@ -565,9 +565,9 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e } x = p.abs() + q.abs() + r.abs(); if x != T::zero() { - p = p / x; - q = q / x; - r = r / x; + p /= x; + q /= x; + r /= x; } } let s = (p * p + q * q + r * r).sqrt().copysign(p); @@ -579,16 +579,16 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e } else { A.set(k, k - 1, -s * x); } - p = p + s; + p += s; x = p / s; y = q / s; z = r / s; - q = q / p; - r = r / p; + q /= p; + r /= p; for j in k..n { p = A.get(k, j) + q * A.get(k + 1, j); if k + 1 != nn { - p = p + r * A.get(k + 2, j); + p += r * A.get(k + 2, j); A.sub_element_mut(k + 2, j, p * z); } A.sub_element_mut(k + 1, j, p * y); @@ -603,7 +603,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e for i in 0..mmin + 1 { p = x * A.get(i, k) + y * A.get(i, k + 1); if k + 1 != nn { - p = p + z * A.get(i, k + 2); + p += z * A.get(i, k + 2); A.sub_element_mut(i, k + 2, p * r); } A.sub_element_mut(i, k + 1, p * q); @@ -612,7 +612,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e for i in 0..n { p = x * V.get(i, k) + y * V.get(i, k + 1); if k + 1 != nn { - p = p + z * V.get(i, k + 2); + p += z * V.get(i, k + 2); V.sub_element_mut(i, k + 2, p * r); } V.sub_element_mut(i, k + 1, p * q); @@ -642,7 +642,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e let w = A.get(i, i) - p; r = T::zero(); for j in m..=nn { - r = r + A.get(i, j) * A.get(j, nn); + r += A.get(i, j) * A.get(j, nn); } if e[i] < T::zero() { z = w; @@ -701,8 +701,8 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e let mut ra = T::zero(); let mut sa = T::zero(); for j in m..=nn { - ra = ra + A.get(i, j) * A.get(j, na); - sa = sa + A.get(i, j) * A.get(j, nn); + ra += A.get(i, j) * A.get(j, na); + sa += A.get(i, j) * A.get(j, nn); } if e[i] < T::zero() { z = w; @@ -766,7 +766,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e for i in 0..n { z = T::zero(); for k in 0..=j { - z = z + V.get(i, k) * A.get(k, j); + z += V.get(i, k) * A.get(k, j); } V.set(i, j, z); } diff --git a/src/linalg/lu.rs b/src/linalg/lu.rs index a4cc58d..bfc7fff 100644 --- a/src/linalg/lu.rs +++ b/src/linalg/lu.rs @@ -33,6 +33,7 @@ //! #![allow(non_snake_case)] +use std::cmp::Ordering; use std::fmt::Debug; use std::marker::PhantomData; @@ -63,10 +64,10 @@ impl> LU { } LU { - LU: LU, - pivot: pivot, - pivot_sign: pivot_sign, - singular: singular, + LU, + pivot, + pivot_sign, + singular, phantom: PhantomData, } } @@ -78,12 +79,10 @@ impl> LU { for i in 0..n_rows { for j in 0..n_cols { - if i > j { - L.set(i, j, self.LU.get(i, j)); - } else if i == j { - L.set(i, j, T::one()); - } else { - L.set(i, j, T::zero()); + match i.cmp(&j) { + Ordering::Greater => L.set(i, j, self.LU.get(i, j)), + Ordering::Equal => L.set(i, j, T::one()), + Ordering::Less => L.set(i, j, T::zero()), } } } @@ -220,10 +219,10 @@ pub trait LUDecomposableMatrix: BaseMatrix { let kmax = usize::min(i, j); let mut s = T::zero(); for k in 0..kmax { - s = s + self.get(i, k) * LUcolj[k]; + s += self.get(i, k) * LUcolj[k]; } - LUcolj[i] = LUcolj[i] - s; + LUcolj[i] -= s; self.set(i, j, LUcolj[i]); } @@ -239,9 +238,7 @@ pub trait LUDecomposableMatrix: BaseMatrix { self.set(p, k, self.get(j, k)); self.set(j, k, t); } - let k = piv[p]; - piv[p] = piv[j]; - piv[j] = k; + piv.swap(p, j); pivsign = -pivsign; } diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index 41ec415..46f09c9 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -78,6 +78,11 @@ pub trait BaseVector: Clone + Debug { /// Get number of elevemnt in the vector fn len(&self) -> usize; + /// Returns true if the vector is empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } + /// Return a vector with the elements of the one-dimensional array. fn to_vec(&self) -> Vec; @@ -542,9 +547,9 @@ pub trait Matrix: { } -pub(crate) fn row_iter>(m: &M) -> RowIter { +pub(crate) fn row_iter>(m: &M) -> RowIter<'_, F, M> { RowIter { - m: m, + m, pos: 0, max_pos: m.shape().0, phantom: PhantomData, diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index e34dd91..7486329 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -1,4 +1,3 @@ -extern crate num; use std::fmt; use std::fmt::Debug; use std::marker::PhantomData; @@ -31,8 +30,7 @@ impl BaseVector for Vec { } fn to_vec(&self) -> Vec { - let v = self.clone(); - v + self.clone() } fn zeros(len: usize) -> Self { @@ -54,7 +52,7 @@ impl BaseVector for Vec { let mut result = T::zero(); for i in 0..self.len() { - result = result + self[i] * other[i]; + result += self[i] * other[i]; } result @@ -64,7 +62,7 @@ impl BaseVector for Vec { let mut norm = T::zero(); for xi in self.iter() { - norm = norm + *xi * *xi; + norm += *xi * *xi; } norm.sqrt() @@ -83,7 +81,7 @@ impl BaseVector for Vec { let mut norm = T::zero(); for xi in self.iter() { - norm = norm + xi.abs().powf(p); + norm += xi.abs().powf(p); } norm.powf(T::one() / p) @@ -91,19 +89,19 @@ impl BaseVector for Vec { } fn div_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] / x; + self[pos] /= x; } fn mul_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] * x; + self[pos] *= x; } fn add_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] + x + self[pos] += x } fn sub_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] - x; + self[pos] -= x; } fn add_mut(&mut self, other: &Self) -> &Self { @@ -166,7 +164,7 @@ impl BaseVector for Vec { fn sum(&self) -> T { let mut sum = T::zero(); for i in 0..self.len() { - sum = sum + self[i]; + sum += self[i]; } sum } @@ -198,7 +196,7 @@ pub struct DenseMatrixIterator<'a, T: RealNumber> { } impl fmt::Display for DenseMatrix { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut rows: Vec> = Vec::new(); for r in 0..self.nrows { rows.push( @@ -217,15 +215,15 @@ impl DenseMatrix { /// `values` should be in column-major order. pub fn new(nrows: usize, ncols: usize, values: Vec) -> Self { DenseMatrix { - ncols: ncols, - nrows: nrows, - values: values, + ncols, + nrows, + values, } } /// New instance of `DenseMatrix` from 2d array. pub fn from_2d_array(values: &[&[T]]) -> Self { - DenseMatrix::from_2d_vec(&values.into_iter().map(|row| Vec::from(*row)).collect()) + DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect()) } /// New instance of `DenseMatrix` from 2d vector. @@ -236,8 +234,8 @@ impl DenseMatrix { .unwrap_or_else(|| panic!("Cannot create 2d matrix from an empty vector")) .len(); let mut m = DenseMatrix { - ncols: ncols, - nrows: nrows, + ncols, + nrows, values: vec![T::zero(); ncols * nrows], }; for row in 0..nrows { @@ -262,8 +260,8 @@ impl DenseMatrix { /// * `values` - values to initialize the matrix. pub fn from_vec(nrows: usize, ncols: usize, values: &Vec) -> DenseMatrix { let mut m = DenseMatrix { - ncols: ncols, - nrows: nrows, + ncols, + nrows, values: vec![T::zero(); ncols * nrows], }; for row in 0..nrows { @@ -286,7 +284,7 @@ impl DenseMatrix { DenseMatrix { ncols: values.len(), nrows: 1, - values: values, + values, } } @@ -302,13 +300,13 @@ impl DenseMatrix { DenseMatrix { ncols: 1, nrows: values.len(), - values: values, + values, } } /// Creates new column vector (_1xN_ matrix) from a vector. /// * `values` - values to initialize the matrix. - pub fn iter<'a>(&'a self) -> DenseMatrixIterator<'a, T> { + pub fn iter(&self) -> DenseMatrixIterator<'_, T> { DenseMatrixIterator { cur_c: 0, cur_r: 0, @@ -357,7 +355,7 @@ impl<'de, T: RealNumber + fmt::Debug + Deserialize<'de>> Deserialize<'de> for De impl<'a, T: RealNumber + fmt::Debug + Deserialize<'a>> Visitor<'a> for DenseMatrixVisitor { type Value = DenseMatrix; - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { formatter.write_str("struct DenseMatrix") } @@ -413,7 +411,7 @@ impl<'de, T: RealNumber + fmt::Debug + Deserialize<'de>> Deserialize<'de> for De } } - const FIELDS: &'static [&'static str] = &["nrows", "ncols", "values"]; + const FIELDS: &[&str] = &["nrows", "ncols", "values"]; deserializer.deserialize_struct( "DenseMatrix", FIELDS, @@ -565,7 +563,7 @@ impl BaseMatrix for DenseMatrix { matrix.set(i, i, T::one()); } - return matrix; + matrix } fn shape(&self) -> (usize, usize) { @@ -617,7 +615,7 @@ impl BaseMatrix for DenseMatrix { for c in 0..other.ncols { let mut s = T::zero(); for i in 0..inner_d { - s = s + self.get(r, i) * other.get(i, c); + s += self.get(r, i) * other.get(i, c); } result.set(r, c, s); } @@ -636,7 +634,7 @@ impl BaseMatrix for DenseMatrix { let mut result = T::zero(); for i in 0..(self.nrows * self.ncols) { - result = result + self.values[i] * other.values[i]; + result += self.values[i] * other.values[i]; } result @@ -730,19 +728,19 @@ impl BaseMatrix for DenseMatrix { } fn div_element_mut(&mut self, row: usize, col: usize, x: T) { - self.values[col * self.nrows + row] = self.values[col * self.nrows + row] / x; + self.values[col * self.nrows + row] /= x; } fn mul_element_mut(&mut self, row: usize, col: usize, x: T) { - self.values[col * self.nrows + row] = self.values[col * self.nrows + row] * x; + self.values[col * self.nrows + row] *= x; } fn add_element_mut(&mut self, row: usize, col: usize, x: T) { - self.values[col * self.nrows + row] = self.values[col * self.nrows + row] + x + self.values[col * self.nrows + row] += x } fn sub_element_mut(&mut self, row: usize, col: usize, x: T) { - self.values[col * self.nrows + row] = self.values[col * self.nrows + row] - x; + self.values[col * self.nrows + row] -= x; } fn transpose(&self) -> Self { @@ -762,9 +760,9 @@ impl BaseMatrix for DenseMatrix { fn rand(nrows: usize, ncols: usize) -> Self { let values: Vec = (0..nrows * ncols).map(|_| T::rand()).collect(); DenseMatrix { - ncols: ncols, - nrows: nrows, - values: values, + ncols, + nrows, + values, } } @@ -772,7 +770,7 @@ impl BaseMatrix for DenseMatrix { let mut norm = T::zero(); for xi in self.values.iter() { - norm = norm + *xi * *xi; + norm += *xi * *xi; } norm.sqrt() @@ -793,7 +791,7 @@ impl BaseMatrix for DenseMatrix { let mut norm = T::zero(); for xi in self.values.iter() { - norm = norm + xi.abs().powf(p); + norm += xi.abs().powf(p); } norm.powf(T::one() / p) @@ -805,12 +803,12 @@ impl BaseMatrix for DenseMatrix { for r in 0..self.nrows { for c in 0..self.ncols { - mean[c] = mean[c] + self.get(r, c); + mean[c] += self.get(r, c); } } for i in 0..mean.len() { - mean[i] = mean[i] / T::from(self.nrows).unwrap(); + mean[i] /= T::from(self.nrows).unwrap(); } mean @@ -818,28 +816,28 @@ impl BaseMatrix for DenseMatrix { fn add_scalar_mut(&mut self, scalar: T) -> &Self { for i in 0..self.values.len() { - self.values[i] = self.values[i] + scalar; + self.values[i] += scalar; } self } fn sub_scalar_mut(&mut self, scalar: T) -> &Self { for i in 0..self.values.len() { - self.values[i] = self.values[i] - scalar; + self.values[i] -= scalar; } self } fn mul_scalar_mut(&mut self, scalar: T) -> &Self { for i in 0..self.values.len() { - self.values[i] = self.values[i] * scalar; + self.values[i] *= scalar; } self } fn div_scalar_mut(&mut self, scalar: T) -> &Self { for i in 0..self.values.len() { - self.values[i] = self.values[i] / scalar; + self.values[i] /= scalar; } self } @@ -905,7 +903,7 @@ impl BaseMatrix for DenseMatrix { fn sum(&self) -> T { let mut sum = T::zero(); for i in 0..self.values.len() { - sum = sum + self.values[i]; + sum += self.values[i]; } sum } @@ -937,7 +935,7 @@ impl BaseMatrix for DenseMatrix { for c in 0..self.ncols { let p = (self.get(r, c) - max).exp(); self.set(r, c, p); - z = z + p; + z += p; } } for r in 0..self.nrows { @@ -1061,7 +1059,7 @@ mod tests { DenseMatrix::new(1, 3, vec![1., 2., 3.]) ); assert_eq!( - DenseMatrix::from_row_vector(vec.clone()).to_row_vector(), + DenseMatrix::from_row_vector(vec).to_row_vector(), vec![1., 2., 3.] ); } diff --git a/src/linalg/nalgebra_bindings.rs b/src/linalg/nalgebra_bindings.rs index ad39057..8ddfdb6 100644 --- a/src/linalg/nalgebra_bindings.rs +++ b/src/linalg/nalgebra_bindings.rs @@ -65,7 +65,7 @@ impl BaseVector for MatrixMN { } fn to_vec(&self) -> Vec { - self.row(0).iter().map(|v| *v).collect() + self.row(0).iter().copied().collect() } fn zeros(len: usize) -> Self { @@ -113,7 +113,7 @@ impl BaseVector for MatrixMN { let mut norm = T::zero(); for xi in self.iter() { - norm = norm + xi.abs().powf(p); + norm += xi.abs().powf(p); } norm.powf(T::one() / p) @@ -175,7 +175,7 @@ impl BaseVector for MatrixMN { } fn unique(&self) -> Vec { - let mut result: Vec = self.iter().map(|v| *v).collect(); + let mut result: Vec = self.iter().copied().collect(); result.sort_by(|a, b| a.partial_cmp(b).unwrap()); result.dedup(); result @@ -200,7 +200,7 @@ impl Vec { - self.row(row).iter().map(|v| *v).collect() + self.row(row).iter().copied().collect() } fn get_row(&self, row: usize) -> Self::RowVector { @@ -208,22 +208,18 @@ impl) { - let mut r = 0; - for e in self.row(row).iter() { + for (r, e) in self.row(row).iter().enumerate() { result[r] = *e; - r += 1; } } fn get_col_as_vec(&self, col: usize) -> Vec { - self.column(col).iter().map(|v| *v).collect() + self.column(col).iter().copied().collect() } fn copy_col_as_vec(&self, col: usize, result: &mut Vec) { - let mut r = 0; - for e in self.column(col).iter() { - result[r] = *e; - r += 1; + for (c, e) in self.column(col).iter().enumerate() { + result[c] = *e; } } @@ -369,7 +365,7 @@ impl Vec { - let mut result: Vec = self.iter().map(|v| *v).collect(); + let mut result: Vec = self.iter().copied().collect(); result.sort_by(|a, b| a.partial_cmp(b).unwrap()); result.dedup(); result diff --git a/src/linalg/ndarray_bindings.rs b/src/linalg/ndarray_bindings.rs index e8de983..b5058ab 100644 --- a/src/linalg/ndarray_bindings.rs +++ b/src/linalg/ndarray_bindings.rs @@ -118,7 +118,7 @@ impl BaseVector for ArrayBase, Ix let mut norm = T::zero(); for xi in self.iter() { - norm = norm + xi.abs().powf(p); + norm += xi.abs().powf(p); } norm.powf(T::one() / p) @@ -126,19 +126,19 @@ impl BaseVector for ArrayBase, Ix } fn div_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] / x; + self[pos] /= x; } fn mul_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] * x; + self[pos] *= x; } fn add_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] + x; + self[pos] += x; } fn sub_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] - x; + self[pos] -= x; } fn approximate_eq(&self, other: &Self, error: T) -> bool { @@ -205,10 +205,8 @@ impl) { - let mut r = 0; - for e in self.row(row).iter() { + for (r, e) in self.row(row).iter().enumerate() { result[r] = *e; - r += 1; } } @@ -217,10 +215,8 @@ impl) { - let mut r = 0; - for e in self.column(col).iter() { - result[r] = *e; - r += 1; + for (c, e) in self.column(col).iter().enumerate() { + result[c] = *e; } } @@ -348,7 +344,7 @@ impl> QR { } } - QR { - QR: QR, - tau: tau, - singular: singular, - } + QR { QR, tau, singular } } /// Get upper triangular matrix. @@ -68,7 +64,7 @@ impl> QR { R.set(i, j, self.QR.get(i, j)); } } - return R; + R } /// Get an orthogonal matrix. @@ -82,7 +78,7 @@ impl> QR { if self.QR.get(k, k) != T::zero() { let mut s = T::zero(); for i in k..m { - s = s + self.QR.get(i, k) * Q.get(i, j); + s += self.QR.get(i, k) * Q.get(i, j); } s = -s / self.QR.get(k, k); for i in k..m { @@ -96,7 +92,7 @@ impl> QR { k -= 1; } } - return Q; + Q } fn solve(&self, mut b: M) -> Result { @@ -118,7 +114,7 @@ impl> QR { for j in 0..b_ncols { let mut s = T::zero(); for i in k..m { - s = s + self.QR.get(i, k) * b.get(i, j); + s += self.QR.get(i, k) * b.get(i, j); } s = -s / self.QR.get(k, k); for i in k..m { @@ -175,7 +171,7 @@ pub trait QRDecomposableMatrix: BaseMatrix { for j in k + 1..n { let mut s = T::zero(); for i in k..m { - s = s + self.get(i, k) * self.get(i, j); + s += self.get(i, k) * self.get(i, j); } s = -s / self.get(k, k); for i in k..m { diff --git a/src/linalg/svd.rs b/src/linalg/svd.rs index 8866ba9..9271f5b 100644 --- a/src/linalg/svd.rs +++ b/src/linalg/svd.rs @@ -106,13 +106,13 @@ pub trait SVDDecomposableMatrix: BaseMatrix { if i < m { for k in i..m { - scale = scale + U.get(k, i).abs(); + scale += U.get(k, i).abs(); } if scale.abs() > T::epsilon() { for k in i..m { U.div_element_mut(k, i, scale); - s = s + U.get(k, i) * U.get(k, i); + s += U.get(k, i) * U.get(k, i); } let mut f = U.get(i, i); @@ -122,7 +122,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { for j in l - 1..n { s = T::zero(); for k in i..m { - s = s + U.get(k, i) * U.get(k, j); + s += U.get(k, i) * U.get(k, j); } f = s / h; for k in i..m { @@ -140,15 +140,15 @@ pub trait SVDDecomposableMatrix: BaseMatrix { let mut s = T::zero(); scale = T::zero(); - if i + 1 <= m && i + 1 != n { + if i < m && i + 1 != n { for k in l - 1..n { - scale = scale + U.get(i, k).abs(); + scale += U.get(i, k).abs(); } if scale.abs() > T::epsilon() { for k in l - 1..n { U.div_element_mut(i, k, scale); - s = s + U.get(i, k) * U.get(i, k); + s += U.get(i, k) * U.get(i, k); } let f = U.get(i, l - 1); @@ -163,7 +163,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { for j in l - 1..m { s = T::zero(); for k in l - 1..n { - s = s + U.get(j, k) * U.get(i, k); + s += U.get(j, k) * U.get(i, k); } for k in l - 1..n { @@ -189,7 +189,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { for j in l..n { let mut s = T::zero(); for k in l..n { - s = s + U.get(i, k) * v.get(k, j); + s += U.get(i, k) * v.get(k, j); } for k in l..n { v.add_element_mut(k, j, s * v.get(k, i)); @@ -218,7 +218,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { for j in l..n { let mut s = T::zero(); for k in l..m { - s = s + U.get(k, i) * U.get(k, j); + s += U.get(k, i) * U.get(k, j); } let f = (s / U.get(i, i)) * g; for k in i..m { @@ -316,7 +316,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { f = x * c + g * s; g = g * c - x * s; h = y * s; - y = y * c; + y *= c; for jj in 0..n { x = v.get(jj, j); @@ -431,13 +431,13 @@ impl> SVD { let full = s.len() == m.min(n); let tol = T::half() * (T::from(m + n).unwrap() + T::one()).sqrt() * s[0] * T::epsilon(); SVD { - U: U, - V: V, - s: s, - full: full, - m: m, - n: n, - tol: tol, + U, + V, + s, + full, + m, + n, + tol, } } @@ -458,9 +458,9 @@ impl> SVD { let mut r = T::zero(); if self.s[j] > self.tol { for i in 0..self.m { - r = r + self.U.get(i, j) * b.get(i, k); + r += self.U.get(i, j) * b.get(i, k); } - r = r / self.s[j]; + r /= self.s[j]; } tmp[j] = r; } @@ -468,7 +468,7 @@ impl> SVD { for j in 0..self.n { let mut r = T::zero(); for jj in 0..self.n { - r = r + self.V.get(j, jj) * tmp[jj]; + r += self.V.get(j, jj) * tmp[jj]; } b.set(j, k, r); } diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index 5de5007..d01b817 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -123,9 +123,9 @@ impl> LinearRegression { let (y_nrows, _) = b.shape(); if x_nrows != y_nrows { - return Err(Failed::fit(&format!( - "Number of rows of X doesn't match number of rows of Y" - ))); + return Err(Failed::fit( + &"Number of rows of X doesn\'t match number of rows of Y".to_string(), + )); } let a = x.h_stack(&M::ones(x_nrows, 1)); diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 2df9b87..addede7 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -84,7 +84,7 @@ trait ObjectiveFunction> { let mut sum = T::zero(); let p = x.shape().1; for i in 0..p { - sum = sum + x.get(m_row, i) * w.get(0, i + v_col); + sum += x.get(m_row, i) * w.get(0, i + v_col); } sum + w.get(0, p + v_col) @@ -103,14 +103,14 @@ impl> PartialEq for LogisticRegression { || self.num_attributes != other.num_attributes || self.classes.len() != other.classes.len() { - return false; + false } else { for i in 0..self.classes.len() { if (self.classes[i] - other.classes[i]).abs() > T::epsilon() { return false; } } - + return self.coefficients == other.coefficients && self.intercept == other.intercept; } } @@ -125,7 +125,7 @@ impl<'a, T: RealNumber, M: Matrix> ObjectiveFunction for i in 0..n { let wx = BinaryObjectiveFunction::partial_dot(w_bias, self.x, 0, i); - f = f + (wx.ln_1pe() - (T::from(self.y[i]).unwrap()) * wx); + f += wx.ln_1pe() - (T::from(self.y[i]).unwrap()) * wx; } f @@ -171,7 +171,7 @@ impl<'a, T: RealNumber, M: Matrix> ObjectiveFunction ); } prob.softmax_mut(); - f = f - prob.get(0, self.y[i]).ln(); + f -= prob.get(0, self.y[i]).ln(); } f @@ -217,9 +217,9 @@ impl> LogisticRegression { let (_, y_nrows) = y_m.shape(); if x_nrows != y_nrows { - return Err(Failed::fit(&format!( - "Number of rows of X doesn't match number of rows of Y" - ))); + return Err(Failed::fit( + &"Number of rows of X doesn\'t match number of rows of Y".to_string(), + )); } let classes = y_m.unique(); @@ -248,6 +248,7 @@ impl> LogisticRegression { }; let result = LogisticRegression::minimize(x0, objective); + let weights = result.x; Ok(LogisticRegression { @@ -269,7 +270,6 @@ impl> LogisticRegression { }; let result = LogisticRegression::minimize(x0, objective); - let weights = result.x.reshape(k, num_attributes + 1); Ok(LogisticRegression { @@ -332,8 +332,10 @@ impl> LogisticRegression { let df = |g: &mut M, w: &M| objective.df(g, w); - let mut ls: Backtracking = Default::default(); - ls.order = FunctionOrder::THIRD; + let ls: Backtracking = Backtracking { + order: FunctionOrder::THIRD, + ..Default::default() + }; let optimizer: LBFGS = Default::default(); optimizer.optimize(&f, &df, &x0, &ls) @@ -371,7 +373,7 @@ mod tests { let objective = MultiClassObjectiveFunction { x: &x, - y: y, + y, k: 3, phantom: PhantomData, }; @@ -420,7 +422,7 @@ mod tests { let objective = BinaryObjectiveFunction { x: &x, - y: y, + y, phantom: PhantomData, }; diff --git a/src/math/distance/euclidian.rs b/src/math/distance/euclidian.rs index 4ec0ad0..31503bd 100644 --- a/src/math/distance/euclidian.rs +++ b/src/math/distance/euclidian.rs @@ -38,7 +38,7 @@ impl Euclidian { let mut sum = T::zero(); for i in 0..x.len() { let d = x[i] - y[i]; - sum = sum + d * d; + sum += d * d; } sum diff --git a/src/math/distance/mahalanobis.rs b/src/math/distance/mahalanobis.rs index 6c205e5..fd320c3 100644 --- a/src/math/distance/mahalanobis.rs +++ b/src/math/distance/mahalanobis.rs @@ -68,8 +68,8 @@ impl> Mahalanobis { let sigma = data.cov(); let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap(); Mahalanobis { - sigma: sigma, - sigmaInv: sigmaInv, + sigma, + sigmaInv, t: PhantomData, } } @@ -80,8 +80,8 @@ impl> Mahalanobis { let sigma = cov.clone(); let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap(); Mahalanobis { - sigma: sigma, - sigmaInv: sigmaInv, + sigma, + sigmaInv, t: PhantomData, } } @@ -118,7 +118,7 @@ impl> Distance, T> for Mahalanobis { let mut s = T::zero(); for j in 0..n { for i in 0..n { - s = s + self.sigmaInv.get(i, j) * z[i] * z[j]; + s += self.sigmaInv.get(i, j) * z[i] * z[j]; } } diff --git a/src/math/distance/manhattan.rs b/src/math/distance/manhattan.rs index 9b46a0c..66125a5 100644 --- a/src/math/distance/manhattan.rs +++ b/src/math/distance/manhattan.rs @@ -35,7 +35,7 @@ impl Distance, T> for Manhattan { let mut dist = T::zero(); for i in 0..x.len() { - dist = dist + (x[i] - y[i]).abs(); + dist += (x[i] - y[i]).abs(); } dist diff --git a/src/math/distance/minkowski.rs b/src/math/distance/minkowski.rs index 667e0db..b7c5691 100644 --- a/src/math/distance/minkowski.rs +++ b/src/math/distance/minkowski.rs @@ -48,7 +48,7 @@ impl Distance, T> for Minkowski { for i in 0..x.len() { let d = (x[i] - y[i]).abs(); - dist = dist + d.powf(p_t); + dist += d.powf(p_t); } dist.powf(T::one() / p_t) diff --git a/src/math/distance/mod.rs b/src/math/distance/mod.rs index 0532e86..696b5ff 100644 --- a/src/math/distance/mod.rs +++ b/src/math/distance/mod.rs @@ -4,7 +4,7 @@ //! Formally, the distance can be any metric measure that is defined as \\( d(x, y) \geq 0\\) and follows three conditions: //! 1. \\( d(x, y) = 0 \\) if and only \\( x = y \\), positive definiteness //! 1. \\( d(x, y) = d(y, x) \\), symmetry -//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality +//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality //! //! for all \\(x, y, z \in Z \\) //! @@ -45,7 +45,7 @@ impl Distances { /// Minkowski distance, see [`Minkowski`](minkowski/index.html) /// * `p` - function order. Should be >= 1 pub fn minkowski(p: u16) -> minkowski::Minkowski { - minkowski::Minkowski { p: p } + minkowski::Minkowski { p } } /// Manhattan distance, see [`Manhattan`](manhattan/index.html) diff --git a/src/math/num.rs b/src/math/num.rs index 894e5a3..490623c 100644 --- a/src/math/num.rs +++ b/src/math/num.rs @@ -57,19 +57,19 @@ impl RealNumber for f64 { fn ln_1pe(self) -> f64 { if self > 15. { - return self; + self } else { - return self.exp().ln_1p(); + self.exp().ln_1p() } } fn sigmoid(self) -> f64 { if self < -40. { - return 0.; + 0. } else if self > 40. { - return 1.; + 1. } else { - return 1. / (1. + f64::exp(-self)); + 1. / (1. + f64::exp(-self)) } } @@ -98,19 +98,19 @@ impl RealNumber for f32 { fn ln_1pe(self) -> f32 { if self > 15. { - return self; + self } else { - return self.exp().ln_1p(); + self.exp().ln_1p() } } fn sigmoid(self) -> f32 { if self < -40. { - return 0.; + 0. } else if self > 40. { - return 1.; + 1. } else { - return 1. / (1. + f32::exp(-self)); + 1. / (1. + f32::exp(-self)) } } diff --git a/src/metrics/auc.rs b/src/metrics/auc.rs index 99e6cbd..571dd49 100644 --- a/src/metrics/auc.rs +++ b/src/metrics/auc.rs @@ -42,9 +42,9 @@ impl AUC { for i in 0..n { if y_true.get(i) == T::zero() { - neg = neg + T::one(); + neg += T::one(); } else if y_true.get(i) == T::one() { - pos = pos + T::one(); + pos += T::one(); } else { panic!( "AUC is only for binary classification. Invalid label: {}", @@ -79,7 +79,7 @@ impl AUC { let mut auc = T::zero(); for i in 0..n { if y_true.get(label_idx[i]) == T::one() { - auc = auc + rank[i]; + auc += rank[i]; } } diff --git a/src/metrics/cluster_hcv.rs b/src/metrics/cluster_hcv.rs index bdefc8d..29a9db2 100644 --- a/src/metrics/cluster_hcv.rs +++ b/src/metrics/cluster_hcv.rs @@ -24,8 +24,8 @@ impl HCVScore { let contingency = contingency_matrix(&labels_true, &labels_pred); let mi: T = mutual_info_score(&contingency); - let homogeneity = entropy_c.map(|e| mi / e).unwrap_or(T::one()); - let completeness = entropy_k.map(|e| mi / e).unwrap_or(T::one()); + let homogeneity = entropy_c.map(|e| mi / e).unwrap_or_else(T::one); + let completeness = entropy_k.map(|e| mi / e).unwrap_or_else(T::one); let v_measure_score = if homogeneity + completeness == T::zero() { T::zero() diff --git a/src/metrics/cluster_helpers.rs b/src/metrics/cluster_helpers.rs index 76cd643..dd5bbb3 100644 --- a/src/metrics/cluster_helpers.rs +++ b/src/metrics/cluster_helpers.rs @@ -37,7 +37,7 @@ pub fn entropy(data: &Vec) -> Option { for &c in bincounts.values() { if c > 0 { let pi = T::from_usize(c).unwrap(); - entropy = entropy - (pi / sum) * (pi.ln() - sum.ln()); + entropy -= (pi / sum) * (pi.ln() - sum.ln()); } } @@ -89,9 +89,8 @@ pub fn mutual_info_score(contingency: &Vec>) -> T { let mut result = T::zero(); for i in 0..log_outer.len() { - result = result - + ((contingency_nm[i] * (log_contingency_nm[i] - contingency_sum_ln)) - + contingency_nm[i] * log_outer[i]) + result += (contingency_nm[i] * (log_contingency_nm[i] - contingency_sum_ln)) + + contingency_nm[i] * log_outer[i] } result.max(T::zero()) diff --git a/src/metrics/mean_absolute_error.rs b/src/metrics/mean_absolute_error.rs index 3e5099e..a069335 100644 --- a/src/metrics/mean_absolute_error.rs +++ b/src/metrics/mean_absolute_error.rs @@ -43,7 +43,7 @@ impl MeanAbsoluteError { let n = y_true.len(); let mut ras = T::zero(); for i in 0..n { - ras = ras + (y_true.get(i) - y_pred.get(i)).abs(); + ras += (y_true.get(i) - y_pred.get(i)).abs(); } ras / T::from_usize(n).unwrap() diff --git a/src/metrics/mean_squared_error.rs b/src/metrics/mean_squared_error.rs index 816cc70..137c8e6 100644 --- a/src/metrics/mean_squared_error.rs +++ b/src/metrics/mean_squared_error.rs @@ -43,7 +43,7 @@ impl MeanSquareError { let n = y_true.len(); let mut rss = T::zero(); for i in 0..n { - rss = rss + (y_true.get(i) - y_pred.get(i)).square(); + rss += (y_true.get(i) - y_pred.get(i)).square(); } rss / T::from_usize(n).unwrap() diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 4fe199b..f49300d 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -101,7 +101,7 @@ impl ClassificationMetrics { /// F1 score, also known as balanced F-score or F-measure, see [F1](f1/index.html). pub fn f1(beta: T) -> f1::F1 { - f1::F1 { beta: beta } + f1::F1 { beta } } /// Area Under the Receiver Operating Characteristic Curve (ROC AUC), see [AUC](auc/index.html). diff --git a/src/metrics/r2.rs b/src/metrics/r2.rs index e689c6f..cbcf7e4 100644 --- a/src/metrics/r2.rs +++ b/src/metrics/r2.rs @@ -45,10 +45,10 @@ impl R2 { let mut mean = T::zero(); for i in 0..n { - mean = mean + y_true.get(i); + mean += y_true.get(i); } - mean = mean / T::from_usize(n).unwrap(); + mean /= T::from_usize(n).unwrap(); let mut ss_tot = T::zero(); let mut ss_res = T::zero(); @@ -56,8 +56,8 @@ impl R2 { for i in 0..n { let y_i = y_true.get(i); let f_i = y_pred.get(i); - ss_tot = ss_tot + (y_i - mean).square(); - ss_res = ss_res + (y_i - f_i).square(); + ss_tot += (y_i - mean).square(); + ss_res += (y_i - f_i).square(); } T::one() - (ss_res / ss_tot) diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index 49938cf..b066b30 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -8,7 +8,6 @@ //! your data. //! //! In SmartCore you can split your data into training and test datasets using `train_test_split` function. -extern crate rand; use crate::linalg::BaseVector; use crate::linalg::Matrix; @@ -111,7 +110,7 @@ pub struct KFold { impl Default for KFold { fn default() -> KFold { KFold { - n_splits: 3 as usize, + n_splits: 3_usize, shuffle: true, } } @@ -127,7 +126,7 @@ impl BaseKFold for KFold { // initialise indices let mut indices: Vec = (0..n_samples).collect(); - if self.shuffle == true { + if self.shuffle { indices.shuffle(&mut thread_rng()); } // return a new array of given shape n_split, filled with each element of n_samples divided by n_splits. @@ -135,7 +134,7 @@ impl BaseKFold for KFold { // increment by one if odd for i in 0..(n_samples % self.n_splits) { - fold_sizes[i] = fold_sizes[i] + 1; + fold_sizes[i] += 1; } // generate the right array of arrays for test indices @@ -175,13 +174,13 @@ impl BaseKFold for KFold { .clone() .iter() .enumerate() - .filter(|&(idx, _)| test_index[idx] == false) + .filter(|&(idx, _)| !test_index[idx]) .map(|(idx, _)| idx) .collect::>(); // filter train indices out according to mask let test_index = indices .iter() .enumerate() - .filter(|&(idx, _)| test_index[idx] == true) + .filter(|&(idx, _)| test_index[idx]) .map(|(idx, _)| idx) .collect::>(); // filter tests indices out according to mask return_values.push((train_index, test_index)) @@ -293,10 +292,10 @@ mod tests { let x: DenseMatrix = DenseMatrix::rand(23, 100); let train_test_splits = k.split(&x); - assert_eq!(train_test_splits[0].1.len(), 12 as usize); - assert_eq!(train_test_splits[0].0.len(), 11 as usize); - assert_eq!(train_test_splits[1].0.len(), 12 as usize); - assert_eq!(train_test_splits[1].1.len(), 11 as usize); + assert_eq!(train_test_splits[0].1.len(), 12_usize); + assert_eq!(train_test_splits[0].0.len(), 11_usize); + assert_eq!(train_test_splits[1].0.len(), 12_usize); + assert_eq!(train_test_splits[1].1.len(), 11_usize); } #[test] diff --git a/src/naive_bayes/categorical.rs b/src/naive_bayes/categorical.rs new file mode 100644 index 0000000..f948aeb --- /dev/null +++ b/src/naive_bayes/categorical.rs @@ -0,0 +1,232 @@ +use crate::error::Failed; +use crate::linalg::BaseVector; +use crate::linalg::Matrix; +use crate::math::num::RealNumber; +use crate::naive_bayes::{BaseNaiveBayes, NBDistribution}; +use serde::{Deserialize, Serialize}; + +/// Naive Bayes classifier for categorical features +struct CategoricalNBDistribution { + class_labels: Vec, + class_probabilities: Vec, + coef: Vec>>, + feature_categories: Vec>, +} + +impl> NBDistribution for CategoricalNBDistribution { + fn prior(&self, class_index: usize) -> T { + if class_index >= self.class_labels.len() { + T::zero() + } else { + self.class_probabilities[class_index] + } + } + + fn conditional_probability(&self, class_index: usize, j: &M::RowVector) -> T { + if class_index < self.class_labels.len() { + let mut prob = T::one(); + for feature in 0..j.len() { + let value = j.get(feature); + match self.feature_categories[feature] + .iter() + .position(|&t| t == value) + { + Some(_i) => prob *= self.coef[class_index][feature][_i], + None => return T::zero(), + } + } + prob + } else { + T::zero() + } + } + + fn classes(&self) -> &Vec { + &self.class_labels + } +} + +impl CategoricalNBDistribution { + /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features. + /// * `x` - training data. + /// * `y` - vector with target values (classes) of length N. + /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). + pub fn fit>(x: &M, y: &M::RowVector, alpha: T) -> Result { + if alpha < T::zero() { + return Err(Failed::fit(&format!( + "alpha should be >= 0, alpha=[{}]", + alpha + ))); + } + + let (n_samples, n_features) = x.shape(); + let y_samples = y.len(); + if y_samples != n_samples { + return Err(Failed::fit(&format!( + "Size of x should equal size of y; |x|=[{}], |y|=[{}]", + n_samples, y_samples + ))); + } + + if n_samples == 0 { + return Err(Failed::fit(&format!( + "Size of x and y should greater than 0; |x|=[{}]", + n_samples + ))); + } + + let mut y_sorted = y.to_vec(); + y_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mut class_labels = Vec::with_capacity(y.len()); + class_labels.push(y_sorted[0]); + let mut classes_count = Vec::with_capacity(y.len()); + let mut current_count = T::one(); + for idx in 1..y_samples { + if y_sorted[idx] == y_sorted[idx - 1] { + current_count += T::one(); + } else { + classes_count.push(current_count); + class_labels.push(y_sorted[idx]); + current_count = T::one() + } + classes_count.push(current_count); + } + + let mut feature_categories: Vec> = Vec::with_capacity(n_features); + + for feature in 0..n_features { + let feature_types = x.get_col_as_vec(feature).unique(); + feature_categories.push(feature_types); + } + let mut coef: Vec>> = Vec::with_capacity(class_labels.len()); + for (label, label_count) in class_labels.iter().zip(classes_count.iter()) { + let mut coef_i: Vec> = Vec::with_capacity(n_features); + for (feature_index, feature_options) in + feature_categories.iter().enumerate().take(n_features) + { + let col = x + .get_col_as_vec(feature_index) + .iter() + .enumerate() + .filter(|(i, _j)| y.get(*i) == *label) + .map(|(_, j)| *j) + .collect::>(); + let mut feat_count: Vec = Vec::with_capacity(feature_options.len()); + for k in feature_options.iter() { + let feat_k_count = col.iter().filter(|&v| v == k).count(); + feat_count.push(feat_k_count); + } + + let coef_i_j = feat_count + .iter() + .map(|c| { + (T::from(*c).unwrap() + alpha) + / (T::from(*label_count).unwrap() + + T::from(feature_options.len()).unwrap() * alpha) + }) + .collect::>(); + coef_i.push(coef_i_j); + } + coef.push(coef_i); + } + let class_probabilities = classes_count + .into_iter() + .map(|count| count / T::from(n_samples).unwrap()) + .collect::>(); + + Ok(Self { + class_labels, + class_probabilities, + coef, + feature_categories, + }) + } +} + +/// `CategoricalNB` parameters. Use `Default::default()` for default values. +#[derive(Serialize, Deserialize, Debug)] +pub struct CategoricalNBParameters { + /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). + pub alpha: T, +} + +impl CategoricalNBParameters { + /// Create CategoricalNBParameters with specific paramaters. + pub fn new(alpha: T) -> Result { + if alpha > T::zero() { + Ok(Self { alpha }) + } else { + Err(Failed::fit(&format!( + "alpha should be >= 0, alpha=[{}]", + alpha + ))) + } + } +} +impl Default for CategoricalNBParameters { + fn default() -> Self { + Self { alpha: T::one() } + } +} + +/// CategoricalNB implements the categorical naive Bayes algorithm for categorically distributed data. +pub struct CategoricalNB> { + inner: BaseNaiveBayes>, +} + +impl> CategoricalNB { + /// Fits CategoricalNB with given data + /// * `x` - training data of size NxM where N is the number of samples and M is the number of + /// features. + /// * `y` - vector with target values (classes) of length N. + /// * `parameters` - additional parameters like alpha for smoothing + pub fn fit( + x: &M, + y: &M::RowVector, + parameters: CategoricalNBParameters, + ) -> Result { + let alpha = parameters.alpha; + let distribution = CategoricalNBDistribution::fit(x, y, alpha)?; + let inner = BaseNaiveBayes::fit(distribution)?; + Ok(Self { inner }) + } + + /// Estimates the class labels for the provided data. + /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// Returns a vector of size N with class estimates. + pub fn predict(&self, x: &M) -> Result { + self.inner.predict(x) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_matrix::DenseMatrix; + + #[test] + fn run_base_naive_bayes() { + let x = DenseMatrix::from_2d_array(&[ + &[0., 2., 1., 0.], + &[0., 2., 1., 1.], + &[1., 2., 1., 0.], + &[2., 1., 1., 0.], + &[2., 0., 0., 0.], + &[2., 0., 0., 1.], + &[1., 0., 0., 1.], + &[0., 1., 1., 0.], + &[0., 0., 0., 0.], + &[2., 1., 0., 0.], + &[0., 1., 0., 1.], + &[1., 1., 1., 1.], + &[1., 2., 0., 0.], + &[2., 1., 1., 1.], + ]); + let y = vec![0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0.]; + + let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); + let x_test = DenseMatrix::from_2d_array(&[&[0., 2., 1., 0.], &[2., 2., 0., 0.]]); + let y_hat = cnb.predict(&x_test).unwrap(); + assert_eq!(y_hat, vec![0., 1.]); + } +} diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs new file mode 100644 index 0000000..e9ab792 --- /dev/null +++ b/src/naive_bayes/mod.rs @@ -0,0 +1,69 @@ +use crate::error::Failed; +use crate::linalg::BaseVector; +use crate::linalg::Matrix; +use crate::math::num::RealNumber; +use std::marker::PhantomData; + +/// Distribution used in the Naive Bayes classifier. +pub(crate) trait NBDistribution> { + /// Prior of class at the given index. + fn prior(&self, class_index: usize) -> T; + + /// Conditional probability of sample j given class in the specified index. + fn conditional_probability(&self, class_index: usize, j: &M::RowVector) -> T; + + /// Possible classes of the distribution. + fn classes(&self) -> &Vec; +} + +/// Base struct for the Naive Bayes classifier. +pub(crate) struct BaseNaiveBayes, D: NBDistribution> { + distribution: D, + _phantom_t: PhantomData, + _phantom_m: PhantomData, +} + +impl, D: NBDistribution> BaseNaiveBayes { + /// Fits NB classifier to a given NBdistribution. + /// * `distribution` - NBDistribution of the training data + pub fn fit(distribution: D) -> Result { + Ok(Self { + distribution, + _phantom_t: PhantomData, + _phantom_m: PhantomData, + }) + } + + /// Estimates the class labels for the provided data. + /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// Returns a vector of size N with class estimates. + pub fn predict(&self, x: &M) -> Result { + let y_classes = self.distribution.classes(); + let (rows, _) = x.shape(); + let predictions = (0..rows) + .map(|row_index| { + let row = x.get_row(row_index); + let (prediction, _probability) = y_classes + .iter() + .enumerate() + .map(|(class_index, class)| { + ( + class, + self.distribution.conditional_probability(class_index, &row) + * self.distribution.prior(class_index), + ) + }) + .max_by(|(_, p1), (_, p2)| p1.partial_cmp(p2).unwrap()) + .unwrap(); + *prediction + }) + .collect::>(); + let mut y_hat = M::RowVector::zeros(rows); + for (i, prediction) in predictions.iter().enumerate().take(rows) { + y_hat.set(i, *prediction); + } + Ok(y_hat) + } +} +mod categorical; +pub use categorical::{CategoricalNB, CategoricalNBParameters}; diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index 3ad4297..135594a 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -78,7 +78,7 @@ impl, T>> PartialEq for KNNClassifier { || self.k != other.k || self.y.len() != other.y.len() { - return false; + false } else { for i in 0..self.classes.len() { if (self.classes[i] - other.classes[i]).abs() > T::epsilon() { @@ -139,7 +139,7 @@ impl, T>> KNNClassifier { } Ok(KNNClassifier { - classes: classes, + classes, y: yi, k: parameters.k, knn_algorithm: parameters.algorithm.fit(data, distance)?, @@ -166,13 +166,13 @@ impl, T>> KNNClassifier { let weights = self .weight .calc_weights(search_result.iter().map(|v| v.1).collect()); - let w_sum = weights.iter().map(|w| *w).sum(); + let w_sum = weights.iter().copied().sum(); let mut c = vec![T::zero(); self.classes.len()]; let mut max_c = T::zero(); let mut max_i = 0; for (r, w) in search_result.iter().zip(weights.iter()) { - c[self.y[r.0]] = c[self.y[r.0]] + (*w / w_sum); + c[self.y[r.0]] += *w / w_sum; if c[self.y[r.0]] > max_c { max_c = c[self.y[r.0]]; max_i = self.y[r.0]; diff --git a/src/neighbors/knn_regressor.rs b/src/neighbors/knn_regressor.rs index 0bf283f..b7c0f2d 100644 --- a/src/neighbors/knn_regressor.rs +++ b/src/neighbors/knn_regressor.rs @@ -76,7 +76,7 @@ impl Default for KNNRegressorParameters { impl, T>> PartialEq for KNNRegressor { fn eq(&self, other: &Self) -> bool { if self.k != other.k || self.y.len() != other.y.len() { - return false; + false } else { for i in 0..self.y.len() { if (self.y[i] - other.y[i]).abs() > T::epsilon() { @@ -151,10 +151,10 @@ impl, T>> KNNRegressor { let weights = self .weight .calc_weights(search_result.iter().map(|v| v.1).collect()); - let w_sum = weights.iter().map(|w| *w).sum(); + let w_sum = weights.iter().copied().sum(); for (r, w) in search_result.iter().zip(weights.iter()) { - result = result + self.y[r.0] * (*w / w_sum); + result += self.y[r.0] * (*w / w_sum); } Ok(result) diff --git a/src/neighbors/mod.rs b/src/neighbors/mod.rs index 6d542f6..be1ad4d 100644 --- a/src/neighbors/mod.rs +++ b/src/neighbors/mod.rs @@ -10,7 +10,7 @@ //! and follows three conditions: //! 1. \\( d(x, y) = 0 \\) if and only \\( x = y \\), positive definiteness //! 1. \\( d(x, y) = d(y, x) \\), symmetry -//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality +//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality //! //! for all \\(x, y, z \in Z \\) //! diff --git a/src/optimization/first_order/gradient_descent.rs b/src/optimization/first_order/gradient_descent.rs index c860084..d57896f 100644 --- a/src/optimization/first_order/gradient_descent.rs +++ b/src/optimization/first_order/gradient_descent.rs @@ -25,8 +25,8 @@ impl Default for GradientDescent { impl FirstOrderOptimizer for GradientDescent { fn optimize<'a, X: Matrix, LS: LineSearchMethod>( &self, - f: &'a F, - df: &'a DF, + f: &'a F<'_, T, X>, + df: &'a DF<'_, X>, x0: &X, ls: &'a LS, ) -> OptimizerResult { @@ -74,8 +74,8 @@ impl FirstOrderOptimizer for GradientDescent { let f_x = f(&x); OptimizerResult { - x: x, - f_x: f_x, + x, + f_x, iterations: iter, } } diff --git a/src/optimization/first_order/lbfgs.rs b/src/optimization/first_order/lbfgs.rs index b63f617..5dedfe6 100644 --- a/src/optimization/first_order/lbfgs.rs +++ b/src/optimization/first_order/lbfgs.rs @@ -100,8 +100,8 @@ impl LBFGS { fn update_state<'a, X: Matrix, LS: LineSearchMethod>( &self, - f: &'a F, - df: &'a DF, + f: &'a F<'_, T, X>, + df: &'a DF<'_, X>, ls: &'a LS, state: &mut LBFGSState, ) { @@ -162,7 +162,7 @@ impl LBFGS { g_converged || x_converged || state.counter_f_tol > self.successive_f_tol } - fn update_hessian<'a, X: Matrix>(&self, _: &'a DF, state: &mut LBFGSState) { + fn update_hessian<'a, X: Matrix>(&self, _: &'a DF<'_, X>, state: &mut LBFGSState) { state.dg = state.x_df.sub(&state.x_df_prev); let rho_iteration = T::one() / state.dx.dot(&state.dg); if !rho_iteration.is_infinite() { @@ -198,8 +198,8 @@ struct LBFGSState> { impl FirstOrderOptimizer for LBFGS { fn optimize<'a, X: Matrix, LS: LineSearchMethod>( &self, - f: &F, - df: &'a DF, + f: &F<'_, T, X>, + df: &'a DF<'_, X>, x0: &X, ls: &'a LS, ) -> OptimizerResult { diff --git a/src/optimization/first_order/mod.rs b/src/optimization/first_order/mod.rs index d1c628f..f2e476f 100644 --- a/src/optimization/first_order/mod.rs +++ b/src/optimization/first_order/mod.rs @@ -12,8 +12,8 @@ use crate::optimization::{DF, F}; pub trait FirstOrderOptimizer { fn optimize<'a, X: Matrix, LS: LineSearchMethod>( &self, - f: &F, - df: &'a DF, + f: &F<'_, T, X>, + df: &'a DF<'_, X>, x0: &X, ls: &'a LS, ) -> OptimizerResult; diff --git a/src/optimization/line_search.rs b/src/optimization/line_search.rs index 3481c87..e6a3b80 100644 --- a/src/optimization/line_search.rs +++ b/src/optimization/line_search.rs @@ -2,7 +2,7 @@ use crate::optimization::FunctionOrder; use num_traits::Float; pub trait LineSearchMethod { - fn search<'a>( + fn search( &self, f: &(dyn Fn(T) -> T), df: &(dyn Fn(T) -> T), diff --git a/src/svm/mod.rs b/src/svm/mod.rs index 84a405e..1f563c1 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -48,7 +48,7 @@ impl Kernels { /// Radial basis function kernel (Gaussian) pub fn rbf(gamma: T) -> RBFKernel { - RBFKernel { gamma: gamma } + RBFKernel { gamma } } /// Polynomial kernel @@ -57,9 +57,9 @@ impl Kernels { /// * `coef0` - independent term in kernel function pub fn polynomial(degree: T, gamma: T, coef0: T) -> PolynomialKernel { PolynomialKernel { - degree: degree, - gamma: gamma, - coef0: coef0, + degree, + gamma, + coef0, } } @@ -79,17 +79,14 @@ impl Kernels { /// * `gamma` - kernel coefficient /// * `coef0` - independent term in kernel function pub fn sigmoid(gamma: T, coef0: T) -> SigmoidKernel { - SigmoidKernel { - gamma: gamma, - coef0: coef0, - } + SigmoidKernel { gamma, coef0 } } /// Sigmoid kernel /// * `gamma` - kernel coefficient pub fn sigmoid_with_gamma(gamma: T) -> SigmoidKernel { SigmoidKernel { - gamma: gamma, + gamma, coef0: T::one(), } } diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 119b812..4fd70df 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -173,9 +173,9 @@ impl, K: Kernel> SVC { let (n, _) = x.shape(); if n != y.len() { - return Err(Failed::fit(&format!( - "Number of rows of X doesn't match number of rows of Y" - ))); + return Err(Failed::fit( + &"Number of rows of X doesn\'t match number of rows of Y".to_string(), + )); } let classes = y.unique(); @@ -204,11 +204,11 @@ impl, K: Kernel> SVC { let (support_vectors, weight, b) = optimizer.optimize(); Ok(SVC { - classes: classes, - kernel: kernel, + classes, + kernel, instances: support_vectors, w: weight, - b: b, + b, }) } @@ -251,7 +251,7 @@ impl, K: Kernel> PartialEq for SVC< || self.w.len() != other.w.len() || self.instances.len() != other.instances.len() { - return false; + false } else { for i in 0..self.w.len() { if (self.w[i] - other.w[i]).abs() > T::epsilon() { @@ -263,7 +263,7 @@ impl, K: Kernel> PartialEq for SVC< return false; } } - return true; + true } } } @@ -278,12 +278,12 @@ impl> SupportVector { }; SupportVector { index: i, - x: x, + x, grad: g, k: k_v, alpha: T::zero(), - cmin: cmin, - cmax: cmax, + cmin, + cmax, } } } @@ -291,7 +291,7 @@ impl> SupportVector { impl<'a, T: RealNumber, M: Matrix, K: Kernel> Cache<'a, T, M, K> { fn new(kernel: &'a K) -> Cache<'a, T, M, K> { Cache { - kernel: kernel, + kernel, data: HashMap::new(), phantom: PhantomData, } @@ -300,11 +300,12 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Cache<'a, T, M fn get(&mut self, i: &SupportVector, j: &SupportVector) -> T { let idx_i = i.index; let idx_j = j.index; - if !self.data.contains_key(&(idx_i, idx_j)) { - let v = self.kernel.apply(&i.x, &j.x); - self.data.insert((idx_i, idx_j), v); - } - *self.data.get(&(idx_i, idx_j)).unwrap() + #[allow(clippy::or_fun_call)] + let entry = self + .data + .entry((idx_i, idx_j)) + .or_insert(self.kernel.apply(&i.x, &j.x)); + *entry } fn insert(&mut self, key: (usize, usize), value: T) { @@ -326,8 +327,8 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, let (n, _) = x.shape(); Optimizer { - x: x, - y: y, + x, + y, parameters: ¶meters, svmin: 0, svmax: 0, @@ -335,7 +336,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, gmax: T::min_value(), tau: T::from_f64(1e-12).unwrap(), sv: Vec::with_capacity(n), - kernel: kernel, + kernel, recalculate_minmax_grad: true, } } @@ -378,7 +379,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, (support_vectors, w, b) } - fn initialize(&mut self, cache: &mut Cache) { + fn initialize(&mut self, cache: &mut Cache<'_, T, M, K>) { let (n, _) = self.x.shape(); let few = 5; let mut cp = 0; @@ -389,10 +390,11 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, if self.process(i, self.x.get_row(i), self.y.get(i), cache) { cp += 1; } - } else if self.y.get(i) == -T::one() && cn < few { - if self.process(i, self.x.get_row(i), self.y.get(i), cache) { - cn += 1; - } + } else if self.y.get(i) == -T::one() + && cn < few + && self.process(i, self.x.get_row(i), self.y.get(i), cache) + { + cn += 1; } if cp >= few && cn >= few { @@ -401,7 +403,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, } } - fn process(&mut self, i: usize, x: M::RowVector, y: T, cache: &mut Cache) -> bool { + fn process(&mut self, i: usize, x: M::RowVector, y: T, cache: &mut Cache<'_, T, M, K>) -> bool { for j in 0..self.sv.len() { if self.sv[j].index == i { return true; @@ -420,10 +422,10 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.find_min_max_gradient(); - if self.gmin < self.gmax { - if (y > T::zero() && g < self.gmin) || (y < T::zero() && g > self.gmax) { - return false; - } + if self.gmin < self.gmax + && ((y > T::zero() && g < self.gmin) || (y < T::zero() && g > self.gmax)) + { + return false; } for v in cache_values { @@ -444,13 +446,13 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, true } - fn reprocess(&mut self, tol: T, cache: &mut Cache) -> bool { + fn reprocess(&mut self, tol: T, cache: &mut Cache<'_, T, M, K>) -> bool { let status = self.smo(None, None, tol, cache); self.clean(cache); status } - fn finish(&mut self, cache: &mut Cache) { + fn finish(&mut self, cache: &mut Cache<'_, T, M, K>) { let mut max_iter = self.sv.len(); while self.smo(None, None, self.parameters.tol, cache) && max_iter > 0 { @@ -485,7 +487,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.recalculate_minmax_grad = false } - fn clean(&mut self, cache: &mut Cache) { + fn clean(&mut self, cache: &mut Cache<'_, T, M, K>) { self.find_min_max_gradient(); let gmax = self.gmax; @@ -494,13 +496,12 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, let mut idxs_to_drop: HashSet = HashSet::new(); self.sv.retain(|v| { - if v.alpha == T::zero() { - if (v.grad >= gmax && T::zero() >= v.cmax) - || (v.grad <= gmin && T::zero() <= v.cmin) - { - idxs_to_drop.insert(v.index); - return false; - } + if v.alpha == T::zero() + && ((v.grad >= gmax && T::zero() >= v.cmax) + || (v.grad <= gmin && T::zero() <= v.cmin)) + { + idxs_to_drop.insert(v.index); + return false; }; true }); @@ -520,7 +521,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, &mut self, idx_1: Option, idx_2: Option, - cache: &mut Cache, + cache: &mut Cache<'_, T, M, K>, ) -> Option<(usize, usize, T)> { match (idx_1, idx_2) { (None, None) => { @@ -561,7 +562,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, ( idx_1, idx_2, - k_v_12.unwrap_or(self.kernel.apply(&self.sv[idx_1].x, &self.sv[idx_2].x)), + k_v_12.unwrap_or_else(|| { + self.kernel.apply(&self.sv[idx_1].x, &self.sv[idx_2].x) + }), ) }) } @@ -597,7 +600,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, ( idx_1, idx_2, - k_v_12.unwrap_or(self.kernel.apply(&self.sv[idx_1].x, &self.sv[idx_2].x)), + k_v_12.unwrap_or_else(|| { + self.kernel.apply(&self.sv[idx_1].x, &self.sv[idx_2].x) + }), ) }) } @@ -614,7 +619,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, idx_1: Option, idx_2: Option, tol: T, - cache: &mut Cache, + cache: &mut Cache<'_, T, M, K>, ) -> bool { match self.select_pair(idx_1, idx_2, cache) { Some((idx_1, idx_2, k_v_12)) => { @@ -647,13 +652,13 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.update(idx_1, idx_2, step, cache); - return self.gmax - self.gmin > tol; + self.gmax - self.gmin > tol } None => false, } } - fn update(&mut self, v1: usize, v2: usize, step: T, cache: &mut Cache) { + fn update(&mut self, v1: usize, v2: usize, step: T, cache: &mut Cache<'_, T, M, K>) { self.sv[v1].alpha -= step; self.sv[v2].alpha += step; diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 61feb80..5d007d7 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -160,9 +160,9 @@ impl, K: Kernel> SVR { let (n, _) = x.shape(); if n != y.len() { - return Err(Failed::fit(&format!( - "Number of rows of X doesn't match number of rows of Y" - ))); + return Err(Failed::fit( + &"Number of rows of X doesn\'t match number of rows of Y".to_string(), + )); } let optimizer = Optimizer::new(x, y, &kernel, ¶meters); @@ -170,10 +170,10 @@ impl, K: Kernel> SVR { let (support_vectors, weight, b) = optimizer.smo(); Ok(SVR { - kernel: kernel, + kernel, instances: support_vectors, w: weight, - b: b, + b, }) } @@ -198,7 +198,7 @@ impl, K: Kernel> SVR { f += self.w[i] * self.kernel.apply(&x, &self.instances[i]); } - return f; + f } } @@ -208,7 +208,7 @@ impl, K: Kernel> PartialEq for SVR< || self.w.len() != other.w.len() || self.instances.len() != other.instances.len() { - return false; + false } else { for i in 0..self.w.len() { if (self.w[i] - other.w[i]).abs() > T::epsilon() { @@ -220,7 +220,7 @@ impl, K: Kernel> PartialEq for SVR< return false; } } - return true; + true } } } @@ -230,7 +230,7 @@ impl> SupportVector { let k_v = k.apply(&x, &x); SupportVector { index: i, - x: x, + x, grad: [eps + y, eps - y], k: k_v, alpha: [T::zero(), T::zero()], @@ -270,7 +270,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, gmaxindex: 0, tau: T::from_f64(1e-12).unwrap(), sv: support_vectors, - kernel: kernel, + kernel, } } @@ -392,11 +392,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.sv[v2].alpha[j] = T::zero(); self.sv[v1].alpha[i] = diff; } - } else { - if self.sv[v1].alpha[i] < T::zero() { - self.sv[v1].alpha[i] = T::zero(); - self.sv[v2].alpha[j] = -diff; - } + } else if self.sv[v1].alpha[i] < T::zero() { + self.sv[v1].alpha[i] = T::zero(); + self.sv[v2].alpha[j] = -diff; } if diff > T::zero() { @@ -404,11 +402,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.sv[v1].alpha[i] = self.c; self.sv[v2].alpha[j] = self.c - diff; } - } else { - if self.sv[v2].alpha[j] > self.c { - self.sv[v2].alpha[j] = self.c; - self.sv[v1].alpha[i] = self.c + diff; - } + } else if self.sv[v2].alpha[j] > self.c { + self.sv[v2].alpha[j] = self.c; + self.sv[v1].alpha[i] = self.c + diff; } } else { let delta = (self.sv[v1].grad[i] - self.sv[v2].grad[j]) / curv; @@ -421,11 +417,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.sv[v1].alpha[i] = self.c; self.sv[v2].alpha[j] = sum - self.c; } - } else { - if self.sv[v2].alpha[j] < T::zero() { - self.sv[v2].alpha[j] = T::zero(); - self.sv[v1].alpha[i] = sum; - } + } else if self.sv[v2].alpha[j] < T::zero() { + self.sv[v2].alpha[j] = T::zero(); + self.sv[v1].alpha[i] = sum; } if sum > self.c { @@ -433,11 +427,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.sv[v2].alpha[j] = self.c; self.sv[v1].alpha[i] = sum - self.c; } - } else { - if self.sv[v1].alpha[i] < T::zero() { - self.sv[v1].alpha[i] = T::zero(); - self.sv[v2].alpha[j] = sum; - } + } else if self.sv[v1].alpha[i] < T::zero() { + self.sv[v1].alpha[i] = T::zero(); + self.sv[v2].alpha[j] = sum; } } @@ -477,7 +469,7 @@ impl Cache { } } - fn get Vec>(&self, i: usize, or: F) -> Ref> { + fn get Vec>(&self, i: usize, or: F) -> Ref<'_, Vec> { if self.data[i].borrow().is_none() { self.data[i].replace(Some(or())); } diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 25704e6..9fe1b1a 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -126,7 +126,7 @@ impl PartialEq for DecisionTreeClassifier { || self.num_classes != other.num_classes || self.nodes.len() != other.nodes.len() { - return false; + false } else { for i in 0..self.classes.len() { if (self.classes[i] - other.classes[i]).abs() > T::epsilon() { @@ -138,7 +138,7 @@ impl PartialEq for DecisionTreeClassifier { return false; } } - return true; + true } } } @@ -174,8 +174,8 @@ impl Default for DecisionTreeClassifierParameters { impl Node { fn new(index: usize, output: usize) -> Self { Node { - index: index, - output: output, + index, + output, split_feature: 0, split_value: Option::None, split_score: Option::None, @@ -206,7 +206,7 @@ fn impurity(criterion: &SplitCriterion, count: &Vec, n: us for i in 0..count.len() { if count[i] > 0 { let p = T::from(count[i]).unwrap() / T::from(n).unwrap(); - impurity = impurity - p * p; + impurity -= p * p; } } } @@ -215,7 +215,7 @@ fn impurity(criterion: &SplitCriterion, count: &Vec, n: us for i in 0..count.len() { if count[i] > 0 { let p = T::from(count[i]).unwrap() / T::from(n).unwrap(); - impurity = impurity - p * p.log2(); + impurity -= p * p.log2(); } } } @@ -229,7 +229,7 @@ fn impurity(criterion: &SplitCriterion, count: &Vec, n: us } } - return impurity; + impurity } impl<'a, T: RealNumber, M: Matrix> NodeVisitor<'a, T, M> { @@ -242,14 +242,14 @@ impl<'a, T: RealNumber, M: Matrix> NodeVisitor<'a, T, M> { level: u16, ) -> Self { NodeVisitor { - x: x, - y: y, + x, + y, node: node_id, - samples: samples, - order: order, + samples, + order, true_child_output: 0, false_child_output: 0, - level: level, + level, phantom: PhantomData, } } @@ -266,7 +266,7 @@ pub(in crate) fn which_max(x: &Vec) -> usize { } } - return which; + which } impl DecisionTreeClassifier { @@ -325,16 +325,16 @@ impl DecisionTreeClassifier { } let mut tree = DecisionTreeClassifier { - nodes: nodes, - parameters: parameters, + nodes, + parameters, num_classes: k, - classes: classes, + classes, depth: 0, }; let mut visitor = NodeVisitor::::new(0, samples, &order, &x, &yi, 1); - let mut visitor_queue: LinkedList> = LinkedList::new(); + let mut visitor_queue: LinkedList> = LinkedList::new(); if tree.find_best_cutoff(&mut visitor, mtry) { visitor_queue.push_back(visitor); @@ -376,24 +376,24 @@ impl DecisionTreeClassifier { let node = &self.nodes[node_id]; if node.true_child == None && node.false_child == None { result = node.output; + } else if x.get(row, node.split_feature) + <= node.split_value.unwrap_or_else(T::nan) + { + queue.push_back(node.true_child.unwrap()); } else { - if x.get(row, node.split_feature) <= node.split_value.unwrap_or(T::nan()) { - queue.push_back(node.true_child.unwrap()); - } else { - queue.push_back(node.false_child.unwrap()); - } + queue.push_back(node.false_child.unwrap()); } } None => break, }; } - return result; + result } fn find_best_cutoff>( &mut self, - visitor: &mut NodeVisitor, + visitor: &mut NodeVisitor<'_, T, M>, mtry: usize, ) -> bool { let (n_rows, n_attr) = visitor.x.shape(); @@ -456,7 +456,7 @@ impl DecisionTreeClassifier { fn find_best_split>( &mut self, - visitor: &mut NodeVisitor, + visitor: &mut NodeVisitor<'_, T, M>, n: usize, count: &Vec, false_count: &mut Vec, @@ -530,7 +530,7 @@ impl DecisionTreeClassifier { for i in 0..n { if visitor.samples[i] > 0 { if visitor.x.get(i, self.nodes[visitor.node].split_feature) - <= self.nodes[visitor.node].split_value.unwrap_or(T::nan()) + <= self.nodes[visitor.node].split_value.unwrap_or_else(T::nan) { true_samples[i] = visitor.samples[i]; tc += true_samples[i]; diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 0f88d4d..c30c9e2 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -113,8 +113,8 @@ impl Default for DecisionTreeRegressorParameters { impl Node { fn new(index: usize, output: T) -> Self { Node { - index: index, - output: output, + index, + output, split_feature: 0, split_value: Option::None, split_score: Option::None, @@ -144,14 +144,14 @@ impl PartialEq for Node { impl PartialEq for DecisionTreeRegressor { fn eq(&self, other: &Self) -> bool { if self.depth != other.depth || self.nodes.len() != other.nodes.len() { - return false; + false } else { for i in 0..self.nodes.len() { if self.nodes[i] != other.nodes[i] { return false; } } - return true; + true } } } @@ -177,14 +177,14 @@ impl<'a, T: RealNumber, M: Matrix> NodeVisitor<'a, T, M> { level: u16, ) -> Self { NodeVisitor { - x: x, - y: y, + x, + y, node: node_id, - samples: samples, - order: order, + samples, + order, true_child_output: T::zero(), false_child_output: T::zero(), - level: level, + level, } } } @@ -221,7 +221,7 @@ impl DecisionTreeRegressor { let mut sum = T::zero(); for i in 0..y_ncols { n += samples[i]; - sum = sum + T::from(samples[i]).unwrap() * y_m.get(0, i); + sum += T::from(samples[i]).unwrap() * y_m.get(0, i); } let root = Node::new(0, sum / T::from(n).unwrap()); @@ -233,14 +233,14 @@ impl DecisionTreeRegressor { } let mut tree = DecisionTreeRegressor { - nodes: nodes, - parameters: parameters, + nodes, + parameters, depth: 0, }; let mut visitor = NodeVisitor::::new(0, samples, &order, &x, &y_m, 1); - let mut visitor_queue: LinkedList> = LinkedList::new(); + let mut visitor_queue: LinkedList> = LinkedList::new(); if tree.find_best_cutoff(&mut visitor, mtry) { visitor_queue.push_back(visitor); @@ -282,24 +282,24 @@ impl DecisionTreeRegressor { let node = &self.nodes[node_id]; if node.true_child == None && node.false_child == None { result = node.output; + } else if x.get(row, node.split_feature) + <= node.split_value.unwrap_or_else(T::nan) + { + queue.push_back(node.true_child.unwrap()); } else { - if x.get(row, node.split_feature) <= node.split_value.unwrap_or(T::nan()) { - queue.push_back(node.true_child.unwrap()); - } else { - queue.push_back(node.false_child.unwrap()); - } + queue.push_back(node.false_child.unwrap()); } } None => break, }; } - return result; + result } fn find_best_cutoff>( &mut self, - visitor: &mut NodeVisitor, + visitor: &mut NodeVisitor<'_, T, M>, mtry: usize, ) -> bool { let (_, n_attr) = visitor.x.shape(); @@ -333,7 +333,7 @@ impl DecisionTreeRegressor { fn find_best_split>( &mut self, - visitor: &mut NodeVisitor, + visitor: &mut NodeVisitor<'_, T, M>, n: usize, sum: T, parent_gain: T, @@ -348,8 +348,7 @@ impl DecisionTreeRegressor { if prevx.is_nan() || visitor.x.get(*i, j) == prevx { prevx = visitor.x.get(*i, j); true_count += visitor.samples[*i]; - true_sum = - true_sum + T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); + true_sum += T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); continue; } @@ -360,8 +359,7 @@ impl DecisionTreeRegressor { { prevx = visitor.x.get(*i, j); true_count += visitor.samples[*i]; - true_sum = - true_sum + T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); + true_sum += T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); continue; } @@ -384,7 +382,7 @@ impl DecisionTreeRegressor { } prevx = visitor.x.get(*i, j); - true_sum = true_sum + T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); + true_sum += T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); true_count += visitor.samples[*i]; } } @@ -404,7 +402,7 @@ impl DecisionTreeRegressor { for i in 0..n { if visitor.samples[i] > 0 { if visitor.x.get(i, self.nodes[visitor.node].split_feature) - <= self.nodes[visitor.node].split_value.unwrap_or(T::nan()) + <= self.nodes[visitor.node].split_value.unwrap_or_else(T::nan) { true_samples[i] = visitor.samples[i]; tc += true_samples[i];