From ba03ef4678345229ad5f04c8986049f6823defcb Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Fri, 6 Nov 2020 19:41:32 -0400 Subject: [PATCH 01/21] Add clippy CI job --- .circleci/config.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4ed3135..dd616af 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,5 +1,11 @@ version: 2.1 +workflows: + version: 2.1 + build: + jobs: + - build + - clippy jobs: build: docker: @@ -24,3 +30,14 @@ jobs: paths: - "~/.cargo" - "./target" + clippy: + docker: + - image: circleci/rust:latest + steps: + - checkout + - run: + name: Install cargo clippy + command: rustup component add clippy + - run: + name: Run cargo clippy + command: cargo clippy From 8281a1620ebbef9e527408f4813319288e69d9fe Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Fri, 6 Nov 2020 20:24:14 -0400 Subject: [PATCH 02/21] Fix clippy errors --- src/algorithm/neighbour/cover_tree.rs | 2 +- src/cluster/kmeans.rs | 2 +- src/dataset/mod.rs | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index da870d2..bbd7254 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -101,7 +101,7 @@ impl> CoverTree /// * `p` - look for k nearest points to `p` /// * `k` - the number of nearest neighbors to return pub fn find(&self, p: &T, k: usize) -> Result, Failed> { - if k <= 0 { + if k == 0 { return Err(Failed::because(FailedError::FindFailed, "k should be > 0")); } diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index eff65aa..278024f 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -129,7 +129,7 @@ impl KMeans { return Err(Failed::fit(&format!("invalid number of clusters: {}", k))); } - if parameters.max_iter <= 0 { + if parameters.max_iter == 0 { return Err(Failed::fit(&format!( "invalid maximum number of iterations: {}", parameters.max_iter diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index bfcd1c9..85829fe 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -56,8 +56,8 @@ pub(crate) fn serialize_data( ) -> Result<(), io::Error> { match File::create(filename) { Ok(mut file) => { - file.write(&dataset.num_features.to_le_bytes())?; - file.write(&dataset.num_samples.to_le_bytes())?; + file.write_all(&dataset.num_features.to_le_bytes())?; + file.write_all(&dataset.num_samples.to_le_bytes())?; let x: Vec = dataset .data .iter() From 860056c3bab6a6c8ace5bf45c9938f2a1f8e3bf3 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 19:39:11 -0400 Subject: [PATCH 03/21] Run: cargo clippy --fix -Z unstable-options and cargo fmt --- src/algorithm/neighbour/bbd_tree.rs | 22 +++-- src/algorithm/neighbour/cover_tree.rs | 27 +++--- src/algorithm/neighbour/linear_search.rs | 10 +-- src/algorithm/neighbour/mod.rs | 4 +- src/algorithm/sort/heap_select.rs | 8 +- src/cluster/dbscan.rs | 4 +- src/cluster/kmeans.rs | 18 ++-- src/dataset/boston.rs | 4 +- src/dataset/breast_cancer.rs | 4 +- src/dataset/diabetes.rs | 4 +- src/dataset/digits.rs | 4 +- src/dataset/generator.rs | 6 +- src/dataset/iris.rs | 4 +- src/dataset/mod.rs | 4 +- src/decomposition/pca.rs | 14 +-- src/ensemble/random_forest_classifier.rs | 8 +- src/ensemble/random_forest_regressor.rs | 9 +- src/error/mod.rs | 2 +- src/linalg/cholesky.rs | 14 ++- src/linalg/evd.rs | 90 +++++++++---------- src/linalg/lu.rs | 16 ++-- src/linalg/mod.rs | 2 +- src/linalg/naive/dense_matrix.rs | 80 ++++++++--------- src/linalg/qr.rs | 16 ++-- src/linalg/svd.rs | 40 ++++----- src/linear/linear_regression.rs | 6 +- src/linear/logistic_regression.rs | 36 ++++---- src/math/distance/euclidian.rs | 2 +- src/math/distance/mahalanobis.rs | 10 +-- src/math/distance/manhattan.rs | 2 +- src/math/distance/minkowski.rs | 2 +- src/math/distance/mod.rs | 2 +- src/math/num.rs | 20 ++--- src/metrics/auc.rs | 6 +- src/metrics/cluster_helpers.rs | 7 +- src/metrics/mean_absolute_error.rs | 2 +- src/metrics/mean_squared_error.rs | 2 +- src/metrics/mod.rs | 2 +- src/metrics/r2.rs | 8 +- src/model_selection/mod.rs | 8 +- src/neighbors/knn_classifier.rs | 8 +- src/neighbors/knn_regressor.rs | 6 +- .../first_order/gradient_descent.rs | 4 +- src/svm/mod.rs | 15 ++-- src/svm/svc.rs | 62 ++++++------- src/svm/svr.rs | 52 +++++------ src/tree/decision_tree_classifier.rs | 43 +++++---- src/tree/decision_tree_regressor.rs | 43 +++++---- 48 files changed, 367 insertions(+), 395 deletions(-) diff --git a/src/algorithm/neighbour/bbd_tree.rs b/src/algorithm/neighbour/bbd_tree.rs index cc71f54..632da86 100644 --- a/src/algorithm/neighbour/bbd_tree.rs +++ b/src/algorithm/neighbour/bbd_tree.rs @@ -50,8 +50,8 @@ impl BBDTree { } let mut tree = BBDTree { - nodes: nodes, - index: index, + nodes, + index, root: 0, }; @@ -113,7 +113,7 @@ impl BBDTree { } } - if !self.nodes[node].lower.is_none() { + if self.nodes[node].lower.is_some() { let mut new_candidates = vec![0; k]; let mut newk = 0; @@ -152,7 +152,7 @@ impl BBDTree { } for i in 0..d { - sums[closest][i] = sums[closest][i] + self.nodes[node].sum[i]; + sums[closest][i] += self.nodes[node].sum[i]; } counts[closest] += self.nodes[node].count; @@ -184,11 +184,11 @@ impl BBDTree { let mut rhs = T::zero(); for i in 0..d { let diff = test[i] - best[i]; - lhs = lhs + diff * diff; + lhs += diff * diff; if diff > T::zero() { - rhs = rhs + (center[i] + radius[i] - best[i]) * diff; + rhs += (center[i] + radius[i] - best[i]) * diff; } else { - rhs = rhs + (center[i] - radius[i] - best[i]) * diff; + rhs += (center[i] - radius[i] - best[i]) * diff; } } @@ -244,7 +244,7 @@ impl BBDTree { if end > begin + 1 { let len = end - begin; for i in 0..d { - node.sum[i] = node.sum[i] * T::from(len).unwrap(); + node.sum[i] *= T::from(len).unwrap(); } } @@ -261,9 +261,7 @@ impl BBDTree { let mut i2_good = data.get(self.index[i2], split_index) >= split_cutoff; if !i1_good && !i2_good { - let temp = self.index[i1]; - self.index[i1] = self.index[i2]; - self.index[i2] = temp; + self.index.swap(i1, i2); i1_good = true; i2_good = true; } @@ -302,7 +300,7 @@ impl BBDTree { let mut scatter = T::zero(); for i in 0..d { let x = (node.sum[i] / T::from(node.count).unwrap()) - center[i]; - scatter = scatter + x * x; + scatter += x * x; } node.cost + T::from(node.count).unwrap() * scatter } diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index bbd7254..e7dbac0 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -51,7 +51,7 @@ impl> PartialEq for CoverTree { return false; } } - return true; + true } } @@ -84,11 +84,11 @@ impl> CoverTree scale: 0, }; let mut tree = CoverTree { - base: base, + base, inv_log_base: F::one() / base.ln(), - distance: distance, - root: root, - data: data, + distance, + root, + data, identical_excluded: false, }; @@ -147,10 +147,11 @@ impl> CoverTree *heap.peek() }; if d <= (upper_bound + child.max_dist) { - if c > 0 && d < upper_bound { - if !self.identical_excluded || self.get_data_value(child.idx) != p { - heap.add(d); - } + if c > 0 + && d < upper_bound + && (!self.identical_excluded || self.get_data_value(child.idx) != p) + { + heap.add(d); } if !child.children.is_empty() { @@ -234,7 +235,7 @@ impl> CoverTree fn new_leaf(&self, idx: usize) -> Node { Node { - idx: idx, + idx, max_dist: F::zero(), parent_dist: F::zero(), children: Vec::new(), @@ -298,7 +299,7 @@ impl> CoverTree idx: p, max_dist: F::zero(), parent_dist: F::zero(), - children: children, + children, scale: 100, } } else { @@ -368,7 +369,7 @@ impl> CoverTree idx: p, max_dist: self.max(consumed_set), parent_dist: F::zero(), - children: children, + children, scale: (top_scale - max_scale), } } @@ -442,7 +443,7 @@ impl> CoverTree max = n.dist[n.dist.len() - 1]; } } - return max; + max } } diff --git a/src/algorithm/neighbour/linear_search.rs b/src/algorithm/neighbour/linear_search.rs index e89a793..d09f2ed 100644 --- a/src/algorithm/neighbour/linear_search.rs +++ b/src/algorithm/neighbour/linear_search.rs @@ -44,8 +44,8 @@ impl> LinearKNNSearch { /// * `distance` - distance metric to use for searching. This function should extend [`Distance`](../../../math/distance/index.html) interface. pub fn new(data: Vec, distance: D) -> Result, Failed> { Ok(LinearKNNSearch { - data: data, - distance: distance, + data, + distance, f: PhantomData, }) } @@ -157,7 +157,7 @@ mod tests { .iter() .map(|v| v.0) .collect(); - found_idxs1.sort(); + found_idxs1.sort_unstable(); assert_eq!(vec!(0, 1, 2), found_idxs1); @@ -167,7 +167,7 @@ mod tests { .iter() .map(|v| *v.2) .collect(); - found_idxs1.sort(); + found_idxs1.sort_unstable(); assert_eq!(vec!(2, 3, 4, 5, 6, 7, 8), found_idxs1); @@ -187,7 +187,7 @@ mod tests { .iter() .map(|v| v.0) .collect(); - found_idxs2.sort(); + found_idxs2.sort_unstable(); assert_eq!(vec!(1, 2, 3), found_idxs2); } diff --git a/src/algorithm/neighbour/mod.rs b/src/algorithm/neighbour/mod.rs index 0a4f21a..7ef1c5c 100644 --- a/src/algorithm/neighbour/mod.rs +++ b/src/algorithm/neighbour/mod.rs @@ -66,10 +66,10 @@ impl KNNAlgorithmName { ) -> Result, Failed> { match *self { KNNAlgorithmName::LinearSearch => { - LinearKNNSearch::new(data, distance).map(|a| KNNAlgorithm::LinearSearch(a)) + LinearKNNSearch::new(data, distance).map(KNNAlgorithm::LinearSearch) } KNNAlgorithmName::CoverTree => { - CoverTree::new(data, distance).map(|a| KNNAlgorithm::CoverTree(a)) + CoverTree::new(data, distance).map(KNNAlgorithm::CoverTree) } } } diff --git a/src/algorithm/sort/heap_select.rs b/src/algorithm/sort/heap_select.rs index ae3ff18..a44b2bb 100644 --- a/src/algorithm/sort/heap_select.rs +++ b/src/algorithm/sort/heap_select.rs @@ -15,7 +15,7 @@ pub struct HeapSelection { impl<'a, T: PartialOrd + Debug> HeapSelection { pub fn with_capacity(k: usize) -> HeapSelection { HeapSelection { - k: k, + k, n: 0, sorted: false, heap: Vec::new(), @@ -51,7 +51,7 @@ impl<'a, T: PartialOrd + Debug> HeapSelection { pub fn peek(&self) -> &T { if self.sorted { - return &self.heap[0]; + &self.heap[0] } else { &self .heap @@ -62,11 +62,11 @@ impl<'a, T: PartialOrd + Debug> HeapSelection { } pub fn peek_mut(&mut self) -> &mut T { - return &mut self.heap[0]; + &mut self.heap[0] } pub fn get(self) -> Vec { - return self.heap; + self.heap } fn sift_down(&mut self, k: usize, n: usize) { diff --git a/src/cluster/dbscan.rs b/src/cluster/dbscan.rs index 488a7ac..787d8d3 100644 --- a/src/cluster/dbscan.rs +++ b/src/cluster/dbscan.rs @@ -93,11 +93,11 @@ impl, T>> DBSCAN { parameters: DBSCANParameters, ) -> Result, Failed> { if parameters.min_samples < 1 { - return Err(Failed::fit(&format!("Invalid minPts"))); + return Err(Failed::fit(&"Invalid minPts".to_string())); } if parameters.eps <= T::zero() { - return Err(Failed::fit(&format!("Invalid radius: "))); + return Err(Failed::fit(&"Invalid radius: ".to_string())); } let mut k = 0; diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 278024f..0da8a72 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -149,13 +149,13 @@ impl KMeans { for i in 0..n { for j in 0..d { - centroids[y[i]][j] = centroids[y[i]][j] + data.get(i, j); + centroids[y[i]][j] += data.get(i, j); } } for i in 0..k { for j in 0..d { - centroids[i][j] = centroids[i][j] / T::from(size[i]).unwrap(); + centroids[i][j] /= T::from(size[i]).unwrap(); } } @@ -178,11 +178,11 @@ impl KMeans { } Ok(KMeans { - k: k, - y: y, - size: size, - distortion: distortion, - centroids: centroids, + k, + y, + size, + distortion, + centroids, }) } @@ -235,13 +235,13 @@ impl KMeans { let mut sum: T = T::zero(); for i in d.iter() { - sum = sum + *i; + sum += *i; } let cutoff = T::from(rng.gen::()).unwrap() * sum; let mut cost = T::zero(); let mut index = 0; while index < n { - cost = cost + d[index]; + cost += d[index]; if cost >= cutoff { break; } diff --git a/src/dataset/boston.rs b/src/dataset/boston.rs index 2a0d30e..33f7700 100644 --- a/src/dataset/boston.rs +++ b/src/dataset/boston.rs @@ -38,8 +38,8 @@ pub fn load_dataset() -> Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", diff --git a/src/dataset/breast_cancer.rs b/src/dataset/breast_cancer.rs index 0a88f31..e469794 100644 --- a/src/dataset/breast_cancer.rs +++ b/src/dataset/breast_cancer.rs @@ -40,8 +40,8 @@ pub fn load_dataset() -> Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "mean radius", "mean texture", "mean perimeter", "mean area", "mean smoothness", "mean compactness", "mean concavity", diff --git a/src/dataset/diabetes.rs b/src/dataset/diabetes.rs index 352fd46..2a3e20c 100644 --- a/src/dataset/diabetes.rs +++ b/src/dataset/diabetes.rs @@ -33,8 +33,8 @@ pub fn load_dataset() -> Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6", ] diff --git a/src/dataset/digits.rs b/src/dataset/digits.rs index 10068ab..fd643d5 100644 --- a/src/dataset/digits.rs +++ b/src/dataset/digits.rs @@ -23,8 +23,8 @@ pub fn load_dataset() -> Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "sepal length (cm)", "sepal width (cm)", diff --git a/src/dataset/generator.rs b/src/dataset/generator.rs index fd4f400..2514134 100644 --- a/src/dataset/generator.rs +++ b/src/dataset/generator.rs @@ -39,8 +39,8 @@ pub fn make_blobs( Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: (0..num_features).map(|n| n.to_string()).collect(), target_names: vec!["label".to_string()], description: "Isotropic Gaussian blobs".to_string(), @@ -82,7 +82,7 @@ pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset Dataset { Dataset { data: x, target: y, - num_samples: num_samples, - num_features: num_features, + num_samples, + num_features, feature_names: vec![ "sepal length (cm)", "sepal width (cm)", diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index 85829fe..da790b4 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -61,14 +61,14 @@ pub(crate) fn serialize_data( let x: Vec = dataset .data .iter() - .map(|v| *v) + .copied() .flat_map(|f| f.to_f32_bits().to_le_bytes().to_vec().into_iter()) .collect(); file.write_all(&x)?; let y: Vec = dataset .target .iter() - .map(|v| *v) + .copied() .flat_map(|f| f.to_f32_bits().to_le_bytes().to_vec().into_iter()) .collect(); file.write_all(&y)?; diff --git a/src/decomposition/pca.rs b/src/decomposition/pca.rs index f66ca9b..f25aaad 100644 --- a/src/decomposition/pca.rs +++ b/src/decomposition/pca.rs @@ -68,14 +68,14 @@ impl> PartialEq for PCA { if self.eigenvectors != other.eigenvectors || self.eigenvalues.len() != other.eigenvalues.len() { - return false; + false } else { for i in 0..self.eigenvalues.len() { if (self.eigenvalues[i] - other.eigenvalues[i]).abs() > T::epsilon() { return false; } } - return true; + true } } } @@ -190,16 +190,16 @@ impl> PCA { let mut pmu = vec![T::zero(); n_components]; for k in 0..n { for i in 0..n_components { - pmu[i] = pmu[i] + projection.get(i, k) * mu[k]; + pmu[i] += projection.get(i, k) * mu[k]; } } Ok(PCA { - eigenvectors: eigenvectors, - eigenvalues: eigenvalues, + eigenvectors, + eigenvalues, projection: projection.transpose(), - mu: mu, - pmu: pmu, + mu, + pmu, }) } diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index add6079..0cfebf1 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -89,7 +89,7 @@ pub struct RandomForestClassifier { impl PartialEq for RandomForestClassifier { fn eq(&self, other: &Self) -> bool { if self.classes.len() != other.classes.len() || self.trees.len() != other.trees.len() { - return false; + false } else { for i in 0..self.classes.len() { if (self.classes[i] - other.classes[i]).abs() > T::epsilon() { @@ -164,8 +164,8 @@ impl RandomForestClassifier { } Ok(RandomForestClassifier { - parameters: parameters, - trees: trees, + parameters, + trees, classes, }) } @@ -191,7 +191,7 @@ impl RandomForestClassifier { result[tree.predict_for_row(x, row)] += 1; } - return which_max(&result); + which_max(&result) } fn sample_with_replacement(y: &Vec, num_classes: usize) -> Vec { diff --git a/src/ensemble/random_forest_regressor.rs b/src/ensemble/random_forest_regressor.rs index d25c850..c704a8f 100644 --- a/src/ensemble/random_forest_regressor.rs +++ b/src/ensemble/random_forest_regressor.rs @@ -95,7 +95,7 @@ impl Default for RandomForestRegressorParameters { impl PartialEq for RandomForestRegressor { fn eq(&self, other: &Self) -> bool { if self.trees.len() != other.trees.len() { - return false; + false } else { for i in 0..self.trees.len() { if self.trees[i] != other.trees[i] { @@ -135,10 +135,7 @@ impl RandomForestRegressor { trees.push(tree); } - Ok(RandomForestRegressor { - parameters: parameters, - trees: trees, - }) + Ok(RandomForestRegressor { parameters, trees }) } /// Predict class for `x` @@ -161,7 +158,7 @@ impl RandomForestRegressor { let mut result = T::zero(); for tree in self.trees.iter() { - result = result + tree.predict_for_row(x, row); + result += tree.predict_for_row(x, row); } result / T::from(n_trees).unwrap() diff --git a/src/error/mod.rs b/src/error/mod.rs index c411e87..679f685 100644 --- a/src/error/mod.rs +++ b/src/error/mod.rs @@ -61,7 +61,7 @@ impl Failed { /// new instance of `err` pub fn because(err: FailedError, msg: &str) -> Self { Failed { - err: err, + err, msg: msg.to_string(), } } diff --git a/src/linalg/cholesky.rs b/src/linalg/cholesky.rs index e55d6bb..724dc8a 100644 --- a/src/linalg/cholesky.rs +++ b/src/linalg/cholesky.rs @@ -46,10 +46,7 @@ pub struct Cholesky> { impl> Cholesky { pub(crate) fn new(R: M) -> Cholesky { - Cholesky { - R: R, - t: PhantomData, - } + Cholesky { R, t: PhantomData } } /// Get lower triangular matrix. @@ -90,7 +87,8 @@ impl> Cholesky { if bn != rn { return Err(Failed::because( FailedError::SolutionFailed, - &format!("Can't solve Ax = b for x. Number of rows in b != number of rows in R."), + &"Can\'t solve Ax = b for x. Number of rows in b != number of rows in R." + .to_string(), )); } @@ -130,7 +128,7 @@ pub trait CholeskyDecomposableMatrix: BaseMatrix { if m != n { return Err(Failed::because( FailedError::DecompositionFailed, - &format!("Can't do Cholesky decomposition on a non-square matrix"), + &"Can\'t do Cholesky decomposition on a non-square matrix".to_string(), )); } @@ -143,14 +141,14 @@ pub trait CholeskyDecomposableMatrix: BaseMatrix { } s = (self.get(j, k) - s) / self.get(k, k); self.set(j, k, s); - d = d + s * s; + d += s * s; } d = self.get(j, j) - d; if d < T::zero() { return Err(Failed::because( FailedError::DecompositionFailed, - &format!("The matrix is not positive definite."), + &"The matrix is not positive definite.".to_string(), )); } diff --git a/src/linalg/evd.rs b/src/linalg/evd.rs index 60602ce..c216696 100644 --- a/src/linalg/evd.rs +++ b/src/linalg/evd.rs @@ -93,7 +93,7 @@ pub trait EVDDecomposableMatrix: BaseMatrix { sort(&mut d, &mut e, &mut V); } - Ok(EVD { V: V, d: d, e: e }) + Ok(EVD { V, d, e }) } } @@ -107,7 +107,7 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec let mut scale = T::zero(); let mut h = T::zero(); for k in 0..i { - scale = scale + d[k].abs(); + scale += d[k].abs(); } if scale == T::zero() { e[i] = d[i - 1]; @@ -118,8 +118,8 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec } } else { for k in 0..i { - d[k] = d[k] / scale; - h = h + d[k] * d[k]; + d[k] /= scale; + h += d[k] * d[k]; } let mut f = d[i - 1]; let mut g = h.sqrt(); @@ -127,7 +127,7 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec g = -g; } e[i] = scale * g; - h = h - f * g; + h -= f * g; d[i - 1] = f - g; for j in 0..i { e[j] = T::zero(); @@ -138,19 +138,19 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec V.set(j, i, f); g = e[j] + V.get(j, j) * f; for k in j + 1..=i - 1 { - g = g + V.get(k, j) * d[k]; - e[k] = e[k] + V.get(k, j) * f; + g += V.get(k, j) * d[k]; + e[k] += V.get(k, j) * f; } e[j] = g; } f = T::zero(); for j in 0..i { - e[j] = e[j] / h; - f = f + e[j] * d[j]; + e[j] /= h; + f += e[j] * d[j]; } let hh = f / (h + h); for j in 0..i { - e[j] = e[j] - hh * d[j]; + e[j] -= hh * d[j]; } for j in 0..i { f = d[j]; @@ -176,7 +176,7 @@ fn tred2>(V: &mut M, d: &mut Vec, e: &mut Vec for j in 0..=i { let mut g = T::zero(); for k in 0..=i { - g = g + V.get(k, i + 1) * V.get(k, j); + g += V.get(k, i + 1) * V.get(k, j); } for k in 0..=i { V.sub_element_mut(k, j, g * d[k]); @@ -239,9 +239,9 @@ fn tql2>(V: &mut M, d: &mut Vec, e: &mut Vec< let dl1 = d[l + 1]; let mut h = g - d[l]; for i in l + 2..n { - d[i] = d[i] - h; + d[i] -= h; } - f = f + h; + f += h; p = d[m]; let mut c = T::one(); @@ -278,7 +278,7 @@ fn tql2>(V: &mut M, d: &mut Vec, e: &mut Vec< } } } - d[l] = d[l] + f; + d[l] += f; e[l] = T::zero(); } @@ -321,8 +321,8 @@ fn balance>(A: &mut M) -> Vec { let mut c = T::zero(); for j in 0..n { if j != i { - c = c + A.get(j, i).abs(); - r = r + A.get(i, j).abs(); + c += A.get(j, i).abs(); + r += A.get(i, j).abs(); } } if c != T::zero() && r != T::zero() { @@ -330,18 +330,18 @@ fn balance>(A: &mut M) -> Vec { let mut f = T::one(); let s = c + r; while c < g { - f = f * radix; - c = c * sqrdx; + f *= radix; + c *= sqrdx; } g = r * radix; while c > g { - f = f / radix; - c = c / sqrdx; + f /= radix; + c /= sqrdx; } if (c + r) / f < t * s { done = false; g = T::one() / f; - scale[i] = scale[i] * f; + scale[i] *= f; for j in 0..n { A.mul_element_mut(i, j, g); } @@ -353,7 +353,7 @@ fn balance>(A: &mut M) -> Vec { } } - return scale; + scale } fn elmhes>(A: &mut M) -> Vec { @@ -386,7 +386,7 @@ fn elmhes>(A: &mut M) -> Vec { for i in (m + 1)..n { let mut y = A.get(i, m - 1); if y != T::zero() { - y = y / x; + y /= x; A.set(i, m - 1, y); for j in m..n { A.sub_element_mut(i, j, y * A.get(m, j)); @@ -399,7 +399,7 @@ fn elmhes>(A: &mut M) -> Vec { } } - return perm; + perm } fn eltran>(A: &M, V: &mut M, perm: &Vec) { @@ -430,7 +430,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e for i in 0..n { for j in i32::max(i as i32 - 1, 0)..n as i32 { - anorm = anorm + A.get(i, j as usize).abs(); + anorm += A.get(i, j as usize).abs(); } } @@ -467,7 +467,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e p = T::half() * (y - x); q = p * p + w; z = q.abs().sqrt(); - x = x + t; + x += t; A.set(nn, nn, x); A.set(nn - 1, nn - 1, y + t); if q >= T::zero() { @@ -482,8 +482,8 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e p = x / s; q = z / s; r = (p * p + q * q).sqrt(); - p = p / r; - q = q / r; + p /= r; + q /= r; for j in nn - 1..n { z = A.get(nn - 1, j); A.set(nn - 1, j, q * z + p * A.get(nn, j)); @@ -516,7 +516,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e panic!("Too many iterations in hqr"); } if its == 10 || its == 20 { - t = t + x; + t += x; for i in 0..nn + 1 { A.sub_element_mut(i, i, x); } @@ -535,9 +535,9 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e q = A.get(m + 1, m + 1) - z - r - s; r = A.get(m + 2, m + 1); s = p.abs() + q.abs() + r.abs(); - p = p / s; - q = q / s; - r = r / s; + p /= s; + q /= s; + r /= s; if m == l { break; } @@ -565,9 +565,9 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e } x = p.abs() + q.abs() + r.abs(); if x != T::zero() { - p = p / x; - q = q / x; - r = r / x; + p /= x; + q /= x; + r /= x; } } let s = (p * p + q * q + r * r).sqrt().copysign(p); @@ -579,16 +579,16 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e } else { A.set(k, k - 1, -s * x); } - p = p + s; + p += s; x = p / s; y = q / s; z = r / s; - q = q / p; - r = r / p; + q /= p; + r /= p; for j in k..n { p = A.get(k, j) + q * A.get(k + 1, j); if k + 1 != nn { - p = p + r * A.get(k + 2, j); + p += r * A.get(k + 2, j); A.sub_element_mut(k + 2, j, p * z); } A.sub_element_mut(k + 1, j, p * y); @@ -603,7 +603,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e for i in 0..mmin + 1 { p = x * A.get(i, k) + y * A.get(i, k + 1); if k + 1 != nn { - p = p + z * A.get(i, k + 2); + p += z * A.get(i, k + 2); A.sub_element_mut(i, k + 2, p * r); } A.sub_element_mut(i, k + 1, p * q); @@ -612,7 +612,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e for i in 0..n { p = x * V.get(i, k) + y * V.get(i, k + 1); if k + 1 != nn { - p = p + z * V.get(i, k + 2); + p += z * V.get(i, k + 2); V.sub_element_mut(i, k + 2, p * r); } V.sub_element_mut(i, k + 1, p * q); @@ -642,7 +642,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e let w = A.get(i, i) - p; r = T::zero(); for j in m..=nn { - r = r + A.get(i, j) * A.get(j, nn); + r += A.get(i, j) * A.get(j, nn); } if e[i] < T::zero() { z = w; @@ -701,8 +701,8 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e let mut ra = T::zero(); let mut sa = T::zero(); for j in m..=nn { - ra = ra + A.get(i, j) * A.get(j, na); - sa = sa + A.get(i, j) * A.get(j, nn); + ra += A.get(i, j) * A.get(j, na); + sa += A.get(i, j) * A.get(j, nn); } if e[i] < T::zero() { z = w; @@ -766,7 +766,7 @@ fn hqr2>(A: &mut M, V: &mut M, d: &mut Vec, e for i in 0..n { z = T::zero(); for k in 0..=j { - z = z + V.get(i, k) * A.get(k, j); + z += V.get(i, k) * A.get(k, j); } V.set(i, j, z); } diff --git a/src/linalg/lu.rs b/src/linalg/lu.rs index a4cc58d..cbe195f 100644 --- a/src/linalg/lu.rs +++ b/src/linalg/lu.rs @@ -63,10 +63,10 @@ impl> LU { } LU { - LU: LU, - pivot: pivot, - pivot_sign: pivot_sign, - singular: singular, + LU, + pivot, + pivot_sign, + singular, phantom: PhantomData, } } @@ -220,10 +220,10 @@ pub trait LUDecomposableMatrix: BaseMatrix { let kmax = usize::min(i, j); let mut s = T::zero(); for k in 0..kmax { - s = s + self.get(i, k) * LUcolj[k]; + s += self.get(i, k) * LUcolj[k]; } - LUcolj[i] = LUcolj[i] - s; + LUcolj[i] -= s; self.set(i, j, LUcolj[i]); } @@ -239,9 +239,7 @@ pub trait LUDecomposableMatrix: BaseMatrix { self.set(p, k, self.get(j, k)); self.set(j, k, t); } - let k = piv[p]; - piv[p] = piv[j]; - piv[j] = k; + piv.swap(p, j); pivsign = -pivsign; } diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index fb12909..09a9687 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -517,7 +517,7 @@ pub trait Matrix: pub(crate) fn row_iter>(m: &M) -> RowIter { RowIter { - m: m, + m, pos: 0, max_pos: m.shape().0, phantom: PhantomData, diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index d3d6353..c1ba650 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -53,7 +53,7 @@ impl BaseVector for Vec { let mut result = T::zero(); for i in 0..self.len() { - result = result + self[i] * other[i]; + result += self[i] * other[i]; } result @@ -63,7 +63,7 @@ impl BaseVector for Vec { let mut norm = T::zero(); for xi in self.iter() { - norm = norm + *xi * *xi; + norm += *xi * *xi; } norm.sqrt() @@ -82,7 +82,7 @@ impl BaseVector for Vec { let mut norm = T::zero(); for xi in self.iter() { - norm = norm + xi.abs().powf(p); + norm += xi.abs().powf(p); } norm.powf(T::one() / p) @@ -90,19 +90,19 @@ impl BaseVector for Vec { } fn div_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] / x; + self[pos] /= x; } fn mul_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] * x; + self[pos] *= x; } fn add_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] + x + self[pos] += x } fn sub_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] - x; + self[pos] -= x; } fn add_mut(&mut self, other: &Self) -> &Self { @@ -165,7 +165,7 @@ impl BaseVector for Vec { fn sum(&self) -> T { let mut sum = T::zero(); for i in 0..self.len() { - sum = sum + self[i]; + sum += self[i]; } sum } @@ -216,15 +216,15 @@ impl DenseMatrix { /// `values` should be in column-major order. pub fn new(nrows: usize, ncols: usize, values: Vec) -> Self { DenseMatrix { - ncols: ncols, - nrows: nrows, - values: values, + ncols, + nrows, + values, } } /// New instance of `DenseMatrix` from 2d array. pub fn from_2d_array(values: &[&[T]]) -> Self { - DenseMatrix::from_2d_vec(&values.into_iter().map(|row| Vec::from(*row)).collect()) + DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect()) } /// New instance of `DenseMatrix` from 2d vector. @@ -235,8 +235,8 @@ impl DenseMatrix { .unwrap_or_else(|| panic!("Cannot create 2d matrix from an empty vector")) .len(); let mut m = DenseMatrix { - ncols: ncols, - nrows: nrows, + ncols, + nrows, values: vec![T::zero(); ncols * nrows], }; for row in 0..nrows { @@ -261,8 +261,8 @@ impl DenseMatrix { /// * `values` - values to initialize the matrix. pub fn from_vec(nrows: usize, ncols: usize, values: &Vec) -> DenseMatrix { let mut m = DenseMatrix { - ncols: ncols, - nrows: nrows, + ncols, + nrows, values: vec![T::zero(); ncols * nrows], }; for row in 0..nrows { @@ -285,7 +285,7 @@ impl DenseMatrix { DenseMatrix { ncols: values.len(), nrows: 1, - values: values, + values, } } @@ -301,7 +301,7 @@ impl DenseMatrix { DenseMatrix { ncols: 1, nrows: values.len(), - values: values, + values, } } @@ -412,7 +412,7 @@ impl<'de, T: RealNumber + fmt::Debug + Deserialize<'de>> Deserialize<'de> for De } } - const FIELDS: &'static [&'static str] = &["nrows", "ncols", "values"]; + const FIELDS: &[&str] = &["nrows", "ncols", "values"]; deserializer.deserialize_struct( "DenseMatrix", FIELDS, @@ -562,7 +562,7 @@ impl BaseMatrix for DenseMatrix { matrix.set(i, i, T::one()); } - return matrix; + matrix } fn shape(&self) -> (usize, usize) { @@ -614,7 +614,7 @@ impl BaseMatrix for DenseMatrix { for c in 0..other.ncols { let mut s = T::zero(); for i in 0..inner_d { - s = s + self.get(r, i) * other.get(i, c); + s += self.get(r, i) * other.get(i, c); } result.set(r, c, s); } @@ -633,7 +633,7 @@ impl BaseMatrix for DenseMatrix { let mut result = T::zero(); for i in 0..(self.nrows * self.ncols) { - result = result + self.values[i] * other.values[i]; + result += self.values[i] * other.values[i]; } result @@ -727,19 +727,19 @@ impl BaseMatrix for DenseMatrix { } fn div_element_mut(&mut self, row: usize, col: usize, x: T) { - self.values[col * self.nrows + row] = self.values[col * self.nrows + row] / x; + self.values[col * self.nrows + row] /= x; } fn mul_element_mut(&mut self, row: usize, col: usize, x: T) { - self.values[col * self.nrows + row] = self.values[col * self.nrows + row] * x; + self.values[col * self.nrows + row] *= x; } fn add_element_mut(&mut self, row: usize, col: usize, x: T) { - self.values[col * self.nrows + row] = self.values[col * self.nrows + row] + x + self.values[col * self.nrows + row] += x } fn sub_element_mut(&mut self, row: usize, col: usize, x: T) { - self.values[col * self.nrows + row] = self.values[col * self.nrows + row] - x; + self.values[col * self.nrows + row] -= x; } fn transpose(&self) -> Self { @@ -759,9 +759,9 @@ impl BaseMatrix for DenseMatrix { fn rand(nrows: usize, ncols: usize) -> Self { let values: Vec = (0..nrows * ncols).map(|_| T::rand()).collect(); DenseMatrix { - ncols: ncols, - nrows: nrows, - values: values, + ncols, + nrows, + values, } } @@ -769,7 +769,7 @@ impl BaseMatrix for DenseMatrix { let mut norm = T::zero(); for xi in self.values.iter() { - norm = norm + *xi * *xi; + norm += *xi * *xi; } norm.sqrt() @@ -790,7 +790,7 @@ impl BaseMatrix for DenseMatrix { let mut norm = T::zero(); for xi in self.values.iter() { - norm = norm + xi.abs().powf(p); + norm += xi.abs().powf(p); } norm.powf(T::one() / p) @@ -802,12 +802,12 @@ impl BaseMatrix for DenseMatrix { for r in 0..self.nrows { for c in 0..self.ncols { - mean[c] = mean[c] + self.get(r, c); + mean[c] += self.get(r, c); } } for i in 0..mean.len() { - mean[i] = mean[i] / T::from(self.nrows).unwrap(); + mean[i] /= T::from(self.nrows).unwrap(); } mean @@ -815,28 +815,28 @@ impl BaseMatrix for DenseMatrix { fn add_scalar_mut(&mut self, scalar: T) -> &Self { for i in 0..self.values.len() { - self.values[i] = self.values[i] + scalar; + self.values[i] += scalar; } self } fn sub_scalar_mut(&mut self, scalar: T) -> &Self { for i in 0..self.values.len() { - self.values[i] = self.values[i] - scalar; + self.values[i] -= scalar; } self } fn mul_scalar_mut(&mut self, scalar: T) -> &Self { for i in 0..self.values.len() { - self.values[i] = self.values[i] * scalar; + self.values[i] *= scalar; } self } fn div_scalar_mut(&mut self, scalar: T) -> &Self { for i in 0..self.values.len() { - self.values[i] = self.values[i] / scalar; + self.values[i] /= scalar; } self } @@ -902,7 +902,7 @@ impl BaseMatrix for DenseMatrix { fn sum(&self) -> T { let mut sum = T::zero(); for i in 0..self.values.len() { - sum = sum + self.values[i]; + sum += self.values[i]; } sum } @@ -934,7 +934,7 @@ impl BaseMatrix for DenseMatrix { for c in 0..self.ncols { let p = (self.get(r, c) - max).exp(); self.set(r, c, p); - z = z + p; + z += p; } } for r in 0..self.nrows { @@ -1058,7 +1058,7 @@ mod tests { DenseMatrix::new(1, 3, vec![1., 2., 3.]) ); assert_eq!( - DenseMatrix::from_row_vector(vec.clone()).to_row_vector(), + DenseMatrix::from_row_vector(vec).to_row_vector(), vec![1., 2., 3.] ); } diff --git a/src/linalg/qr.rs b/src/linalg/qr.rs index e0e5860..c3a7978 100644 --- a/src/linalg/qr.rs +++ b/src/linalg/qr.rs @@ -51,11 +51,7 @@ impl> QR { } } - QR { - QR: QR, - tau: tau, - singular: singular, - } + QR { QR, tau, singular } } /// Get upper triangular matrix. @@ -68,7 +64,7 @@ impl> QR { R.set(i, j, self.QR.get(i, j)); } } - return R; + R } /// Get an orthogonal matrix. @@ -82,7 +78,7 @@ impl> QR { if self.QR.get(k, k) != T::zero() { let mut s = T::zero(); for i in k..m { - s = s + self.QR.get(i, k) * Q.get(i, j); + s += self.QR.get(i, k) * Q.get(i, j); } s = -s / self.QR.get(k, k); for i in k..m { @@ -96,7 +92,7 @@ impl> QR { k -= 1; } } - return Q; + Q } fn solve(&self, mut b: M) -> Result { @@ -118,7 +114,7 @@ impl> QR { for j in 0..b_ncols { let mut s = T::zero(); for i in k..m { - s = s + self.QR.get(i, k) * b.get(i, j); + s += self.QR.get(i, k) * b.get(i, j); } s = -s / self.QR.get(k, k); for i in k..m { @@ -175,7 +171,7 @@ pub trait QRDecomposableMatrix: BaseMatrix { for j in k + 1..n { let mut s = T::zero(); for i in k..m { - s = s + self.get(i, k) * self.get(i, j); + s += self.get(i, k) * self.get(i, j); } s = -s / self.get(k, k); for i in k..m { diff --git a/src/linalg/svd.rs b/src/linalg/svd.rs index 8866ba9..9271f5b 100644 --- a/src/linalg/svd.rs +++ b/src/linalg/svd.rs @@ -106,13 +106,13 @@ pub trait SVDDecomposableMatrix: BaseMatrix { if i < m { for k in i..m { - scale = scale + U.get(k, i).abs(); + scale += U.get(k, i).abs(); } if scale.abs() > T::epsilon() { for k in i..m { U.div_element_mut(k, i, scale); - s = s + U.get(k, i) * U.get(k, i); + s += U.get(k, i) * U.get(k, i); } let mut f = U.get(i, i); @@ -122,7 +122,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { for j in l - 1..n { s = T::zero(); for k in i..m { - s = s + U.get(k, i) * U.get(k, j); + s += U.get(k, i) * U.get(k, j); } f = s / h; for k in i..m { @@ -140,15 +140,15 @@ pub trait SVDDecomposableMatrix: BaseMatrix { let mut s = T::zero(); scale = T::zero(); - if i + 1 <= m && i + 1 != n { + if i < m && i + 1 != n { for k in l - 1..n { - scale = scale + U.get(i, k).abs(); + scale += U.get(i, k).abs(); } if scale.abs() > T::epsilon() { for k in l - 1..n { U.div_element_mut(i, k, scale); - s = s + U.get(i, k) * U.get(i, k); + s += U.get(i, k) * U.get(i, k); } let f = U.get(i, l - 1); @@ -163,7 +163,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { for j in l - 1..m { s = T::zero(); for k in l - 1..n { - s = s + U.get(j, k) * U.get(i, k); + s += U.get(j, k) * U.get(i, k); } for k in l - 1..n { @@ -189,7 +189,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { for j in l..n { let mut s = T::zero(); for k in l..n { - s = s + U.get(i, k) * v.get(k, j); + s += U.get(i, k) * v.get(k, j); } for k in l..n { v.add_element_mut(k, j, s * v.get(k, i)); @@ -218,7 +218,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { for j in l..n { let mut s = T::zero(); for k in l..m { - s = s + U.get(k, i) * U.get(k, j); + s += U.get(k, i) * U.get(k, j); } let f = (s / U.get(i, i)) * g; for k in i..m { @@ -316,7 +316,7 @@ pub trait SVDDecomposableMatrix: BaseMatrix { f = x * c + g * s; g = g * c - x * s; h = y * s; - y = y * c; + y *= c; for jj in 0..n { x = v.get(jj, j); @@ -431,13 +431,13 @@ impl> SVD { let full = s.len() == m.min(n); let tol = T::half() * (T::from(m + n).unwrap() + T::one()).sqrt() * s[0] * T::epsilon(); SVD { - U: U, - V: V, - s: s, - full: full, - m: m, - n: n, - tol: tol, + U, + V, + s, + full, + m, + n, + tol, } } @@ -458,9 +458,9 @@ impl> SVD { let mut r = T::zero(); if self.s[j] > self.tol { for i in 0..self.m { - r = r + self.U.get(i, j) * b.get(i, k); + r += self.U.get(i, j) * b.get(i, k); } - r = r / self.s[j]; + r /= self.s[j]; } tmp[j] = r; } @@ -468,7 +468,7 @@ impl> SVD { for j in 0..self.n { let mut r = T::zero(); for jj in 0..self.n { - r = r + self.V.get(j, jj) * tmp[jj]; + r += self.V.get(j, jj) * tmp[jj]; } b.set(j, k, r); } diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index 61bb678..d8ff1ff 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -123,9 +123,9 @@ impl> LinearRegression { let (y_nrows, _) = b.shape(); if x_nrows != y_nrows { - return Err(Failed::fit(&format!( - "Number of rows of X doesn't match number of rows of Y" - ))); + return Err(Failed::fit( + &"Number of rows of X doesn\'t match number of rows of Y".to_string(), + )); } let a = x.h_stack(&M::ones(x_nrows, 1)); diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index ec09184..ec90af1 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -82,7 +82,7 @@ trait ObjectiveFunction> { let mut sum = T::zero(); let p = x.shape().1; for i in 0..p { - sum = sum + x.get(m_row, i) * w.get(0, i + v_col); + sum += x.get(m_row, i) * w.get(0, i + v_col); } sum + w.get(0, p + v_col) @@ -101,7 +101,7 @@ impl> PartialEq for LogisticRegression { || self.num_attributes != other.num_attributes || self.classes.len() != other.classes.len() { - return false; + false } else { for i in 0..self.classes.len() { if (self.classes[i] - other.classes[i]).abs() > T::epsilon() { @@ -109,7 +109,7 @@ impl> PartialEq for LogisticRegression { } } - return self.weights == other.weights; + self.weights == other.weights } } } @@ -123,7 +123,7 @@ impl<'a, T: RealNumber, M: Matrix> ObjectiveFunction for i in 0..n { let wx = BinaryObjectiveFunction::partial_dot(w_bias, self.x, 0, i); - f = f + (wx.ln_1pe() - (T::from(self.y[i]).unwrap()) * wx); + f += wx.ln_1pe() - (T::from(self.y[i]).unwrap()) * wx; } f @@ -169,7 +169,7 @@ impl<'a, T: RealNumber, M: Matrix> ObjectiveFunction ); } prob.softmax_mut(); - f = f - prob.get(0, self.y[i]).ln(); + f -= prob.get(0, self.y[i]).ln(); } f @@ -215,9 +215,9 @@ impl> LogisticRegression { let (_, y_nrows) = y_m.shape(); if x_nrows != y_nrows { - return Err(Failed::fit(&format!( - "Number of rows of X doesn't match number of rows of Y" - ))); + return Err(Failed::fit( + &"Number of rows of X doesn\'t match number of rows of Y".to_string(), + )); } let classes = y_m.unique(); @@ -240,7 +240,7 @@ impl> LogisticRegression { let x0 = M::zeros(1, num_attributes + 1); let objective = BinaryObjectiveFunction { - x: x, + x, y: yi, phantom: PhantomData, }; @@ -249,17 +249,17 @@ impl> LogisticRegression { Ok(LogisticRegression { weights: result.x, - classes: classes, - num_attributes: num_attributes, + classes, + num_attributes, num_classes: k, }) } else { let x0 = M::zeros(1, (num_attributes + 1) * k); let objective = MultiClassObjectiveFunction { - x: x, + x, y: yi, - k: k, + k, phantom: PhantomData, }; @@ -268,9 +268,9 @@ impl> LogisticRegression { let weights = result.x.reshape(k, num_attributes + 1); Ok(LogisticRegression { - weights: weights, - classes: classes, - num_attributes: num_attributes, + weights, + classes, + num_attributes, num_classes: k, }) } @@ -362,7 +362,7 @@ mod tests { let objective = MultiClassObjectiveFunction { x: &x, - y: y, + y, k: 3, phantom: PhantomData, }; @@ -411,7 +411,7 @@ mod tests { let objective = BinaryObjectiveFunction { x: &x, - y: y, + y, phantom: PhantomData, }; diff --git a/src/math/distance/euclidian.rs b/src/math/distance/euclidian.rs index 4ec0ad0..31503bd 100644 --- a/src/math/distance/euclidian.rs +++ b/src/math/distance/euclidian.rs @@ -38,7 +38,7 @@ impl Euclidian { let mut sum = T::zero(); for i in 0..x.len() { let d = x[i] - y[i]; - sum = sum + d * d; + sum += d * d; } sum diff --git a/src/math/distance/mahalanobis.rs b/src/math/distance/mahalanobis.rs index 6c205e5..fd320c3 100644 --- a/src/math/distance/mahalanobis.rs +++ b/src/math/distance/mahalanobis.rs @@ -68,8 +68,8 @@ impl> Mahalanobis { let sigma = data.cov(); let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap(); Mahalanobis { - sigma: sigma, - sigmaInv: sigmaInv, + sigma, + sigmaInv, t: PhantomData, } } @@ -80,8 +80,8 @@ impl> Mahalanobis { let sigma = cov.clone(); let sigmaInv = sigma.lu().and_then(|lu| lu.inverse()).unwrap(); Mahalanobis { - sigma: sigma, - sigmaInv: sigmaInv, + sigma, + sigmaInv, t: PhantomData, } } @@ -118,7 +118,7 @@ impl> Distance, T> for Mahalanobis { let mut s = T::zero(); for j in 0..n { for i in 0..n { - s = s + self.sigmaInv.get(i, j) * z[i] * z[j]; + s += self.sigmaInv.get(i, j) * z[i] * z[j]; } } diff --git a/src/math/distance/manhattan.rs b/src/math/distance/manhattan.rs index 9b46a0c..66125a5 100644 --- a/src/math/distance/manhattan.rs +++ b/src/math/distance/manhattan.rs @@ -35,7 +35,7 @@ impl Distance, T> for Manhattan { let mut dist = T::zero(); for i in 0..x.len() { - dist = dist + (x[i] - y[i]).abs(); + dist += (x[i] - y[i]).abs(); } dist diff --git a/src/math/distance/minkowski.rs b/src/math/distance/minkowski.rs index 667e0db..b7c5691 100644 --- a/src/math/distance/minkowski.rs +++ b/src/math/distance/minkowski.rs @@ -48,7 +48,7 @@ impl Distance, T> for Minkowski { for i in 0..x.len() { let d = (x[i] - y[i]).abs(); - dist = dist + d.powf(p_t); + dist += d.powf(p_t); } dist.powf(T::one() / p_t) diff --git a/src/math/distance/mod.rs b/src/math/distance/mod.rs index 0532e86..1219ec6 100644 --- a/src/math/distance/mod.rs +++ b/src/math/distance/mod.rs @@ -45,7 +45,7 @@ impl Distances { /// Minkowski distance, see [`Minkowski`](minkowski/index.html) /// * `p` - function order. Should be >= 1 pub fn minkowski(p: u16) -> minkowski::Minkowski { - minkowski::Minkowski { p: p } + minkowski::Minkowski { p } } /// Manhattan distance, see [`Manhattan`](manhattan/index.html) diff --git a/src/math/num.rs b/src/math/num.rs index 894e5a3..490623c 100644 --- a/src/math/num.rs +++ b/src/math/num.rs @@ -57,19 +57,19 @@ impl RealNumber for f64 { fn ln_1pe(self) -> f64 { if self > 15. { - return self; + self } else { - return self.exp().ln_1p(); + self.exp().ln_1p() } } fn sigmoid(self) -> f64 { if self < -40. { - return 0.; + 0. } else if self > 40. { - return 1.; + 1. } else { - return 1. / (1. + f64::exp(-self)); + 1. / (1. + f64::exp(-self)) } } @@ -98,19 +98,19 @@ impl RealNumber for f32 { fn ln_1pe(self) -> f32 { if self > 15. { - return self; + self } else { - return self.exp().ln_1p(); + self.exp().ln_1p() } } fn sigmoid(self) -> f32 { if self < -40. { - return 0.; + 0. } else if self > 40. { - return 1.; + 1. } else { - return 1. / (1. + f32::exp(-self)); + 1. / (1. + f32::exp(-self)) } } diff --git a/src/metrics/auc.rs b/src/metrics/auc.rs index 99e6cbd..571dd49 100644 --- a/src/metrics/auc.rs +++ b/src/metrics/auc.rs @@ -42,9 +42,9 @@ impl AUC { for i in 0..n { if y_true.get(i) == T::zero() { - neg = neg + T::one(); + neg += T::one(); } else if y_true.get(i) == T::one() { - pos = pos + T::one(); + pos += T::one(); } else { panic!( "AUC is only for binary classification. Invalid label: {}", @@ -79,7 +79,7 @@ impl AUC { let mut auc = T::zero(); for i in 0..n { if y_true.get(label_idx[i]) == T::one() { - auc = auc + rank[i]; + auc += rank[i]; } } diff --git a/src/metrics/cluster_helpers.rs b/src/metrics/cluster_helpers.rs index 76cd643..dd5bbb3 100644 --- a/src/metrics/cluster_helpers.rs +++ b/src/metrics/cluster_helpers.rs @@ -37,7 +37,7 @@ pub fn entropy(data: &Vec) -> Option { for &c in bincounts.values() { if c > 0 { let pi = T::from_usize(c).unwrap(); - entropy = entropy - (pi / sum) * (pi.ln() - sum.ln()); + entropy -= (pi / sum) * (pi.ln() - sum.ln()); } } @@ -89,9 +89,8 @@ pub fn mutual_info_score(contingency: &Vec>) -> T { let mut result = T::zero(); for i in 0..log_outer.len() { - result = result - + ((contingency_nm[i] * (log_contingency_nm[i] - contingency_sum_ln)) - + contingency_nm[i] * log_outer[i]) + result += (contingency_nm[i] * (log_contingency_nm[i] - contingency_sum_ln)) + + contingency_nm[i] * log_outer[i] } result.max(T::zero()) diff --git a/src/metrics/mean_absolute_error.rs b/src/metrics/mean_absolute_error.rs index 3e5099e..a069335 100644 --- a/src/metrics/mean_absolute_error.rs +++ b/src/metrics/mean_absolute_error.rs @@ -43,7 +43,7 @@ impl MeanAbsoluteError { let n = y_true.len(); let mut ras = T::zero(); for i in 0..n { - ras = ras + (y_true.get(i) - y_pred.get(i)).abs(); + ras += (y_true.get(i) - y_pred.get(i)).abs(); } ras / T::from_usize(n).unwrap() diff --git a/src/metrics/mean_squared_error.rs b/src/metrics/mean_squared_error.rs index 816cc70..137c8e6 100644 --- a/src/metrics/mean_squared_error.rs +++ b/src/metrics/mean_squared_error.rs @@ -43,7 +43,7 @@ impl MeanSquareError { let n = y_true.len(); let mut rss = T::zero(); for i in 0..n { - rss = rss + (y_true.get(i) - y_pred.get(i)).square(); + rss += (y_true.get(i) - y_pred.get(i)).square(); } rss / T::from_usize(n).unwrap() diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 4fe199b..f49300d 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -101,7 +101,7 @@ impl ClassificationMetrics { /// F1 score, also known as balanced F-score or F-measure, see [F1](f1/index.html). pub fn f1(beta: T) -> f1::F1 { - f1::F1 { beta: beta } + f1::F1 { beta } } /// Area Under the Receiver Operating Characteristic Curve (ROC AUC), see [AUC](auc/index.html). diff --git a/src/metrics/r2.rs b/src/metrics/r2.rs index e689c6f..cbcf7e4 100644 --- a/src/metrics/r2.rs +++ b/src/metrics/r2.rs @@ -45,10 +45,10 @@ impl R2 { let mut mean = T::zero(); for i in 0..n { - mean = mean + y_true.get(i); + mean += y_true.get(i); } - mean = mean / T::from_usize(n).unwrap(); + mean /= T::from_usize(n).unwrap(); let mut ss_tot = T::zero(); let mut ss_res = T::zero(); @@ -56,8 +56,8 @@ impl R2 { for i in 0..n { let y_i = y_true.get(i); let f_i = y_pred.get(i); - ss_tot = ss_tot + (y_i - mean).square(); - ss_res = ss_res + (y_i - f_i).square(); + ss_tot += (y_i - mean).square(); + ss_res += (y_i - f_i).square(); } T::one() - (ss_res / ss_tot) diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index 49938cf..c53451d 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -127,7 +127,7 @@ impl BaseKFold for KFold { // initialise indices let mut indices: Vec = (0..n_samples).collect(); - if self.shuffle == true { + if self.shuffle { indices.shuffle(&mut thread_rng()); } // return a new array of given shape n_split, filled with each element of n_samples divided by n_splits. @@ -135,7 +135,7 @@ impl BaseKFold for KFold { // increment by one if odd for i in 0..(n_samples % self.n_splits) { - fold_sizes[i] = fold_sizes[i] + 1; + fold_sizes[i] += 1; } // generate the right array of arrays for test indices @@ -175,13 +175,13 @@ impl BaseKFold for KFold { .clone() .iter() .enumerate() - .filter(|&(idx, _)| test_index[idx] == false) + .filter(|&(idx, _)| !test_index[idx]) .map(|(idx, _)| idx) .collect::>(); // filter train indices out according to mask let test_index = indices .iter() .enumerate() - .filter(|&(idx, _)| test_index[idx] == true) + .filter(|&(idx, _)| test_index[idx]) .map(|(idx, _)| idx) .collect::>(); // filter tests indices out according to mask return_values.push((train_index, test_index)) diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index 3ad4297..135594a 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -78,7 +78,7 @@ impl, T>> PartialEq for KNNClassifier { || self.k != other.k || self.y.len() != other.y.len() { - return false; + false } else { for i in 0..self.classes.len() { if (self.classes[i] - other.classes[i]).abs() > T::epsilon() { @@ -139,7 +139,7 @@ impl, T>> KNNClassifier { } Ok(KNNClassifier { - classes: classes, + classes, y: yi, k: parameters.k, knn_algorithm: parameters.algorithm.fit(data, distance)?, @@ -166,13 +166,13 @@ impl, T>> KNNClassifier { let weights = self .weight .calc_weights(search_result.iter().map(|v| v.1).collect()); - let w_sum = weights.iter().map(|w| *w).sum(); + let w_sum = weights.iter().copied().sum(); let mut c = vec![T::zero(); self.classes.len()]; let mut max_c = T::zero(); let mut max_i = 0; for (r, w) in search_result.iter().zip(weights.iter()) { - c[self.y[r.0]] = c[self.y[r.0]] + (*w / w_sum); + c[self.y[r.0]] += *w / w_sum; if c[self.y[r.0]] > max_c { max_c = c[self.y[r.0]]; max_i = self.y[r.0]; diff --git a/src/neighbors/knn_regressor.rs b/src/neighbors/knn_regressor.rs index 0bf283f..b7c0f2d 100644 --- a/src/neighbors/knn_regressor.rs +++ b/src/neighbors/knn_regressor.rs @@ -76,7 +76,7 @@ impl Default for KNNRegressorParameters { impl, T>> PartialEq for KNNRegressor { fn eq(&self, other: &Self) -> bool { if self.k != other.k || self.y.len() != other.y.len() { - return false; + false } else { for i in 0..self.y.len() { if (self.y[i] - other.y[i]).abs() > T::epsilon() { @@ -151,10 +151,10 @@ impl, T>> KNNRegressor { let weights = self .weight .calc_weights(search_result.iter().map(|v| v.1).collect()); - let w_sum = weights.iter().map(|w| *w).sum(); + let w_sum = weights.iter().copied().sum(); for (r, w) in search_result.iter().zip(weights.iter()) { - result = result + self.y[r.0] * (*w / w_sum); + result += self.y[r.0] * (*w / w_sum); } Ok(result) diff --git a/src/optimization/first_order/gradient_descent.rs b/src/optimization/first_order/gradient_descent.rs index c860084..9cc78ec 100644 --- a/src/optimization/first_order/gradient_descent.rs +++ b/src/optimization/first_order/gradient_descent.rs @@ -74,8 +74,8 @@ impl FirstOrderOptimizer for GradientDescent { let f_x = f(&x); OptimizerResult { - x: x, - f_x: f_x, + x, + f_x, iterations: iter, } } diff --git a/src/svm/mod.rs b/src/svm/mod.rs index 84a405e..1f563c1 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -48,7 +48,7 @@ impl Kernels { /// Radial basis function kernel (Gaussian) pub fn rbf(gamma: T) -> RBFKernel { - RBFKernel { gamma: gamma } + RBFKernel { gamma } } /// Polynomial kernel @@ -57,9 +57,9 @@ impl Kernels { /// * `coef0` - independent term in kernel function pub fn polynomial(degree: T, gamma: T, coef0: T) -> PolynomialKernel { PolynomialKernel { - degree: degree, - gamma: gamma, - coef0: coef0, + degree, + gamma, + coef0, } } @@ -79,17 +79,14 @@ impl Kernels { /// * `gamma` - kernel coefficient /// * `coef0` - independent term in kernel function pub fn sigmoid(gamma: T, coef0: T) -> SigmoidKernel { - SigmoidKernel { - gamma: gamma, - coef0: coef0, - } + SigmoidKernel { gamma, coef0 } } /// Sigmoid kernel /// * `gamma` - kernel coefficient pub fn sigmoid_with_gamma(gamma: T) -> SigmoidKernel { SigmoidKernel { - gamma: gamma, + gamma, coef0: T::one(), } } diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 119b812..bac6e4e 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -173,9 +173,9 @@ impl, K: Kernel> SVC { let (n, _) = x.shape(); if n != y.len() { - return Err(Failed::fit(&format!( - "Number of rows of X doesn't match number of rows of Y" - ))); + return Err(Failed::fit( + &"Number of rows of X doesn\'t match number of rows of Y".to_string(), + )); } let classes = y.unique(); @@ -204,11 +204,11 @@ impl, K: Kernel> SVC { let (support_vectors, weight, b) = optimizer.optimize(); Ok(SVC { - classes: classes, - kernel: kernel, + classes, + kernel, instances: support_vectors, w: weight, - b: b, + b, }) } @@ -251,7 +251,7 @@ impl, K: Kernel> PartialEq for SVC< || self.w.len() != other.w.len() || self.instances.len() != other.instances.len() { - return false; + false } else { for i in 0..self.w.len() { if (self.w[i] - other.w[i]).abs() > T::epsilon() { @@ -263,7 +263,7 @@ impl, K: Kernel> PartialEq for SVC< return false; } } - return true; + true } } } @@ -278,12 +278,12 @@ impl> SupportVector { }; SupportVector { index: i, - x: x, + x, grad: g, k: k_v, alpha: T::zero(), - cmin: cmin, - cmax: cmax, + cmin, + cmax, } } } @@ -291,7 +291,7 @@ impl> SupportVector { impl<'a, T: RealNumber, M: Matrix, K: Kernel> Cache<'a, T, M, K> { fn new(kernel: &'a K) -> Cache<'a, T, M, K> { Cache { - kernel: kernel, + kernel, data: HashMap::new(), phantom: PhantomData, } @@ -326,8 +326,8 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, let (n, _) = x.shape(); Optimizer { - x: x, - y: y, + x, + y, parameters: ¶meters, svmin: 0, svmax: 0, @@ -335,7 +335,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, gmax: T::min_value(), tau: T::from_f64(1e-12).unwrap(), sv: Vec::with_capacity(n), - kernel: kernel, + kernel, recalculate_minmax_grad: true, } } @@ -389,10 +389,11 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, if self.process(i, self.x.get_row(i), self.y.get(i), cache) { cp += 1; } - } else if self.y.get(i) == -T::one() && cn < few { - if self.process(i, self.x.get_row(i), self.y.get(i), cache) { - cn += 1; - } + } else if self.y.get(i) == -T::one() + && cn < few + && self.process(i, self.x.get_row(i), self.y.get(i), cache) + { + cn += 1; } if cp >= few && cn >= few { @@ -420,10 +421,10 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.find_min_max_gradient(); - if self.gmin < self.gmax { - if (y > T::zero() && g < self.gmin) || (y < T::zero() && g > self.gmax) { - return false; - } + if self.gmin < self.gmax + && ((y > T::zero() && g < self.gmin) || (y < T::zero() && g > self.gmax)) + { + return false; } for v in cache_values { @@ -494,13 +495,12 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, let mut idxs_to_drop: HashSet = HashSet::new(); self.sv.retain(|v| { - if v.alpha == T::zero() { - if (v.grad >= gmax && T::zero() >= v.cmax) - || (v.grad <= gmin && T::zero() <= v.cmin) - { - idxs_to_drop.insert(v.index); - return false; - } + if v.alpha == T::zero() + && ((v.grad >= gmax && T::zero() >= v.cmax) + || (v.grad <= gmin && T::zero() <= v.cmin)) + { + idxs_to_drop.insert(v.index); + return false; }; true }); @@ -647,7 +647,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.update(idx_1, idx_2, step, cache); - return self.gmax - self.gmin > tol; + self.gmax - self.gmin > tol } None => false, } diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 61feb80..36f308a 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -160,9 +160,9 @@ impl, K: Kernel> SVR { let (n, _) = x.shape(); if n != y.len() { - return Err(Failed::fit(&format!( - "Number of rows of X doesn't match number of rows of Y" - ))); + return Err(Failed::fit( + &"Number of rows of X doesn\'t match number of rows of Y".to_string(), + )); } let optimizer = Optimizer::new(x, y, &kernel, ¶meters); @@ -170,10 +170,10 @@ impl, K: Kernel> SVR { let (support_vectors, weight, b) = optimizer.smo(); Ok(SVR { - kernel: kernel, + kernel, instances: support_vectors, w: weight, - b: b, + b, }) } @@ -198,7 +198,7 @@ impl, K: Kernel> SVR { f += self.w[i] * self.kernel.apply(&x, &self.instances[i]); } - return f; + f } } @@ -208,7 +208,7 @@ impl, K: Kernel> PartialEq for SVR< || self.w.len() != other.w.len() || self.instances.len() != other.instances.len() { - return false; + false } else { for i in 0..self.w.len() { if (self.w[i] - other.w[i]).abs() > T::epsilon() { @@ -220,7 +220,7 @@ impl, K: Kernel> PartialEq for SVR< return false; } } - return true; + true } } } @@ -230,7 +230,7 @@ impl> SupportVector { let k_v = k.apply(&x, &x); SupportVector { index: i, - x: x, + x, grad: [eps + y, eps - y], k: k_v, alpha: [T::zero(), T::zero()], @@ -270,7 +270,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, gmaxindex: 0, tau: T::from_f64(1e-12).unwrap(), sv: support_vectors, - kernel: kernel, + kernel, } } @@ -392,11 +392,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.sv[v2].alpha[j] = T::zero(); self.sv[v1].alpha[i] = diff; } - } else { - if self.sv[v1].alpha[i] < T::zero() { - self.sv[v1].alpha[i] = T::zero(); - self.sv[v2].alpha[j] = -diff; - } + } else if self.sv[v1].alpha[i] < T::zero() { + self.sv[v1].alpha[i] = T::zero(); + self.sv[v2].alpha[j] = -diff; } if diff > T::zero() { @@ -404,11 +402,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.sv[v1].alpha[i] = self.c; self.sv[v2].alpha[j] = self.c - diff; } - } else { - if self.sv[v2].alpha[j] > self.c { - self.sv[v2].alpha[j] = self.c; - self.sv[v1].alpha[i] = self.c + diff; - } + } else if self.sv[v2].alpha[j] > self.c { + self.sv[v2].alpha[j] = self.c; + self.sv[v1].alpha[i] = self.c + diff; } } else { let delta = (self.sv[v1].grad[i] - self.sv[v2].grad[j]) / curv; @@ -421,11 +417,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.sv[v1].alpha[i] = self.c; self.sv[v2].alpha[j] = sum - self.c; } - } else { - if self.sv[v2].alpha[j] < T::zero() { - self.sv[v2].alpha[j] = T::zero(); - self.sv[v1].alpha[i] = sum; - } + } else if self.sv[v2].alpha[j] < T::zero() { + self.sv[v2].alpha[j] = T::zero(); + self.sv[v1].alpha[i] = sum; } if sum > self.c { @@ -433,11 +427,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.sv[v2].alpha[j] = self.c; self.sv[v1].alpha[i] = sum - self.c; } - } else { - if self.sv[v1].alpha[i] < T::zero() { - self.sv[v1].alpha[i] = T::zero(); - self.sv[v2].alpha[j] = sum; - } + } else if self.sv[v1].alpha[i] < T::zero() { + self.sv[v1].alpha[i] = T::zero(); + self.sv[v2].alpha[j] = sum; } } diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 25704e6..b30fb2d 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -126,7 +126,7 @@ impl PartialEq for DecisionTreeClassifier { || self.num_classes != other.num_classes || self.nodes.len() != other.nodes.len() { - return false; + false } else { for i in 0..self.classes.len() { if (self.classes[i] - other.classes[i]).abs() > T::epsilon() { @@ -138,7 +138,7 @@ impl PartialEq for DecisionTreeClassifier { return false; } } - return true; + true } } } @@ -174,8 +174,8 @@ impl Default for DecisionTreeClassifierParameters { impl Node { fn new(index: usize, output: usize) -> Self { Node { - index: index, - output: output, + index, + output, split_feature: 0, split_value: Option::None, split_score: Option::None, @@ -206,7 +206,7 @@ fn impurity(criterion: &SplitCriterion, count: &Vec, n: us for i in 0..count.len() { if count[i] > 0 { let p = T::from(count[i]).unwrap() / T::from(n).unwrap(); - impurity = impurity - p * p; + impurity -= p * p; } } } @@ -215,7 +215,7 @@ fn impurity(criterion: &SplitCriterion, count: &Vec, n: us for i in 0..count.len() { if count[i] > 0 { let p = T::from(count[i]).unwrap() / T::from(n).unwrap(); - impurity = impurity - p * p.log2(); + impurity -= p * p.log2(); } } } @@ -229,7 +229,7 @@ fn impurity(criterion: &SplitCriterion, count: &Vec, n: us } } - return impurity; + impurity } impl<'a, T: RealNumber, M: Matrix> NodeVisitor<'a, T, M> { @@ -242,14 +242,14 @@ impl<'a, T: RealNumber, M: Matrix> NodeVisitor<'a, T, M> { level: u16, ) -> Self { NodeVisitor { - x: x, - y: y, + x, + y, node: node_id, - samples: samples, - order: order, + samples, + order, true_child_output: 0, false_child_output: 0, - level: level, + level, phantom: PhantomData, } } @@ -266,7 +266,7 @@ pub(in crate) fn which_max(x: &Vec) -> usize { } } - return which; + which } impl DecisionTreeClassifier { @@ -325,10 +325,10 @@ impl DecisionTreeClassifier { } let mut tree = DecisionTreeClassifier { - nodes: nodes, - parameters: parameters, + nodes, + parameters, num_classes: k, - classes: classes, + classes, depth: 0, }; @@ -376,19 +376,18 @@ impl DecisionTreeClassifier { let node = &self.nodes[node_id]; if node.true_child == None && node.false_child == None { result = node.output; + } else if x.get(row, node.split_feature) <= node.split_value.unwrap_or(T::nan()) + { + queue.push_back(node.true_child.unwrap()); } else { - if x.get(row, node.split_feature) <= node.split_value.unwrap_or(T::nan()) { - queue.push_back(node.true_child.unwrap()); - } else { - queue.push_back(node.false_child.unwrap()); - } + queue.push_back(node.false_child.unwrap()); } } None => break, }; } - return result; + result } fn find_best_cutoff>( diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 0f88d4d..0d6da54 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -113,8 +113,8 @@ impl Default for DecisionTreeRegressorParameters { impl Node { fn new(index: usize, output: T) -> Self { Node { - index: index, - output: output, + index, + output, split_feature: 0, split_value: Option::None, split_score: Option::None, @@ -144,14 +144,14 @@ impl PartialEq for Node { impl PartialEq for DecisionTreeRegressor { fn eq(&self, other: &Self) -> bool { if self.depth != other.depth || self.nodes.len() != other.nodes.len() { - return false; + false } else { for i in 0..self.nodes.len() { if self.nodes[i] != other.nodes[i] { return false; } } - return true; + true } } } @@ -177,14 +177,14 @@ impl<'a, T: RealNumber, M: Matrix> NodeVisitor<'a, T, M> { level: u16, ) -> Self { NodeVisitor { - x: x, - y: y, + x, + y, node: node_id, - samples: samples, - order: order, + samples, + order, true_child_output: T::zero(), false_child_output: T::zero(), - level: level, + level, } } } @@ -221,7 +221,7 @@ impl DecisionTreeRegressor { let mut sum = T::zero(); for i in 0..y_ncols { n += samples[i]; - sum = sum + T::from(samples[i]).unwrap() * y_m.get(0, i); + sum += T::from(samples[i]).unwrap() * y_m.get(0, i); } let root = Node::new(0, sum / T::from(n).unwrap()); @@ -233,8 +233,8 @@ impl DecisionTreeRegressor { } let mut tree = DecisionTreeRegressor { - nodes: nodes, - parameters: parameters, + nodes, + parameters, depth: 0, }; @@ -282,19 +282,18 @@ impl DecisionTreeRegressor { let node = &self.nodes[node_id]; if node.true_child == None && node.false_child == None { result = node.output; + } else if x.get(row, node.split_feature) <= node.split_value.unwrap_or(T::nan()) + { + queue.push_back(node.true_child.unwrap()); } else { - if x.get(row, node.split_feature) <= node.split_value.unwrap_or(T::nan()) { - queue.push_back(node.true_child.unwrap()); - } else { - queue.push_back(node.false_child.unwrap()); - } + queue.push_back(node.false_child.unwrap()); } } None => break, }; } - return result; + result } fn find_best_cutoff>( @@ -348,8 +347,7 @@ impl DecisionTreeRegressor { if prevx.is_nan() || visitor.x.get(*i, j) == prevx { prevx = visitor.x.get(*i, j); true_count += visitor.samples[*i]; - true_sum = - true_sum + T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); + true_sum += T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); continue; } @@ -360,8 +358,7 @@ impl DecisionTreeRegressor { { prevx = visitor.x.get(*i, j); true_count += visitor.samples[*i]; - true_sum = - true_sum + T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); + true_sum += T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); continue; } @@ -384,7 +381,7 @@ impl DecisionTreeRegressor { } prevx = visitor.x.get(*i, j); - true_sum = true_sum + T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); + true_sum += T::from(visitor.samples[*i]).unwrap() * visitor.y.get(0, *i); true_count += visitor.samples[*i]; } } From ea5de9758a2d367cdefab2da4f8f8332787b937d Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 19:46:37 -0400 Subject: [PATCH 04/21] Add -Drust-2018-idioms to clippy --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dd616af..0f118da 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -40,4 +40,4 @@ jobs: command: rustup component add clippy - run: name: Run cargo clippy - command: cargo clippy + command: cargo clippy -- -Drust-2018-idioms From 54886ebd728d58ae9fc5bbebf21b0b7a594bcf4a Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 20:24:08 -0400 Subject: [PATCH 05/21] Fix rust-2018-idioms warnings --- src/cluster/dbscan.rs | 2 -- src/cluster/kmeans.rs | 2 -- src/ensemble/random_forest_classifier.rs | 2 -- src/ensemble/random_forest_regressor.rs | 1 - src/error/mod.rs | 4 ++-- src/linalg/mod.rs | 2 +- src/linalg/naive/dense_matrix.rs | 5 ++--- src/model_selection/mod.rs | 1 - src/optimization/first_order/gradient_descent.rs | 4 ++-- src/optimization/first_order/lbfgs.rs | 10 +++++----- src/optimization/first_order/mod.rs | 4 ++-- src/svm/svc.rs | 16 ++++++++-------- src/svm/svr.rs | 2 +- src/tree/decision_tree_classifier.rs | 6 +++--- src/tree/decision_tree_regressor.rs | 6 +++--- 15 files changed, 29 insertions(+), 38 deletions(-) diff --git a/src/cluster/dbscan.rs b/src/cluster/dbscan.rs index 787d8d3..e595028 100644 --- a/src/cluster/dbscan.rs +++ b/src/cluster/dbscan.rs @@ -29,8 +29,6 @@ //! * ["A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise", Ester M., Kriegel HP., Sander J., Xu X.](http://faculty.marshall.usc.edu/gareth-james/ISL/) //! * ["Density-Based Clustering in Spatial Databases: The Algorithm GDBSCAN and its Applications", Sander J., Ester M., Kriegel HP., Xu X.](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.63.1629&rep=rep1&type=pdf) -extern crate rand; - use std::fmt::Debug; use std::iter::Sum; diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 0da8a72..26a4038 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -52,8 +52,6 @@ //! * ["An Introduction to Statistical Learning", James G., Witten D., Hastie T., Tibshirani R., 10.3.1 K-Means Clustering](http://faculty.marshall.usc.edu/gareth-james/ISL/) //! * ["k-means++: The Advantages of Careful Seeding", Arthur D., Vassilvitskii S.](http://ilpubs.stanford.edu:8090/778/1/2006-13.pdf) -extern crate rand; - use rand::Rng; use std::fmt::Debug; use std::iter::Sum; diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index 0cfebf1..e1d462a 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -45,8 +45,6 @@ //! //! //! -extern crate rand; - use std::default::Default; use std::fmt::Debug; diff --git a/src/ensemble/random_forest_regressor.rs b/src/ensemble/random_forest_regressor.rs index c704a8f..36fa096 100644 --- a/src/ensemble/random_forest_regressor.rs +++ b/src/ensemble/random_forest_regressor.rs @@ -42,7 +42,6 @@ //! //! //! -extern crate rand; use std::default::Default; use std::fmt::Debug; diff --git a/src/error/mod.rs b/src/error/mod.rs index 679f685..1615290 100644 --- a/src/error/mod.rs +++ b/src/error/mod.rs @@ -82,7 +82,7 @@ impl PartialEq for Failed { } impl fmt::Display for FailedError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let failed_err_str = match self { FailedError::FitFailed => "Fit failed", FailedError::PredictFailed => "Predict failed", @@ -96,7 +96,7 @@ impl fmt::Display for FailedError { } impl fmt::Display for Failed { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}: {}", self.err, self.msg) } } diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index 09a9687..fc9d6c9 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -515,7 +515,7 @@ pub trait Matrix: { } -pub(crate) fn row_iter>(m: &M) -> RowIter { +pub(crate) fn row_iter>(m: &M) -> RowIter<'_, F, M> { RowIter { m, pos: 0, diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index c1ba650..aff0fa2 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -1,4 +1,3 @@ -extern crate num; use std::fmt; use std::fmt::Debug; use std::marker::PhantomData; @@ -197,7 +196,7 @@ pub struct DenseMatrixIterator<'a, T: RealNumber> { } impl fmt::Display for DenseMatrix { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut rows: Vec> = Vec::new(); for r in 0..self.nrows { rows.push( @@ -356,7 +355,7 @@ impl<'de, T: RealNumber + fmt::Debug + Deserialize<'de>> Deserialize<'de> for De impl<'a, T: RealNumber + fmt::Debug + Deserialize<'a>> Visitor<'a> for DenseMatrixVisitor { type Value = DenseMatrix; - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { formatter.write_str("struct DenseMatrix") } diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index c53451d..d4908f6 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -8,7 +8,6 @@ //! your data. //! //! In SmartCore you can split your data into training and test datasets using `train_test_split` function. -extern crate rand; use crate::linalg::BaseVector; use crate::linalg::Matrix; diff --git a/src/optimization/first_order/gradient_descent.rs b/src/optimization/first_order/gradient_descent.rs index 9cc78ec..d57896f 100644 --- a/src/optimization/first_order/gradient_descent.rs +++ b/src/optimization/first_order/gradient_descent.rs @@ -25,8 +25,8 @@ impl Default for GradientDescent { impl FirstOrderOptimizer for GradientDescent { fn optimize<'a, X: Matrix, LS: LineSearchMethod>( &self, - f: &'a F, - df: &'a DF, + f: &'a F<'_, T, X>, + df: &'a DF<'_, X>, x0: &X, ls: &'a LS, ) -> OptimizerResult { diff --git a/src/optimization/first_order/lbfgs.rs b/src/optimization/first_order/lbfgs.rs index b63f617..5dedfe6 100644 --- a/src/optimization/first_order/lbfgs.rs +++ b/src/optimization/first_order/lbfgs.rs @@ -100,8 +100,8 @@ impl LBFGS { fn update_state<'a, X: Matrix, LS: LineSearchMethod>( &self, - f: &'a F, - df: &'a DF, + f: &'a F<'_, T, X>, + df: &'a DF<'_, X>, ls: &'a LS, state: &mut LBFGSState, ) { @@ -162,7 +162,7 @@ impl LBFGS { g_converged || x_converged || state.counter_f_tol > self.successive_f_tol } - fn update_hessian<'a, X: Matrix>(&self, _: &'a DF, state: &mut LBFGSState) { + fn update_hessian<'a, X: Matrix>(&self, _: &'a DF<'_, X>, state: &mut LBFGSState) { state.dg = state.x_df.sub(&state.x_df_prev); let rho_iteration = T::one() / state.dx.dot(&state.dg); if !rho_iteration.is_infinite() { @@ -198,8 +198,8 @@ struct LBFGSState> { impl FirstOrderOptimizer for LBFGS { fn optimize<'a, X: Matrix, LS: LineSearchMethod>( &self, - f: &F, - df: &'a DF, + f: &F<'_, T, X>, + df: &'a DF<'_, X>, x0: &X, ls: &'a LS, ) -> OptimizerResult { diff --git a/src/optimization/first_order/mod.rs b/src/optimization/first_order/mod.rs index d1c628f..f2e476f 100644 --- a/src/optimization/first_order/mod.rs +++ b/src/optimization/first_order/mod.rs @@ -12,8 +12,8 @@ use crate::optimization::{DF, F}; pub trait FirstOrderOptimizer { fn optimize<'a, X: Matrix, LS: LineSearchMethod>( &self, - f: &F, - df: &'a DF, + f: &F<'_, T, X>, + df: &'a DF<'_, X>, x0: &X, ls: &'a LS, ) -> OptimizerResult; diff --git a/src/svm/svc.rs b/src/svm/svc.rs index bac6e4e..62a9e01 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -378,7 +378,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, (support_vectors, w, b) } - fn initialize(&mut self, cache: &mut Cache) { + fn initialize(&mut self, cache: &mut Cache<'_, T, M, K>) { let (n, _) = self.x.shape(); let few = 5; let mut cp = 0; @@ -402,7 +402,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, } } - fn process(&mut self, i: usize, x: M::RowVector, y: T, cache: &mut Cache) -> bool { + fn process(&mut self, i: usize, x: M::RowVector, y: T, cache: &mut Cache<'_, T, M, K>) -> bool { for j in 0..self.sv.len() { if self.sv[j].index == i { return true; @@ -445,13 +445,13 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, true } - fn reprocess(&mut self, tol: T, cache: &mut Cache) -> bool { + fn reprocess(&mut self, tol: T, cache: &mut Cache<'_, T, M, K>) -> bool { let status = self.smo(None, None, tol, cache); self.clean(cache); status } - fn finish(&mut self, cache: &mut Cache) { + fn finish(&mut self, cache: &mut Cache<'_, T, M, K>) { let mut max_iter = self.sv.len(); while self.smo(None, None, self.parameters.tol, cache) && max_iter > 0 { @@ -486,7 +486,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, self.recalculate_minmax_grad = false } - fn clean(&mut self, cache: &mut Cache) { + fn clean(&mut self, cache: &mut Cache<'_, T, M, K>) { self.find_min_max_gradient(); let gmax = self.gmax; @@ -520,7 +520,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, &mut self, idx_1: Option, idx_2: Option, - cache: &mut Cache, + cache: &mut Cache<'_, T, M, K>, ) -> Option<(usize, usize, T)> { match (idx_1, idx_2) { (None, None) => { @@ -614,7 +614,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, idx_1: Option, idx_2: Option, tol: T, - cache: &mut Cache, + cache: &mut Cache<'_, T, M, K>, ) -> bool { match self.select_pair(idx_1, idx_2, cache) { Some((idx_1, idx_2, k_v_12)) => { @@ -653,7 +653,7 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, } } - fn update(&mut self, v1: usize, v2: usize, step: T, cache: &mut Cache) { + fn update(&mut self, v1: usize, v2: usize, step: T, cache: &mut Cache<'_, T, M, K>) { self.sv[v1].alpha -= step; self.sv[v2].alpha += step; diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 36f308a..5d007d7 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -469,7 +469,7 @@ impl Cache { } } - fn get Vec>(&self, i: usize, or: F) -> Ref> { + fn get Vec>(&self, i: usize, or: F) -> Ref<'_, Vec> { if self.data[i].borrow().is_none() { self.data[i].replace(Some(or())); } diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index b30fb2d..353c1bd 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -334,7 +334,7 @@ impl DecisionTreeClassifier { let mut visitor = NodeVisitor::::new(0, samples, &order, &x, &yi, 1); - let mut visitor_queue: LinkedList> = LinkedList::new(); + let mut visitor_queue: LinkedList> = LinkedList::new(); if tree.find_best_cutoff(&mut visitor, mtry) { visitor_queue.push_back(visitor); @@ -392,7 +392,7 @@ impl DecisionTreeClassifier { fn find_best_cutoff>( &mut self, - visitor: &mut NodeVisitor, + visitor: &mut NodeVisitor<'_, T, M>, mtry: usize, ) -> bool { let (n_rows, n_attr) = visitor.x.shape(); @@ -455,7 +455,7 @@ impl DecisionTreeClassifier { fn find_best_split>( &mut self, - visitor: &mut NodeVisitor, + visitor: &mut NodeVisitor<'_, T, M>, n: usize, count: &Vec, false_count: &mut Vec, diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 0d6da54..39f3eb8 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -240,7 +240,7 @@ impl DecisionTreeRegressor { let mut visitor = NodeVisitor::::new(0, samples, &order, &x, &y_m, 1); - let mut visitor_queue: LinkedList> = LinkedList::new(); + let mut visitor_queue: LinkedList> = LinkedList::new(); if tree.find_best_cutoff(&mut visitor, mtry) { visitor_queue.push_back(visitor); @@ -298,7 +298,7 @@ impl DecisionTreeRegressor { fn find_best_cutoff>( &mut self, - visitor: &mut NodeVisitor, + visitor: &mut NodeVisitor<'_, T, M>, mtry: usize, ) -> bool { let (_, n_attr) = visitor.x.shape(); @@ -332,7 +332,7 @@ impl DecisionTreeRegressor { fn find_best_split>( &mut self, - visitor: &mut NodeVisitor, + visitor: &mut NodeVisitor<'_, T, M>, n: usize, sum: T, parent_gain: T, From 8a2da00665df708b883765edce7529717c55f831 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 20:58:47 -0400 Subject: [PATCH 06/21] Fail in case of clippy warning --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0f118da..069c56d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -40,4 +40,4 @@ jobs: command: rustup component add clippy - run: name: Run cargo clippy - command: cargo clippy -- -Drust-2018-idioms + command: cargo clippy -- -Drust-2018-idioms -Dwarnings From 4d75af67033f81eaf005b8319f044d48ea439d60 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 20:59:27 -0400 Subject: [PATCH 07/21] Allow temporally the warnings that are currently failing --- src/lib.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 083b95f..687becf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,6 +64,22 @@ //! let y_hat = knn.predict(&x).unwrap(); //! ``` +#![allow( + clippy::or_fun_call, + clippy::needless_range_loop, + clippy::ptr_arg, + clippy::len_without_is_empty, + clippy::extra_unused_lifetimes, + clippy::map_entry, + clippy::comparison_chain, + clippy::type_complexity, + clippy::needless_lifetimes, + clippy::too_many_arguments, + clippy::unnecessary_mut_passed, + clippy::let_and_return, + clippy::many_single_char_names, + clippy::tabs_in_doc_comments +)] /// Various algorithms and helper methods that are used elsewhere in SmartCore pub mod algorithm; /// Algorithms for clustering of unlabeled data From 43584e14e57ed131104c9dd71c61c3fd9f78fc27 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 23:15:50 -0400 Subject: [PATCH 08/21] Fix clippy::or_fun_call --- src/ensemble/random_forest_classifier.rs | 6 +++--- src/lib.rs | 1 - src/metrics/cluster_hcv.rs | 4 ++-- src/svm/svc.rs | 8 ++++++-- src/tree/decision_tree_classifier.rs | 5 +++-- src/tree/decision_tree_regressor.rs | 5 +++-- 6 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index e1d462a..011b0ba 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -137,13 +137,13 @@ impl RandomForestClassifier { yi[i] = classes.iter().position(|c| yc == *c).unwrap(); } - let mtry = parameters.m.unwrap_or( + let mtry = parameters.m.unwrap_or_else(|| { (T::from(num_attributes).unwrap()) .sqrt() .floor() .to_usize() - .unwrap(), - ); + .unwrap() + }); let classes = y_m.unique(); let k = classes.len(); diff --git a/src/lib.rs b/src/lib.rs index 687becf..2142c8b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,7 +65,6 @@ //! ``` #![allow( - clippy::or_fun_call, clippy::needless_range_loop, clippy::ptr_arg, clippy::len_without_is_empty, diff --git a/src/metrics/cluster_hcv.rs b/src/metrics/cluster_hcv.rs index bdefc8d..29a9db2 100644 --- a/src/metrics/cluster_hcv.rs +++ b/src/metrics/cluster_hcv.rs @@ -24,8 +24,8 @@ impl HCVScore { let contingency = contingency_matrix(&labels_true, &labels_pred); let mi: T = mutual_info_score(&contingency); - let homogeneity = entropy_c.map(|e| mi / e).unwrap_or(T::one()); - let completeness = entropy_k.map(|e| mi / e).unwrap_or(T::one()); + let homogeneity = entropy_c.map(|e| mi / e).unwrap_or_else(T::one); + let completeness = entropy_k.map(|e| mi / e).unwrap_or_else(T::one); let v_measure_score = if homogeneity + completeness == T::zero() { T::zero() diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 62a9e01..f2d518b 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -561,7 +561,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, ( idx_1, idx_2, - k_v_12.unwrap_or(self.kernel.apply(&self.sv[idx_1].x, &self.sv[idx_2].x)), + k_v_12.unwrap_or_else(|| { + self.kernel.apply(&self.sv[idx_1].x, &self.sv[idx_2].x) + }), ) }) } @@ -597,7 +599,9 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Optimizer<'a, ( idx_1, idx_2, - k_v_12.unwrap_or(self.kernel.apply(&self.sv[idx_1].x, &self.sv[idx_2].x)), + k_v_12.unwrap_or_else(|| { + self.kernel.apply(&self.sv[idx_1].x, &self.sv[idx_2].x) + }), ) }) } diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 353c1bd..9fe1b1a 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -376,7 +376,8 @@ impl DecisionTreeClassifier { let node = &self.nodes[node_id]; if node.true_child == None && node.false_child == None { result = node.output; - } else if x.get(row, node.split_feature) <= node.split_value.unwrap_or(T::nan()) + } else if x.get(row, node.split_feature) + <= node.split_value.unwrap_or_else(T::nan) { queue.push_back(node.true_child.unwrap()); } else { @@ -529,7 +530,7 @@ impl DecisionTreeClassifier { for i in 0..n { if visitor.samples[i] > 0 { if visitor.x.get(i, self.nodes[visitor.node].split_feature) - <= self.nodes[visitor.node].split_value.unwrap_or(T::nan()) + <= self.nodes[visitor.node].split_value.unwrap_or_else(T::nan) { true_samples[i] = visitor.samples[i]; tc += true_samples[i]; diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 39f3eb8..c30c9e2 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -282,7 +282,8 @@ impl DecisionTreeRegressor { let node = &self.nodes[node_id]; if node.true_child == None && node.false_child == None { result = node.output; - } else if x.get(row, node.split_feature) <= node.split_value.unwrap_or(T::nan()) + } else if x.get(row, node.split_feature) + <= node.split_value.unwrap_or_else(T::nan) { queue.push_back(node.true_child.unwrap()); } else { @@ -401,7 +402,7 @@ impl DecisionTreeRegressor { for i in 0..n { if visitor.samples[i] > 0 { if visitor.x.get(i, self.nodes[visitor.node].split_feature) - <= self.nodes[visitor.node].split_value.unwrap_or(T::nan()) + <= self.nodes[visitor.node].split_value.unwrap_or_else(T::nan) { true_samples[i] = visitor.samples[i]; tc += true_samples[i]; From 513d916580f72ca87418fb6a62bc2ef3898a89b2 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 23:20:22 -0400 Subject: [PATCH 09/21] Fix clippy::tabs_in_doc_comments --- src/lib.rs | 1 - src/math/distance/mod.rs | 2 +- src/neighbors/mod.rs | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2142c8b..85aa3b8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,7 +77,6 @@ clippy::unnecessary_mut_passed, clippy::let_and_return, clippy::many_single_char_names, - clippy::tabs_in_doc_comments )] /// Various algorithms and helper methods that are used elsewhere in SmartCore pub mod algorithm; diff --git a/src/math/distance/mod.rs b/src/math/distance/mod.rs index 1219ec6..696b5ff 100644 --- a/src/math/distance/mod.rs +++ b/src/math/distance/mod.rs @@ -4,7 +4,7 @@ //! Formally, the distance can be any metric measure that is defined as \\( d(x, y) \geq 0\\) and follows three conditions: //! 1. \\( d(x, y) = 0 \\) if and only \\( x = y \\), positive definiteness //! 1. \\( d(x, y) = d(y, x) \\), symmetry -//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality +//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality //! //! for all \\(x, y, z \in Z \\) //! diff --git a/src/neighbors/mod.rs b/src/neighbors/mod.rs index 6d542f6..be1ad4d 100644 --- a/src/neighbors/mod.rs +++ b/src/neighbors/mod.rs @@ -10,7 +10,7 @@ //! and follows three conditions: //! 1. \\( d(x, y) = 0 \\) if and only \\( x = y \\), positive definiteness //! 1. \\( d(x, y) = d(y, x) \\), symmetry -//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality +//! 1. \\( d(x, y) \leq d(x, z) + d(z, y) \\), subadditivity or triangle inequality //! //! for all \\(x, y, z \in Z \\) //! From b780e0c289080526a940c1d06fbe337cd81ec8b4 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 23:22:18 -0400 Subject: [PATCH 10/21] Fix clippy::unnecessary_mut_passed --- src/algorithm/neighbour/bbd_tree.rs | 4 ++-- src/lib.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/algorithm/neighbour/bbd_tree.rs b/src/algorithm/neighbour/bbd_tree.rs index 632da86..85e6628 100644 --- a/src/algorithm/neighbour/bbd_tree.rs +++ b/src/algorithm/neighbour/bbd_tree.rs @@ -134,7 +134,7 @@ impl BBDTree { return self.filter( self.nodes[node].lower.unwrap(), centroids, - &mut new_candidates, + &new_candidates, newk, sums, counts, @@ -142,7 +142,7 @@ impl BBDTree { ) + self.filter( self.nodes[node].upper.unwrap(), centroids, - &mut new_candidates, + &new_candidates, newk, sums, counts, diff --git a/src/lib.rs b/src/lib.rs index 85aa3b8..80da506 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,7 +74,6 @@ clippy::type_complexity, clippy::needless_lifetimes, clippy::too_many_arguments, - clippy::unnecessary_mut_passed, clippy::let_and_return, clippy::many_single_char_names, )] From dd2864abe78426554d4b3217b01140139fc2bb6e Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 23:23:55 -0400 Subject: [PATCH 11/21] Fix clippy::extra_unused_lifetimes --- src/lib.rs | 3 +-- src/optimization/line_search.rs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 80da506..b0bf26c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,14 +68,13 @@ clippy::needless_range_loop, clippy::ptr_arg, clippy::len_without_is_empty, - clippy::extra_unused_lifetimes, clippy::map_entry, clippy::comparison_chain, clippy::type_complexity, clippy::needless_lifetimes, clippy::too_many_arguments, clippy::let_and_return, - clippy::many_single_char_names, + clippy::many_single_char_names )] /// Various algorithms and helper methods that are used elsewhere in SmartCore pub mod algorithm; diff --git a/src/optimization/line_search.rs b/src/optimization/line_search.rs index 3481c87..e6a3b80 100644 --- a/src/optimization/line_search.rs +++ b/src/optimization/line_search.rs @@ -2,7 +2,7 @@ use crate::optimization::FunctionOrder; use num_traits::Float; pub trait LineSearchMethod { - fn search<'a>( + fn search( &self, f: &(dyn Fn(T) -> T), df: &(dyn Fn(T) -> T), From 0c35adf76aaba8b56745d2bac964e2ffd73d59f3 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 23:26:22 -0400 Subject: [PATCH 12/21] Fix clippy::let_and_return --- src/lib.rs | 1 - src/linalg/naive/dense_matrix.rs | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b0bf26c..0df22b0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,7 +73,6 @@ clippy::type_complexity, clippy::needless_lifetimes, clippy::too_many_arguments, - clippy::let_and_return, clippy::many_single_char_names )] /// Various algorithms and helper methods that are used elsewhere in SmartCore diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index aff0fa2..02bb8b6 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -29,8 +29,7 @@ impl BaseVector for Vec { } fn to_vec(&self) -> Vec { - let v = self.clone(); - v + self.clone() } fn zeros(len: usize) -> Self { From 3c1969bdf508eef642a97aa9c97e5d899fcf4225 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Sun, 8 Nov 2020 23:30:08 -0400 Subject: [PATCH 13/21] Fix clippy::needless_lifetimes --- src/lib.rs | 1 - src/linalg/naive/dense_matrix.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0df22b0..c85596e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,6 @@ clippy::map_entry, clippy::comparison_chain, clippy::type_complexity, - clippy::needless_lifetimes, clippy::too_many_arguments, clippy::many_single_char_names )] diff --git a/src/linalg/naive/dense_matrix.rs b/src/linalg/naive/dense_matrix.rs index 02bb8b6..7ba28bf 100644 --- a/src/linalg/naive/dense_matrix.rs +++ b/src/linalg/naive/dense_matrix.rs @@ -305,7 +305,7 @@ impl DenseMatrix { /// Creates new column vector (_1xN_ matrix) from a vector. /// * `values` - values to initialize the matrix. - pub fn iter<'a>(&'a self) -> DenseMatrixIterator<'a, T> { + pub fn iter(&self) -> DenseMatrixIterator<'_, T> { DenseMatrixIterator { cur_c: 0, cur_r: 0, From 5e887634db987137f3691a563b7a932855f3508b Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Mon, 9 Nov 2020 00:02:22 -0400 Subject: [PATCH 14/21] Fix clippy::comparison_chain --- src/lib.rs | 1 - src/linalg/lu.rs | 11 +++-- src/linear/logistic_regression.rs | 69 ++++++++++++++++--------------- 3 files changed, 41 insertions(+), 40 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c85596e..8c97bf7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,7 +69,6 @@ clippy::ptr_arg, clippy::len_without_is_empty, clippy::map_entry, - clippy::comparison_chain, clippy::type_complexity, clippy::too_many_arguments, clippy::many_single_char_names diff --git a/src/linalg/lu.rs b/src/linalg/lu.rs index cbe195f..bfc7fff 100644 --- a/src/linalg/lu.rs +++ b/src/linalg/lu.rs @@ -33,6 +33,7 @@ //! #![allow(non_snake_case)] +use std::cmp::Ordering; use std::fmt::Debug; use std::marker::PhantomData; @@ -78,12 +79,10 @@ impl> LU { for i in 0..n_rows { for j in 0..n_cols { - if i > j { - L.set(i, j, self.LU.get(i, j)); - } else if i == j { - L.set(i, j, T::one()); - } else { - L.set(i, j, T::zero()); + match i.cmp(&j) { + Ordering::Greater => L.set(i, j, self.LU.get(i, j)), + Ordering::Equal => L.set(i, j, T::one()), + Ordering::Less => L.set(i, j, T::zero()), } } } diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index ec90af1..796caed 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -52,6 +52,7 @@ //! //! //! +use std::cmp::Ordering; use std::fmt::Debug; use std::marker::PhantomData; @@ -231,48 +232,50 @@ impl> LogisticRegression { yi[i] = classes.iter().position(|c| yc == *c).unwrap(); } - if k < 2 { - Err(Failed::fit(&format!( + match k.cmp(&2) { + Ordering::Less => Err(Failed::fit(&format!( "incorrect number of classes: {}. Should be >= 2.", k - ))) - } else if k == 2 { - let x0 = M::zeros(1, num_attributes + 1); + ))), + Ordering::Greater => { + let x0 = M::zeros(1, (num_attributes + 1) * k); - let objective = BinaryObjectiveFunction { - x, - y: yi, - phantom: PhantomData, - }; + let objective = MultiClassObjectiveFunction { + x, + y: yi, + k, + phantom: PhantomData, + }; - let result = LogisticRegression::minimize(x0, objective); + let result = LogisticRegression::minimize(x0, objective); - Ok(LogisticRegression { - weights: result.x, - classes, - num_attributes, - num_classes: k, - }) - } else { - let x0 = M::zeros(1, (num_attributes + 1) * k); + let weights = result.x.reshape(k, num_attributes + 1); - let objective = MultiClassObjectiveFunction { - x, - y: yi, - k, - phantom: PhantomData, - }; + Ok(LogisticRegression { + weights, + classes, + num_attributes, + num_classes: k, + }) + } + Ordering::Equal => { + let x0 = M::zeros(1, num_attributes + 1); - let result = LogisticRegression::minimize(x0, objective); + let objective = BinaryObjectiveFunction { + x, + y: yi, + phantom: PhantomData, + }; - let weights = result.x.reshape(k, num_attributes + 1); + let result = LogisticRegression::minimize(x0, objective); - Ok(LogisticRegression { - weights, - classes, - num_attributes, - num_classes: k, - }) + Ok(LogisticRegression { + weights: result.x, + classes, + num_attributes, + num_classes: k, + }) + } } } From 3d4d5f64f6ebcd9adf037442778639a7b6cbd00c Mon Sep 17 00:00:00 2001 From: morenol Date: Mon, 9 Nov 2020 15:54:27 -0400 Subject: [PATCH 15/21] feat: add Naive Bayes and CategoricalNB (#15) * feat: Implement Naive Bayes classifier * Implement CategoricalNB --- src/lib.rs | 2 + src/naive_bayes/categorical.rs | 232 +++++++++++++++++++++++++++++++++ src/naive_bayes/mod.rs | 69 ++++++++++ 3 files changed, 303 insertions(+) create mode 100644 src/naive_bayes/categorical.rs create mode 100644 src/naive_bayes/mod.rs diff --git a/src/lib.rs b/src/lib.rs index 083b95f..966d5ed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -85,6 +85,8 @@ pub mod math; /// Functions for assessing prediction error. pub mod metrics; pub mod model_selection; +/// Supervised learning algorithms based on applying the Bayes theorem with the independence assumptions between predictors +pub mod naive_bayes; /// Supervised neighbors-based learning methods pub mod neighbors; pub(crate) mod optimization; diff --git a/src/naive_bayes/categorical.rs b/src/naive_bayes/categorical.rs new file mode 100644 index 0000000..f948aeb --- /dev/null +++ b/src/naive_bayes/categorical.rs @@ -0,0 +1,232 @@ +use crate::error::Failed; +use crate::linalg::BaseVector; +use crate::linalg::Matrix; +use crate::math::num::RealNumber; +use crate::naive_bayes::{BaseNaiveBayes, NBDistribution}; +use serde::{Deserialize, Serialize}; + +/// Naive Bayes classifier for categorical features +struct CategoricalNBDistribution { + class_labels: Vec, + class_probabilities: Vec, + coef: Vec>>, + feature_categories: Vec>, +} + +impl> NBDistribution for CategoricalNBDistribution { + fn prior(&self, class_index: usize) -> T { + if class_index >= self.class_labels.len() { + T::zero() + } else { + self.class_probabilities[class_index] + } + } + + fn conditional_probability(&self, class_index: usize, j: &M::RowVector) -> T { + if class_index < self.class_labels.len() { + let mut prob = T::one(); + for feature in 0..j.len() { + let value = j.get(feature); + match self.feature_categories[feature] + .iter() + .position(|&t| t == value) + { + Some(_i) => prob *= self.coef[class_index][feature][_i], + None => return T::zero(), + } + } + prob + } else { + T::zero() + } + } + + fn classes(&self) -> &Vec { + &self.class_labels + } +} + +impl CategoricalNBDistribution { + /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features. + /// * `x` - training data. + /// * `y` - vector with target values (classes) of length N. + /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). + pub fn fit>(x: &M, y: &M::RowVector, alpha: T) -> Result { + if alpha < T::zero() { + return Err(Failed::fit(&format!( + "alpha should be >= 0, alpha=[{}]", + alpha + ))); + } + + let (n_samples, n_features) = x.shape(); + let y_samples = y.len(); + if y_samples != n_samples { + return Err(Failed::fit(&format!( + "Size of x should equal size of y; |x|=[{}], |y|=[{}]", + n_samples, y_samples + ))); + } + + if n_samples == 0 { + return Err(Failed::fit(&format!( + "Size of x and y should greater than 0; |x|=[{}]", + n_samples + ))); + } + + let mut y_sorted = y.to_vec(); + y_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mut class_labels = Vec::with_capacity(y.len()); + class_labels.push(y_sorted[0]); + let mut classes_count = Vec::with_capacity(y.len()); + let mut current_count = T::one(); + for idx in 1..y_samples { + if y_sorted[idx] == y_sorted[idx - 1] { + current_count += T::one(); + } else { + classes_count.push(current_count); + class_labels.push(y_sorted[idx]); + current_count = T::one() + } + classes_count.push(current_count); + } + + let mut feature_categories: Vec> = Vec::with_capacity(n_features); + + for feature in 0..n_features { + let feature_types = x.get_col_as_vec(feature).unique(); + feature_categories.push(feature_types); + } + let mut coef: Vec>> = Vec::with_capacity(class_labels.len()); + for (label, label_count) in class_labels.iter().zip(classes_count.iter()) { + let mut coef_i: Vec> = Vec::with_capacity(n_features); + for (feature_index, feature_options) in + feature_categories.iter().enumerate().take(n_features) + { + let col = x + .get_col_as_vec(feature_index) + .iter() + .enumerate() + .filter(|(i, _j)| y.get(*i) == *label) + .map(|(_, j)| *j) + .collect::>(); + let mut feat_count: Vec = Vec::with_capacity(feature_options.len()); + for k in feature_options.iter() { + let feat_k_count = col.iter().filter(|&v| v == k).count(); + feat_count.push(feat_k_count); + } + + let coef_i_j = feat_count + .iter() + .map(|c| { + (T::from(*c).unwrap() + alpha) + / (T::from(*label_count).unwrap() + + T::from(feature_options.len()).unwrap() * alpha) + }) + .collect::>(); + coef_i.push(coef_i_j); + } + coef.push(coef_i); + } + let class_probabilities = classes_count + .into_iter() + .map(|count| count / T::from(n_samples).unwrap()) + .collect::>(); + + Ok(Self { + class_labels, + class_probabilities, + coef, + feature_categories, + }) + } +} + +/// `CategoricalNB` parameters. Use `Default::default()` for default values. +#[derive(Serialize, Deserialize, Debug)] +pub struct CategoricalNBParameters { + /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). + pub alpha: T, +} + +impl CategoricalNBParameters { + /// Create CategoricalNBParameters with specific paramaters. + pub fn new(alpha: T) -> Result { + if alpha > T::zero() { + Ok(Self { alpha }) + } else { + Err(Failed::fit(&format!( + "alpha should be >= 0, alpha=[{}]", + alpha + ))) + } + } +} +impl Default for CategoricalNBParameters { + fn default() -> Self { + Self { alpha: T::one() } + } +} + +/// CategoricalNB implements the categorical naive Bayes algorithm for categorically distributed data. +pub struct CategoricalNB> { + inner: BaseNaiveBayes>, +} + +impl> CategoricalNB { + /// Fits CategoricalNB with given data + /// * `x` - training data of size NxM where N is the number of samples and M is the number of + /// features. + /// * `y` - vector with target values (classes) of length N. + /// * `parameters` - additional parameters like alpha for smoothing + pub fn fit( + x: &M, + y: &M::RowVector, + parameters: CategoricalNBParameters, + ) -> Result { + let alpha = parameters.alpha; + let distribution = CategoricalNBDistribution::fit(x, y, alpha)?; + let inner = BaseNaiveBayes::fit(distribution)?; + Ok(Self { inner }) + } + + /// Estimates the class labels for the provided data. + /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// Returns a vector of size N with class estimates. + pub fn predict(&self, x: &M) -> Result { + self.inner.predict(x) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::naive::dense_matrix::DenseMatrix; + + #[test] + fn run_base_naive_bayes() { + let x = DenseMatrix::from_2d_array(&[ + &[0., 2., 1., 0.], + &[0., 2., 1., 1.], + &[1., 2., 1., 0.], + &[2., 1., 1., 0.], + &[2., 0., 0., 0.], + &[2., 0., 0., 1.], + &[1., 0., 0., 1.], + &[0., 1., 1., 0.], + &[0., 0., 0., 0.], + &[2., 1., 0., 0.], + &[0., 1., 0., 1.], + &[1., 1., 1., 1.], + &[1., 2., 0., 0.], + &[2., 1., 1., 1.], + ]); + let y = vec![0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0.]; + + let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); + let x_test = DenseMatrix::from_2d_array(&[&[0., 2., 1., 0.], &[2., 2., 0., 0.]]); + let y_hat = cnb.predict(&x_test).unwrap(); + assert_eq!(y_hat, vec![0., 1.]); + } +} diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs new file mode 100644 index 0000000..e9ab792 --- /dev/null +++ b/src/naive_bayes/mod.rs @@ -0,0 +1,69 @@ +use crate::error::Failed; +use crate::linalg::BaseVector; +use crate::linalg::Matrix; +use crate::math::num::RealNumber; +use std::marker::PhantomData; + +/// Distribution used in the Naive Bayes classifier. +pub(crate) trait NBDistribution> { + /// Prior of class at the given index. + fn prior(&self, class_index: usize) -> T; + + /// Conditional probability of sample j given class in the specified index. + fn conditional_probability(&self, class_index: usize, j: &M::RowVector) -> T; + + /// Possible classes of the distribution. + fn classes(&self) -> &Vec; +} + +/// Base struct for the Naive Bayes classifier. +pub(crate) struct BaseNaiveBayes, D: NBDistribution> { + distribution: D, + _phantom_t: PhantomData, + _phantom_m: PhantomData, +} + +impl, D: NBDistribution> BaseNaiveBayes { + /// Fits NB classifier to a given NBdistribution. + /// * `distribution` - NBDistribution of the training data + pub fn fit(distribution: D) -> Result { + Ok(Self { + distribution, + _phantom_t: PhantomData, + _phantom_m: PhantomData, + }) + } + + /// Estimates the class labels for the provided data. + /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// Returns a vector of size N with class estimates. + pub fn predict(&self, x: &M) -> Result { + let y_classes = self.distribution.classes(); + let (rows, _) = x.shape(); + let predictions = (0..rows) + .map(|row_index| { + let row = x.get_row(row_index); + let (prediction, _probability) = y_classes + .iter() + .enumerate() + .map(|(class_index, class)| { + ( + class, + self.distribution.conditional_probability(class_index, &row) + * self.distribution.prior(class_index), + ) + }) + .max_by(|(_, p1), (_, p2)| p1.partial_cmp(p2).unwrap()) + .unwrap(); + *prediction + }) + .collect::>(); + let mut y_hat = M::RowVector::zeros(rows); + for (i, prediction) in predictions.iter().enumerate().take(rows) { + y_hat.set(i, *prediction); + } + Ok(y_hat) + } +} +mod categorical; +pub use categorical::{CategoricalNB, CategoricalNBParameters}; From c756496b710a962f632061b2e9c153488aeaefb7 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Mon, 9 Nov 2020 00:21:02 -0400 Subject: [PATCH 16/21] Fix clippy::len_without_is_empty --- src/lib.rs | 1 - src/linalg/mod.rs | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 8c97bf7..4e87301 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,7 +67,6 @@ #![allow( clippy::needless_range_loop, clippy::ptr_arg, - clippy::len_without_is_empty, clippy::map_entry, clippy::type_complexity, clippy::too_many_arguments, diff --git a/src/linalg/mod.rs b/src/linalg/mod.rs index fc9d6c9..896d718 100644 --- a/src/linalg/mod.rs +++ b/src/linalg/mod.rs @@ -76,6 +76,11 @@ pub trait BaseVector: Clone + Debug { /// Get number of elevemnt in the vector fn len(&self) -> usize; + /// Returns true if the vector is empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } + /// Return a vector with the elements of the one-dimensional array. fn to_vec(&self) -> Vec; From d620f225ee167dff86cf85601cee918b4c2ff5d7 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Tue, 10 Nov 2020 00:20:26 -0400 Subject: [PATCH 17/21] Fix new warnings after rustup update --- src/dataset/generator.rs | 2 +- src/linear/logistic_regression.rs | 6 ++++-- src/model_selection/mod.rs | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/dataset/generator.rs b/src/dataset/generator.rs index 2514134..e0b2939 100644 --- a/src/dataset/generator.rs +++ b/src/dataset/generator.rs @@ -49,7 +49,7 @@ pub fn make_blobs( /// Make a large circle containing a smaller circle in 2d. pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset { - if factor >= 1.0 || factor < 0.0 { + if !(0.0..1.0).contains(&factor) { panic!("'factor' has to be between 0 and 1."); } diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 796caed..022942c 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -328,8 +328,10 @@ impl> LogisticRegression { let df = |g: &mut M, w: &M| objective.df(g, w); - let mut ls: Backtracking = Default::default(); - ls.order = FunctionOrder::THIRD; + let ls: Backtracking = Backtracking { + order: FunctionOrder::THIRD, + ..Default::default() + }; let optimizer: LBFGS = Default::default(); optimizer.optimize(&f, &df, &x0, &ls) diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index d4908f6..ddcd9d4 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -110,7 +110,7 @@ pub struct KFold { impl Default for KFold { fn default() -> KFold { KFold { - n_splits: 3 as usize, + n_splits: 3_usize, shuffle: true, } } From 18df9c758ced915d67134395b00b62daa5f0f596 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Tue, 10 Nov 2020 00:36:54 -0400 Subject: [PATCH 18/21] Fix clippy::map_entry --- src/lib.rs | 1 - src/svm/svc.rs | 11 ++++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4e87301..97c953e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,7 +67,6 @@ #![allow( clippy::needless_range_loop, clippy::ptr_arg, - clippy::map_entry, clippy::type_complexity, clippy::too_many_arguments, clippy::many_single_char_names diff --git a/src/svm/svc.rs b/src/svm/svc.rs index f2d518b..4fd70df 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -300,11 +300,12 @@ impl<'a, T: RealNumber, M: Matrix, K: Kernel> Cache<'a, T, M fn get(&mut self, i: &SupportVector, j: &SupportVector) -> T { let idx_i = i.index; let idx_j = j.index; - if !self.data.contains_key(&(idx_i, idx_j)) { - let v = self.kernel.apply(&i.x, &j.x); - self.data.insert((idx_i, idx_j), v); - } - *self.data.get(&(idx_i, idx_j)).unwrap() + #[allow(clippy::or_fun_call)] + let entry = self + .data + .entry((idx_i, idx_j)) + .or_insert(self.kernel.apply(&i.x, &j.x)); + *entry } fn insert(&mut self, key: (usize, usize), value: T) { From 126b306681382a42f647bc101ce9ef0ed00822e2 Mon Sep 17 00:00:00 2001 From: morenol Date: Tue, 10 Nov 2020 20:50:41 -0400 Subject: [PATCH 19/21] Update .circleci/config.yml Co-authored-by: VolodymyrOrlov --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 069c56d..17da167 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -40,4 +40,4 @@ jobs: command: rustup component add clippy - run: name: Run cargo clippy - command: cargo clippy -- -Drust-2018-idioms -Dwarnings + command: cargo clippy --all-features -- -Drust-2018-idioms -Dwarnings From 85d2ecd1c97c00d53d5e06b69f115d158ee4e40d Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Tue, 10 Nov 2020 21:10:21 -0400 Subject: [PATCH 20/21] Fix clippy errors after --all-features was enabled --- src/linalg/nalgebra_bindings.rs | 26 +++++++++++--------------- src/linalg/ndarray_bindings.rs | 32 ++++++++++++++------------------ src/model_selection/mod.rs | 8 ++++---- 3 files changed, 29 insertions(+), 37 deletions(-) diff --git a/src/linalg/nalgebra_bindings.rs b/src/linalg/nalgebra_bindings.rs index e0b885b..4c8120d 100644 --- a/src/linalg/nalgebra_bindings.rs +++ b/src/linalg/nalgebra_bindings.rs @@ -64,7 +64,7 @@ impl BaseVector for MatrixMN { } fn to_vec(&self) -> Vec { - self.row(0).iter().map(|v| *v).collect() + self.row(0).iter().copied().collect() } fn zeros(len: usize) -> Self { @@ -112,7 +112,7 @@ impl BaseVector for MatrixMN { let mut norm = T::zero(); for xi in self.iter() { - norm = norm + xi.abs().powf(p); + norm += xi.abs().powf(p); } norm.powf(T::one() / p) @@ -174,7 +174,7 @@ impl BaseVector for MatrixMN { } fn unique(&self) -> Vec { - let mut result: Vec = self.iter().map(|v| *v).collect(); + let mut result: Vec = self.iter().copied().collect(); result.sort_by(|a, b| a.partial_cmp(b).unwrap()); result.dedup(); result @@ -199,7 +199,7 @@ impl Vec { - self.row(row).iter().map(|v| *v).collect() + self.row(row).iter().copied().collect() } fn get_row(&self, row: usize) -> Self::RowVector { @@ -207,22 +207,18 @@ impl) { - let mut r = 0; - for e in self.row(row).iter() { + for (r, e) in self.row(row).iter().enumerate() { result[r] = *e; - r += 1; } } fn get_col_as_vec(&self, col: usize) -> Vec { - self.column(col).iter().map(|v| *v).collect() + self.column(col).iter().copied().collect() } fn copy_col_as_vec(&self, col: usize, result: &mut Vec) { - let mut r = 0; - for e in self.column(col).iter() { - result[r] = *e; - r += 1; + for (c, e) in self.column(col).iter().enumerate() { + result[c] = *e; } } @@ -368,7 +364,7 @@ impl Vec { - let mut result: Vec = self.iter().map(|v| *v).collect(); + let mut result: Vec = self.iter().copied().collect(); result.sort_by(|a, b| a.partial_cmp(b).unwrap()); result.dedup(); result diff --git a/src/linalg/ndarray_bindings.rs b/src/linalg/ndarray_bindings.rs index 00c9745..958123a 100644 --- a/src/linalg/ndarray_bindings.rs +++ b/src/linalg/ndarray_bindings.rs @@ -117,7 +117,7 @@ impl BaseVector for ArrayBase, Ix let mut norm = T::zero(); for xi in self.iter() { - norm = norm + xi.abs().powf(p); + norm += xi.abs().powf(p); } norm.powf(T::one() / p) @@ -125,19 +125,19 @@ impl BaseVector for ArrayBase, Ix } fn div_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] / x; + self[pos] /= x; } fn mul_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] * x; + self[pos] *= x; } fn add_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] + x; + self[pos] += x; } fn sub_element_mut(&mut self, pos: usize, x: T) { - self[pos] = self[pos] - x; + self[pos] -= x; } fn approximate_eq(&self, other: &Self, error: T) -> bool { @@ -204,10 +204,8 @@ impl) { - let mut r = 0; - for e in self.row(row).iter() { + for (r, e) in self.row(row).iter().enumerate() { result[r] = *e; - r += 1; } } @@ -216,10 +214,8 @@ impl) { - let mut r = 0; - for e in self.column(col).iter() { - result[r] = *e; - r += 1; + for (c, e) in self.column(col).iter().enumerate() { + result[c] = *e; } } @@ -347,7 +343,7 @@ impl = DenseMatrix::rand(23, 100); let train_test_splits = k.split(&x); - assert_eq!(train_test_splits[0].1.len(), 12 as usize); - assert_eq!(train_test_splits[0].0.len(), 11 as usize); - assert_eq!(train_test_splits[1].0.len(), 12 as usize); - assert_eq!(train_test_splits[1].1.len(), 11 as usize); + assert_eq!(train_test_splits[0].1.len(), 12_usize); + assert_eq!(train_test_splits[0].0.len(), 11_usize); + assert_eq!(train_test_splits[1].0.len(), 12_usize); + assert_eq!(train_test_splits[1].1.len(), 11_usize); } #[test] From f46d3ba94c5a0bf3b938c8adf4923cf53bd80c62 Mon Sep 17 00:00:00 2001 From: Luis Moreno Date: Tue, 10 Nov 2020 21:12:48 -0400 Subject: [PATCH 21/21] Address feedback --- src/lib.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 97c953e..49e106f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,10 @@ +#![allow( + clippy::needless_range_loop, + clippy::ptr_arg, + clippy::type_complexity, + clippy::too_many_arguments, + clippy::many_single_char_names +)] #![warn(missing_docs)] #![warn(missing_doc_code_examples)] @@ -64,13 +71,6 @@ //! let y_hat = knn.predict(&x).unwrap(); //! ``` -#![allow( - clippy::needless_range_loop, - clippy::ptr_arg, - clippy::type_complexity, - clippy::too_many_arguments, - clippy::many_single_char_names -)] /// Various algorithms and helper methods that are used elsewhere in SmartCore pub mod algorithm; /// Algorithms for clustering of unlabeled data