Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
13bb222ca7 | ||
|
|
2d7c055154 | ||
|
|
545ed6ce2b | ||
|
|
8939ed93b9 | ||
|
|
9cd7348403 | ||
|
|
bf65fe3753 | ||
|
|
074cfaf14f | ||
|
|
393cf15534 | ||
|
|
d52830a818 | ||
|
|
80c406b37d | ||
|
|
50e040a7a2 | ||
|
|
8765bd2173 | ||
|
|
0e1bf6ce7f | ||
|
|
d15ea43975 |
@@ -37,6 +37,8 @@ $ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213
|
||||
```
|
||||
* find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file.
|
||||
|
||||
* Please take a look to the output of a profiler to spot most evident performance problems, see [this guide about using a profiler](http://www.codeofview.com/fix-rs/2017/01/24/how-to-optimize-rust-programs-on-linux/).
|
||||
|
||||
## Issue Report Process
|
||||
|
||||
1. Go to the project's issues.
|
||||
|
||||
@@ -41,4 +41,4 @@ jobs:
|
||||
- name: Upload to codecov.io
|
||||
uses: codecov/codecov-action@v2
|
||||
with:
|
||||
fail_ci_if_error: true
|
||||
fail_ci_if_error: false
|
||||
|
||||
+1
-1
@@ -2,7 +2,7 @@
|
||||
name = "smartcore"
|
||||
description = "Machine Learning in Rust."
|
||||
homepage = "https://smartcorelib.org"
|
||||
version = "0.3.1"
|
||||
version = "0.3.2"
|
||||
authors = ["smartcore Developers"]
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -179,6 +179,21 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2<T>> FastPair<'a, T, M> {
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Return order dissimilarities from closest to furthest
|
||||
///
|
||||
#[allow(dead_code)]
|
||||
pub fn ordered_pairs(&self) -> std::vec::IntoIter<&PairwiseDistance<T>> {
|
||||
// improvement: implement this to return `impl Iterator<Item = &PairwiseDistance<T>>`
|
||||
// need to implement trait `Iterator` for `Vec<&PairwiseDistance<T>>`
|
||||
let mut distances = self
|
||||
.distances
|
||||
.values()
|
||||
.collect::<Vec<&PairwiseDistance<T>>>();
|
||||
distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
distances.into_iter()
|
||||
}
|
||||
|
||||
//
|
||||
// Compute distances from input to all other points in data-structure.
|
||||
// input is the row index of the sample matrix
|
||||
@@ -590,4 +605,39 @@ mod tests_fastpair {
|
||||
|
||||
assert_eq!(closest, min_dissimilarity);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastpair_ordered_pairs() {
|
||||
let x = DenseMatrix::<f64>::from_2d_array(&[
|
||||
&[5.1, 3.5, 1.4, 0.2],
|
||||
&[4.9, 3.0, 1.4, 0.2],
|
||||
&[4.7, 3.2, 1.3, 0.2],
|
||||
&[4.6, 3.1, 1.5, 0.2],
|
||||
&[5.0, 3.6, 1.4, 0.2],
|
||||
&[5.4, 3.9, 1.7, 0.4],
|
||||
&[4.9, 3.1, 1.5, 0.1],
|
||||
&[7.0, 3.2, 4.7, 1.4],
|
||||
&[6.4, 3.2, 4.5, 1.5],
|
||||
&[6.9, 3.1, 4.9, 1.5],
|
||||
&[5.5, 2.3, 4.0, 1.3],
|
||||
&[6.5, 2.8, 4.6, 1.5],
|
||||
&[4.6, 3.4, 1.4, 0.3],
|
||||
&[5.0, 3.4, 1.5, 0.2],
|
||||
&[4.4, 2.9, 1.4, 0.2],
|
||||
]);
|
||||
let fastpair = FastPair::new(&x).unwrap();
|
||||
|
||||
let ordered = fastpair.ordered_pairs();
|
||||
|
||||
let mut previous: f64 = -1.0;
|
||||
for p in ordered {
|
||||
if previous == -1.0 {
|
||||
previous = p.distance.unwrap();
|
||||
} else {
|
||||
let current = p.distance.unwrap();
|
||||
assert!(current >= previous);
|
||||
previous = current;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+177
-1
@@ -62,7 +62,7 @@ use serde::{Deserialize, Serialize};
|
||||
use crate::algorithm::neighbour::bbd_tree::BBDTree;
|
||||
use crate::api::{Predictor, UnsupervisedEstimator};
|
||||
use crate::error::Failed;
|
||||
use crate::linalg::basic::arrays::{Array1, Array2};
|
||||
use crate::linalg::basic::arrays::{Array1, Array2, Array};
|
||||
use crate::metrics::distance::euclidian::*;
|
||||
use crate::numbers::basenum::Number;
|
||||
use crate::rand_custom::get_rng_impl;
|
||||
@@ -322,6 +322,109 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> KMeans<TX, TY, X, Y>
|
||||
})
|
||||
}
|
||||
|
||||
/// Fit algorithm to _NxM_ matrix where _N_ is number of samples and _M_ is number of features.
|
||||
/// * `data` - training instances to cluster
|
||||
/// * `parameters` - cluster parameters
|
||||
/// * `centroids` - starting centroids
|
||||
pub fn fit_with_centroids(
|
||||
data: &X,
|
||||
parameters: KMeansParameters,
|
||||
centroids: Vec<Vec<f64>>,
|
||||
) -> Result<KMeans<TX, TY, X, Y>, Failed> {
|
||||
|
||||
// TODO: reuse existing methods in `crate::metrics`
|
||||
fn euclidean_distance(point1: &Vec<f64>, point2: &Vec<f64>) -> f64 {
|
||||
let mut dist = 0.0;
|
||||
for i in 0..point1.len() {
|
||||
dist += (point1[i] - point2[i]).powi(2);
|
||||
}
|
||||
dist.sqrt()
|
||||
}
|
||||
|
||||
fn closest_centroid(point: &Vec<f64>, centroids: &Vec<Vec<f64>>) -> usize {
|
||||
let mut closest_idx = 0;
|
||||
let mut closest_dist = std::f64::MAX;
|
||||
for (i, centroid) in centroids.iter().enumerate() {
|
||||
let dist = euclidean_distance(point, centroid);
|
||||
if dist < closest_dist {
|
||||
closest_dist = dist;
|
||||
closest_idx = i;
|
||||
}
|
||||
}
|
||||
closest_idx
|
||||
}
|
||||
|
||||
let bbd = BBDTree::new(data);
|
||||
|
||||
if centroids.len() != parameters.k {
|
||||
return Err(Failed::fit(&format!(
|
||||
"number of centroids ({}) must be equal to k ({})",
|
||||
centroids.len(),
|
||||
parameters.k
|
||||
)));
|
||||
}
|
||||
|
||||
let mut y = vec![0; data.shape().0];
|
||||
for i in 0..data.shape().0 {
|
||||
y[i] = closest_centroid(
|
||||
&Vec::from_iterator(data.get_row(i).iterator(0).map(|e| e.to_f64().unwrap()),
|
||||
data.shape().1), ¢roids
|
||||
);
|
||||
}
|
||||
|
||||
let mut size = vec![0; parameters.k];
|
||||
let mut new_centroids = vec![vec![0f64; data.shape().1]; parameters.k];
|
||||
|
||||
for i in 0..data.shape().0 {
|
||||
size[y[i]] += 1;
|
||||
}
|
||||
|
||||
for i in 0..data.shape().0 {
|
||||
for j in 0..data.shape().1 {
|
||||
new_centroids[y[i]][j] += data.get((i, j)).to_f64().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..parameters.k {
|
||||
for j in 0..data.shape().1 {
|
||||
new_centroids[i][j] /= size[i] as f64;
|
||||
}
|
||||
}
|
||||
|
||||
let mut sums = vec![vec![0f64; data.shape().1]; parameters.k];
|
||||
let mut distortion = std::f64::MAX;
|
||||
|
||||
for _ in 1..=parameters.max_iter {
|
||||
let dist = bbd.clustering(&new_centroids, &mut sums, &mut size, &mut y);
|
||||
for i in 0..parameters.k {
|
||||
if size[i] > 0 {
|
||||
for j in 0..data.shape().1 {
|
||||
new_centroids[i][j] = sums[i][j] / size[i] as f64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if distortion <= dist {
|
||||
break;
|
||||
} else {
|
||||
distortion = dist;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(KMeans {
|
||||
k: parameters.k,
|
||||
_y: y,
|
||||
size,
|
||||
_distortion: distortion,
|
||||
centroids: new_centroids,
|
||||
_phantom_tx: PhantomData,
|
||||
_phantom_ty: PhantomData,
|
||||
_phantom_x: PhantomData,
|
||||
_phantom_y: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
/// Predict clusters for `x`
|
||||
/// * `x` - matrix with new data to transform of size _KxM_ , where _K_ is number of new samples and _M_ is number of features.
|
||||
pub fn predict(&self, x: &X) -> Result<Y, Failed> {
|
||||
@@ -417,6 +520,7 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>> KMeans<TX, TY, X, Y>
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
use crate::algorithm::neighbour::fastpair;
|
||||
|
||||
#[cfg_attr(
|
||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||
@@ -503,6 +607,78 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(
|
||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||
wasm_bindgen_test::wasm_bindgen_test
|
||||
)]
|
||||
#[test]
|
||||
fn fit_with_centroids_predict() {
|
||||
let x = DenseMatrix::from_2d_array(&[
|
||||
&[5.1, 3.5, 1.4, 0.2],
|
||||
&[4.9, 3.0, 1.4, 0.2],
|
||||
&[4.7, 3.2, 1.3, 0.2],
|
||||
&[4.6, 3.1, 1.5, 0.2],
|
||||
&[5.0, 3.6, 1.4, 0.2],
|
||||
&[5.4, 3.9, 1.7, 0.4],
|
||||
&[4.6, 3.4, 1.4, 0.3],
|
||||
&[5.0, 3.4, 1.5, 0.2],
|
||||
&[4.4, 2.9, 1.4, 0.2],
|
||||
&[4.9, 3.1, 1.5, 0.1],
|
||||
&[7.0, 3.2, 4.7, 1.4],
|
||||
&[6.4, 3.2, 4.5, 1.5],
|
||||
&[6.9, 3.1, 4.9, 1.5],
|
||||
&[5.5, 2.3, 4.0, 1.3],
|
||||
&[6.5, 2.8, 4.6, 1.5],
|
||||
&[5.7, 2.8, 4.5, 1.3],
|
||||
&[6.3, 3.3, 4.7, 1.6],
|
||||
&[4.9, 2.4, 3.3, 1.0],
|
||||
&[6.6, 2.9, 4.6, 1.3],
|
||||
&[5.2, 2.7, 3.9, 1.4],
|
||||
]);
|
||||
|
||||
let parameters = KMeansParameters {
|
||||
k: 3,
|
||||
max_iter: 50,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// compute pairs
|
||||
let fastpair = fastpair::FastPair::new(&x).unwrap();
|
||||
|
||||
// compute centroids for N closest pairs
|
||||
let mut n: isize = 2;
|
||||
let mut centroids = vec![vec![0f64; x.shape().1]; n as usize + 1];
|
||||
for p in fastpair.ordered_pairs() {
|
||||
if n == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
centroids[n as usize] = {
|
||||
let mut result: Vec<f64> = Vec::with_capacity(x.shape().1);
|
||||
for val1 in x.get_row(p.node).iterator(0) {
|
||||
for val2 in x.get_row(p.neighbour.unwrap()).iterator(0) {
|
||||
let sum = val1 + val2;
|
||||
let avg = sum * 0.5f64;
|
||||
result.push(avg);
|
||||
}
|
||||
}
|
||||
result
|
||||
};
|
||||
|
||||
n -= 1;
|
||||
}
|
||||
|
||||
|
||||
let kmeans = KMeans::fit_with_centroids(
|
||||
&x, parameters, centroids).unwrap();
|
||||
|
||||
let y: Vec<usize> = kmeans.predict(&x).unwrap();
|
||||
|
||||
for (i, _y_i) in y.iter().enumerate() {
|
||||
assert_eq!({ y[i] }, kmeans._y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(
|
||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||
wasm_bindgen_test::wasm_bindgen_test
|
||||
|
||||
@@ -1570,7 +1570,7 @@ pub trait Array2<T: Debug + Display + Copy + Sized>: MutArrayView2<T> + Sized +
|
||||
mean
|
||||
}
|
||||
|
||||
/// copy coumn as a vector
|
||||
/// copy column as a vector
|
||||
fn copy_col_as_vec(&self, col: usize, result: &mut Vec<T>) {
|
||||
for (r, result_r) in result.iter_mut().enumerate().take(self.shape().0) {
|
||||
*result_r = *self.get((r, col));
|
||||
|
||||
@@ -431,9 +431,9 @@ impl<T: Number + RealNumber> SVDDecomposable<T> for DenseMatrix<T> {}
|
||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixView<'a, T> {
|
||||
fn get(&self, pos: (usize, usize)) -> &T {
|
||||
if self.column_major {
|
||||
&self.values[(pos.0 + pos.1 * self.stride)]
|
||||
&self.values[pos.0 + pos.1 * self.stride]
|
||||
} else {
|
||||
&self.values[(pos.0 * self.stride + pos.1)]
|
||||
&self.values[pos.0 * self.stride + pos.1]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -495,9 +495,9 @@ impl<'a, T: Debug + Display + Copy + Sized> ArrayView1<T> for DenseMatrixView<'a
|
||||
impl<'a, T: Debug + Display + Copy + Sized> Array<T, (usize, usize)> for DenseMatrixMutView<'a, T> {
|
||||
fn get(&self, pos: (usize, usize)) -> &T {
|
||||
if self.column_major {
|
||||
&self.values[(pos.0 + pos.1 * self.stride)]
|
||||
&self.values[pos.0 + pos.1 * self.stride]
|
||||
} else {
|
||||
&self.values[(pos.0 * self.stride + pos.1)]
|
||||
&self.values[pos.0 * self.stride + pos.1]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -519,9 +519,9 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray<T, (usize, usize)>
|
||||
{
|
||||
fn set(&mut self, pos: (usize, usize), x: T) {
|
||||
if self.column_major {
|
||||
self.values[(pos.0 + pos.1 * self.stride)] = x;
|
||||
self.values[pos.0 + pos.1 * self.stride] = x;
|
||||
} else {
|
||||
self.values[(pos.0 * self.stride + pos.1)] = x;
|
||||
self.values[pos.0 * self.stride + pos.1] = x;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,25 @@ pub struct VecView<'a, T: Debug + Display + Copy + Sized> {
|
||||
ptr: &'a [T],
|
||||
}
|
||||
|
||||
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for &[T] {
|
||||
fn get(&self, i: usize) -> &T {
|
||||
&self[i]
|
||||
}
|
||||
|
||||
fn shape(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() > 0
|
||||
}
|
||||
|
||||
fn iterator<'b>(&'b self, axis: u8) -> Box<dyn Iterator<Item = &'b T> + 'b> {
|
||||
assert!(axis == 0, "For one dimensional array `axis` should == 0");
|
||||
Box::new(self.iter())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Debug + Display + Copy + Sized> Array<T, usize> for Vec<T> {
|
||||
fn get(&self, i: usize) -> &T {
|
||||
&self[i]
|
||||
@@ -46,6 +65,7 @@ impl<T: Debug + Display + Copy + Sized> MutArray<T, usize> for Vec<T> {
|
||||
}
|
||||
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for Vec<T> {}
|
||||
impl<T: Debug + Display + Copy + Sized> ArrayView1<T> for &[T] {}
|
||||
|
||||
impl<T: Debug + Display + Copy + Sized> MutArrayView1<T> for Vec<T> {}
|
||||
|
||||
|
||||
@@ -283,9 +283,7 @@ mod tests {
|
||||
(vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
|
||||
(vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
|
||||
];
|
||||
for ((train, test), (expected_train, expected_test)) in
|
||||
k.split(&x).into_iter().zip(expected)
|
||||
{
|
||||
for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) {
|
||||
assert_eq!(test, expected_test);
|
||||
assert_eq!(train, expected_train);
|
||||
}
|
||||
@@ -307,9 +305,7 @@ mod tests {
|
||||
(vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
|
||||
(vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
|
||||
];
|
||||
for ((train, test), (expected_train, expected_test)) in
|
||||
k.split(&x).into_iter().zip(expected)
|
||||
{
|
||||
for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) {
|
||||
assert_eq!(test.len(), expected_test.len());
|
||||
assert_eq!(train.len(), expected_train.len());
|
||||
}
|
||||
|
||||
+3
-9
@@ -83,7 +83,7 @@ where
|
||||
Matrix: Array2<T>,
|
||||
{
|
||||
let csv_text = read_string_from_source(source)?;
|
||||
let rows: Vec<Vec<T>> = extract_row_vectors_from_csv_text::<T, RowVector, Matrix>(
|
||||
let rows: Vec<Vec<T>> = extract_row_vectors_from_csv_text(
|
||||
&csv_text,
|
||||
&definition,
|
||||
detect_row_format(&csv_text, &definition)?,
|
||||
@@ -103,12 +103,7 @@ where
|
||||
|
||||
/// Given a string containing the contents of a csv file, extract its value
|
||||
/// into row-vectors.
|
||||
fn extract_row_vectors_from_csv_text<
|
||||
'a,
|
||||
T: Number + RealNumber + std::str::FromStr,
|
||||
RowVector: Array1<T>,
|
||||
Matrix: Array2<T>,
|
||||
>(
|
||||
fn extract_row_vectors_from_csv_text<'a, T: Number + RealNumber + std::str::FromStr>(
|
||||
csv_text: &'a str,
|
||||
definition: &'a CSVDefinition<'_>,
|
||||
row_format: CSVRowFormat<'_>,
|
||||
@@ -305,12 +300,11 @@ mod tests {
|
||||
}
|
||||
mod extract_row_vectors_from_csv_text {
|
||||
use super::super::{extract_row_vectors_from_csv_text, CSVDefinition, CSVRowFormat};
|
||||
use crate::linalg::basic::matrix::DenseMatrix;
|
||||
|
||||
#[test]
|
||||
fn read_default_csv() {
|
||||
assert_eq!(
|
||||
extract_row_vectors_from_csv_text::<f64, Vec<_>, DenseMatrix<_>>(
|
||||
extract_row_vectors_from_csv_text::<f64>(
|
||||
"column 1, column 2, column3\n1.0,2.0,3.0\n4.0,5.0,6.0",
|
||||
&CSVDefinition::default(),
|
||||
CSVRowFormat {
|
||||
|
||||
+55
-64
@@ -322,19 +322,26 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX> + 'a, Y: Array
|
||||
let (n, _) = x.shape();
|
||||
let mut y_hat: Vec<TX> = Array1::zeros(n);
|
||||
|
||||
let mut row = Vec::with_capacity(n);
|
||||
for i in 0..n {
|
||||
let row_pred: TX =
|
||||
self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n));
|
||||
row.clear();
|
||||
row.extend(x.get_row(i).iterator(0).copied());
|
||||
let row_pred: TX = self.predict_for_row(&row);
|
||||
y_hat.set(i, row_pred);
|
||||
}
|
||||
|
||||
Ok(y_hat)
|
||||
}
|
||||
|
||||
fn predict_for_row(&self, x: Vec<TX>) -> TX {
|
||||
fn predict_for_row(&self, x: &[TX]) -> TX {
|
||||
let mut f = self.b.unwrap();
|
||||
|
||||
let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
for i in 0..self.instances.as_ref().unwrap().len() {
|
||||
let xj: Vec<_> = self.instances.as_ref().unwrap()[i]
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect();
|
||||
f += self.w.as_ref().unwrap()[i]
|
||||
* TX::from(
|
||||
self.parameters
|
||||
@@ -343,13 +350,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX> + 'a, Y: Array
|
||||
.kernel
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.apply(
|
||||
&x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
&self.instances.as_ref().unwrap()[i]
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect(),
|
||||
)
|
||||
.apply(&xi, &xj)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
@@ -472,14 +473,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
let tol = self.parameters.tol;
|
||||
let good_enough = TX::from_i32(1000).unwrap();
|
||||
|
||||
let mut x = Vec::with_capacity(n);
|
||||
for _ in 0..self.parameters.epoch {
|
||||
for i in self.permutate(n) {
|
||||
self.process(
|
||||
i,
|
||||
Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n),
|
||||
*self.y.get(i),
|
||||
&mut cache,
|
||||
);
|
||||
x.clear();
|
||||
x.extend(self.x.get_row(i).iterator(0).take(n).copied());
|
||||
self.process(i, &x, *self.y.get(i), &mut cache);
|
||||
loop {
|
||||
self.reprocess(tol, &mut cache);
|
||||
self.find_min_max_gradient();
|
||||
@@ -511,24 +510,17 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
let mut cp = 0;
|
||||
let mut cn = 0;
|
||||
|
||||
let mut x = Vec::with_capacity(n);
|
||||
for i in self.permutate(n) {
|
||||
x.clear();
|
||||
x.extend(self.x.get_row(i).iterator(0).take(n).copied());
|
||||
if *self.y.get(i) == TY::one() && cp < few {
|
||||
if self.process(
|
||||
i,
|
||||
Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n),
|
||||
*self.y.get(i),
|
||||
cache,
|
||||
) {
|
||||
if self.process(i, &x, *self.y.get(i), cache) {
|
||||
cp += 1;
|
||||
}
|
||||
} else if *self.y.get(i) == TY::from(-1).unwrap()
|
||||
&& cn < few
|
||||
&& self.process(
|
||||
i,
|
||||
Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n),
|
||||
*self.y.get(i),
|
||||
cache,
|
||||
)
|
||||
&& self.process(i, &x, *self.y.get(i), cache)
|
||||
{
|
||||
cn += 1;
|
||||
}
|
||||
@@ -539,7 +531,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
}
|
||||
}
|
||||
|
||||
fn process(&mut self, i: usize, x: Vec<TX>, y: TY, cache: &mut Cache<TX, TY, X, Y>) -> bool {
|
||||
fn process(&mut self, i: usize, x: &[TX], y: TY, cache: &mut Cache<TX, TY, X, Y>) -> bool {
|
||||
for j in 0..self.sv.len() {
|
||||
if self.sv[j].index == i {
|
||||
return true;
|
||||
@@ -551,15 +543,14 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
let mut cache_values: Vec<((usize, usize), TX)> = Vec::new();
|
||||
|
||||
for v in self.sv.iter() {
|
||||
let xi: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
let xj: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
let k = self
|
||||
.parameters
|
||||
.kernel
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.apply(
|
||||
&v.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
&x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
)
|
||||
.apply(&xi, &xj)
|
||||
.unwrap();
|
||||
cache_values.push(((i, v.index), TX::from(k).unwrap()));
|
||||
g -= v.alpha * k;
|
||||
@@ -578,7 +569,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
cache.insert(v.0, v.1.to_f64().unwrap());
|
||||
}
|
||||
|
||||
let x_f64 = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
let x_f64: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
let k_v = self
|
||||
.parameters
|
||||
.kernel
|
||||
@@ -701,8 +692,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
let km = sv1.k;
|
||||
let gm = sv1.grad;
|
||||
let mut best = 0f64;
|
||||
let xi: Vec<_> = sv1.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
for i in 0..self.sv.len() {
|
||||
let v = &self.sv[i];
|
||||
let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
let z = v.grad - gm;
|
||||
let k = cache.get(
|
||||
sv1,
|
||||
@@ -711,10 +704,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
.kernel
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.apply(
|
||||
&sv1.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
&v.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
)
|
||||
.apply(&xi, &xj)
|
||||
.unwrap(),
|
||||
);
|
||||
let mut curv = km + v.k - 2f64 * k;
|
||||
@@ -732,6 +722,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
}
|
||||
}
|
||||
|
||||
let xi: Vec<_> = self.sv[idx_1]
|
||||
.x
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
idx_2.map(|idx_2| {
|
||||
(
|
||||
idx_1,
|
||||
@@ -742,16 +738,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.apply(
|
||||
&self.sv[idx_1]
|
||||
.x
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect(),
|
||||
&xi,
|
||||
&self.sv[idx_2]
|
||||
.x
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect(),
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.unwrap()
|
||||
}),
|
||||
@@ -765,8 +757,11 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
let km = sv2.k;
|
||||
let gm = sv2.grad;
|
||||
let mut best = 0f64;
|
||||
|
||||
let xi: Vec<_> = sv2.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
for i in 0..self.sv.len() {
|
||||
let v = &self.sv[i];
|
||||
let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
let z = gm - v.grad;
|
||||
let k = cache.get(
|
||||
sv2,
|
||||
@@ -775,10 +770,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
.kernel
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.apply(
|
||||
&sv2.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
&v.x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
)
|
||||
.apply(&xi, &xj)
|
||||
.unwrap(),
|
||||
);
|
||||
let mut curv = km + v.k - 2f64 * k;
|
||||
@@ -797,6 +789,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
}
|
||||
}
|
||||
|
||||
let xj: Vec<_> = self.sv[idx_2]
|
||||
.x
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect();
|
||||
|
||||
idx_1.map(|idx_1| {
|
||||
(
|
||||
idx_1,
|
||||
@@ -811,12 +809,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
.x
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect(),
|
||||
&self.sv[idx_2]
|
||||
.x
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect(),
|
||||
.collect::<Vec<_>>(),
|
||||
&xj,
|
||||
)
|
||||
.unwrap()
|
||||
}),
|
||||
@@ -835,12 +829,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
.x
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect(),
|
||||
.collect::<Vec<_>>(),
|
||||
&self.sv[idx_2]
|
||||
.x
|
||||
.iter()
|
||||
.map(|e| e.to_f64().unwrap())
|
||||
.collect(),
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.unwrap(),
|
||||
)),
|
||||
@@ -895,7 +889,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
self.sv[v1].alpha -= step.to_f64().unwrap();
|
||||
self.sv[v2].alpha += step.to_f64().unwrap();
|
||||
|
||||
let xi_v1: Vec<_> = self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
let xi_v2: Vec<_> = self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
for i in 0..self.sv.len() {
|
||||
let xj: Vec<_> = self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
let k2 = cache.get(
|
||||
&self.sv[v2],
|
||||
&self.sv[i],
|
||||
@@ -903,10 +900,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
.kernel
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.apply(
|
||||
&self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
&self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
)
|
||||
.apply(&xi_v2, &xj)
|
||||
.unwrap(),
|
||||
);
|
||||
let k1 = cache.get(
|
||||
@@ -916,10 +910,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2<TX>, Y: Array1<TY>
|
||||
.kernel
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.apply(
|
||||
&self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
&self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
)
|
||||
.apply(&xi_v1, &xj)
|
||||
.unwrap(),
|
||||
);
|
||||
self.sv[i].grad -= step.to_f64().unwrap() * (k2 - k1);
|
||||
|
||||
+7
-9
@@ -248,19 +248,20 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> SVR<'
|
||||
|
||||
let mut y_hat: Vec<T> = Vec::<T>::zeros(n);
|
||||
|
||||
let mut x_i = Vec::with_capacity(n);
|
||||
for i in 0..n {
|
||||
y_hat.set(
|
||||
i,
|
||||
self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)),
|
||||
);
|
||||
x_i.clear();
|
||||
x_i.extend(x.get_row(i).iterator(0).copied());
|
||||
y_hat.set(i, self.predict_for_row(&x_i));
|
||||
}
|
||||
|
||||
Ok(y_hat)
|
||||
}
|
||||
|
||||
pub(crate) fn predict_for_row(&self, x: Vec<T>) -> T {
|
||||
pub(crate) fn predict_for_row(&self, x: &[T]) -> T {
|
||||
let mut f = self.b;
|
||||
|
||||
let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect();
|
||||
for i in 0..self.instances.as_ref().unwrap().len() {
|
||||
f += self.w.as_ref().unwrap()[i]
|
||||
* T::from(
|
||||
@@ -270,10 +271,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2<T>, Y: Array1<T>> SVR<'
|
||||
.kernel
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.apply(
|
||||
&x.iter().map(|e| e.to_f64().unwrap()).collect(),
|
||||
&self.instances.as_ref().unwrap()[i],
|
||||
)
|
||||
.apply(&xi, &self.instances.as_ref().unwrap()[i])
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap()
|
||||
|
||||
Reference in New Issue
Block a user