1 Commits

Author SHA1 Message Date
dependabot[bot]
ff9679c970 Update rand requirement from 0.8.5 to 0.9.2
Updates the requirements on [rand](https://github.com/rust-random/rand) to permit the latest version.
- [Release notes](https://github.com/rust-random/rand/releases)
- [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-random/rand/compare/0.8.5...rand_core-0.9.2)

---
updated-dependencies:
- dependency-name: rand
  dependency-version: 0.9.2
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-19 17:02:08 +00:00
2 changed files with 7 additions and 191 deletions
+1 -1
View File
@@ -25,7 +25,7 @@ cfg-if = "1.0.0"
ndarray = { version = "0.15", optional = true }
num-traits = "0.2.12"
num = "0.4"
rand = { version = "0.8.5", default-features = false, features = ["small_rng"] }
rand = { version = "0.9.2", default-features = false, features = ["small_rng"] }
rand_distr = { version = "0.4", optional = true }
serde = { version = "1", features = ["derive"], optional = true }
ordered-float = "5.1.0"
+6 -190
View File
@@ -1,7 +1,6 @@
//! # K Nearest Neighbors Regressor with Feature Sparsing
//! # K Nearest Neighbors Regressor
//!
//! Regressor that predicts estimated values as a function of k nearest neightbours.
//! Now supports feature sparsing - the ability to consider only a subset of features during prediction.
//!
//! `KNNRegressor` relies on 2 backend algorithms to speedup KNN queries:
//! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html)
@@ -30,10 +29,6 @@
//!
//! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
//! let y_hat = knn.predict(&x).unwrap();
//!
//! // Predict using only features at indices 0
//! let feature_indices = vec![0];
//! let y_hat_sparse = knn.predict_sparse(&x, &feature_indices).unwrap();
//! ```
//!
//! variable `y_hat` will hold predicted value
@@ -82,13 +77,12 @@ pub struct KNNRegressorParameters<T: Number, D: Distance<Vec<T>>> {
pub struct KNNRegressor<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
{
y: Option<Y>,
x: Option<X>, // Store training data for sparse feature prediction
knn_algorithm: Option<KNNAlgorithm<TX, D>>,
distance: Option<D>, // Store distance function for sparse prediction
weight: Option<KNNWeightFunction>,
k: Option<usize>,
_phantom_tx: PhantomData<TX>,
_phantom_ty: PhantomData<TY>,
_phantom_x: PhantomData<X>,
}
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
@@ -98,20 +92,12 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
self.y.as_ref().unwrap()
}
fn x(&self) -> &X {
self.x.as_ref().unwrap()
}
fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> {
self.knn_algorithm
.as_ref()
.expect("Missing parameter: KNNAlgorithm")
}
fn distance(&self) -> &D {
self.distance.as_ref().expect("Missing parameter: distance")
}
fn weight(&self) -> &KNNWeightFunction {
self.weight.as_ref().expect("Missing parameter: weight")
}
@@ -190,13 +176,12 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
fn new() -> Self {
Self {
y: Option::None,
x: Option::None,
knn_algorithm: Option::None,
distance: Option::None,
weight: Option::None,
k: Option::None,
_phantom_tx: PhantomData,
_phantom_ty: PhantomData,
_phantom_x: PhantomData,
}
}
@@ -246,17 +231,16 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
)));
}
let knn_algo = parameters.algorithm.fit(data, parameters.distance.clone())?;
let knn_algo = parameters.algorithm.fit(data, parameters.distance)?;
Ok(KNNRegressor {
y: Some(y.clone()),
x: Some(x.clone()),
k: Some(parameters.k),
knn_algorithm: Some(knn_algo),
distance: Some(parameters.distance),
weight: Some(parameters.weight),
_phantom_tx: PhantomData,
_phantom_ty: PhantomData,
_phantom_x: PhantomData,
})
}
@@ -278,45 +262,6 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
Ok(result)
}
/// Predict the target for the provided data using only specified features.
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
/// * `feature_indices` - indices of features to consider (e.g., [0, 2, 4] to use only features at positions 0, 2, and 4)
///
/// Returns a vector of size N with estimates.
pub fn predict_sparse(&self, x: &X, feature_indices: &[usize]) -> Result<Y, Failed> {
let (n_samples, n_features) = x.shape();
// Validate feature indices
for &idx in feature_indices {
if idx >= n_features {
return Err(Failed::predict(&format!(
"Feature index {} out of bounds (max: {})",
idx,
n_features - 1
)));
}
}
if feature_indices.is_empty() {
return Err(Failed::predict(
"feature_indices cannot be empty"
));
}
let mut result = Y::zeros(n_samples);
let mut row_vec = vec![TX::zero(); feature_indices.len()];
for (i, row) in x.row_iter().enumerate() {
// Extract only the specified features
for (j, &feat_idx) in feature_indices.iter().enumerate() {
row_vec[j] = *row.get(feat_idx);
}
result.set(i, self.predict_for_row_sparse(&row_vec, feature_indices)?);
}
Ok(result)
}
fn predict_for_row(&self, row: &Vec<TX>) -> Result<TY, Failed> {
let search_result = self.knn_algorithm().find(row, self.k.unwrap())?;
let mut result = TY::zero();
@@ -332,50 +277,6 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
Ok(result)
}
fn predict_for_row_sparse(
&self,
row: &Vec<TX>,
feature_indices: &[usize],
) -> Result<TY, Failed> {
let training_data = self.x();
let (n_training_samples, _) = training_data.shape();
let k = self.k.unwrap();
// Manually compute distances using only specified features
let mut distances: Vec<(usize, f64)> = Vec::with_capacity(n_training_samples);
for i in 0..n_training_samples {
let train_row = training_data.get_row(i);
// Extract sparse features from training data
let mut train_sparse = Vec::with_capacity(feature_indices.len());
for &feat_idx in feature_indices {
train_sparse.push(*train_row.get(feat_idx));
}
// Compute distance using only selected features
let dist = self.distance().distance(row, &train_sparse);
distances.push((i, dist));
}
// Sort by distance and take k nearest
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let k_nearest: Vec<(usize, f64)> = distances.into_iter().take(k).collect();
// Compute weighted prediction
let mut result = TY::zero();
let weights = self
.weight()
.calc_weights(k_nearest.iter().map(|v| v.1).collect());
let w_sum: f64 = weights.iter().copied().sum();
for (neighbor, w) in k_nearest.iter().zip(weights.iter()) {
result += *self.y().get(neighbor.0) * TY::from_f64(*w / w_sum).unwrap();
}
Ok(result)
}
}
#[cfg(test)]
@@ -431,91 +332,6 @@ mod tests {
}
}
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test
)]
#[test]
fn knn_predict_sparse() {
// Training data with 3 features
let x = DenseMatrix::from_2d_array(&[
&[1., 2., 10.],
&[3., 4., 20.],
&[5., 6., 30.],
&[7., 8., 40.],
&[9., 10., 50.],
])
.unwrap();
let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
// Test data
let x_test = DenseMatrix::from_2d_array(&[
&[1., 2., 999.], // Third feature is very different
&[5., 6., 999.],
])
.unwrap();
// Predict using only first two features (ignore the third)
let feature_indices = vec![0, 1];
let y_hat_sparse = knn.predict_sparse(&x_test, &feature_indices).unwrap();
// Should get good predictions since we're ignoring the mismatched third feature
assert_eq!(2, Vec::len(&y_hat_sparse));
assert!((y_hat_sparse[0] - 2.0).abs() < 1.0); // Should be close to 1-2
assert!((y_hat_sparse[1] - 3.0).abs() < 1.0); // Should be close to 3
}
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test
)]
#[test]
fn knn_predict_sparse_single_feature() {
let x = DenseMatrix::from_2d_array(&[
&[1., 100., 1000.],
&[2., 200., 2000.],
&[3., 300., 3000.],
&[4., 400., 4000.],
&[5., 500., 5000.],
])
.unwrap();
let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
let x_test = DenseMatrix::from_2d_array(&[&[1.5, 999., 9999.]]).unwrap();
// Use only first feature
let y_hat = knn.predict_sparse(&x_test, &[0]).unwrap();
// Should predict based on first feature only
assert_eq!(1, Vec::len(&y_hat));
assert!((y_hat[0] - 1.5).abs() < 1.0);
}
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test
)]
#[test]
fn knn_predict_sparse_invalid_indices() {
let x = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.]]).unwrap();
let y: Vec<f64> = vec![1., 2.];
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
let x_test = DenseMatrix::from_2d_array(&[&[1., 2.]]).unwrap();
// Index out of bounds
let result = knn.predict_sparse(&x_test, &[5]);
assert!(result.is_err());
// Empty indices
let result = knn.predict_sparse(&x_test, &[]);
assert!(result.is_err());
}
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test
@@ -534,4 +350,4 @@ mod tests {
assert_eq!(knn, deserialized_knn);
}
}
}