Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f53cb36b9d | ||
|
|
c57a4370ba | ||
|
|
78f18505b1 | ||
|
|
58a8624fa9 |
+1
-1
@@ -2,7 +2,7 @@
|
|||||||
name = "smartcore"
|
name = "smartcore"
|
||||||
description = "Machine Learning in Rust."
|
description = "Machine Learning in Rust."
|
||||||
homepage = "https://smartcorelib.org"
|
homepage = "https://smartcorelib.org"
|
||||||
version = "0.4.8"
|
version = "0.4.9"
|
||||||
authors = ["smartcore Developers"]
|
authors = ["smartcore Developers"]
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
|
|||||||
+1
-1
@@ -166,7 +166,7 @@ pub struct LassoSearchParameters {
|
|||||||
/// The maximum number of iterations
|
/// The maximum number of iterations
|
||||||
pub max_iter: Vec<usize>,
|
pub max_iter: Vec<usize>,
|
||||||
#[cfg_attr(feature = "serde", serde(default))]
|
#[cfg_attr(feature = "serde", serde(default))]
|
||||||
/// The maximum number of iterations
|
/// If false, force the intercept parameter (beta_0) to be zero.
|
||||||
pub fit_intercept: Vec<bool>,
|
pub fit_intercept: Vec<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
|||||||
let lambda = lambda.max(T::epsilon());
|
let lambda = lambda.max(T::epsilon());
|
||||||
|
|
||||||
//parameters
|
//parameters
|
||||||
|
let max_ls_iter = 100;
|
||||||
let pcgmaxi = 5000;
|
let pcgmaxi = 5000;
|
||||||
let min_pcgtol = T::from_f64(0.1).unwrap();
|
let min_pcgtol = T::from_f64(0.1).unwrap();
|
||||||
let eta = T::from_f64(1E-3).unwrap();
|
let eta = T::from_f64(1E-3).unwrap();
|
||||||
@@ -68,7 +69,6 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
|||||||
y.to_owned()
|
y.to_owned()
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut max_ls_iter = 100;
|
|
||||||
let mut pitr = 0;
|
let mut pitr = 0;
|
||||||
let mut w = Vec::zeros(p);
|
let mut w = Vec::zeros(p);
|
||||||
let mut neww = w.clone();
|
let mut neww = w.clone();
|
||||||
@@ -170,7 +170,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
|||||||
s = T::one();
|
s = T::one();
|
||||||
let gdx = grad.dot(&dxu);
|
let gdx = grad.dot(&dxu);
|
||||||
|
|
||||||
let lsiter = 0;
|
let mut lsiter = 0;
|
||||||
while lsiter < max_ls_iter {
|
while lsiter < max_ls_iter {
|
||||||
for i in 0..p {
|
for i in 0..p {
|
||||||
neww[i] = w[i] + s * dx[i];
|
neww[i] = w[i] + s * dx[i];
|
||||||
@@ -195,7 +195,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
s = beta * s;
|
s = beta * s;
|
||||||
max_ls_iter += 1;
|
lsiter += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if lsiter == max_ls_iter {
|
if lsiter == max_ls_iter {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
//! # K Nearest Neighbors Regressor
|
//! # K Nearest Neighbors Regressor with Feature Sparsing
|
||||||
//!
|
//!
|
||||||
//! Regressor that predicts estimated values as a function of k nearest neightbours.
|
//! Regressor that predicts estimated values as a function of k nearest neightbours.
|
||||||
|
//! Now supports feature sparsing - the ability to consider only a subset of features during prediction.
|
||||||
//!
|
//!
|
||||||
//! `KNNRegressor` relies on 2 backend algorithms to speedup KNN queries:
|
//! `KNNRegressor` relies on 2 backend algorithms to speedup KNN queries:
|
||||||
//! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html)
|
//! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html)
|
||||||
@@ -29,6 +30,10 @@
|
|||||||
//!
|
//!
|
||||||
//! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
//! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
||||||
//! let y_hat = knn.predict(&x).unwrap();
|
//! let y_hat = knn.predict(&x).unwrap();
|
||||||
|
//!
|
||||||
|
//! // Predict using only features at indices 0
|
||||||
|
//! let feature_indices = vec![0];
|
||||||
|
//! let y_hat_sparse = knn.predict_sparse(&x, &feature_indices).unwrap();
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! variable `y_hat` will hold predicted value
|
//! variable `y_hat` will hold predicted value
|
||||||
@@ -77,12 +82,13 @@ pub struct KNNRegressorParameters<T: Number, D: Distance<Vec<T>>> {
|
|||||||
pub struct KNNRegressor<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
pub struct KNNRegressor<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
||||||
{
|
{
|
||||||
y: Option<Y>,
|
y: Option<Y>,
|
||||||
|
x: Option<X>, // Store training data for sparse feature prediction
|
||||||
knn_algorithm: Option<KNNAlgorithm<TX, D>>,
|
knn_algorithm: Option<KNNAlgorithm<TX, D>>,
|
||||||
|
distance: Option<D>, // Store distance function for sparse prediction
|
||||||
weight: Option<KNNWeightFunction>,
|
weight: Option<KNNWeightFunction>,
|
||||||
k: Option<usize>,
|
k: Option<usize>,
|
||||||
_phantom_tx: PhantomData<TX>,
|
_phantom_tx: PhantomData<TX>,
|
||||||
_phantom_ty: PhantomData<TY>,
|
_phantom_ty: PhantomData<TY>,
|
||||||
_phantom_x: PhantomData<X>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
||||||
@@ -92,12 +98,20 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
|||||||
self.y.as_ref().unwrap()
|
self.y.as_ref().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn x(&self) -> &X {
|
||||||
|
self.x.as_ref().unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> {
|
fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> {
|
||||||
self.knn_algorithm
|
self.knn_algorithm
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.expect("Missing parameter: KNNAlgorithm")
|
.expect("Missing parameter: KNNAlgorithm")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn distance(&self) -> &D {
|
||||||
|
self.distance.as_ref().expect("Missing parameter: distance")
|
||||||
|
}
|
||||||
|
|
||||||
fn weight(&self) -> &KNNWeightFunction {
|
fn weight(&self) -> &KNNWeightFunction {
|
||||||
self.weight.as_ref().expect("Missing parameter: weight")
|
self.weight.as_ref().expect("Missing parameter: weight")
|
||||||
}
|
}
|
||||||
@@ -176,12 +190,13 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
|||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
y: Option::None,
|
y: Option::None,
|
||||||
|
x: Option::None,
|
||||||
knn_algorithm: Option::None,
|
knn_algorithm: Option::None,
|
||||||
|
distance: Option::None,
|
||||||
weight: Option::None,
|
weight: Option::None,
|
||||||
k: Option::None,
|
k: Option::None,
|
||||||
_phantom_tx: PhantomData,
|
_phantom_tx: PhantomData,
|
||||||
_phantom_ty: PhantomData,
|
_phantom_ty: PhantomData,
|
||||||
_phantom_x: PhantomData,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -231,16 +246,17 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
|||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
let knn_algo = parameters.algorithm.fit(data, parameters.distance)?;
|
let knn_algo = parameters.algorithm.fit(data, parameters.distance.clone())?;
|
||||||
|
|
||||||
Ok(KNNRegressor {
|
Ok(KNNRegressor {
|
||||||
y: Some(y.clone()),
|
y: Some(y.clone()),
|
||||||
|
x: Some(x.clone()),
|
||||||
k: Some(parameters.k),
|
k: Some(parameters.k),
|
||||||
knn_algorithm: Some(knn_algo),
|
knn_algorithm: Some(knn_algo),
|
||||||
|
distance: Some(parameters.distance),
|
||||||
weight: Some(parameters.weight),
|
weight: Some(parameters.weight),
|
||||||
_phantom_tx: PhantomData,
|
_phantom_tx: PhantomData,
|
||||||
_phantom_ty: PhantomData,
|
_phantom_ty: PhantomData,
|
||||||
_phantom_x: PhantomData,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -262,6 +278,45 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
|||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Predict the target for the provided data using only specified features.
|
||||||
|
/// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
|
||||||
|
/// * `feature_indices` - indices of features to consider (e.g., [0, 2, 4] to use only features at positions 0, 2, and 4)
|
||||||
|
///
|
||||||
|
/// Returns a vector of size N with estimates.
|
||||||
|
pub fn predict_sparse(&self, x: &X, feature_indices: &[usize]) -> Result<Y, Failed> {
|
||||||
|
let (n_samples, n_features) = x.shape();
|
||||||
|
|
||||||
|
// Validate feature indices
|
||||||
|
for &idx in feature_indices {
|
||||||
|
if idx >= n_features {
|
||||||
|
return Err(Failed::predict(&format!(
|
||||||
|
"Feature index {} out of bounds (max: {})",
|
||||||
|
idx,
|
||||||
|
n_features - 1
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if feature_indices.is_empty() {
|
||||||
|
return Err(Failed::predict(
|
||||||
|
"feature_indices cannot be empty"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut result = Y::zeros(n_samples);
|
||||||
|
|
||||||
|
let mut row_vec = vec![TX::zero(); feature_indices.len()];
|
||||||
|
for (i, row) in x.row_iter().enumerate() {
|
||||||
|
// Extract only the specified features
|
||||||
|
for (j, &feat_idx) in feature_indices.iter().enumerate() {
|
||||||
|
row_vec[j] = *row.get(feat_idx);
|
||||||
|
}
|
||||||
|
result.set(i, self.predict_for_row_sparse(&row_vec, feature_indices)?);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
fn predict_for_row(&self, row: &Vec<TX>) -> Result<TY, Failed> {
|
fn predict_for_row(&self, row: &Vec<TX>) -> Result<TY, Failed> {
|
||||||
let search_result = self.knn_algorithm().find(row, self.k.unwrap())?;
|
let search_result = self.knn_algorithm().find(row, self.k.unwrap())?;
|
||||||
let mut result = TY::zero();
|
let mut result = TY::zero();
|
||||||
@@ -277,6 +332,50 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
|
|||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn predict_for_row_sparse(
|
||||||
|
&self,
|
||||||
|
row: &Vec<TX>,
|
||||||
|
feature_indices: &[usize],
|
||||||
|
) -> Result<TY, Failed> {
|
||||||
|
let training_data = self.x();
|
||||||
|
let (n_training_samples, _) = training_data.shape();
|
||||||
|
let k = self.k.unwrap();
|
||||||
|
|
||||||
|
// Manually compute distances using only specified features
|
||||||
|
let mut distances: Vec<(usize, f64)> = Vec::with_capacity(n_training_samples);
|
||||||
|
|
||||||
|
for i in 0..n_training_samples {
|
||||||
|
let train_row = training_data.get_row(i);
|
||||||
|
|
||||||
|
// Extract sparse features from training data
|
||||||
|
let mut train_sparse = Vec::with_capacity(feature_indices.len());
|
||||||
|
for &feat_idx in feature_indices {
|
||||||
|
train_sparse.push(*train_row.get(feat_idx));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute distance using only selected features
|
||||||
|
let dist = self.distance().distance(row, &train_sparse);
|
||||||
|
distances.push((i, dist));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by distance and take k nearest
|
||||||
|
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
let k_nearest: Vec<(usize, f64)> = distances.into_iter().take(k).collect();
|
||||||
|
|
||||||
|
// Compute weighted prediction
|
||||||
|
let mut result = TY::zero();
|
||||||
|
let weights = self
|
||||||
|
.weight()
|
||||||
|
.calc_weights(k_nearest.iter().map(|v| v.1).collect());
|
||||||
|
let w_sum: f64 = weights.iter().copied().sum();
|
||||||
|
|
||||||
|
for (neighbor, w) in k_nearest.iter().zip(weights.iter()) {
|
||||||
|
result += *self.y().get(neighbor.0) * TY::from_f64(*w / w_sum).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -332,6 +431,91 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(
|
||||||
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
|
)]
|
||||||
|
#[test]
|
||||||
|
fn knn_predict_sparse() {
|
||||||
|
// Training data with 3 features
|
||||||
|
let x = DenseMatrix::from_2d_array(&[
|
||||||
|
&[1., 2., 10.],
|
||||||
|
&[3., 4., 20.],
|
||||||
|
&[5., 6., 30.],
|
||||||
|
&[7., 8., 40.],
|
||||||
|
&[9., 10., 50.],
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
|
||||||
|
|
||||||
|
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|
||||||
|
// Test data
|
||||||
|
let x_test = DenseMatrix::from_2d_array(&[
|
||||||
|
&[1., 2., 999.], // Third feature is very different
|
||||||
|
&[5., 6., 999.],
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Predict using only first two features (ignore the third)
|
||||||
|
let feature_indices = vec![0, 1];
|
||||||
|
let y_hat_sparse = knn.predict_sparse(&x_test, &feature_indices).unwrap();
|
||||||
|
|
||||||
|
// Should get good predictions since we're ignoring the mismatched third feature
|
||||||
|
assert_eq!(2, Vec::len(&y_hat_sparse));
|
||||||
|
assert!((y_hat_sparse[0] - 2.0).abs() < 1.0); // Should be close to 1-2
|
||||||
|
assert!((y_hat_sparse[1] - 3.0).abs() < 1.0); // Should be close to 3
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(
|
||||||
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
|
)]
|
||||||
|
#[test]
|
||||||
|
fn knn_predict_sparse_single_feature() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[
|
||||||
|
&[1., 100., 1000.],
|
||||||
|
&[2., 200., 2000.],
|
||||||
|
&[3., 300., 3000.],
|
||||||
|
&[4., 400., 4000.],
|
||||||
|
&[5., 500., 5000.],
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
|
||||||
|
|
||||||
|
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
|
||||||
|
let x_test = DenseMatrix::from_2d_array(&[&[1.5, 999., 9999.]]).unwrap();
|
||||||
|
|
||||||
|
// Use only first feature
|
||||||
|
let y_hat = knn.predict_sparse(&x_test, &[0]).unwrap();
|
||||||
|
|
||||||
|
// Should predict based on first feature only
|
||||||
|
assert_eq!(1, Vec::len(&y_hat));
|
||||||
|
assert!((y_hat[0] - 1.5).abs() < 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(
|
||||||
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
|
)]
|
||||||
|
#[test]
|
||||||
|
fn knn_predict_sparse_invalid_indices() {
|
||||||
|
let x = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.]]).unwrap();
|
||||||
|
let y: Vec<f64> = vec![1., 2.];
|
||||||
|
|
||||||
|
let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
|
||||||
|
let x_test = DenseMatrix::from_2d_array(&[&[1., 2.]]).unwrap();
|
||||||
|
|
||||||
|
// Index out of bounds
|
||||||
|
let result = knn.predict_sparse(&x_test, &[5]);
|
||||||
|
assert!(result.is_err());
|
||||||
|
|
||||||
|
// Empty indices
|
||||||
|
let result = knn.predict_sparse(&x_test, &[]);
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
all(target_arch = "wasm32", not(target_os = "wasi")),
|
all(target_arch = "wasm32", not(target_os = "wasi")),
|
||||||
wasm_bindgen_test::wasm_bindgen_test
|
wasm_bindgen_test::wasm_bindgen_test
|
||||||
|
|||||||
Reference in New Issue
Block a user