allow for sparse predictions

bump version tp 0.4.9
fix LASSO (#346 )
2026-02-09 13:25:50 +01:00 · 2026-01-09 06:14:44 +00:00 · 2025-12-05 17:49:07 +09:00 · 2025-11-29 02:54:35 +00:00
5 changed files with 196 additions and 12 deletions
@@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

-## [Unreleased]
+## [0.4.8] - 2025-11-29
 - WARNING: Breaking changes!
 - `LassoParameters` and `LassoSearchParameters` have a new field `fit_intercept`. When it is set to false, the `beta_0` term in the formula will be forced to zero, and `intercept` field in `Lasso` will be set to `None`.

@@ -2,7 +2,7 @@
 name = "smartcore"
 description = "Machine Learning in Rust."
 homepage = "https://smartcorelib.org"
-version = "0.4.7"
+version = "0.4.9"
 authors = ["smartcore Developers"]
 edition = "2021"
 license = "Apache-2.0"
@@ -166,7 +166,7 @@ pub struct LassoSearchParameters {
    /// The maximum number of iterations
    pub max_iter: Vec<usize>,
    #[cfg_attr(feature = "serde", serde(default))]
-    /// The maximum number of iterations
+    /// If false, force the intercept parameter (beta_0) to be zero.
    pub fit_intercept: Vec<bool>,
 }

@@ -53,6 +53,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
        let lambda = lambda.max(T::epsilon());

        //parameters
+        let max_ls_iter = 100;
        let pcgmaxi = 5000;
        let min_pcgtol = T::from_f64(0.1).unwrap();
        let eta = T::from_f64(1E-3).unwrap();
@@ -68,7 +69,6 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
            y.to_owned()
        };

-        let mut max_ls_iter = 100;
        let mut pitr = 0;
        let mut w = Vec::zeros(p);
        let mut neww = w.clone();
@@ -170,7 +170,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
            s = T::one();
            let gdx = grad.dot(&dxu);

-            let lsiter = 0;
+            let mut lsiter = 0;
            while lsiter < max_ls_iter {
                for i in 0..p {
                    neww[i] = w[i] + s * dx[i];
@@ -195,7 +195,7 @@ impl<T: FloatNumber, X: Array2<T>> InteriorPointOptimizer<T, X> {
                    }
                }
                s = beta * s;
-                max_ls_iter += 1;
+                lsiter += 1;
            }

            if lsiter == max_ls_iter {
@@ -1,6 +1,7 @@
-//! # K Nearest Neighbors Regressor
+//! # K Nearest Neighbors Regressor with Feature Sparsing
 //!
 //! Regressor that predicts estimated values as a function of k nearest neightbours.
+//! Now supports feature sparsing - the ability to consider only a subset of features during prediction.
 //!
 //! `KNNRegressor` relies on 2 backend algorithms to speedup KNN queries:
 //! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html)
@@ -29,6 +30,10 @@
 //!
 //! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
 //! let y_hat = knn.predict(&x).unwrap();
+//! 
+//! // Predict using only features at indices 0
+//! let feature_indices = vec![0];
+//! let y_hat_sparse = knn.predict_sparse(&x, &feature_indices).unwrap();
 //! ```
 //!
 //! variable `y_hat` will hold predicted value
@@ -77,12 +82,13 @@ pub struct KNNRegressorParameters<T: Number, D: Distance<Vec<T>>> {
 pub struct KNNRegressor<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
 {
    y: Option<Y>,
+    x: Option<X>, // Store training data for sparse feature prediction
    knn_algorithm: Option<KNNAlgorithm<TX, D>>,
+    distance: Option<D>, // Store distance function for sparse prediction
    weight: Option<KNNWeightFunction>,
    k: Option<usize>,
    _phantom_tx: PhantomData<TX>,
    _phantom_ty: PhantomData<TY>,
-    _phantom_x: PhantomData<X>,
 }

 impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
@@ -92,12 +98,20 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
        self.y.as_ref().unwrap()
    }

+    fn x(&self) -> &X {
+        self.x.as_ref().unwrap()
+    }
+
    fn knn_algorithm(&self) -> &KNNAlgorithm<TX, D> {
        self.knn_algorithm
            .as_ref()
            .expect("Missing parameter: KNNAlgorithm")
    }

+    fn distance(&self) -> &D {
+        self.distance.as_ref().expect("Missing parameter: distance")
+    }
+
    fn weight(&self) -> &KNNWeightFunction {
        self.weight.as_ref().expect("Missing parameter: weight")
    }
@@ -176,12 +190,13 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
    fn new() -> Self {
        Self {
            y: Option::None,
+            x: Option::None,
            knn_algorithm: Option::None,
+            distance: Option::None,
            weight: Option::None,
            k: Option::None,
            _phantom_tx: PhantomData,
            _phantom_ty: PhantomData,
-            _phantom_x: PhantomData,
        }
    }

@@ -231,16 +246,17 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
            )));
        }

-        let knn_algo = parameters.algorithm.fit(data, parameters.distance)?;
+        let knn_algo = parameters.algorithm.fit(data, parameters.distance.clone())?;

        Ok(KNNRegressor {
            y: Some(y.clone()),
+            x: Some(x.clone()),
            k: Some(parameters.k),
            knn_algorithm: Some(knn_algo),
+            distance: Some(parameters.distance),
            weight: Some(parameters.weight),
            _phantom_tx: PhantomData,
            _phantom_ty: PhantomData,
-            _phantom_x: PhantomData,
        })
    }

@@ -262,6 +278,45 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>
        Ok(result)
    }

+    /// Predict the target for the provided data using only specified features.
+    /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
+    /// * `feature_indices` - indices of features to consider (e.g., [0, 2, 4] to use only features at positions 0, 2, and 4)
+    ///
+    /// Returns a vector of size N with estimates.
+    pub fn predict_sparse(&self, x: &X, feature_indices: &[usize]) -> Result<Y, Failed> {
+        let (n_samples, n_features) = x.shape();
+        
+        // Validate feature indices
+        for &idx in feature_indices {
+            if idx >= n_features {
+                return Err(Failed::predict(&format!(
+                    "Feature index {} out of bounds (max: {})",
+                    idx,
+                    n_features - 1
+                )));
+            }
+        }
+
+        if feature_indices.is_empty() {
+            return Err(Failed::predict(
+                "feature_indices cannot be empty"
+            ));
+        }
+
+        let mut result = Y::zeros(n_samples);
+
+        let mut row_vec = vec![TX::zero(); feature_indices.len()];
+        for (i, row) in x.row_iter().enumerate() {
+            // Extract only the specified features
+            for (j, &feat_idx) in feature_indices.iter().enumerate() {
+                row_vec[j] = *row.get(feat_idx);
+            }
+            result.set(i, self.predict_for_row_sparse(&row_vec, feature_indices)?);
+        }
+
+        Ok(result)
+    }
+
    fn predict_for_row(&self, row: &Vec<TX>) -> Result<TY, Failed> {
        let search_result = self.knn_algorithm().find(row, self.k.unwrap())?;
        let mut result = TY::zero();
@@ -277,6 +332,50 @@ impl<TX: Number, TY: Number, X: Array2<TX>, Y: Array1<TY>, D: Distance<Vec<TX>>>

        Ok(result)
    }
+
+    fn predict_for_row_sparse(
+        &self,
+        row: &Vec<TX>,
+        feature_indices: &[usize],
+    ) -> Result<TY, Failed> {
+        let training_data = self.x();
+        let (n_training_samples, _) = training_data.shape();
+        let k = self.k.unwrap();
+
+        // Manually compute distances using only specified features
+        let mut distances: Vec<(usize, f64)> = Vec::with_capacity(n_training_samples);
+
+        for i in 0..n_training_samples {
+            let train_row = training_data.get_row(i);
+            
+            // Extract sparse features from training data
+            let mut train_sparse = Vec::with_capacity(feature_indices.len());
+            for &feat_idx in feature_indices {
+                train_sparse.push(*train_row.get(feat_idx));
+            }
+
+            // Compute distance using only selected features
+            let dist = self.distance().distance(row, &train_sparse);
+            distances.push((i, dist));
+        }
+
+        // Sort by distance and take k nearest
+        distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
+        let k_nearest: Vec<(usize, f64)> = distances.into_iter().take(k).collect();
+
+        // Compute weighted prediction
+        let mut result = TY::zero();
+        let weights = self
+            .weight()
+            .calc_weights(k_nearest.iter().map(|v| v.1).collect());
+        let w_sum: f64 = weights.iter().copied().sum();
+
+        for (neighbor, w) in k_nearest.iter().zip(weights.iter()) {
+            result += *self.y().get(neighbor.0) * TY::from_f64(*w / w_sum).unwrap();
+        }
+
+        Ok(result)
+    }
 }

 #[cfg(test)]
@@ -332,6 +431,91 @@ mod tests {
        }
    }

+    #[cfg_attr(
+        all(target_arch = "wasm32", not(target_os = "wasi")),
+        wasm_bindgen_test::wasm_bindgen_test
+    )]
+    #[test]
+    fn knn_predict_sparse() {
+        // Training data with 3 features
+        let x = DenseMatrix::from_2d_array(&[
+            &[1., 2., 10.],
+            &[3., 4., 20.],
+            &[5., 6., 30.],
+            &[7., 8., 40.],
+            &[9., 10., 50.],
+        ])
+        .unwrap();
+        let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
+
+        let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
+
+        // Test data
+        let x_test = DenseMatrix::from_2d_array(&[
+            &[1., 2., 999.], // Third feature is very different
+            &[5., 6., 999.],
+        ])
+        .unwrap();
+
+        // Predict using only first two features (ignore the third)
+        let feature_indices = vec![0, 1];
+        let y_hat_sparse = knn.predict_sparse(&x_test, &feature_indices).unwrap();
+
+        // Should get good predictions since we're ignoring the mismatched third feature
+        assert_eq!(2, Vec::len(&y_hat_sparse));
+        assert!((y_hat_sparse[0] - 2.0).abs() < 1.0); // Should be close to 1-2
+        assert!((y_hat_sparse[1] - 3.0).abs() < 1.0); // Should be close to 3
+    }
+
+    #[cfg_attr(
+        all(target_arch = "wasm32", not(target_os = "wasi")),
+        wasm_bindgen_test::wasm_bindgen_test
+    )]
+    #[test]
+    fn knn_predict_sparse_single_feature() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[1., 100., 1000.],
+            &[2., 200., 2000.],
+            &[3., 300., 3000.],
+            &[4., 400., 4000.],
+            &[5., 500., 5000.],
+        ])
+        .unwrap();
+        let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
+
+        let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
+
+        let x_test = DenseMatrix::from_2d_array(&[&[1.5, 999., 9999.]]).unwrap();
+
+        // Use only first feature
+        let y_hat = knn.predict_sparse(&x_test, &[0]).unwrap();
+        
+        // Should predict based on first feature only
+        assert_eq!(1, Vec::len(&y_hat));
+        assert!((y_hat[0] - 1.5).abs() < 1.0);
+    }
+
+    #[cfg_attr(
+        all(target_arch = "wasm32", not(target_os = "wasi")),
+        wasm_bindgen_test::wasm_bindgen_test
+    )]
+    #[test]
+    fn knn_predict_sparse_invalid_indices() {
+        let x = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.]]).unwrap();
+        let y: Vec<f64> = vec![1., 2.];
+
+        let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
+        let x_test = DenseMatrix::from_2d_array(&[&[1., 2.]]).unwrap();
+
+        // Index out of bounds
+        let result = knn.predict_sparse(&x_test, &[5]);
+        assert!(result.is_err());
+
+        // Empty indices
+        let result = knn.predict_sparse(&x_test, &[]);
+        assert!(result.is_err());
+    }
+
    #[cfg_attr(
        all(target_arch = "wasm32", not(target_os = "wasi")),
        wasm_bindgen_test::wasm_bindgen_test
@@ -350,4 +534,4 @@ mod tests {

        assert_eq!(knn, deserialized_knn);
    }
-}
+}
Author	SHA1	Message	Date
Konstantin Hirschfeld	f53cb36b9d	allow for sparse predictions CI / tests (map[os:macos target:aarch64-apple-darwin]) (push) Has been cancelled Details CI / tests (map[os:ubuntu target:i686-unknown-linux-gnu]) (push) Has been cancelled Details CI / tests (map[os:ubuntu target:wasm32-unknown-unknown]) (push) Has been cancelled Details CI / tests (map[os:ubuntu target:x86_64-unknown-linux-gnu]) (push) Has been cancelled Details CI / tests (map[os:windows target:i686-pc-windows-msvc]) (push) Has been cancelled Details CI / tests (map[os:windows target:x86_64-pc-windows-msvc]) (push) Has been cancelled Details CI / check_features (, map[os:ubuntu]) (push) Has been cancelled Details CI / check_features (--features datasets, map[os:ubuntu]) (push) Has been cancelled Details CI / check_features (--features serde, map[os:ubuntu]) (push) Has been cancelled Details Coverage / coverage (push) Has been cancelled Details Lint checks / lint (push) Has been cancelled Details	2026-02-09 13:25:50 +01:00
Lorenzo Mec-iS	c57a4370ba	bump version tp 0.4.9	2026-01-09 06:14:44 +00:00
Georeth Chow	78f18505b1	fix LASSO (#346 ) * fix lasso doc typo * fix lasso optimizer bug	2025-12-05 17:49:07 +09:00
Lorenzo	58a8624fa9	v0.4.8 (#345 )	2025-11-29 02:54:35 +00:00