Merge pull request #1 from smartcorelib/development

update
2021-02-26 10:47:50 +08:00
parent cd44f1d515 1b42f8a396
commit 023b449ff1
75 changed files with 6166 additions and 798 deletions
@@ -6,6 +6,8 @@ workflows:
    jobs:
      - build
      - clippy
+      - coverage
+
 jobs:
  build:
    docker:
@@ -21,10 +23,10 @@ jobs:
          command: cargo fmt -- --check
      - run:
          name: Stable Build
-          command: cargo build --features "nalgebra-bindings ndarray-bindings"
+          command: cargo build --all-features
      - run:
          name: Test
-          command: cargo test --features "nalgebra-bindings ndarray-bindings"
+          command: cargo test --all-features
      - save_cache:
          key: project-cache
          paths:
@@ -41,3 +43,17 @@ jobs:
      - run:
          name: Run cargo clippy
          command: cargo clippy --all-features -- -Drust-2018-idioms -Dwarnings
+
+  coverage:
+    machine: true
+    steps:
+      - checkout
+      - run:
+          name: Generate report
+          command: >
+            docker run --security-opt seccomp=unconfined -v $PWD:/volume
+            xd009642/tarpaulin:latest-nightly cargo tarpaulin -v --ciserver circle-ci
+            --out Lcov --all-features -- --test-threads 1
+      - run:
+          name: Upload
+          command: bash <(curl -s https://codecov.io/bash) -Z -f
@@ -2,7 +2,7 @@
 name = "smartcore"
 description = "The most advanced machine learning library in rust."
 homepage = "https://smartcorelib.org"
-version = "0.1.0"
+version = "0.2.0"
 authors = ["SmartCore Developers"]
 edition = "2018"
 license = "Apache-2.0"
@@ -19,14 +19,13 @@ nalgebra-bindings = ["nalgebra"]
 datasets = []

 [dependencies]
-ndarray = { version = "0.13", optional = true }
-nalgebra = { version = "0.22.0", optional = true }
+ndarray = { version = "0.14", optional = true }
+nalgebra = { version = "0.23.0", optional = true }
 num-traits = "0.2.12"
 num = "0.3.0"
 rand = "0.7.3"
 rand_distr = "0.3.0"
-serde = { version = "1.0.115", features = ["derive"] }
-serde_derive = "1.0.115"
+serde = { version = "1.0.115", features = ["derive"], optional = true }

 [dev-dependencies]
 criterion = "0.3"
@@ -36,3 +35,8 @@ bincode = "1.3.1"
 [[bench]]
 name = "distance"
 harness = false
+
+[[bench]]
+name = "naive_bayes"
+harness = false
+required-features = ["ndarray-bindings", "nalgebra-bindings"]
@@ -0,0 +1,73 @@
+use criterion::BenchmarkId;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use nalgebra::DMatrix;
+use ndarray::Array2;
+use smartcore::linalg::naive::dense_matrix::DenseMatrix;
+use smartcore::linalg::BaseMatrix;
+use smartcore::linalg::BaseVector;
+use smartcore::naive_bayes::gaussian::GaussianNB;
+
+pub fn gaussian_naive_bayes_fit_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("GaussianNB::fit");
+
+    for n_samples in [100_usize, 1000_usize, 10000_usize].iter() {
+        for n_features in [10_usize, 100_usize, 1000_usize].iter() {
+            let x = DenseMatrix::<f64>::rand(*n_samples, *n_features);
+            let y: Vec<f64> = (0..*n_samples)
+                .map(|i| (i % *n_samples / 5_usize) as f64)
+                .collect::<Vec<f64>>();
+            group.bench_with_input(
+                BenchmarkId::from_parameter(format!(
+                    "n_samples: {}, n_features: {}",
+                    n_samples, n_features
+                )),
+                n_samples,
+                |b, _| {
+                    b.iter(|| {
+                        GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
+                    })
+                },
+            );
+        }
+    }
+    group.finish();
+}
+
+pub fn gaussian_naive_matrix_datastructure(c: &mut Criterion) {
+    let mut group = c.benchmark_group("GaussianNB");
+    let classes = (0..10000).map(|i| (i % 25) as f64).collect::<Vec<f64>>();
+
+    group.bench_function("DenseMatrix", |b| {
+        let x = DenseMatrix::<f64>::rand(10000, 500);
+        let y = <DenseMatrix<f64> as BaseMatrix<f64>>::RowVector::from_array(&classes);
+
+        b.iter(|| {
+            GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
+        })
+    });
+
+    group.bench_function("ndarray", |b| {
+        let x = Array2::<f64>::rand(10000, 500);
+        let y = <Array2<f64> as BaseMatrix<f64>>::RowVector::from_array(&classes);
+
+        b.iter(|| {
+            GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
+        })
+    });
+
+    group.bench_function("ndalgebra", |b| {
+        let x = DMatrix::<f64>::rand(10000, 500);
+        let y = <DMatrix<f64> as BaseMatrix<f64>>::RowVector::from_array(&classes);
+
+        b.iter(|| {
+            GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
+        })
+    });
+}
+criterion_group!(
+    benches,
+    gaussian_naive_bayes_fit_benchmark,
+    gaussian_naive_matrix_datastructure
+);
+criterion_main!(benches);
@@ -44,10 +44,7 @@ impl<T: RealNumber> BBDTree<T> {

        let (n, _) = data.shape();

-        let mut index = vec![0; n];
-        for i in 0..n {
-            index[i] = i;
-        }
+        let index = (0..n).collect::<Vec<_>>();

        let mut tree = BBDTree {
            nodes,
@@ -64,7 +61,7 @@ impl<T: RealNumber> BBDTree<T> {

    pub(in crate) fn clustering(
        &self,
-        centroids: &Vec<Vec<T>>,
+        centroids: &[Vec<T>],
        sums: &mut Vec<Vec<T>>,
        counts: &mut Vec<usize>,
        membership: &mut Vec<usize>,
@@ -92,8 +89,8 @@ impl<T: RealNumber> BBDTree<T> {
    fn filter(
        &self,
        node: usize,
-        centroids: &Vec<Vec<T>>,
-        candidates: &Vec<usize>,
+        centroids: &[Vec<T>],
+        candidates: &[usize],
        k: usize,
        sums: &mut Vec<Vec<T>>,
        counts: &mut Vec<usize>,
@@ -117,15 +114,15 @@ impl<T: RealNumber> BBDTree<T> {
            let mut new_candidates = vec![0; k];
            let mut newk = 0;

-            for i in 0..k {
+            for candidate in candidates.iter().take(k) {
                if !BBDTree::prune(
                    &self.nodes[node].center,
                    &self.nodes[node].radius,
                    centroids,
                    closest,
-                    candidates[i],
+                    *candidate,
                ) {
-                    new_candidates[newk] = candidates[i];
+                    new_candidates[newk] = *candidate;
                    newk += 1;
                }
            }
@@ -166,9 +163,9 @@ impl<T: RealNumber> BBDTree<T> {
    }

    fn prune(
-        center: &Vec<T>,
-        radius: &Vec<T>,
-        centroids: &Vec<Vec<T>>,
+        center: &[T],
+        radius: &[T],
+        centroids: &[Vec<T>],
        best_index: usize,
        test_index: usize,
    ) -> bool {
@@ -285,8 +282,8 @@ impl<T: RealNumber> BBDTree<T> {
        }

        let mut mean = vec![T::zero(); d];
-        for i in 0..d {
-            mean[i] = node.sum[i] / T::from(node.count).unwrap();
+        for (i, mean_i) in mean.iter_mut().enumerate().take(d) {
+            *mean_i = node.sum[i] / T::from(node.count).unwrap();
        }

        node.cost = BBDTree::node_cost(&self.nodes[node.lower.unwrap()], &mean)
@@ -295,11 +292,11 @@ impl<T: RealNumber> BBDTree<T> {
        self.add_node(node)
    }

-    fn node_cost(node: &BBDTreeNode<T>, center: &Vec<T>) -> T {
+    fn node_cost(node: &BBDTreeNode<T>, center: &[T]) -> T {
        let d = center.len();
        let mut scatter = T::zero();
-        for i in 0..d {
-            let x = (node.sum[i] / T::from(node.count).unwrap()) - center[i];
+        for (i, center_i) in center.iter().enumerate().take(d) {
+            let x = (node.sum[i] / T::from(node.count).unwrap()) - *center_i;
            scatter += x * x;
        }
        node.cost + T::from(node.count).unwrap() * scatter
@@ -6,6 +6,7 @@
 //! use smartcore::algorithm::neighbour::cover_tree::*;
 //! use smartcore::math::distance::Distance;
 //!
+//! #[derive(Clone)]
 //! struct SimpleDistance {} // Our distance function
 //!
 //! impl Distance<i32, f64> for SimpleDistance {
@@ -23,6 +24,7 @@
 //! ```
 use std::fmt::Debug;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::algorithm::sort::heap_select::HeapSelection;
@@ -31,7 +33,8 @@ use crate::math::distance::Distance;
 use crate::math::num::RealNumber;

 /// Implements Cover Tree algorithm
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct CoverTree<T, F: RealNumber, D: Distance<T, F>> {
    base: F,
    inv_log_base: F,
@@ -55,7 +58,8 @@ impl<T, F: RealNumber, D: Distance<T, F>> PartialEq for CoverTree<T, F, D> {
    }
 }

-#[derive(Debug, Serialize, Deserialize)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 struct Node<F: RealNumber> {
    idx: usize,
    max_dist: F,
@@ -64,7 +68,7 @@ struct Node<F: RealNumber> {
    scale: i64,
 }

-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug)]
 struct DistanceSet<F: RealNumber> {
    idx: usize,
    dist: Vec<F>,
@@ -436,7 +440,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
        }
    }

-    fn max(&self, distance_set: &Vec<DistanceSet<F>>) -> F {
+    fn max(&self, distance_set: &[DistanceSet<F>]) -> F {
        let mut max = F::zero();
        for n in distance_set {
            if max < n.dist[n.dist.len() - 1] {
@@ -453,7 +457,8 @@ mod tests {
    use super::*;
    use crate::math::distance::Distances;

-    #[derive(Debug, Serialize, Deserialize)]
+    #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+    #[derive(Debug, Clone)]
    struct SimpleDistance {}

    impl Distance<i32, f64> for SimpleDistance {
@@ -499,6 +504,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];

@@ -5,6 +5,7 @@
 //! use smartcore::algorithm::neighbour::linear_search::*;
 //! use smartcore::math::distance::Distance;
 //!
+//! #[derive(Clone)]
 //! struct SimpleDistance {} // Our distance function
 //!
 //! impl Distance<i32, f64> for SimpleDistance {
@@ -21,6 +22,7 @@
 //!
 //! ```

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};
 use std::cmp::{Ordering, PartialOrd};
 use std::marker::PhantomData;
@@ -31,7 +33,8 @@ use crate::math::distance::Distance;
 use crate::math::num::RealNumber;

 /// Implements Linear Search algorithm, see [KNN algorithms](../index.html)
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct LinearKNNSearch<T, F: RealNumber, D: Distance<T, F>> {
    distance: D,
    data: Vec<T>,
@@ -137,6 +140,8 @@ mod tests {
    use super::*;
    use crate::math::distance::Distances;

+    #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+    #[derive(Debug, Clone)]
    struct SimpleDistance {}

    impl Distance<i32, f64> for SimpleDistance {
@@ -1,3 +1,4 @@
+#![allow(clippy::ptr_arg)]
 //! # Nearest Neighbors Search Algorithms and Data Structures
 //!
 //! Nearest neighbor search is a basic computational tool that is particularly relevant to machine learning,
@@ -34,6 +35,7 @@ use crate::algorithm::neighbour::linear_search::LinearKNNSearch;
 use crate::error::Failed;
 use crate::math::distance::Distance;
 use crate::math::num::RealNumber;
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 pub(crate) mod bbd_tree;
@@ -44,7 +46,8 @@ pub mod linear_search;

 /// Both, KNN classifier and regressor benefits from underlying search algorithms that helps to speed up queries.
 /// `KNNAlgorithmName` maintains a list of supported search algorithms, see [KNN algorithms](../algorithm/neighbour/index.html)
-#[derive(Serialize, Deserialize, Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub enum KNNAlgorithmName {
    /// Heap Search algorithm, see [`LinearSearch`](../algorithm/neighbour/linear_search/index.html)
    LinearSearch,
@@ -52,7 +55,8 @@ pub enum KNNAlgorithmName {
    CoverTree,
 }

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub(crate) enum KNNAlgorithm<T: RealNumber, D: Distance<Vec<T>, T>> {
    LinearSearch(LinearKNNSearch<Vec<T>, T, D>),
    CoverTree(CoverTree<Vec<T>, T, D>),
@@ -0,0 +1,43 @@
+//! # Common Interfaces and API
+//!
+//! This module provides interfaces and uniform API with simple conventions
+//! that are used in other modules for supervised and unsupervised learning.
+
+use crate::error::Failed;
+
+/// An estimator for unsupervised learning, that provides method `fit` to learn from data
+pub trait UnsupervisedEstimator<X, P> {
+    /// Fit a model to a training dataset, estimate model's parameters.
+    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
+    /// * `parameters` - hyperparameters of an algorithm
+    fn fit(x: &X, parameters: P) -> Result<Self, Failed>
+    where
+        Self: Sized,
+        P: Clone;
+}
+
+/// An estimator for supervised learning, , that provides method `fit` to learn from data and training values
+pub trait SupervisedEstimator<X, Y, P> {
+    /// Fit a model to a training dataset, estimate model's parameters.
+    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
+    /// * `y` - target training values of size _N_.
+    /// * `parameters` - hyperparameters of an algorithm
+    fn fit(x: &X, y: &Y, parameters: P) -> Result<Self, Failed>
+    where
+        Self: Sized,
+        P: Clone;
+}
+
+/// Implements method predict that estimates target value from new data
+pub trait Predictor<X, Y> {
+    /// Estimate target values from new data.
+    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
+    fn predict(&self, x: &X) -> Result<Y, Failed>;
+}
+
+/// Implements method transform that filters or modifies input data
+pub trait Transformer<X> {
+    /// Transform data by modifying or filtering it
+    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
+    fn transform(&self, x: &X) -> Result<X, Failed>;
+}
@@ -1,6 +1,20 @@
 //! # DBSCAN Clustering
 //!
-//! DBSCAN - Density-Based Spatial Clustering of Applications with Noise.
+//! DBSCAN stands for density-based spatial clustering of applications with noise. This algorithms is good for arbitrary shaped clusters and clusters with noise.
+//! The main idea behind DBSCAN is that a point belongs to a cluster if it is close to many points from that cluster. There are two key parameters of DBSCAN:
+//!
+//! * `eps`, the maximum distance that specifies a neighborhood. Two points are considered to be neighbors if the distance between them are less than or equal to `eps`.
+//! * `min_samples`, minimum number of data points that defines a cluster.
+//!
+//! Based on these two parameters, points are classified as core point, border point, or outlier:
+//!
+//! * A point is a core point if there are at least `min_samples` number of points, including the point itself in its vicinity.
+//! * A point is a border point if it is reachable from a core point and there are less than `min_samples` number of points within its surrounding area.
+//! * All points not reachable from any other point are outliers or noise points.
+//!
+//! The algorithm starts from picking up an arbitrarily point in the dataset.
+//! If there are at least `min_samples` points within a radius of `eps` to the point then we consider all these points to be part of the same cluster.
+//! The clusters are then expanded by recursively repeating the neighborhood calculation for each neighboring point.
 //!
 //! Example:
 //!
@@ -15,11 +29,8 @@
 //! let blobs = generator::make_blobs(100, 2, 3);
 //! let x = DenseMatrix::from_vec(blobs.num_samples, blobs.num_features, &blobs.data);
 //! // Fit the algorithm and predict cluster labels
-//! let labels = DBSCAN::fit(&x, Distances::euclidian(), DBSCANParameters{
-//!     min_samples: 5,
-//!     eps: 3.0,
-//!     algorithm: KNNAlgorithmName::CoverTree
-//! }).and_then(|dbscan| dbscan.predict(&x));
+//! let labels = DBSCAN::fit(&x, DBSCANParameters::default().with_eps(3.0)).
+//!     and_then(|dbscan| dbscan.predict(&x));
 //!
 //! println!("{:?}", labels);
 //! ```
@@ -32,17 +43,21 @@
 use std::fmt::Debug;
 use std::iter::Sum;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
+use crate::api::{Predictor, UnsupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::{row_iter, Matrix};
-use crate::math::distance::Distance;
+use crate::math::distance::euclidian::Euclidian;
+use crate::math::distance::{Distance, Distances};
 use crate::math::num::RealNumber;
 use crate::tree::decision_tree_classifier::which_max;

 /// DBSCAN clustering algorithm
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct DBSCAN<T: RealNumber, D: Distance<Vec<T>, T>> {
    cluster_labels: Vec<i16>,
    num_classes: usize,
@@ -52,15 +67,48 @@ pub struct DBSCAN<T: RealNumber, D: Distance<Vec<T>, T>> {

 #[derive(Debug, Clone)]
 /// DBSCAN clustering algorithm parameters
-pub struct DBSCANParameters<T: RealNumber> {
-    /// Maximum number of iterations of the k-means algorithm for a single run.
+pub struct DBSCANParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
+    /// a function that defines a distance between each pair of point in training data.
+    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
+    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
+    pub distance: D,
+    /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
    pub min_samples: usize,
-    /// The number of samples in a neighborhood for a point to be considered as a core point.
+    /// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
    pub eps: T,
    /// KNN algorithm to use.
    pub algorithm: KNNAlgorithmName,
 }

+impl<T: RealNumber, D: Distance<Vec<T>, T>> DBSCANParameters<T, D> {
+    /// a function that defines a distance between each pair of point in training data.
+    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
+    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
+    pub fn with_distance<DD: Distance<Vec<T>, T>>(self, distance: DD) -> DBSCANParameters<T, DD> {
+        DBSCANParameters {
+            distance,
+            min_samples: self.min_samples,
+            eps: self.eps,
+            algorithm: self.algorithm,
+        }
+    }
+    /// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
+    pub fn with_min_samples(mut self, min_samples: usize) -> Self {
+        self.min_samples = min_samples;
+        self
+    }
+    /// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
+    pub fn with_eps(mut self, eps: T) -> Self {
+        self.eps = eps;
+        self
+    }
+    /// KNN algorithm to use.
+    pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self {
+        self.algorithm = algorithm;
+        self
+    }
+}
+
 impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for DBSCAN<T, D> {
    fn eq(&self, other: &Self) -> bool {
        self.cluster_labels.len() == other.cluster_labels.len()
@@ -70,9 +118,10 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for DBSCAN<T, D> {
    }
 }

-impl<T: RealNumber> Default for DBSCANParameters<T> {
+impl<T: RealNumber> Default for DBSCANParameters<T, Euclidian> {
    fn default() -> Self {
        DBSCANParameters {
+            distance: Distances::euclidian(),
            min_samples: 5,
            eps: T::half(),
            algorithm: KNNAlgorithmName::CoverTree,
@@ -80,6 +129,22 @@ impl<T: RealNumber> Default for DBSCANParameters<T> {
    }
 }

+impl<T: RealNumber + Sum, M: Matrix<T>, D: Distance<Vec<T>, T>>
+    UnsupervisedEstimator<M, DBSCANParameters<T, D>> for DBSCAN<T, D>
+{
+    fn fit(x: &M, parameters: DBSCANParameters<T, D>) -> Result<Self, Failed> {
+        DBSCAN::fit(x, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>> Predictor<M, M::RowVector>
+    for DBSCAN<T, D>
+{
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber + Sum, D: Distance<Vec<T>, T>> DBSCAN<T, D> {
    /// Fit algorithm to _NxM_ matrix where _N_ is number of samples and _M_ is number of features.
    /// * `data` - training instances to cluster
@@ -87,8 +152,7 @@ impl<T: RealNumber + Sum, D: Distance<Vec<T>, T>> DBSCAN<T, D> {
    /// * `parameters` - cluster parameters
    pub fn fit<M: Matrix<T>>(
        x: &M,
-        distance: D,
-        parameters: DBSCANParameters<T>,
+        parameters: DBSCANParameters<T, D>,
    ) -> Result<DBSCAN<T, D>, Failed> {
        if parameters.min_samples < 1 {
            return Err(Failed::fit(&"Invalid minPts".to_string()));
@@ -99,37 +163,60 @@ impl<T: RealNumber + Sum, D: Distance<Vec<T>, T>> DBSCAN<T, D> {
        }

        let mut k = 0;
-        let unassigned = -2;
+        let queued = -2;
        let outlier = -1;
+        let undefined = -3;

        let n = x.shape().0;
-        let mut y = vec![unassigned; n];
+        let mut y = vec![undefined; n];

-        let algo = parameters.algorithm.fit(row_iter(x).collect(), distance)?;
+        let algo = parameters
+            .algorithm
+            .fit(row_iter(x).collect(), parameters.distance)?;

        for (i, e) in row_iter(x).enumerate() {
-            if y[i] == unassigned {
+            if y[i] == undefined {
                let mut neighbors = algo.find_radius(&e, parameters.eps)?;
                if neighbors.len() < parameters.min_samples {
                    y[i] = outlier;
                } else {
                    y[i] = k;
+
                    for j in 0..neighbors.len() {
-                        if y[neighbors[j].0] == unassigned {
-                            y[neighbors[j].0] = k;
-
-                            let mut secondary_neighbors =
-                                algo.find_radius(neighbors[j].2, parameters.eps)?;
-
-                            if secondary_neighbors.len() >= parameters.min_samples {
-                                neighbors.append(&mut secondary_neighbors);
-                            }
-                        }
-
-                        if y[neighbors[j].0] == outlier {
-                            y[neighbors[j].0] = k;
+                        if y[neighbors[j].0] == undefined {
+                            y[neighbors[j].0] = queued;
                        }
                    }
+
+                    while !neighbors.is_empty() {
+                        let neighbor = neighbors.pop().unwrap();
+                        let index = neighbor.0;
+
+                        if y[index] == outlier {
+                            y[index] = k;
+                        }
+
+                        if y[index] == undefined || y[index] == queued {
+                            y[index] = k;
+
+                            let secondary_neighbors =
+                                algo.find_radius(neighbor.2, parameters.eps)?;
+
+                            if secondary_neighbors.len() >= parameters.min_samples {
+                                for j in 0..secondary_neighbors.len() {
+                                    let label = y[secondary_neighbors[j].0];
+                                    if label == undefined {
+                                        y[secondary_neighbors[j].0] = queued;
+                                    }
+
+                                    if label == undefined || label == outlier {
+                                        neighbors.push(secondary_neighbors[j]);
+                                    }
+                                }
+                            }
+                        }
+                    }
+
                    k += 1;
                }
            }
@@ -178,8 +265,8 @@ impl<T: RealNumber + Sum, D: Distance<Vec<T>, T>> DBSCAN<T, D> {
 mod tests {
    use super::*;
    use crate::linalg::naive::dense_matrix::DenseMatrix;
+    #[cfg(feature = "serde")]
    use crate::math::distance::euclidian::Euclidian;
-    use crate::math::distance::Distances;

    #[test]
    fn fit_predict_dbscan() {
@@ -187,12 +274,12 @@ mod tests {
            &[1.0, 2.0],
            &[1.1, 2.1],
            &[0.9, 1.9],
-            &[1.2, 1.2],
+            &[1.2, 2.2],
            &[0.8, 1.8],
            &[2.0, 1.0],
            &[2.1, 1.1],
-            &[2.2, 1.2],
            &[1.9, 0.9],
+            &[2.2, 1.2],
            &[1.8, 0.8],
            &[3.0, 5.0],
        ]);
@@ -201,12 +288,9 @@ mod tests {

        let dbscan = DBSCAN::fit(
            &x,
-            Distances::euclidian(),
-            DBSCANParameters {
-                min_samples: 5,
-                eps: 1.0,
-                algorithm: KNNAlgorithmName::CoverTree,
-            },
+            DBSCANParameters::default()
+                .with_eps(0.5)
+                .with_min_samples(2),
        )
        .unwrap();

@@ -216,6 +300,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[5.1, 3.5, 1.4, 0.2],
@@ -240,7 +325,7 @@ mod tests {
            &[5.2, 2.7, 3.9, 1.4],
        ]);

-        let dbscan = DBSCAN::fit(&x, Distances::euclidian(), Default::default()).unwrap();
+        let dbscan = DBSCAN::fit(&x, Default::default()).unwrap();

        let deserialized_dbscan: DBSCAN<f64, Euclidian> =
            serde_json::from_str(&serde_json::to_string(&dbscan).unwrap()).unwrap();
@@ -43,7 +43,7 @@
 //!            &[5.2, 2.7, 3.9, 1.4],
 //!            ]);
 //!
-//! let kmeans = KMeans::fit(&x, 2, Default::default()).unwrap(); // Fit to data, 2 clusters
+//! let kmeans = KMeans::fit(&x, KMeansParameters::default().with_k(2)).unwrap(); // Fit to data, 2 clusters
 //! let y_hat = kmeans.predict(&x).unwrap(); // use the same points for prediction
 //! ```
 //!
@@ -56,16 +56,19 @@ use rand::Rng;
 use std::fmt::Debug;
 use std::iter::Sum;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::algorithm::neighbour::bbd_tree::BBDTree;
+use crate::api::{Predictor, UnsupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::distance::euclidian::*;
 use crate::math::num::RealNumber;

 /// K-Means clustering algorithm
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct KMeans<T: RealNumber> {
    k: usize,
    y: Vec<usize>,
@@ -101,30 +104,58 @@ impl<T: RealNumber> PartialEq for KMeans<T> {
 #[derive(Debug, Clone)]
 /// K-Means clustering algorithm parameters
 pub struct KMeansParameters {
+    /// Number of clusters.
+    pub k: usize,
    /// Maximum number of iterations of the k-means algorithm for a single run.
    pub max_iter: usize,
 }

+impl KMeansParameters {
+    /// Number of clusters.
+    pub fn with_k(mut self, k: usize) -> Self {
+        self.k = k;
+        self
+    }
+    /// Maximum number of iterations of the k-means algorithm for a single run.
+    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
+        self.max_iter = max_iter;
+        self
+    }
+}
+
 impl Default for KMeansParameters {
    fn default() -> Self {
-        KMeansParameters { max_iter: 100 }
+        KMeansParameters {
+            k: 2,
+            max_iter: 100,
+        }
+    }
+}
+
+impl<T: RealNumber + Sum, M: Matrix<T>> UnsupervisedEstimator<M, KMeansParameters> for KMeans<T> {
+    fn fit(x: &M, parameters: KMeansParameters) -> Result<Self, Failed> {
+        KMeans::fit(x, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for KMeans<T> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
    }
 }

 impl<T: RealNumber + Sum> KMeans<T> {
    /// Fit algorithm to _NxM_ matrix where _N_ is number of samples and _M_ is number of features.
    /// * `data` - training instances to cluster    
-    /// * `k` - number of clusters
    /// * `parameters` - cluster parameters
-    pub fn fit<M: Matrix<T>>(
-        data: &M,
-        k: usize,
-        parameters: KMeansParameters,
-    ) -> Result<KMeans<T>, Failed> {
+    pub fn fit<M: Matrix<T>>(data: &M, parameters: KMeansParameters) -> Result<KMeans<T>, Failed> {
        let bbd = BBDTree::new(data);

-        if k < 2 {
-            return Err(Failed::fit(&format!("invalid number of clusters: {}", k)));
+        if parameters.k < 2 {
+            return Err(Failed::fit(&format!(
+                "invalid number of clusters: {}",
+                parameters.k
+            )));
        }

        if parameters.max_iter == 0 {
@@ -137,9 +168,9 @@ impl<T: RealNumber + Sum> KMeans<T> {
        let (n, d) = data.shape();

        let mut distortion = T::max_value();
-        let mut y = KMeans::kmeans_plus_plus(data, k);
-        let mut size = vec![0; k];
-        let mut centroids = vec![vec![T::zero(); d]; k];
+        let mut y = KMeans::kmeans_plus_plus(data, parameters.k);
+        let mut size = vec![0; parameters.k];
+        let mut centroids = vec![vec![T::zero(); d]; parameters.k];

        for i in 0..n {
            size[y[i]] += 1;
@@ -151,16 +182,16 @@ impl<T: RealNumber + Sum> KMeans<T> {
            }
        }

-        for i in 0..k {
+        for i in 0..parameters.k {
            for j in 0..d {
                centroids[i][j] /= T::from(size[i]).unwrap();
            }
        }

-        let mut sums = vec![vec![T::zero(); d]; k];
+        let mut sums = vec![vec![T::zero(); d]; parameters.k];
        for _ in 1..=parameters.max_iter {
            let dist = bbd.clustering(&centroids, &mut sums, &mut size, &mut y);
-            for i in 0..k {
+            for i in 0..parameters.k {
                if size[i] > 0 {
                    for j in 0..d {
                        centroids[i][j] = T::from(sums[i][j]).unwrap() / T::from(size[i]).unwrap();
@@ -176,7 +207,7 @@ impl<T: RealNumber + Sum> KMeans<T> {
        }

        Ok(KMeans {
-            k,
+            k: parameters.k,
            y,
            size,
            distortion,
@@ -272,10 +303,10 @@ mod tests {
    fn invalid_k() {
        let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);

-        assert!(KMeans::fit(&x, 0, Default::default()).is_err());
+        assert!(KMeans::fit(&x, KMeansParameters::default().with_k(0)).is_err());
        assert_eq!(
            "Fit failed: invalid number of clusters: 1",
-            KMeans::fit(&x, 1, Default::default())
+            KMeans::fit(&x, KMeansParameters::default().with_k(1))
                .unwrap_err()
                .to_string()
        );
@@ -306,7 +337,7 @@ mod tests {
            &[5.2, 2.7, 3.9, 1.4],
        ]);

-        let kmeans = KMeans::fit(&x, 2, Default::default()).unwrap();
+        let kmeans = KMeans::fit(&x, Default::default()).unwrap();

        let y = kmeans.predict(&x).unwrap();

@@ -316,6 +347,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[5.1, 3.5, 1.4, 0.2],
@@ -340,7 +372,7 @@ mod tests {
            &[5.2, 2.7, 3.9, 1.4],
        ]);

-        let kmeans = KMeans::fit(&x, 2, Default::default()).unwrap();
+        let kmeans = KMeans::fit(&x, Default::default()).unwrap();

        let deserialized_kmeans: KMeans<f64> =
            serde_json::from_str(&serde_json::to_string(&kmeans).unwrap()).unwrap();
@@ -59,8 +59,6 @@ pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset<f32,
    let linspace_out = linspace(0.0, 2.0 * std::f32::consts::PI, num_samples_out);
    let linspace_in = linspace(0.0, 2.0 * std::f32::consts::PI, num_samples_in);

-    println!("{:?}", linspace_out);
-    println!("{:?}", linspace_in);
    let noise = Normal::new(0.0, noise).unwrap();
    let mut rng = rand::thread_rng();

@@ -90,6 +88,43 @@ pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset<f32,
    }
 }

+/// Make two interleaving half circles in 2d
+pub fn make_moons(num_samples: usize, noise: f32) -> Dataset<f32, f32> {
+    let num_samples_out = num_samples / 2;
+    let num_samples_in = num_samples - num_samples_out;
+
+    let linspace_out = linspace(0.0, std::f32::consts::PI, num_samples_out);
+    let linspace_in = linspace(0.0, std::f32::consts::PI, num_samples_in);
+
+    let noise = Normal::new(0.0, noise).unwrap();
+    let mut rng = rand::thread_rng();
+
+    let mut x: Vec<f32> = Vec::with_capacity(num_samples * 2);
+    let mut y: Vec<f32> = Vec::with_capacity(num_samples);
+
+    for v in linspace_out {
+        x.push(v.cos() + noise.sample(&mut rng));
+        x.push(v.sin() + noise.sample(&mut rng));
+        y.push(0.0);
+    }
+
+    for v in linspace_in {
+        x.push(1.0 - v.cos() + noise.sample(&mut rng));
+        x.push(1.0 - v.sin() + noise.sample(&mut rng) - 0.5);
+        y.push(1.0);
+    }
+
+    Dataset {
+        data: x,
+        target: y,
+        num_samples,
+        num_features: 2,
+        feature_names: (0..2).map(|n| n.to_string()).collect(),
+        target_names: vec!["label".to_string()],
+        description: "Two interleaving half circles in 2d".to_string(),
+    }
+}
+
 fn linspace(start: f32, stop: f32, num: usize) -> Vec<f32> {
    let div = num as f32;
    let delta = stop - start;
@@ -117,7 +152,18 @@ mod tests {
    #[test]
    fn test_make_circles() {
        let dataset = make_circles(10, 0.5, 0.05);
-        println!("{:?}", dataset.as_matrix());
+        assert_eq!(
+            dataset.data.len(),
+            dataset.num_features * dataset.num_samples
+        );
+        assert_eq!(dataset.target.len(), dataset.num_samples);
+        assert_eq!(dataset.num_features, 2);
+        assert_eq!(dataset.num_samples, 10);
+    }
+
+    #[test]
+    fn test_make_moons() {
+        let dataset = make_moons(10, 0.05);
        assert_eq!(
            dataset.data.len(),
            dataset.num_features * dataset.num_samples
@@ -83,7 +83,7 @@ pub(crate) fn deserialize_data(
 ) -> Result<(Vec<f32>, Vec<f32>, usize, usize), io::Error> {
    // read the same file back into a Vec of bytes
    let (num_samples, num_features) = {
-        let mut buffer = [0u8; 8];
+        let mut buffer = [0u8; if cfg!(target_arch = "wasm32") { 4 } else { 8 }];
        buffer.copy_from_slice(&bytes[0..8]);
        let num_features = usize::from_le_bytes(buffer);
        buffer.copy_from_slice(&bytes[8..16]);
@@ -13,3 +13,4 @@

 /// PCA is a popular approach for deriving a low-dimensional set of features from a large set of variables.
 pub mod pca;
+pub mod svd;
@@ -37,7 +37,7 @@
 //!                     &[5.2, 2.7, 3.9, 1.4],
 //!                     ]);
 //!
-//! let pca = PCA::fit(&iris, 2, Default::default()).unwrap(); // Reduce number of features to 2
+//! let pca = PCA::fit(&iris, PCAParameters::default().with_n_components(2)).unwrap(); // Reduce number of features to 2
 //!
 //! let iris_reduced = pca.transform(&iris).unwrap();
 //!
@@ -47,14 +47,17 @@
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
 use std::fmt::Debug;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

+use crate::api::{Transformer, UnsupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;

 /// Principal components analysis algorithm
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct PCA<T: RealNumber, M: Matrix<T>> {
    eigenvectors: M,
    eigenvalues: Vec<T>,
@@ -83,38 +86,70 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for PCA<T, M> {
 #[derive(Debug, Clone)]
 /// PCA parameters
 pub struct PCAParameters {
+    /// Number of components to keep.
+    pub n_components: usize,
    /// By default, covariance matrix is used to compute principal components.
    /// Enable this flag if you want to use correlation matrix instead.
    pub use_correlation_matrix: bool,
 }

+impl PCAParameters {
+    /// Number of components to keep.
+    pub fn with_n_components(mut self, n_components: usize) -> Self {
+        self.n_components = n_components;
+        self
+    }
+    /// By default, covariance matrix is used to compute principal components.
+    /// Enable this flag if you want to use correlation matrix instead.
+    pub fn with_use_correlation_matrix(mut self, use_correlation_matrix: bool) -> Self {
+        self.use_correlation_matrix = use_correlation_matrix;
+        self
+    }
+}
+
 impl Default for PCAParameters {
    fn default() -> Self {
        PCAParameters {
+            n_components: 2,
            use_correlation_matrix: false,
        }
    }
 }

+impl<T: RealNumber, M: Matrix<T>> UnsupervisedEstimator<M, PCAParameters> for PCA<T, M> {
+    fn fit(x: &M, parameters: PCAParameters) -> Result<Self, Failed> {
+        PCA::fit(x, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Transformer<M> for PCA<T, M> {
+    fn transform(&self, x: &M) -> Result<M, Failed> {
+        self.transform(x)
+    }
+}
+
 impl<T: RealNumber, M: Matrix<T>> PCA<T, M> {
    /// Fits PCA to your data.
    /// * `data` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
    /// * `n_components` - number of components to keep.
    /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
-    pub fn fit(
-        data: &M,
-        n_components: usize,
-        parameters: PCAParameters,
-    ) -> Result<PCA<T, M>, Failed> {
+    pub fn fit(data: &M, parameters: PCAParameters) -> Result<PCA<T, M>, Failed> {
        let (m, n) = data.shape();

+        if parameters.n_components > n {
+            return Err(Failed::fit(&format!(
+                "Number of components, n_components should be <= number of attributes ({})",
+                n
+            )));
+        }
+
        let mu = data.column_mean();

        let mut x = data.clone();

-        for c in 0..n {
+        for (c, mu_c) in mu.iter().enumerate().take(n) {
            for r in 0..m {
-                x.sub_element_mut(r, c, mu[c]);
+                x.sub_element_mut(r, c, *mu_c);
            }
        }

@@ -124,8 +159,8 @@ impl<T: RealNumber, M: Matrix<T>> PCA<T, M> {
        if m > n && !parameters.use_correlation_matrix {
            let svd = x.svd()?;
            eigenvalues = svd.s;
-            for i in 0..eigenvalues.len() {
-                eigenvalues[i] = eigenvalues[i] * eigenvalues[i];
+            for eigenvalue in &mut eigenvalues {
+                *eigenvalue = *eigenvalue * (*eigenvalue);
            }

            eigenvectors = svd.V;
@@ -149,8 +184,8 @@ impl<T: RealNumber, M: Matrix<T>> PCA<T, M> {

            if parameters.use_correlation_matrix {
                let mut sd = vec![T::zero(); n];
-                for i in 0..n {
-                    sd[i] = cov.get(i, i).sqrt();
+                for (i, sd_i) in sd.iter_mut().enumerate().take(n) {
+                    *sd_i = cov.get(i, i).sqrt();
                }

                for i in 0..n {
@@ -166,9 +201,9 @@ impl<T: RealNumber, M: Matrix<T>> PCA<T, M> {

                eigenvectors = evd.V;

-                for i in 0..n {
+                for (i, sd_i) in sd.iter().enumerate().take(n) {
                    for j in 0..n {
-                        eigenvectors.div_element_mut(i, j, sd[i]);
+                        eigenvectors.div_element_mut(i, j, *sd_i);
                    }
                }
            } else {
@@ -180,17 +215,17 @@ impl<T: RealNumber, M: Matrix<T>> PCA<T, M> {
            }
        }

-        let mut projection = M::zeros(n_components, n);
+        let mut projection = M::zeros(parameters.n_components, n);
        for i in 0..n {
-            for j in 0..n_components {
+            for j in 0..parameters.n_components {
                projection.set(j, i, eigenvectors.get(i, j));
            }
        }

-        let mut pmu = vec![T::zero(); n_components];
-        for k in 0..n {
-            for i in 0..n_components {
-                pmu[i] += projection.get(i, k) * mu[k];
+        let mut pmu = vec![T::zero(); parameters.n_components];
+        for (k, mu_k) in mu.iter().enumerate().take(n) {
+            for (i, pmu_i) in pmu.iter_mut().enumerate().take(parameters.n_components) {
+                *pmu_i += projection.get(i, k) * (*mu_k);
            }
        }

@@ -224,6 +259,11 @@ impl<T: RealNumber, M: Matrix<T>> PCA<T, M> {
        }
        Ok(x_transformed)
    }
+
+    /// Get a projection matrix
+    pub fn components(&self) -> &M {
+        &self.projection
+    }
 }

 #[cfg(test)]
@@ -286,6 +326,22 @@ mod tests {
        ])
    }

+    #[test]
+    fn pca_components() {
+        let us_arrests = us_arrests_data();
+
+        let expected = DenseMatrix::from_2d_array(&[
+            &[0.0417, 0.0448],
+            &[0.9952, 0.0588],
+            &[0.0463, 0.9769],
+            &[0.0752, 0.2007],
+        ]);
+
+        let pca = PCA::fit(&us_arrests, Default::default()).unwrap();
+
+        assert!(expected.approximate_eq(&pca.components().abs(), 0.4));
+    }
+
    #[test]
    fn decompose_covariance() {
        let us_arrests = us_arrests_data();
@@ -377,7 +433,7 @@ mod tests {
            302.04806302399646,
        ];

-        let pca = PCA::fit(&us_arrests, 4, Default::default()).unwrap();
+        let pca = PCA::fit(&us_arrests, PCAParameters::default().with_n_components(4)).unwrap();

        assert!(pca
            .eigenvectors
@@ -488,10 +544,9 @@ mod tests {

        let pca = PCA::fit(
            &us_arrests,
-            4,
-            PCAParameters {
-                use_correlation_matrix: true,
-            },
+            PCAParameters::default()
+                .with_n_components(4)
+                .with_use_correlation_matrix(true),
        )
        .unwrap();

@@ -512,6 +567,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let iris = DenseMatrix::from_2d_array(&[
            &[5.1, 3.5, 1.4, 0.2],
@@ -536,7 +592,7 @@ mod tests {
            &[5.2, 2.7, 3.9, 1.4],
        ]);

-        let pca = PCA::fit(&iris, 4, Default::default()).unwrap();
+        let pca = PCA::fit(&iris, Default::default()).unwrap();

        let deserialized_pca: PCA<f64, DenseMatrix<f64>> =
            serde_json::from_str(&serde_json::to_string(&pca).unwrap()).unwrap();
@@ -0,0 +1,263 @@
+//! # Dimensionality reduction using SVD
+//!
+//! Similar to [`PCA`](../pca/index.html), SVD is a technique that can be used to reduce the number of input variables _p_ to a smaller number _k_, while preserving
+//! the most important structure or relationships between the variables observed in the data.
+//!
+//! Contrary to PCA, SVD does not center the data before computing the singular value decomposition.
+//!
+//! Example:
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::*;
+//! use smartcore::decomposition::svd::*;
+//!
+//! // Iris data
+//! let iris = DenseMatrix::from_2d_array(&[
+//!                     &[5.1, 3.5, 1.4, 0.2],
+//!                     &[4.9, 3.0, 1.4, 0.2],
+//!                     &[4.7, 3.2, 1.3, 0.2],
+//!                     &[4.6, 3.1, 1.5, 0.2],
+//!                     &[5.0, 3.6, 1.4, 0.2],
+//!                     &[5.4, 3.9, 1.7, 0.4],
+//!                     &[4.6, 3.4, 1.4, 0.3],
+//!                     &[5.0, 3.4, 1.5, 0.2],
+//!                     &[4.4, 2.9, 1.4, 0.2],
+//!                     &[4.9, 3.1, 1.5, 0.1],
+//!                     &[7.0, 3.2, 4.7, 1.4],
+//!                     &[6.4, 3.2, 4.5, 1.5],
+//!                     &[6.9, 3.1, 4.9, 1.5],
+//!                     &[5.5, 2.3, 4.0, 1.3],
+//!                     &[6.5, 2.8, 4.6, 1.5],
+//!                     &[5.7, 2.8, 4.5, 1.3],
+//!                     &[6.3, 3.3, 4.7, 1.6],
+//!                     &[4.9, 2.4, 3.3, 1.0],
+//!                     &[6.6, 2.9, 4.6, 1.3],
+//!                     &[5.2, 2.7, 3.9, 1.4],
+//!                     ]);
+//!
+//! let svd = SVD::fit(&iris, SVDParameters::default().
+//!         with_n_components(2)).unwrap(); // Reduce number of features to 2
+//!
+//! let iris_reduced = svd.transform(&iris).unwrap();
+//!
+//! ```
+//!
+//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+use std::fmt::Debug;
+use std::marker::PhantomData;
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+use crate::api::{Transformer, UnsupervisedEstimator};
+use crate::error::Failed;
+use crate::linalg::Matrix;
+use crate::math::num::RealNumber;
+
+/// SVD
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
+pub struct SVD<T: RealNumber, M: Matrix<T>> {
+    components: M,
+    phantom: PhantomData<T>,
+}
+
+impl<T: RealNumber, M: Matrix<T>> PartialEq for SVD<T, M> {
+    fn eq(&self, other: &Self) -> bool {
+        self.components
+            .approximate_eq(&other.components, T::from_f64(1e-8).unwrap())
+    }
+}
+
+#[derive(Debug, Clone)]
+/// SVD parameters
+pub struct SVDParameters {
+    /// Number of components to keep.
+    pub n_components: usize,
+}
+
+impl Default for SVDParameters {
+    fn default() -> Self {
+        SVDParameters { n_components: 2 }
+    }
+}
+
+impl SVDParameters {
+    /// Number of components to keep.
+    pub fn with_n_components(mut self, n_components: usize) -> Self {
+        self.n_components = n_components;
+        self
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> UnsupervisedEstimator<M, SVDParameters> for SVD<T, M> {
+    fn fit(x: &M, parameters: SVDParameters) -> Result<Self, Failed> {
+        SVD::fit(x, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Transformer<M> for SVD<T, M> {
+    fn transform(&self, x: &M) -> Result<M, Failed> {
+        self.transform(x)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> SVD<T, M> {
+    /// Fits SVD to your data.
+    /// * `data` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
+    /// * `n_components` - number of components to keep.
+    /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
+    pub fn fit(x: &M, parameters: SVDParameters) -> Result<SVD<T, M>, Failed> {
+        let (_, p) = x.shape();
+
+        if parameters.n_components >= p {
+            return Err(Failed::fit(&format!(
+                "Number of components, n_components should be < number of attributes ({})",
+                p
+            )));
+        }
+
+        let svd = x.svd()?;
+
+        let components = svd.V.slice(0..p, 0..parameters.n_components);
+
+        Ok(SVD {
+            components,
+            phantom: PhantomData,
+        })
+    }
+
+    /// Run dimensionality reduction for `x`
+    /// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
+    pub fn transform(&self, x: &M) -> Result<M, Failed> {
+        let (n, p) = x.shape();
+        let (p_c, k) = self.components.shape();
+        if p_c != p {
+            return Err(Failed::transform(&format!(
+                "Can not transform a {}x{} matrix into {}x{} matrix, incorrect input dimentions",
+                n, p, n, k
+            )));
+        }
+
+        Ok(x.matmul(&self.components))
+    }
+
+    /// Get a projection matrix
+    pub fn components(&self) -> &M {
+        &self.components
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::linalg::naive::dense_matrix::*;
+
+    #[test]
+    fn svd_decompose() {
+        // https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/USArrests.html
+        let x = DenseMatrix::from_2d_array(&[
+            &[13.2, 236.0, 58.0, 21.2],
+            &[10.0, 263.0, 48.0, 44.5],
+            &[8.1, 294.0, 80.0, 31.0],
+            &[8.8, 190.0, 50.0, 19.5],
+            &[9.0, 276.0, 91.0, 40.6],
+            &[7.9, 204.0, 78.0, 38.7],
+            &[3.3, 110.0, 77.0, 11.1],
+            &[5.9, 238.0, 72.0, 15.8],
+            &[15.4, 335.0, 80.0, 31.9],
+            &[17.4, 211.0, 60.0, 25.8],
+            &[5.3, 46.0, 83.0, 20.2],
+            &[2.6, 120.0, 54.0, 14.2],
+            &[10.4, 249.0, 83.0, 24.0],
+            &[7.2, 113.0, 65.0, 21.0],
+            &[2.2, 56.0, 57.0, 11.3],
+            &[6.0, 115.0, 66.0, 18.0],
+            &[9.7, 109.0, 52.0, 16.3],
+            &[15.4, 249.0, 66.0, 22.2],
+            &[2.1, 83.0, 51.0, 7.8],
+            &[11.3, 300.0, 67.0, 27.8],
+            &[4.4, 149.0, 85.0, 16.3],
+            &[12.1, 255.0, 74.0, 35.1],
+            &[2.7, 72.0, 66.0, 14.9],
+            &[16.1, 259.0, 44.0, 17.1],
+            &[9.0, 178.0, 70.0, 28.2],
+            &[6.0, 109.0, 53.0, 16.4],
+            &[4.3, 102.0, 62.0, 16.5],
+            &[12.2, 252.0, 81.0, 46.0],
+            &[2.1, 57.0, 56.0, 9.5],
+            &[7.4, 159.0, 89.0, 18.8],
+            &[11.4, 285.0, 70.0, 32.1],
+            &[11.1, 254.0, 86.0, 26.1],
+            &[13.0, 337.0, 45.0, 16.1],
+            &[0.8, 45.0, 44.0, 7.3],
+            &[7.3, 120.0, 75.0, 21.4],
+            &[6.6, 151.0, 68.0, 20.0],
+            &[4.9, 159.0, 67.0, 29.3],
+            &[6.3, 106.0, 72.0, 14.9],
+            &[3.4, 174.0, 87.0, 8.3],
+            &[14.4, 279.0, 48.0, 22.5],
+            &[3.8, 86.0, 45.0, 12.8],
+            &[13.2, 188.0, 59.0, 26.9],
+            &[12.7, 201.0, 80.0, 25.5],
+            &[3.2, 120.0, 80.0, 22.9],
+            &[2.2, 48.0, 32.0, 11.2],
+            &[8.5, 156.0, 63.0, 20.7],
+            &[4.0, 145.0, 73.0, 26.2],
+            &[5.7, 81.0, 39.0, 9.3],
+            &[2.6, 53.0, 66.0, 10.8],
+            &[6.8, 161.0, 60.0, 15.6],
+        ]);
+
+        let expected = DenseMatrix::from_2d_array(&[
+            &[243.54655757, -18.76673788],
+            &[268.36802004, -33.79304302],
+            &[305.93972467, -15.39087376],
+            &[197.28420365, -11.66808306],
+            &[293.43187394, 1.91163633],
+        ]);
+        let svd = SVD::fit(&x, Default::default()).unwrap();
+
+        let x_transformed = svd.transform(&x).unwrap();
+
+        assert_eq!(svd.components.shape(), (x.shape().1, 2));
+
+        assert!(x_transformed
+            .slice(0..5, 0..2)
+            .approximate_eq(&expected, 1e-4));
+    }
+
+    #[test]
+    #[cfg(feature = "serde")]
+    fn serde() {
+        let iris = DenseMatrix::from_2d_array(&[
+            &[5.1, 3.5, 1.4, 0.2],
+            &[4.9, 3.0, 1.4, 0.2],
+            &[4.7, 3.2, 1.3, 0.2],
+            &[4.6, 3.1, 1.5, 0.2],
+            &[5.0, 3.6, 1.4, 0.2],
+            &[5.4, 3.9, 1.7, 0.4],
+            &[4.6, 3.4, 1.4, 0.3],
+            &[5.0, 3.4, 1.5, 0.2],
+            &[4.4, 2.9, 1.4, 0.2],
+            &[4.9, 3.1, 1.5, 0.1],
+            &[7.0, 3.2, 4.7, 1.4],
+            &[6.4, 3.2, 4.5, 1.5],
+            &[6.9, 3.1, 4.9, 1.5],
+            &[5.5, 2.3, 4.0, 1.3],
+            &[6.5, 2.8, 4.6, 1.5],
+            &[5.7, 2.8, 4.5, 1.3],
+            &[6.3, 3.3, 4.7, 1.6],
+            &[4.9, 2.4, 3.3, 1.0],
+            &[6.6, 2.9, 4.6, 1.3],
+            &[5.2, 2.7, 3.9, 1.4],
+        ]);
+
+        let svd = SVD::fit(&iris, Default::default()).unwrap();
+
+        let deserialized_svd: SVD<f64, DenseMatrix<f64>> =
+            serde_json::from_str(&serde_json::to_string(&svd).unwrap()).unwrap();
+
+        assert_eq!(svd, deserialized_svd);
+    }
+}
@@ -9,7 +9,7 @@
 //!
 //! ```
 //! use smartcore::linalg::naive::dense_matrix::*;
-//! use smartcore::ensemble::random_forest_classifier::*;
+//! use smartcore::ensemble::random_forest_classifier::RandomForestClassifier;
 //!
 //! // Iris dataset
 //! let x = DenseMatrix::from_2d_array(&[
@@ -49,8 +49,10 @@ use std::default::Default;
 use std::fmt::Debug;

 use rand::Rng;
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
@@ -60,7 +62,8 @@ use crate::tree::decision_tree_classifier::{

 /// Parameters of the Random Forest algorithm.
 /// Some parameters here are passed directly into base estimator.
-#[derive(Serialize, Deserialize, Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct RandomForestClassifierParameters {
    /// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
    pub criterion: SplitCriterion,
@@ -77,13 +80,47 @@ pub struct RandomForestClassifierParameters {
 }

 /// Random Forest Classifier
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct RandomForestClassifier<T: RealNumber> {
    parameters: RandomForestClassifierParameters,
    trees: Vec<DecisionTreeClassifier<T>>,
    classes: Vec<T>,
 }

+impl RandomForestClassifierParameters {
+    /// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
+    pub fn with_criterion(mut self, criterion: SplitCriterion) -> Self {
+        self.criterion = criterion;
+        self
+    }
+    /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
+    pub fn with_max_depth(mut self, max_depth: u16) -> Self {
+        self.max_depth = Some(max_depth);
+        self
+    }
+    /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
+    pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
+        self.min_samples_leaf = min_samples_leaf;
+        self
+    }
+    /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
+    pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
+        self.min_samples_split = min_samples_split;
+        self
+    }
+    /// The number of trees in the forest.
+    pub fn with_n_trees(mut self, n_trees: u16) -> Self {
+        self.n_trees = n_trees;
+        self
+    }
+    /// Number of random sample of predictors to use as split candidates.
+    pub fn with_m(mut self, m: usize) -> Self {
+        self.m = Some(m);
+        self
+    }
+}
+
 impl<T: RealNumber> PartialEq for RandomForestClassifier<T> {
    fn eq(&self, other: &Self) -> bool {
        if self.classes.len() != other.classes.len() || self.trees.len() != other.trees.len() {
@@ -117,6 +154,25 @@ impl Default for RandomForestClassifierParameters {
    }
 }

+impl<T: RealNumber, M: Matrix<T>>
+    SupervisedEstimator<M, M::RowVector, RandomForestClassifierParameters>
+    for RandomForestClassifier<T>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: RandomForestClassifierParameters,
+    ) -> Result<Self, Failed> {
+        RandomForestClassifier::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestClassifier<T> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber> RandomForestClassifier<T> {
    /// Build a forest of trees from the training set.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -132,9 +188,9 @@ impl<T: RealNumber> RandomForestClassifier<T> {
        let mut yi: Vec<usize> = vec![0; y_ncols];
        let classes = y_m.unique();

-        for i in 0..y_ncols {
+        for (i, yi_i) in yi.iter_mut().enumerate().take(y_ncols) {
            let yc = y_m.get(0, i);
-            yi[i] = classes.iter().position(|c| yc == *c).unwrap();
+            *yi_i = classes.iter().position(|c| yc == *c).unwrap();
        }

        let mtry = parameters.m.unwrap_or_else(|| {
@@ -192,22 +248,22 @@ impl<T: RealNumber> RandomForestClassifier<T> {
        which_max(&result)
    }

-    fn sample_with_replacement(y: &Vec<usize>, num_classes: usize) -> Vec<usize> {
+    fn sample_with_replacement(y: &[usize], num_classes: usize) -> Vec<usize> {
        let mut rng = rand::thread_rng();
        let class_weight = vec![1.; num_classes];
        let nrows = y.len();
        let mut samples = vec![0; nrows];
-        for l in 0..num_classes {
+        for (l, class_weight_l) in class_weight.iter().enumerate().take(num_classes) {
            let mut n_samples = 0;
            let mut index: Vec<usize> = Vec::new();
-            for i in 0..nrows {
-                if y[i] == l {
+            for (i, y_i) in y.iter().enumerate().take(nrows) {
+                if *y_i == l {
                    index.push(i);
                    n_samples += 1;
                }
            }

-            let size = ((n_samples as f64) / class_weight[l]) as usize;
+            let size = ((n_samples as f64) / *class_weight_l) as usize;
            for _ in 0..size {
                let xi: usize = rng.gen_range(0, n_samples);
                samples[index[xi]] += 1;
@@ -269,6 +325,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[5.1, 3.5, 1.4, 0.2],
@@ -47,8 +47,10 @@ use std::default::Default;
 use std::fmt::Debug;

 use rand::Rng;
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
@@ -56,7 +58,8 @@ use crate::tree::decision_tree_regressor::{
    DecisionTreeRegressor, DecisionTreeRegressorParameters,
 };

-#[derive(Serialize, Deserialize, Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 /// Parameters of the Random Forest Regressor
 /// Some parameters here are passed directly into base estimator.
 pub struct RandomForestRegressorParameters {
@@ -73,12 +76,41 @@ pub struct RandomForestRegressorParameters {
 }

 /// Random Forest Regressor
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct RandomForestRegressor<T: RealNumber> {
    parameters: RandomForestRegressorParameters,
    trees: Vec<DecisionTreeRegressor<T>>,
 }

+impl RandomForestRegressorParameters {
+    /// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
+    pub fn with_max_depth(mut self, max_depth: u16) -> Self {
+        self.max_depth = Some(max_depth);
+        self
+    }
+    /// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
+    pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
+        self.min_samples_leaf = min_samples_leaf;
+        self
+    }
+    /// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
+    pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
+        self.min_samples_split = min_samples_split;
+        self
+    }
+    /// The number of trees in the forest.
+    pub fn with_n_trees(mut self, n_trees: usize) -> Self {
+        self.n_trees = n_trees;
+        self
+    }
+    /// Number of random sample of predictors to use as split candidates.
+    pub fn with_m(mut self, m: usize) -> Self {
+        self.m = Some(m);
+        self
+    }
+}
+
 impl Default for RandomForestRegressorParameters {
    fn default() -> Self {
        RandomForestRegressorParameters {
@@ -106,6 +138,25 @@ impl<T: RealNumber> PartialEq for RandomForestRegressor<T> {
    }
 }

+impl<T: RealNumber, M: Matrix<T>>
+    SupervisedEstimator<M, M::RowVector, RandomForestRegressorParameters>
+    for RandomForestRegressor<T>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: RandomForestRegressorParameters,
+    ) -> Result<Self, Failed> {
+        RandomForestRegressor::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestRegressor<T> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber> RandomForestRegressor<T> {
    /// Build a forest of trees from the training set.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -223,6 +274,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[234.289, 235.6, 159., 107.608, 1947., 60.323],
@@ -2,17 +2,21 @@
 use std::error::Error;
 use std::fmt;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 /// Generic error to be raised when something goes wrong.
-#[derive(Debug, Serialize, Deserialize)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct Failed {
    err: FailedError,
    msg: String,
 }

 /// Type of error
-#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
+#[non_exhaustive]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Copy, Clone, Debug)]
 pub enum FailedError {
    /// Can't fit algorithm to data
    FitFailed = 1,
@@ -1,9 +1,8 @@
 #![allow(
-    clippy::needless_range_loop,
-    clippy::ptr_arg,
    clippy::type_complexity,
    clippy::too_many_arguments,
-    clippy::many_single_char_names
+    clippy::many_single_char_names,
+    clippy::unnecessary_wraps
 )]
 #![warn(missing_docs)]
 #![warn(missing_doc_code_examples)]
@@ -12,16 +11,11 @@
 //!
 //! Welcome to SmartCore, the most advanced machine learning library in Rust!
 //!
-//! In SmartCore you will find implementation of these ML algorithms:
-//! * __Regression__: Linear Regression (OLS), Decision Tree Regressor, Random Forest Regressor, K Nearest Neighbors
-//! * __Classification__: Logistic Regressor, Decision Tree Classifier, Random Forest Classifier, Supervised Nearest Neighbors (KNN)
-//! * __Clustering__: K-Means
-//! * __Matrix Decomposition__: PCA, LU, QR, SVD, EVD
-//! * __Distance Metrics__: Euclidian, Minkowski, Manhattan, Hamming, Mahalanobis
-//! * __Evaluation Metrics__: Accuracy, AUC, Recall, Precision, F1, Mean Absolute Error, Mean Squared Error, R2
+//! SmartCore features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN,
+//! as well as tools for model selection and model evaluation.
 //!
-//! Most of algorithms implemented in SmartCore operate on n-dimentional arrays. While you can use Rust vectors with all functions defined in this library
-//! we do recommend to go with one of the popular linear algebra libraries available in Rust. At this moment we support these packages:
+//! SmartCore is well integrated with a with wide variaty of libraries that provide support for large, multi-dimensional arrays and matrices. At this moment,
+//! all Smartcore's algorithms work with ordinary Rust vectors, as well as matrices and vectors defined in these packages:
 //! * [ndarray](https://docs.rs/ndarray)
 //! * [nalgebra](https://docs.rs/nalgebra/)
 //!
@@ -30,21 +24,21 @@
 //! To start using SmartCore simply add the following to your Cargo.toml file:
 //! ```ignore
 //! [dependencies]
-//! smartcore = "0.1.0"
+//! smartcore = "0.2.0"
 //! ```
 //!
-//! All ML algorithms in SmartCore are grouped into these generic categories:
+//! All machine learning algorithms in SmartCore are grouped into these broad categories:
 //! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data.
-//! * [Martix Decomposition](decomposition/index.html), various methods for matrix decomposition.
+//! * [Matrix Decomposition](decomposition/index.html), various methods for matrix decomposition.
 //! * [Linear Models](linear/index.html), regression and classification methods where output is assumed to have linear relation to explanatory variables
 //! * [Ensemble Models](ensemble/index.html), variety of regression and classification ensemble models
 //! * [Tree-based Models](tree/index.html), classification and regression trees
 //! * [Nearest Neighbors](neighbors/index.html), K Nearest Neighbors for classification and regression
+//! * [Naive Bayes](naive_bayes/index.html), statistical classification technique based on Bayes Theorem
+//! * [SVM](svm/index.html), support vector machines
 //!
-//! Each category is assigned to a separate module.
 //!
-//! For example, KNN classifier is defined in [smartcore::neighbors::knn_classifier](neighbors/knn_classifier/index.html). To train and run it using standard Rust vectors you will
-//! run this code:
+//! For example, you can use this code to fit a [K Nearest Neighbors classifier](neighbors/knn_classifier/index.html) to a dataset that is defined as standard Rust vector:
 //!
 //! ```
 //! // DenseMatrix defenition
@@ -65,7 +59,7 @@
 //! let y = vec![2., 2., 2., 3., 3.];
 //!
 //! // Train classifier
-//! let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+//! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
 //!
 //! // Predict classes
 //! let y_hat = knn.predict(&x).unwrap();
@@ -73,6 +67,7 @@

 /// Various algorithms and helper methods that are used elsewhere in SmartCore
 pub mod algorithm;
+pub mod api;
 /// Algorithms for clustering of unlabeled data
 pub mod cluster;
 /// Various datasets
@@ -97,6 +92,8 @@ pub mod naive_bayes;
 /// Supervised neighbors-based learning methods
 pub mod neighbors;
 pub(crate) mod optimization;
+/// Preprocessing utilities
+pub mod preprocessing;
 /// Support Vector Machines
 pub mod svm;
 /// Supervised tree-based learning methods
@@ -99,27 +99,27 @@ pub trait EVDDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {

 fn tred2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut Vec<T>, e: &mut Vec<T>) {
    let (n, _) = V.shape();
-    for i in 0..n {
-        d[i] = V.get(n - 1, i);
+    for (i, d_i) in d.iter_mut().enumerate().take(n) {
+        *d_i = V.get(n - 1, i);
    }

    for i in (1..n).rev() {
        let mut scale = T::zero();
        let mut h = T::zero();
-        for k in 0..i {
-            scale += d[k].abs();
+        for d_k in d.iter().take(i) {
+            scale += d_k.abs();
        }
        if scale == T::zero() {
            e[i] = d[i - 1];
-            for j in 0..i {
-                d[j] = V.get(i - 1, j);
+            for (j, d_j) in d.iter_mut().enumerate().take(i) {
+                *d_j = V.get(i - 1, j);
                V.set(i, j, T::zero());
                V.set(j, i, T::zero());
            }
        } else {
-            for k in 0..i {
-                d[k] /= scale;
-                h += d[k] * d[k];
+            for d_k in d.iter_mut().take(i) {
+                *d_k /= scale;
+                h += (*d_k) * (*d_k);
            }
            let mut f = d[i - 1];
            let mut g = h.sqrt();
@@ -129,8 +129,8 @@ fn tred2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut Vec<T>, e: &mut Vec
            e[i] = scale * g;
            h -= f * g;
            d[i - 1] = f - g;
-            for j in 0..i {
-                e[j] = T::zero();
+            for e_j in e.iter_mut().take(i) {
+                *e_j = T::zero();
            }

            for j in 0..i {
@@ -170,16 +170,16 @@ fn tred2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut Vec<T>, e: &mut Vec
        V.set(i, i, T::one());
        let h = d[i + 1];
        if h != T::zero() {
-            for k in 0..=i {
-                d[k] = V.get(k, i + 1) / h;
+            for (k, d_k) in d.iter_mut().enumerate().take(i + 1) {
+                *d_k = V.get(k, i + 1) / h;
            }
            for j in 0..=i {
                let mut g = T::zero();
                for k in 0..=i {
                    g += V.get(k, i + 1) * V.get(k, j);
                }
-                for k in 0..=i {
-                    V.sub_element_mut(k, j, g * d[k]);
+                for (k, d_k) in d.iter().enumerate().take(i + 1) {
+                    V.sub_element_mut(k, j, g * (*d_k));
                }
            }
        }
@@ -187,8 +187,8 @@ fn tred2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut Vec<T>, e: &mut Vec
            V.set(k, i + 1, T::zero());
        }
    }
-    for j in 0..n {
-        d[j] = V.get(n - 1, j);
+    for (j, d_j) in d.iter_mut().enumerate().take(n) {
+        *d_j = V.get(n - 1, j);
        V.set(n - 1, j, T::zero());
    }
    V.set(n - 1, n - 1, T::one());
@@ -238,8 +238,8 @@ fn tql2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut Vec<T>, e: &mut Vec<
                d[l + 1] = e[l] * (p + r);
                let dl1 = d[l + 1];
                let mut h = g - d[l];
-                for i in l + 2..n {
-                    d[i] -= h;
+                for d_i in d.iter_mut().take(n).skip(l + 2) {
+                    *d_i -= h;
                }
                f += h;

@@ -285,10 +285,10 @@ fn tql2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut Vec<T>, e: &mut Vec<
    for i in 0..n - 1 {
        let mut k = i;
        let mut p = d[i];
-        for j in i + 1..n {
-            if d[j] > p {
+        for (j, d_j) in d.iter().enumerate().take(n).skip(i + 1) {
+            if *d_j > p {
                k = j;
-                p = d[j];
+                p = *d_j;
            }
        }
        if k != i {
@@ -316,7 +316,7 @@ fn balance<T: RealNumber, M: BaseMatrix<T>>(A: &mut M) -> Vec<T> {
    let mut done = false;
    while !done {
        done = true;
-        for i in 0..n {
+        for (i, scale_i) in scale.iter_mut().enumerate().take(n) {
            let mut r = T::zero();
            let mut c = T::zero();
            for j in 0..n {
@@ -341,7 +341,7 @@ fn balance<T: RealNumber, M: BaseMatrix<T>>(A: &mut M) -> Vec<T> {
                if (c + r) / f < t * s {
                    done = false;
                    g = T::one() / f;
-                    scale[i] *= f;
+                    *scale_i *= f;
                    for j in 0..n {
                        A.mul_element_mut(i, j, g);
                    }
@@ -360,7 +360,7 @@ fn elmhes<T: RealNumber, M: BaseMatrix<T>>(A: &mut M) -> Vec<usize> {
    let (n, _) = A.shape();
    let mut perm = vec![0; n];

-    for m in 1..n - 1 {
+    for (m, perm_m) in perm.iter_mut().enumerate().take(n - 1).skip(1) {
        let mut x = T::zero();
        let mut i = m;
        for j in m..n {
@@ -369,7 +369,7 @@ fn elmhes<T: RealNumber, M: BaseMatrix<T>>(A: &mut M) -> Vec<usize> {
                i = j;
            }
        }
-        perm[m] = i;
+        *perm_m = i;
        if i != m {
            for j in (m - 1)..n {
                let swap = A.get(i, j);
@@ -402,7 +402,7 @@ fn elmhes<T: RealNumber, M: BaseMatrix<T>>(A: &mut M) -> Vec<usize> {
    perm
 }

-fn eltran<T: RealNumber, M: BaseMatrix<T>>(A: &M, V: &mut M, perm: &Vec<usize>) {
+fn eltran<T: RealNumber, M: BaseMatrix<T>>(A: &M, V: &mut M, perm: &[usize]) {
    let (n, _) = A.shape();
    for mp in (1..n - 1).rev() {
        for k in mp + 1..n {
@@ -774,11 +774,11 @@ fn hqr2<T: RealNumber, M: BaseMatrix<T>>(A: &mut M, V: &mut M, d: &mut Vec<T>, e
    }
 }

-fn balbak<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, scale: &Vec<T>) {
+fn balbak<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, scale: &[T]) {
    let (n, _) = V.shape();
-    for i in 0..n {
+    for (i, scale_i) in scale.iter().enumerate().take(n) {
        for j in 0..n {
-            V.mul_element_mut(i, j, scale[i]);
+            V.mul_element_mut(i, j, *scale_i);
        }
    }
 }
@@ -789,8 +789,8 @@ fn sort<T: RealNumber, M: BaseMatrix<T>>(d: &mut Vec<T>, e: &mut Vec<T>, V: &mut
    for j in 1..n {
        let real = d[j];
        let img = e[j];
-        for k in 0..n {
-            temp[k] = V.get(k, j);
+        for (k, temp_k) in temp.iter_mut().enumerate().take(n) {
+            *temp_k = V.get(k, j);
        }
        let mut i = j as i32 - 1;
        while i >= 0 {
@@ -806,8 +806,8 @@ fn sort<T: RealNumber, M: BaseMatrix<T>>(d: &mut Vec<T>, e: &mut Vec<T>, V: &mut
        }
        d[i as usize + 1] = real;
        e[i as usize + 1] = img;
-        for k in 0..n {
-            V.set(k, i as usize + 1, temp[k]);
+        for (k, temp_k) in temp.iter().enumerate().take(n) {
+            V.set(k, i as usize + 1, *temp_k);
        }
    }
 }
@@ -0,0 +1,28 @@
+//! In this module you will find composite of matrix operations that are used elsewhere
+//! for improved efficiency.
+
+use crate::linalg::BaseMatrix;
+use crate::math::num::RealNumber;
+
+/// High order matrix operations.
+pub trait HighOrderOperations<T: RealNumber>: BaseMatrix<T> {
+    /// Y = AB
+    /// ```
+    /// use smartcore::linalg::naive::dense_matrix::*;
+    /// use smartcore::linalg::high_order::HighOrderOperations;
+    ///
+    /// let a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]);
+    /// let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]);
+    /// let expected = DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]]);
+    ///
+    /// assert_eq!(a.ab(true, &b, false), expected);
+    /// ```
+    fn ab(&self, a_transpose: bool, b: &Self, b_transpose: bool) -> Self {
+        match (a_transpose, b_transpose) {
+            (true, true) => b.matmul(self).transpose(),
+            (false, true) => self.matmul(&b.transpose()),
+            (true, false) => self.transpose().matmul(b),
+            (false, false) => self.matmul(b),
+        }
+    }
+}
@@ -202,24 +202,21 @@ pub trait LUDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
    fn lu_mut(mut self) -> Result<LU<T, Self>, Failed> {
        let (m, n) = self.shape();

-        let mut piv = vec![0; m];
-        for i in 0..m {
-            piv[i] = i;
-        }
+        let mut piv = (0..m).collect::<Vec<_>>();

        let mut pivsign = 1;
        let mut LUcolj = vec![T::zero(); m];

        for j in 0..n {
-            for i in 0..m {
-                LUcolj[i] = self.get(i, j);
+            for (i, LUcolj_i) in LUcolj.iter_mut().enumerate().take(m) {
+                *LUcolj_i = self.get(i, j);
            }

            for i in 0..m {
                let kmax = usize::min(i, j);
                let mut s = T::zero();
-                for k in 0..kmax {
-                    s += self.get(i, k) * LUcolj[k];
+                for (k, LUcolj_k) in LUcolj.iter().enumerate().take(kmax) {
+                    s += self.get(i, k) * (*LUcolj_k);
                }

                LUcolj[i] -= s;
@@ -1,3 +1,4 @@
+#![allow(clippy::wrong_self_convention)]
 //! # Linear Algebra and Matrix Decomposition
 //!
 //! Most machine learning algorithms in SmartCore depend on linear algebra and matrix decomposition methods from this module.
@@ -36,6 +37,7 @@
 pub mod cholesky;
 /// The matrix is represented in terms of its eigenvalues and eigenvectors.
 pub mod evd;
+pub mod high_order;
 /// Factors a matrix as the product of a lower triangular matrix and an upper triangular matrix.
 pub mod lu;
 /// Dense matrix with column-major order that wraps [Vec](https://doc.rust-lang.org/std/vec/struct.Vec.html).
@@ -59,9 +61,10 @@ use std::ops::Range;
 use crate::math::num::RealNumber;
 use cholesky::CholeskyDecomposableMatrix;
 use evd::EVDDecomposableMatrix;
+use high_order::HighOrderOperations;
 use lu::LUDecomposableMatrix;
 use qr::QRDecomposableMatrix;
-use stats::MatrixStats;
+use stats::{MatrixPreprocessing, MatrixStats};
 use svd::SVDDecomposableMatrix;

 /// Column or row vector
@@ -134,6 +137,66 @@ pub trait BaseVector<T: RealNumber>: Clone + Debug {
    /// Subtract `x` from single element of the vector, write result to original vector.
    fn sub_element_mut(&mut self, pos: usize, x: T);

+    /// Subtract scalar
+    fn sub_scalar_mut(&mut self, x: T) -> &Self {
+        for i in 0..self.len() {
+            self.set(i, self.get(i) - x);
+        }
+        self
+    }
+
+    /// Subtract scalar
+    fn add_scalar_mut(&mut self, x: T) -> &Self {
+        for i in 0..self.len() {
+            self.set(i, self.get(i) + x);
+        }
+        self
+    }
+
+    /// Subtract scalar
+    fn mul_scalar_mut(&mut self, x: T) -> &Self {
+        for i in 0..self.len() {
+            self.set(i, self.get(i) * x);
+        }
+        self
+    }
+
+    /// Subtract scalar
+    fn div_scalar_mut(&mut self, x: T) -> &Self {
+        for i in 0..self.len() {
+            self.set(i, self.get(i) / x);
+        }
+        self
+    }
+
+    /// Add vectors, element-wise
+    fn add_scalar(&self, x: T) -> Self {
+        let mut r = self.clone();
+        r.add_scalar_mut(x);
+        r
+    }
+
+    /// Subtract vectors, element-wise
+    fn sub_scalar(&self, x: T) -> Self {
+        let mut r = self.clone();
+        r.sub_scalar_mut(x);
+        r
+    }
+
+    /// Multiply vectors, element-wise
+    fn mul_scalar(&self, x: T) -> Self {
+        let mut r = self.clone();
+        r.mul_scalar_mut(x);
+        r
+    }
+
+    /// Divide vectors, element-wise
+    fn div_scalar(&self, x: T) -> Self {
+        let mut r = self.clone();
+        r.div_scalar_mut(x);
+        r
+    }
+
    /// Add vectors, element-wise, overriding original vector with result.
    fn add_mut(&mut self, other: &Self) -> &Self;

@@ -203,12 +266,28 @@ pub trait BaseVector<T: RealNumber>: Clone + Debug {
            sum += xi * xi;
        }
        mu /= div;
-        sum / div - mu * mu
+        sum / div - mu.powi(2)
    }
    /// Computes the standard deviation.
    fn std(&self) -> T {
        self.var().sqrt()
    }
+
+    /// Copies content of `other` vector.
+    fn copy_from(&mut self, other: &Self);
+
+    /// Take elements from an array.
+    fn take(&self, index: &[usize]) -> Self {
+        let n = index.len();
+
+        let mut result = Self::zeros(n);
+
+        for (i, idx) in index.iter().enumerate() {
+            result.set(i, self.get(*idx));
+        }
+
+        result
+    }
 }

 /// Generic matrix type.
@@ -546,6 +625,32 @@ pub trait BaseMatrix<T: RealNumber>: Clone + Debug {

    /// Calculates the covariance matrix
    fn cov(&self) -> Self;
+
+    /// Take elements from an array along an axis.
+    fn take(&self, index: &[usize], axis: u8) -> Self {
+        let (n, p) = self.shape();
+
+        let k = match axis {
+            0 => p,
+            _ => n,
+        };
+
+        let mut result = match axis {
+            0 => Self::zeros(index.len(), p),
+            _ => Self::zeros(n, index.len()),
+        };
+
+        for (i, idx) in index.iter().enumerate() {
+            for j in 0..k {
+                match axis {
+                    0 => result.set(i, j, self.get(*idx, j)),
+                    _ => result.set(j, i, self.get(j, *idx)),
+                };
+            }
+        }
+
+        result
+    }
 }

 /// Generic matrix with additional mixins like various factorization methods.
@@ -557,6 +662,8 @@ pub trait Matrix<T: RealNumber>:
    + LUDecomposableMatrix<T>
    + CholeskyDecomposableMatrix<T>
    + MatrixStats<T>
+    + MatrixPreprocessing<T>
+    + HighOrderOperations<T>
    + PartialEq
    + Display
 {
@@ -595,6 +702,8 @@ impl<'a, T: RealNumber, M: BaseMatrix<T>> Iterator for RowIter<'a, T, M> {

 #[cfg(test)]
 mod tests {
+    use crate::linalg::naive::dense_matrix::DenseMatrix;
+    use crate::linalg::BaseMatrix;
    use crate::linalg::BaseVector;

    #[test]
@@ -617,4 +726,35 @@ mod tests {

        assert!((m.var() - 1.25f64).abs() < std::f64::EPSILON);
    }
+
+    #[test]
+    fn vec_take() {
+        let m = vec![1., 2., 3., 4., 5.];
+
+        assert_eq!(m.take(&vec!(0, 0, 4, 4)), vec![1., 1., 5., 5.]);
+    }
+
+    #[test]
+    fn take() {
+        let m = DenseMatrix::from_2d_array(&[
+            &[1.0, 2.0],
+            &[3.0, 4.0],
+            &[5.0, 6.0],
+            &[7.0, 8.0],
+            &[9.0, 10.0],
+        ]);
+
+        let expected_0 = DenseMatrix::from_2d_array(&[&[3.0, 4.0], &[3.0, 4.0], &[7.0, 8.0]]);
+
+        let expected_1 = DenseMatrix::from_2d_array(&[
+            &[2.0, 1.0],
+            &[4.0, 3.0],
+            &[6.0, 5.0],
+            &[8.0, 7.0],
+            &[10.0, 9.0],
+        ]);
+
+        assert_eq!(m.take(&vec!(1, 1, 3), 0), expected_0);
+        assert_eq!(m.take(&vec!(1, 0), 1), expected_1);
+    }
 }
@@ -1,17 +1,23 @@
+#![allow(clippy::ptr_arg)]
 use std::fmt;
 use std::fmt::Debug;
+#[cfg(feature = "serde")]
 use std::marker::PhantomData;
 use std::ops::Range;

+#[cfg(feature = "serde")]
 use serde::de::{Deserializer, MapAccess, SeqAccess, Visitor};
+#[cfg(feature = "serde")]
 use serde::ser::{SerializeStruct, Serializer};
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::cholesky::CholeskyDecomposableMatrix;
 use crate::linalg::evd::EVDDecomposableMatrix;
+use crate::linalg::high_order::HighOrderOperations;
 use crate::linalg::lu::LUDecomposableMatrix;
 use crate::linalg::qr::QRDecomposableMatrix;
-use crate::linalg::stats::MatrixStats;
+use crate::linalg::stats::{MatrixPreprocessing, MatrixStats};
 use crate::linalg::svd::SVDDecomposableMatrix;
 use crate::linalg::Matrix;
 pub use crate::linalg::{BaseMatrix, BaseVector};
@@ -163,8 +169,8 @@ impl<T: RealNumber> BaseVector<T> for Vec<T> {

    fn sum(&self) -> T {
        let mut sum = T::zero();
-        for i in 0..self.len() {
-            sum += self[i];
+        for self_i in self.iter() {
+            sum += *self_i;
        }
        sum
    }
@@ -175,6 +181,18 @@ impl<T: RealNumber> BaseVector<T> for Vec<T> {
        result.dedup();
        result
    }
+
+    fn copy_from(&mut self, other: &Self) {
+        if self.len() != other.len() {
+            panic!(
+                "Can't copy vector of length {} into a vector of length {}.",
+                self.len(),
+                other.len()
+            );
+        }
+
+        self[..].clone_from_slice(&other[..]);
+    }
 }

 /// Column-major, dense matrix. See [Simple Dense Matrix](../index.html).
@@ -238,9 +256,9 @@ impl<T: RealNumber> DenseMatrix<T> {
            nrows,
            values: vec![T::zero(); ncols * nrows],
        };
-        for row in 0..nrows {
-            for col in 0..ncols {
-                m.set(row, col, values[row][col]);
+        for (row_index, row) in values.iter().enumerate().take(nrows) {
+            for (col_index, value) in row.iter().enumerate().take(ncols) {
+                m.set(row_index, col_index, *value);
            }
        }
        m
@@ -258,7 +276,7 @@ impl<T: RealNumber> DenseMatrix<T> {
    /// * `nrows` - number of rows in new matrix.
    /// * `ncols` - number of columns in new matrix.
    /// * `values` - values to initialize the matrix.
-    pub fn from_vec(nrows: usize, ncols: usize, values: &Vec<T>) -> DenseMatrix<T> {
+    pub fn from_vec(nrows: usize, ncols: usize, values: &[T]) -> DenseMatrix<T> {
        let mut m = DenseMatrix {
            ncols,
            nrows,
@@ -335,6 +353,7 @@ impl<'a, T: RealNumber> Iterator for DenseMatrixIterator<'a, T> {
    }
 }

+#[cfg(feature = "serde")]
 impl<'de, T: RealNumber + fmt::Debug + Deserialize<'de>> Deserialize<'de> for DenseMatrix<T> {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
@@ -420,6 +439,7 @@ impl<'de, T: RealNumber + fmt::Debug + Deserialize<'de>> Deserialize<'de> for De
    }
 }

+#[cfg(feature = "serde")]
 impl<T: RealNumber + fmt::Debug + Serialize> Serialize for DenseMatrix<T> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
@@ -444,7 +464,40 @@ impl<T: RealNumber> LUDecomposableMatrix<T> for DenseMatrix<T> {}

 impl<T: RealNumber> CholeskyDecomposableMatrix<T> for DenseMatrix<T> {}

+impl<T: RealNumber> HighOrderOperations<T> for DenseMatrix<T> {
+    fn ab(&self, a_transpose: bool, b: &Self, b_transpose: bool) -> Self {
+        if !a_transpose && !b_transpose {
+            self.matmul(b)
+        } else {
+            let (d1, d2, d3, d4) = match (a_transpose, b_transpose) {
+                (true, false) => (self.nrows, self.ncols, b.ncols, b.nrows),
+                (false, true) => (self.ncols, self.nrows, b.nrows, b.ncols),
+                _ => (self.nrows, self.ncols, b.nrows, b.ncols),
+            };
+            if d1 != d4 {
+                panic!("Can not multiply {}x{} by {}x{} matrices", d2, d1, d4, d3);
+            }
+            let mut result = Self::zeros(d2, d3);
+            for r in 0..d2 {
+                for c in 0..d3 {
+                    let mut s = T::zero();
+                    for i in 0..d1 {
+                        match (a_transpose, b_transpose) {
+                            (true, false) => s += self.get(i, r) * b.get(i, c),
+                            (false, true) => s += self.get(r, i) * b.get(c, i),
+                            _ => s += self.get(i, r) * b.get(c, i),
+                        }
+                    }
+                    result.set(r, c, s);
+                }
+            }
+            result
+        }
+    }
+}
+
 impl<T: RealNumber> MatrixStats<T> for DenseMatrix<T> {}
+impl<T: RealNumber> MatrixPreprocessing<T> for DenseMatrix<T> {}

 impl<T: RealNumber> Matrix<T> for DenseMatrix<T> {}

@@ -509,8 +562,8 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {
    fn get_row(&self, row: usize) -> Self::RowVector {
        let mut v = vec![T::zero(); self.ncols];

-        for c in 0..self.ncols {
-            v[c] = self.get(row, c);
+        for (c, v_c) in v.iter_mut().enumerate().take(self.ncols) {
+            *v_c = self.get(row, c);
        }

        v
@@ -518,29 +571,29 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {

    fn get_row_as_vec(&self, row: usize) -> Vec<T> {
        let mut result = vec![T::zero(); self.ncols];
-        for c in 0..self.ncols {
-            result[c] = self.get(row, c);
+        for (c, result_c) in result.iter_mut().enumerate().take(self.ncols) {
+            *result_c = self.get(row, c);
        }
        result
    }

    fn copy_row_as_vec(&self, row: usize, result: &mut Vec<T>) {
-        for c in 0..self.ncols {
-            result[c] = self.get(row, c);
+        for (c, result_c) in result.iter_mut().enumerate().take(self.ncols) {
+            *result_c = self.get(row, c);
        }
    }

    fn get_col_as_vec(&self, col: usize) -> Vec<T> {
        let mut result = vec![T::zero(); self.nrows];
-        for r in 0..self.nrows {
-            result[r] = self.get(r, col);
+        for (r, result_r) in result.iter_mut().enumerate().take(self.nrows) {
+            *result_r = self.get(r, col);
        }
        result
    }

    fn copy_col_as_vec(&self, col: usize, result: &mut Vec<T>) {
-        for r in 0..self.nrows {
-            result[r] = self.get(r, col);
+        for (r, result_r) in result.iter_mut().enumerate().take(self.nrows) {
+            *result_r = self.get(r, col);
        }
    }

@@ -625,8 +678,8 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {
    }

    fn dot(&self, other: &Self) -> T {
-        if self.nrows != 1 && other.nrows != 1 {
-            panic!("A and B should both be 1-dimentional vectors.");
+        if (self.nrows != 1 && other.nrows != 1) && (self.ncols != 1 && other.ncols != 1) {
+            panic!("A and B should both be either a row or a column vector.");
        }
        if self.nrows * self.ncols != other.nrows * other.ncols {
            panic!("A and B should have the same size");
@@ -802,13 +855,13 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {
        let mut mean = vec![T::zero(); self.ncols];

        for r in 0..self.nrows {
-            for c in 0..self.ncols {
-                mean[c] += self.get(r, c);
+            for (c, mean_c) in mean.iter_mut().enumerate().take(self.ncols) {
+                *mean_c += self.get(r, c);
            }
        }

-        for i in 0..mean.len() {
-            mean[i] /= T::from(self.nrows).unwrap();
+        for mean_i in mean.iter_mut() {
+            *mean_i /= T::from(self.nrows).unwrap();
        }

        mean
@@ -880,9 +933,7 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {
            );
        }

-        for i in 0..self.values.len() {
-            self.values[i] = other.values[i];
-        }
+        self.values[..].clone_from_slice(&other.values[..]);
    }

    fn abs_mut(&mut self) -> &Self {
@@ -955,7 +1006,7 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {
    fn argmax(&self) -> Vec<usize> {
        let mut res = vec![0usize; self.nrows];

-        for r in 0..self.nrows {
+        for (r, res_r) in res.iter_mut().enumerate().take(self.nrows) {
            let mut max = T::neg_infinity();
            let mut max_pos = 0usize;
            for c in 0..self.ncols {
@@ -965,7 +1016,7 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {
                    max_pos = c;
                }
            }
-            res[r] = max_pos;
+            *res_r = max_pos;
        }

        res
@@ -1017,6 +1068,14 @@ mod tests {
        assert_eq!(32.0, BaseVector::dot(&v1, &v2));
    }

+    #[test]
+    fn vec_copy_from() {
+        let mut v1 = vec![1., 2., 3.];
+        let v2 = vec![4., 5., 6.];
+        v1.copy_from(&v2);
+        assert_eq!(v1, v2);
+    }
+
    #[test]
    fn vec_approximate_eq() {
        let a = vec![1., 2., 3.];
@@ -1064,6 +1123,12 @@ mod tests {
        );
    }

+    #[test]
+    fn col_matrix_to_row_vector() {
+        let m: DenseMatrix<f64> = BaseMatrix::zeros(10, 1);
+        assert_eq!(m.to_row_vector().len(), 10)
+    }
+
    #[test]
    fn iter() {
        let vec = vec![1., 2., 3., 4., 5., 6.];
@@ -1114,6 +1179,29 @@ mod tests {
        assert_eq!(result, expected);
    }

+    #[test]
+    fn ab() {
+        let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);
+        let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]);
+        let c = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]);
+        assert_eq!(
+            a.ab(false, &b, false),
+            DenseMatrix::from_2d_array(&[&[46., 52.], &[109., 124.]])
+        );
+        assert_eq!(
+            c.ab(true, &b, false),
+            DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]])
+        );
+        assert_eq!(
+            b.ab(false, &c, true),
+            DenseMatrix::from_2d_array(&[&[17., 39., 61.], &[23., 53., 83.,], &[29., 67., 105.]])
+        );
+        assert_eq!(
+            a.ab(true, &b, true),
+            DenseMatrix::from_2d_array(&[&[29., 39., 49.], &[40., 54., 68.,], &[51., 69., 87.]])
+        );
+    }
+
    #[test]
    fn dot() {
        let a = DenseMatrix::from_array(1, 3, &[1., 2., 3.]);
@@ -1121,6 +1209,14 @@ mod tests {
        assert_eq!(a.dot(&b), 32.);
    }

+    #[test]
+    fn copy_from() {
+        let mut a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]);
+        let b = DenseMatrix::from_2d_array(&[&[7., 8.], &[9., 10.], &[11., 12.]]);
+        a.copy_from(&b);
+        assert_eq!(a, b);
+    }
+
    #[test]
    fn slice() {
        let m = DenseMatrix::from_2d_array(&[
@@ -1216,6 +1312,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn to_from_json() {
        let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
        let deserialized_a: DenseMatrix<f64> =
@@ -1224,6 +1321,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn to_from_bincode() {
        let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
        let deserialized_a: DenseMatrix<f64> =
@@ -44,9 +44,10 @@ use nalgebra::{DMatrix, Dynamic, Matrix, MatrixMN, RowDVector, Scalar, VecStorag

 use crate::linalg::cholesky::CholeskyDecomposableMatrix;
 use crate::linalg::evd::EVDDecomposableMatrix;
+use crate::linalg::high_order::HighOrderOperations;
 use crate::linalg::lu::LUDecomposableMatrix;
 use crate::linalg::qr::QRDecomposableMatrix;
-use crate::linalg::stats::MatrixStats;
+use crate::linalg::stats::{MatrixPreprocessing, MatrixStats};
 use crate::linalg::svd::SVDDecomposableMatrix;
 use crate::linalg::Matrix as SmartCoreMatrix;
 use crate::linalg::{BaseMatrix, BaseVector};
@@ -180,19 +181,24 @@ impl<T: RealNumber + 'static> BaseVector<T> for MatrixMN<T, U1, Dynamic> {
        result.dedup();
        result
    }
+
+    fn copy_from(&mut self, other: &Self) {
+        Matrix::copy_from(self, other);
+    }
 }

 impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
    BaseMatrix<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>
 {
-    type RowVector = MatrixMN<T, U1, Dynamic>;
+    type RowVector = RowDVector<T>;

    fn from_row_vector(vec: Self::RowVector) -> Self {
        Matrix::from_rows(&[vec])
    }

    fn to_row_vector(self) -> Self::RowVector {
-        self.row(0).into_owned()
+        let (nrows, ncols) = self.shape();
+        self.reshape_generic(U1, Dynamic::new(nrows * ncols))
    }

    fn get(&self, row: usize, col: usize) -> T {
@@ -552,6 +558,16 @@ impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Su
 {
 }

+impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
+    MatrixPreprocessing<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>
+{
+}
+
+impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
+    HighOrderOperations<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>
+{
+}
+
 impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
    SmartCoreMatrix<T> for Matrix<T, Dynamic, Dynamic, VecStorage<T, Dynamic, Dynamic>>
 {
@@ -563,6 +579,16 @@ mod tests {
    use crate::linear::linear_regression::*;
    use nalgebra::{DMatrix, Matrix2x3, RowDVector};

+    #[test]
+    fn vec_copy_from() {
+        let mut v1 = RowDVector::from_vec(vec![1., 2., 3.]);
+        let mut v2 = RowDVector::from_vec(vec![4., 5., 6.]);
+        v1.copy_from(&v2);
+        assert_eq!(v2, v1);
+        v2[0] = 10.0;
+        assert_ne!(v2, v1);
+    }
+
    #[test]
    fn vec_len() {
        let v = RowDVector::from_vec(vec![1., 2., 3.]);
@@ -697,6 +723,12 @@ mod tests {
        assert_eq!(m.to_row_vector(), expected);
    }

+    #[test]
+    fn col_matrix_to_row_vector() {
+        let m: DMatrix<f64> = BaseMatrix::zeros(10, 1);
+        assert_eq!(m.to_row_vector().len(), 10)
+    }
+
    #[test]
    fn get_row_col_as_vec() {
        let m = DMatrix::from_row_slice(3, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
@@ -36,7 +36,7 @@
 //!             1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
 //!         ]);
 //!
-//! let lr = LogisticRegression::fit(&x, &y).unwrap();
+//! let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
 //! let y_hat = lr.predict(&x).unwrap();
 //! ```
 use std::iter::Sum;
@@ -47,13 +47,14 @@ use std::ops::Range;
 use std::ops::SubAssign;

 use ndarray::ScalarOperand;
-use ndarray::{s, stack, Array, ArrayBase, Axis, Ix1, Ix2, OwnedRepr};
+use ndarray::{concatenate, s, Array, ArrayBase, Axis, Ix1, Ix2, OwnedRepr};

 use crate::linalg::cholesky::CholeskyDecomposableMatrix;
 use crate::linalg::evd::EVDDecomposableMatrix;
+use crate::linalg::high_order::HighOrderOperations;
 use crate::linalg::lu::LUDecomposableMatrix;
 use crate::linalg::qr::QRDecomposableMatrix;
-use crate::linalg::stats::MatrixStats;
+use crate::linalg::stats::{MatrixPreprocessing, MatrixStats};
 use crate::linalg::svd::SVDDecomposableMatrix;
 use crate::linalg::Matrix;
 use crate::linalg::{BaseMatrix, BaseVector};
@@ -175,6 +176,10 @@ impl<T: RealNumber + ScalarOperand> BaseVector<T> for ArrayBase<OwnedRepr<T>, Ix
        result.dedup();
        result
    }
+
+    fn copy_from(&mut self, other: &Self) {
+        self.assign(&other);
+    }
 }

 impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum>
@@ -245,11 +250,11 @@ impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssi
    }

    fn h_stack(&self, other: &Self) -> Self {
-        stack(Axis(1), &[self.view(), other.view()]).unwrap()
+        concatenate(Axis(1), &[self.view(), other.view()]).unwrap()
    }

    fn v_stack(&self, other: &Self) -> Self {
-        stack(Axis(0), &[self.view(), other.view()]).unwrap()
+        concatenate(Axis(0), &[self.view(), other.view()]).unwrap()
    }

    fn matmul(&self, other: &Self) -> Self {
@@ -502,6 +507,16 @@ impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssi
 {
 }

+impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum>
+    MatrixPreprocessing<T> for ArrayBase<OwnedRepr<T>, Ix2>
+{
+}
+
+impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum>
+    HighOrderOperations<T> for ArrayBase<OwnedRepr<T>, Ix2>
+{
+}
+
 impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssign + Sum> Matrix<T>
    for ArrayBase<OwnedRepr<T>, Ix2>
 {
@@ -526,6 +541,16 @@ mod tests {
        assert_eq!(5., BaseVector::get(&result, 1));
    }

+    #[test]
+    fn vec_copy_from() {
+        let mut v1 = arr1(&[1., 2., 3.]);
+        let mut v2 = arr1(&[4., 5., 6.]);
+        v1.copy_from(&v2);
+        assert_eq!(v1, v2);
+        v2[0] = 10.0;
+        assert_ne!(v1, v2);
+    }
+
    #[test]
    fn vec_len() {
        let v = arr1(&[1., 2., 3.]);
@@ -563,6 +588,12 @@ mod tests {
        );
    }

+    #[test]
+    fn col_matrix_to_row_vector() {
+        let m: Array2<f64> = BaseMatrix::zeros(10, 1);
+        assert_eq!(m.to_row_vector().len(), 10)
+    }
+
    #[test]
    fn add_mut() {
        let mut a1 = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -886,7 +917,7 @@ mod tests {
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        ]);

-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();

        let y_hat = lr.predict(&x).unwrap();

@@ -44,8 +44,8 @@ pub struct QR<T: RealNumber, M: BaseMatrix<T>> {
 impl<T: RealNumber, M: BaseMatrix<T>> QR<T, M> {
    pub(crate) fn new(QR: M, tau: Vec<T>) -> QR<T, M> {
        let mut singular = false;
-        for j in 0..tau.len() {
-            if tau[j] == T::zero() {
+        for tau_elem in tau.iter() {
+            if *tau_elem == T::zero() {
                singular = true;
                break;
            }
@@ -153,7 +153,7 @@ pub trait QRDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {

        let mut r_diagonal: Vec<T> = vec![T::zero(); n];

-        for k in 0..n {
+        for (k, r_diagonal_k) in r_diagonal.iter_mut().enumerate().take(n) {
            let mut nrm = T::zero();
            for i in k..m {
                nrm = nrm.hypot(self.get(i, k));
@@ -179,7 +179,7 @@ pub trait QRDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
                    }
                }
            }
-            r_diagonal[k] = -nrm;
+            *r_diagonal_k = -nrm;
        }

        Ok(QR::new(self, r_diagonal))
@@ -22,14 +22,14 @@ pub trait MatrixStats<T: RealNumber>: BaseMatrix<T> {

        let div = T::from_usize(m).unwrap();

-        for i in 0..n {
+        for (i, x_i) in x.iter_mut().enumerate().take(n) {
            for j in 0..m {
-                x[i] += match axis {
+                *x_i += match axis {
                    0 => self.get(j, i),
                    _ => self.get(i, j),
                };
            }
-            x[i] /= div;
+            *x_i /= div;
        }

        x
@@ -49,7 +49,7 @@ pub trait MatrixStats<T: RealNumber>: BaseMatrix<T> {

        let div = T::from_usize(m).unwrap();

-        for i in 0..n {
+        for (i, x_i) in x.iter_mut().enumerate().take(n) {
            let mut mu = T::zero();
            let mut sum = T::zero();
            for j in 0..m {
@@ -61,7 +61,7 @@ pub trait MatrixStats<T: RealNumber>: BaseMatrix<T> {
                sum += a * a;
            }
            mu /= div;
-            x[i] = sum / div - mu * mu;
+            *x_i = sum / div - mu.powi(2);
        }

        x
@@ -76,15 +76,15 @@ pub trait MatrixStats<T: RealNumber>: BaseMatrix<T> {
            _ => self.shape().0,
        };

-        for i in 0..n {
-            x[i] = x[i].sqrt();
+        for x_i in x.iter_mut().take(n) {
+            *x_i = x_i.sqrt();
        }

        x
    }

    /// standardize values by removing the mean and scaling to unit variance
-    fn scale_mut(&mut self, mean: &Vec<T>, std: &Vec<T>, axis: u8) {
+    fn scale_mut(&mut self, mean: &[T], std: &[T], axis: u8) {
        let (n, m) = match axis {
            0 => {
                let (n, m) = self.shape();
@@ -104,6 +104,47 @@ pub trait MatrixStats<T: RealNumber>: BaseMatrix<T> {
    }
 }

+/// Defines baseline implementations for various matrix processing functions
+pub trait MatrixPreprocessing<T: RealNumber>: BaseMatrix<T> {
+    /// Each element of the matrix greater than the threshold becomes 1, while values less than or equal to the threshold become 0
+    /// ```
+    /// use smartcore::linalg::naive::dense_matrix::*;
+    /// use crate::smartcore::linalg::stats::MatrixPreprocessing;
+    /// let mut a = DenseMatrix::from_array(2, 3, &[0., 2., 3., -5., -6., -7.]);
+    /// let expected = DenseMatrix::from_array(2, 3, &[0., 1., 1., 0., 0., 0.]);
+    /// a.binarize_mut(0.);
+    ///
+    /// assert_eq!(a, expected);
+    /// ```
+
+    fn binarize_mut(&mut self, threshold: T) {
+        let (nrows, ncols) = self.shape();
+        for row in 0..nrows {
+            for col in 0..ncols {
+                if self.get(row, col) > threshold {
+                    self.set(row, col, T::one());
+                } else {
+                    self.set(row, col, T::zero());
+                }
+            }
+        }
+    }
+    /// Returns new matrix where elements are binarized according to a given threshold.
+    /// ```
+    /// use smartcore::linalg::naive::dense_matrix::*;
+    /// use crate::smartcore::linalg::stats::MatrixPreprocessing;
+    /// let a = DenseMatrix::from_array(2, 3, &[0., 2., 3., -5., -6., -7.]);
+    /// let expected = DenseMatrix::from_array(2, 3, &[0., 1., 1., 0., 0., 0.]);
+    ///
+    /// assert_eq!(a.binarize(0.), expected);
+    /// ```
+    fn binarize(&self, threshold: T) -> Self {
+        let mut m = self.clone();
+        m.binarize_mut(threshold);
+        m
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -156,8 +156,8 @@ pub trait SVDDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
                    let h = f * g - s;
                    U.set(i, l - 1, f - g);

-                    for k in l - 1..n {
-                        rv1[k] = U.get(i, k) / h;
+                    for (k, rv1_k) in rv1.iter_mut().enumerate().take(n).skip(l - 1) {
+                        *rv1_k = U.get(i, k) / h;
                    }

                    for j in l - 1..m {
@@ -166,8 +166,8 @@ pub trait SVDDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
                            s += U.get(j, k) * U.get(i, k);
                        }

-                        for k in l - 1..n {
-                            U.add_element_mut(j, k, s * rv1[k]);
+                        for (k, rv1_k) in rv1.iter().enumerate().take(n).skip(l - 1) {
+                            U.add_element_mut(j, k, s * (*rv1_k));
                        }
                    }

@@ -365,11 +365,11 @@ pub trait SVDDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
            inc /= 3;
            for i in inc..n {
                let sw = w[i];
-                for k in 0..m {
-                    su[k] = U.get(k, i);
+                for (k, su_k) in su.iter_mut().enumerate().take(m) {
+                    *su_k = U.get(k, i);
                }
-                for k in 0..n {
-                    sv[k] = v.get(k, i);
+                for (k, sv_k) in sv.iter_mut().enumerate().take(n) {
+                    *sv_k = v.get(k, i);
                }
                let mut j = i;
                while w[j - inc] < sw {
@@ -386,11 +386,11 @@ pub trait SVDDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
                    }
                }
                w[j] = sw;
-                for k in 0..m {
-                    U.set(k, j, su[k]);
+                for (k, su_k) in su.iter().enumerate().take(m) {
+                    U.set(k, j, *su_k);
                }
-                for k in 0..n {
-                    v.set(k, j, sv[k]);
+                for (k, sv_k) in sv.iter().enumerate().take(n) {
+                    v.set(k, j, *sv_k);
                }
            }
            if inc <= 1 {
@@ -454,7 +454,7 @@ impl<T: RealNumber, M: SVDDecomposableMatrix<T>> SVD<T, M> {

        for k in 0..p {
            let mut tmp = vec![T::zero(); self.n];
-            for j in 0..self.n {
+            for (j, tmp_j) in tmp.iter_mut().enumerate().take(self.n) {
                let mut r = T::zero();
                if self.s[j] > self.tol {
                    for i in 0..self.m {
@@ -462,13 +462,13 @@ impl<T: RealNumber, M: SVDDecomposableMatrix<T>> SVD<T, M> {
                    }
                    r /= self.s[j];
                }
-                tmp[j] = r;
+                *tmp_j = r;
            }

            for j in 0..self.n {
                let mut r = T::zero();
-                for jj in 0..self.n {
-                    r += self.V.get(j, jj) * tmp[jj];
+                for (jj, tmp_jj) in tmp.iter().enumerate().take(self.n) {
+                    r += self.V.get(j, jj) * (*tmp_jj);
                }
                b.set(j, k, r);
            }
@@ -0,0 +1,146 @@
+//! This is a generic solver for Ax = b type of equation
+//!
+//! for more information take a look at [this Wikipedia article](https://en.wikipedia.org/wiki/Biconjugate_gradient_method)
+//! and [this paper](https://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf)
+use crate::error::Failed;
+use crate::linalg::Matrix;
+use crate::math::num::RealNumber;
+
+pub trait BiconjugateGradientSolver<T: RealNumber, M: Matrix<T>> {
+    fn solve_mut(&self, a: &M, b: &M, x: &mut M, tol: T, max_iter: usize) -> Result<T, Failed> {
+        if tol <= T::zero() {
+            return Err(Failed::fit("tolerance shoud be > 0"));
+        }
+
+        if max_iter == 0 {
+            return Err(Failed::fit("maximum number of iterations should be > 0"));
+        }
+
+        let (n, _) = b.shape();
+
+        let mut r = M::zeros(n, 1);
+        let mut rr = M::zeros(n, 1);
+        let mut z = M::zeros(n, 1);
+        let mut zz = M::zeros(n, 1);
+
+        self.mat_vec_mul(a, x, &mut r);
+
+        for j in 0..n {
+            r.set(j, 0, b.get(j, 0) - r.get(j, 0));
+            rr.set(j, 0, r.get(j, 0));
+        }
+
+        let bnrm = b.norm(T::two());
+        self.solve_preconditioner(a, &r, &mut z);
+
+        let mut p = M::zeros(n, 1);
+        let mut pp = M::zeros(n, 1);
+        let mut bkden = T::zero();
+        let mut err = T::zero();
+
+        for iter in 1..max_iter {
+            let mut bknum = T::zero();
+
+            self.solve_preconditioner(a, &rr, &mut zz);
+            for j in 0..n {
+                bknum += z.get(j, 0) * rr.get(j, 0);
+            }
+            if iter == 1 {
+                for j in 0..n {
+                    p.set(j, 0, z.get(j, 0));
+                    pp.set(j, 0, zz.get(j, 0));
+                }
+            } else {
+                let bk = bknum / bkden;
+                for j in 0..n {
+                    p.set(j, 0, bk * p.get(j, 0) + z.get(j, 0));
+                    pp.set(j, 0, bk * pp.get(j, 0) + zz.get(j, 0));
+                }
+            }
+            bkden = bknum;
+            self.mat_vec_mul(a, &p, &mut z);
+            let mut akden = T::zero();
+            for j in 0..n {
+                akden += z.get(j, 0) * pp.get(j, 0);
+            }
+            let ak = bknum / akden;
+            self.mat_t_vec_mul(a, &pp, &mut zz);
+            for j in 0..n {
+                x.set(j, 0, x.get(j, 0) + ak * p.get(j, 0));
+                r.set(j, 0, r.get(j, 0) - ak * z.get(j, 0));
+                rr.set(j, 0, rr.get(j, 0) - ak * zz.get(j, 0));
+            }
+            self.solve_preconditioner(a, &r, &mut z);
+            err = r.norm(T::two()) / bnrm;
+
+            if err <= tol {
+                break;
+            }
+        }
+
+        Ok(err)
+    }
+
+    fn solve_preconditioner(&self, a: &M, b: &M, x: &mut M) {
+        let diag = Self::diag(a);
+        let n = diag.len();
+
+        for (i, diag_i) in diag.iter().enumerate().take(n) {
+            if *diag_i != T::zero() {
+                x.set(i, 0, b.get(i, 0) / *diag_i);
+            } else {
+                x.set(i, 0, b.get(i, 0));
+            }
+        }
+    }
+
+    // y = Ax
+    fn mat_vec_mul(&self, a: &M, x: &M, y: &mut M) {
+        y.copy_from(&a.matmul(x));
+    }
+
+    // y = Atx
+    fn mat_t_vec_mul(&self, a: &M, x: &M, y: &mut M) {
+        y.copy_from(&a.ab(true, x, false));
+    }
+
+    fn diag(a: &M) -> Vec<T> {
+        let (nrows, ncols) = a.shape();
+        let n = nrows.min(ncols);
+
+        let mut d = Vec::with_capacity(n);
+        for i in 0..n {
+            d.push(a.get(i, i));
+        }
+
+        d
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::linalg::naive::dense_matrix::*;
+
+    pub struct BGSolver {}
+
+    impl<T: RealNumber, M: Matrix<T>> BiconjugateGradientSolver<T, M> for BGSolver {}
+
+    #[test]
+    fn bg_solver() {
+        let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
+        let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]);
+        let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]);
+
+        let mut x = DenseMatrix::zeros(3, 1);
+
+        let solver = BGSolver {};
+
+        let err: f64 = solver
+            .solve_mut(&a, &b.transpose(), &mut x, 1e-6, 6)
+            .unwrap();
+
+        assert!(x.transpose().approximate_eq(&expected, 1e-4));
+        assert!((err - 0.0).abs() < 1e-4);
+    }
+}
@@ -0,0 +1,437 @@
+#![allow(clippy::needless_range_loop)]
+//! # Elastic Net
+//!
+//! Elastic net is an extension of [linear regression](../linear_regression/index.html) that adds regularization penalties to the loss function during training.
+//! Just like in ordinary linear regression you assume a linear relationship between input variables and the target variable.
+//! Unlike linear regression elastic net adds regularization penalties to the loss function during training.
+//! In particular, the elastic net coefficient estimates \\(\beta\\) are the values that minimize
+//!
+//! \\[L(\alpha, \beta) = \vert \boldsymbol{y} - \boldsymbol{X}\beta\vert^2 + \lambda_1 \vert \beta \vert^2 + \lambda_2 \vert \beta \vert_1\\]
+//!
+//! where \\(\lambda_1 = \\alpha l_{1r}\\), \\(\lambda_2 = \\alpha (1 -  l_{1r})\\) and \\(l_{1r}\\) is the l1 ratio, elastic net mixing parameter.
+//!
+//! In essense, elastic net combines both the [L1](../lasso/index.html) and [L2](../ridge_regression/index.html) penalties during training,
+//! which can result in better performance than a model with either one or the other penalty on some problems.
+//! The elastic net is particularly useful when the number of predictors (p) is much bigger than the number of observations (n).
+//!
+//! Example:
+//!
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::*;
+//! use smartcore::linear::elastic_net::*;
+//!
+//! // Longley dataset (https://www.statsmodels.org/stable/datasets/generated/longley.html)
+//! let x = DenseMatrix::from_2d_array(&[
+//!               &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+//!               &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+//!               &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+//!               &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
+//!               &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+//!               &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
+//!               &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
+//!               &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
+//!               &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+//!               &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
+//!               &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+//!               &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
+//!               &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+//!               &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+//!               &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+//!               &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
+//!          ]);
+//!
+//! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
+//!           100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
+//!
+//! let y_hat = ElasticNet::fit(&x, &y, Default::default()).
+//!                 and_then(|lr| lr.predict(&x)).unwrap();
+//! ```
+//!
+//! ## References:
+//!
+//! * ["An Introduction to Statistical Learning", James G., Witten D., Hastie T., Tibshirani R., 6.2. Shrinkage Methods](http://faculty.marshall.usc.edu/gareth-james/ISL/)
+//! * ["Regularization and variable selection via the elastic net",  Hui Zou and Trevor Hastie](https://web.stanford.edu/~hastie/Papers/B67.2%20(2005)%20301-320%20Zou%20&%20Hastie.pdf)
+//!
+//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+use std::fmt::Debug;
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+use crate::api::{Predictor, SupervisedEstimator};
+use crate::error::Failed;
+use crate::linalg::BaseVector;
+use crate::linalg::Matrix;
+use crate::math::num::RealNumber;
+
+use crate::linear::lasso_optimizer::InteriorPointOptimizer;
+
+/// Elastic net parameters
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
+pub struct ElasticNetParameters<T: RealNumber> {
+    /// Regularization parameter.
+    pub alpha: T,
+    /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
+    /// For l1_ratio = 0 the penalty is an L2 penalty.
+    /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
+    pub l1_ratio: T,
+    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
+    pub normalize: bool,
+    /// The tolerance for the optimization
+    pub tol: T,
+    /// The maximum number of iterations
+    pub max_iter: usize,
+}
+
+/// Elastic net
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
+pub struct ElasticNet<T: RealNumber, M: Matrix<T>> {
+    coefficients: M,
+    intercept: T,
+}
+
+impl<T: RealNumber> ElasticNetParameters<T> {
+    /// Regularization parameter.
+    pub fn with_alpha(mut self, alpha: T) -> Self {
+        self.alpha = alpha;
+        self
+    }
+    /// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
+    /// For l1_ratio = 0 the penalty is an L2 penalty.
+    /// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
+    pub fn with_l1_ratio(mut self, l1_ratio: T) -> Self {
+        self.l1_ratio = l1_ratio;
+        self
+    }
+    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
+    pub fn with_normalize(mut self, normalize: bool) -> Self {
+        self.normalize = normalize;
+        self
+    }
+    /// The tolerance for the optimization
+    pub fn with_tol(mut self, tol: T) -> Self {
+        self.tol = tol;
+        self
+    }
+    /// The maximum number of iterations
+    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
+        self.max_iter = max_iter;
+        self
+    }
+}
+
+impl<T: RealNumber> Default for ElasticNetParameters<T> {
+    fn default() -> Self {
+        ElasticNetParameters {
+            alpha: T::one(),
+            l1_ratio: T::half(),
+            normalize: true,
+            tol: T::from_f64(1e-4).unwrap(),
+            max_iter: 1000,
+        }
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> PartialEq for ElasticNet<T, M> {
+    fn eq(&self, other: &Self) -> bool {
+        self.coefficients == other.coefficients
+            && (self.intercept - other.intercept).abs() <= T::epsilon()
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, ElasticNetParameters<T>>
+    for ElasticNet<T, M>
+{
+    fn fit(x: &M, y: &M::RowVector, parameters: ElasticNetParameters<T>) -> Result<Self, Failed> {
+        ElasticNet::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for ElasticNet<T, M> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> ElasticNet<T, M> {
+    /// Fits elastic net regression to your data.
+    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
+    /// * `y` - target values
+    /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
+    pub fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: ElasticNetParameters<T>,
+    ) -> Result<ElasticNet<T, M>, Failed> {
+        let (n, p) = x.shape();
+
+        if y.len() != n {
+            return Err(Failed::fit("Number of rows in X should = len(y)"));
+        }
+
+        let n_float = T::from_usize(n).unwrap();
+
+        let l1_reg = parameters.alpha * parameters.l1_ratio * n_float;
+        let l2_reg = parameters.alpha * (T::one() - parameters.l1_ratio) * n_float;
+
+        let y_mean = y.mean();
+
+        let (w, b) = if parameters.normalize {
+            let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?;
+
+            let (x, y, gamma) = Self::augment_x_and_y(&scaled_x, y, l2_reg);
+
+            let mut optimizer = InteriorPointOptimizer::new(&x, p);
+
+            let mut w =
+                optimizer.optimize(&x, &y, l1_reg * gamma, parameters.max_iter, parameters.tol)?;
+
+            for i in 0..p {
+                w.set(i, 0, gamma * w.get(i, 0) / col_std[i]);
+            }
+
+            let mut b = T::zero();
+
+            for i in 0..p {
+                b += w.get(i, 0) * col_mean[i];
+            }
+
+            b = y_mean - b;
+
+            (w, b)
+        } else {
+            let (x, y, gamma) = Self::augment_x_and_y(x, y, l2_reg);
+
+            let mut optimizer = InteriorPointOptimizer::new(&x, p);
+
+            let mut w =
+                optimizer.optimize(&x, &y, l1_reg * gamma, parameters.max_iter, parameters.tol)?;
+
+            for i in 0..p {
+                w.set(i, 0, gamma * w.get(i, 0));
+            }
+
+            (w, y_mean)
+        };
+
+        Ok(ElasticNet {
+            intercept: b,
+            coefficients: w,
+        })
+    }
+
+    /// Predict target values from `x`
+    /// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
+    pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        let (nrows, _) = x.shape();
+        let mut y_hat = x.matmul(&self.coefficients);
+        y_hat.add_mut(&M::fill(nrows, 1, self.intercept));
+        Ok(y_hat.transpose().to_row_vector())
+    }
+
+    /// Get estimates regression coefficients
+    pub fn coefficients(&self) -> &M {
+        &self.coefficients
+    }
+
+    /// Get estimate of intercept
+    pub fn intercept(&self) -> T {
+        self.intercept
+    }
+
+    fn rescale_x(x: &M) -> Result<(M, Vec<T>, Vec<T>), Failed> {
+        let col_mean = x.mean(0);
+        let col_std = x.std(0);
+
+        for i in 0..col_std.len() {
+            if (col_std[i] - T::zero()).abs() < T::epsilon() {
+                return Err(Failed::fit(&format!(
+                    "Cannot rescale constant column {}",
+                    i
+                )));
+            }
+        }
+
+        let mut scaled_x = x.clone();
+        scaled_x.scale_mut(&col_mean, &col_std, 0);
+        Ok((scaled_x, col_mean, col_std))
+    }
+
+    fn augment_x_and_y(x: &M, y: &M::RowVector, l2_reg: T) -> (M, M::RowVector, T) {
+        let (n, p) = x.shape();
+
+        let gamma = T::one() / (T::one() + l2_reg).sqrt();
+        let padding = gamma * l2_reg.sqrt();
+
+        let mut y2 = M::RowVector::zeros(n + p);
+        for i in 0..y.len() {
+            y2.set(i, y.get(i));
+        }
+
+        let mut x2 = M::zeros(n + p, p);
+
+        for j in 0..p {
+            for i in 0..n {
+                x2.set(i, j, gamma * x.get(i, j));
+            }
+
+            x2.set(j + n, j, padding);
+        }
+
+        (x2, y2, gamma)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::linalg::naive::dense_matrix::*;
+    use crate::metrics::mean_absolute_error;
+
+    #[test]
+    fn elasticnet_longley() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+            &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+            &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+            &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
+            &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+            &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
+            &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
+            &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
+            &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+            &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
+            &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+            &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
+            &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+            &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+            &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+            &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
+        ]);
+
+        let y: Vec<f64> = vec![
+            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+            114.2, 115.7, 116.9,
+        ];
+
+        let y_hat = ElasticNet::fit(
+            &x,
+            &y,
+            ElasticNetParameters {
+                alpha: 1.0,
+                l1_ratio: 0.5,
+                normalize: false,
+                tol: 1e-4,
+                max_iter: 1000,
+            },
+        )
+        .and_then(|lr| lr.predict(&x))
+        .unwrap();
+
+        assert!(mean_absolute_error(&y_hat, &y) < 30.0);
+    }
+
+    #[test]
+    fn elasticnet_fit_predict1() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[0.0, 1931.0, 1.2232755825400514],
+            &[1.0, 1933.0, 1.1379726120972395],
+            &[2.0, 1920.0, 1.4366265120543429],
+            &[3.0, 1918.0, 1.206005737827858],
+            &[4.0, 1934.0, 1.436613542400669],
+            &[5.0, 1918.0, 1.1594588621640636],
+            &[6.0, 1933.0, 1.19809994745985],
+            &[7.0, 1918.0, 1.3396363871645678],
+            &[8.0, 1931.0, 1.2535342096493207],
+            &[9.0, 1933.0, 1.3101281563456293],
+            &[10.0, 1922.0, 1.3585833349920762],
+            &[11.0, 1930.0, 1.4830786699709897],
+            &[12.0, 1916.0, 1.4919891143094546],
+            &[13.0, 1915.0, 1.259655137451551],
+            &[14.0, 1932.0, 1.3979191428724789],
+            &[15.0, 1917.0, 1.3686634746782371],
+            &[16.0, 1932.0, 1.381658454569724],
+            &[17.0, 1918.0, 1.4054969025700674],
+            &[18.0, 1929.0, 1.3271699396384906],
+            &[19.0, 1915.0, 1.1373332337674806],
+        ]);
+
+        let y: Vec<f64> = vec![
+            1.48, 2.72, 4.52, 5.72, 5.25, 4.07, 3.75, 4.75, 6.77, 4.72, 6.78, 6.79, 8.3, 7.42,
+            10.2, 7.92, 7.62, 8.06, 9.06, 9.29,
+        ];
+
+        let l1_model = ElasticNet::fit(
+            &x,
+            &y,
+            ElasticNetParameters {
+                alpha: 1.0,
+                l1_ratio: 1.0,
+                normalize: true,
+                tol: 1e-4,
+                max_iter: 1000,
+            },
+        )
+        .unwrap();
+
+        let l2_model = ElasticNet::fit(
+            &x,
+            &y,
+            ElasticNetParameters {
+                alpha: 1.0,
+                l1_ratio: 0.0,
+                normalize: true,
+                tol: 1e-4,
+                max_iter: 1000,
+            },
+        )
+        .unwrap();
+
+        let mae_l1 = mean_absolute_error(&l1_model.predict(&x).unwrap(), &y);
+        let mae_l2 = mean_absolute_error(&l2_model.predict(&x).unwrap(), &y);
+
+        assert!(mae_l1 < 2.0);
+        assert!(mae_l2 < 2.0);
+
+        assert!(l1_model.coefficients().get(0, 0) > l1_model.coefficients().get(1, 0));
+        assert!(l1_model.coefficients().get(0, 0) > l1_model.coefficients().get(2, 0));
+    }
+
+    #[test]
+    #[cfg(feature = "serde")]
+    fn serde() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+            &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+            &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+            &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
+            &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+            &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
+            &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
+            &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
+            &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+            &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
+            &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+            &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
+            &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+            &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+            &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+            &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
+        ]);
+
+        let y = vec![
+            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+            114.2, 115.7, 116.9,
+        ];
+
+        let lr = ElasticNet::fit(&x, &y, Default::default()).unwrap();
+
+        let deserialized_lr: ElasticNet<f64, DenseMatrix<f64>> =
+            serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
+
+        assert_eq!(lr, deserialized_lr);
+    }
+}
@@ -0,0 +1,311 @@
+//! # Lasso
+//!
+//! [Linear regression](../linear_regression/index.html) is the standard algorithm for predicting a quantitative response \\(y\\) on the basis of a linear combination of explanatory variables \\(X\\)
+//! that assumes that there is approximately a linear relationship between \\(X\\) and \\(y\\).
+//! Lasso is an extension to linear regression that adds L1 regularization term to the loss function during training.
+//!
+//! Similar to [ridge regression](../ridge_regression/index.html), the lasso shrinks the coefficient estimates towards zero when. However, in the case of the lasso, the l1 penalty has the effect of
+//! forcing some of the coefficient estimates to be exactly equal to zero when the tuning parameter \\(\alpha\\) is sufficiently large.
+//!
+//! Lasso coefficient estimates solve the problem:
+//!
+//! \\[\underset{\beta}{minimize} \space \space \sum_{i=1}^n \left( y_i - \beta_0 - \sum_{j=1}^p \beta_jx_{ij} \right)^2 + \alpha \sum_{j=1}^p \lVert \beta_j \rVert_1\\]
+//!
+//! This problem is solved with an interior-point method that is comparable to coordinate descent in solving large problems with modest accuracy,
+//! but is able to solve them with high accuracy with relatively small additional computational cost.
+//!
+//! ## References:
+//!
+//! * ["An Introduction to Statistical Learning", James G., Witten D., Hastie T., Tibshirani R., 6.2. Shrinkage Methods](http://faculty.marshall.usc.edu/gareth-james/ISL/)
+//! * ["An Interior-Point Method for Large-Scale l1-Regularized Least Squares",  K. Koh, M. Lustig, S. Boyd, D. Gorinevsky](https://web.stanford.edu/~boyd/papers/pdf/l1_ls.pdf)
+//! * [Simple Matlab Solver for l1-regularized Least Squares Problems](https://web.stanford.edu/~boyd/l1_ls/)
+//!
+//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+use std::fmt::Debug;
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+use crate::api::{Predictor, SupervisedEstimator};
+use crate::error::Failed;
+use crate::linalg::BaseVector;
+use crate::linalg::Matrix;
+use crate::linear::lasso_optimizer::InteriorPointOptimizer;
+use crate::math::num::RealNumber;
+
+/// Lasso regression parameters
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
+pub struct LassoParameters<T: RealNumber> {
+    /// Controls the strength of the penalty to the loss function.
+    pub alpha: T,
+    /// If true the regressors X will be normalized before regression
+    /// by subtracting the mean and dividing by the standard deviation.
+    pub normalize: bool,
+    /// The tolerance for the optimization
+    pub tol: T,
+    /// The maximum number of iterations
+    pub max_iter: usize,
+}
+
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
+/// Lasso regressor
+pub struct Lasso<T: RealNumber, M: Matrix<T>> {
+    coefficients: M,
+    intercept: T,
+}
+
+impl<T: RealNumber> LassoParameters<T> {
+    /// Regularization parameter.
+    pub fn with_alpha(mut self, alpha: T) -> Self {
+        self.alpha = alpha;
+        self
+    }
+    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
+    pub fn with_normalize(mut self, normalize: bool) -> Self {
+        self.normalize = normalize;
+        self
+    }
+    /// The tolerance for the optimization
+    pub fn with_tol(mut self, tol: T) -> Self {
+        self.tol = tol;
+        self
+    }
+    /// The maximum number of iterations
+    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
+        self.max_iter = max_iter;
+        self
+    }
+}
+
+impl<T: RealNumber> Default for LassoParameters<T> {
+    fn default() -> Self {
+        LassoParameters {
+            alpha: T::one(),
+            normalize: true,
+            tol: T::from_f64(1e-4).unwrap(),
+            max_iter: 1000,
+        }
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> PartialEq for Lasso<T, M> {
+    fn eq(&self, other: &Self) -> bool {
+        self.coefficients == other.coefficients
+            && (self.intercept - other.intercept).abs() <= T::epsilon()
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, LassoParameters<T>>
+    for Lasso<T, M>
+{
+    fn fit(x: &M, y: &M::RowVector, parameters: LassoParameters<T>) -> Result<Self, Failed> {
+        Lasso::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for Lasso<T, M> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Lasso<T, M> {
+    /// Fits Lasso regression to your data.
+    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
+    /// * `y` - target values
+    /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
+    pub fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: LassoParameters<T>,
+    ) -> Result<Lasso<T, M>, Failed> {
+        let (n, p) = x.shape();
+
+        if n <= p {
+            return Err(Failed::fit(
+                "Number of rows in X should be >= number of columns in X",
+            ));
+        }
+
+        if parameters.alpha < T::zero() {
+            return Err(Failed::fit("alpha should be >= 0"));
+        }
+
+        if parameters.tol <= T::zero() {
+            return Err(Failed::fit("tol should be > 0"));
+        }
+
+        if parameters.max_iter == 0 {
+            return Err(Failed::fit("max_iter should be > 0"));
+        }
+
+        if y.len() != n {
+            return Err(Failed::fit("Number of rows in X should = len(y)"));
+        }
+
+        let l1_reg = parameters.alpha * T::from_usize(n).unwrap();
+
+        let (w, b) = if parameters.normalize {
+            let (scaled_x, col_mean, col_std) = Self::rescale_x(x)?;
+
+            let mut optimizer = InteriorPointOptimizer::new(&scaled_x, p);
+
+            let mut w =
+                optimizer.optimize(&scaled_x, y, l1_reg, parameters.max_iter, parameters.tol)?;
+
+            for (j, col_std_j) in col_std.iter().enumerate().take(p) {
+                w.set(j, 0, w.get(j, 0) / *col_std_j);
+            }
+
+            let mut b = T::zero();
+
+            for (i, col_mean_i) in col_mean.iter().enumerate().take(p) {
+                b += w.get(i, 0) * *col_mean_i;
+            }
+
+            b = y.mean() - b;
+            (w, b)
+        } else {
+            let mut optimizer = InteriorPointOptimizer::new(x, p);
+
+            let w = optimizer.optimize(x, y, l1_reg, parameters.max_iter, parameters.tol)?;
+
+            (w, y.mean())
+        };
+
+        Ok(Lasso {
+            intercept: b,
+            coefficients: w,
+        })
+    }
+
+    /// Predict target values from `x`
+    /// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
+    pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        let (nrows, _) = x.shape();
+        let mut y_hat = x.matmul(&self.coefficients);
+        y_hat.add_mut(&M::fill(nrows, 1, self.intercept));
+        Ok(y_hat.transpose().to_row_vector())
+    }
+
+    /// Get estimates regression coefficients
+    pub fn coefficients(&self) -> &M {
+        &self.coefficients
+    }
+
+    /// Get estimate of intercept
+    pub fn intercept(&self) -> T {
+        self.intercept
+    }
+
+    fn rescale_x(x: &M) -> Result<(M, Vec<T>, Vec<T>), Failed> {
+        let col_mean = x.mean(0);
+        let col_std = x.std(0);
+
+        for (i, col_std_i) in col_std.iter().enumerate() {
+            if (*col_std_i - T::zero()).abs() < T::epsilon() {
+                return Err(Failed::fit(&format!(
+                    "Cannot rescale constant column {}",
+                    i
+                )));
+            }
+        }
+
+        let mut scaled_x = x.clone();
+        scaled_x.scale_mut(&col_mean, &col_std, 0);
+        Ok((scaled_x, col_mean, col_std))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::linalg::naive::dense_matrix::*;
+    use crate::metrics::mean_absolute_error;
+
+    #[test]
+    fn lasso_fit_predict() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+            &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+            &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+            &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
+            &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+            &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
+            &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
+            &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
+            &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+            &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
+            &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+            &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
+            &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+            &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+            &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+            &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
+        ]);
+
+        let y: Vec<f64> = vec![
+            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+            114.2, 115.7, 116.9,
+        ];
+
+        let y_hat = Lasso::fit(&x, &y, Default::default())
+            .and_then(|lr| lr.predict(&x))
+            .unwrap();
+
+        assert!(mean_absolute_error(&y_hat, &y) < 2.0);
+
+        let y_hat = Lasso::fit(
+            &x,
+            &y,
+            LassoParameters {
+                alpha: 0.1,
+                normalize: false,
+                tol: 1e-4,
+                max_iter: 1000,
+            },
+        )
+        .and_then(|lr| lr.predict(&x))
+        .unwrap();
+
+        assert!(mean_absolute_error(&y_hat, &y) < 2.0);
+    }
+
+    #[test]
+    #[cfg(feature = "serde")]
+    fn serde() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+            &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+            &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+            &[284.599, 335.1, 165.0, 110.929, 1950., 61.187],
+            &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+            &[346.999, 193.2, 359.4, 113.270, 1952., 63.639],
+            &[365.385, 187.0, 354.7, 115.094, 1953., 64.989],
+            &[363.112, 357.8, 335.0, 116.219, 1954., 63.761],
+            &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+            &[419.180, 282.2, 285.7, 118.734, 1956., 67.857],
+            &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+            &[444.546, 468.1, 263.7, 121.950, 1958., 66.513],
+            &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+            &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+            &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+            &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
+        ]);
+
+        let y = vec![
+            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+            114.2, 115.7, 116.9,
+        ];
+
+        let lr = Lasso::fit(&x, &y, Default::default()).unwrap();
+
+        let deserialized_lr: Lasso<f64, DenseMatrix<f64>> =
+            serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
+
+        assert_eq!(lr, deserialized_lr);
+    }
+}
@@ -0,0 +1,255 @@
+//! An Interior-Point Method for Large-Scale l1-Regularized Least Squares
+//!
+//! This is a specialized interior-point method for solving large-scale 1-regularized LSPs that uses the
+//! preconditioned conjugate gradients algorithm to compute the search direction.
+//!
+//! The interior-point method can solve large sparse problems, with a million variables and observations, in a few tens of minutes on a PC.
+//! It can efficiently solve large dense problems, that arise in sparse signal recovery with orthogonal transforms, by exploiting fast algorithms for these transforms.
+//!
+//! ## References:
+//! * ["An Interior-Point Method for Large-Scale l1-Regularized Least Squares",  K. Koh, M. Lustig, S. Boyd, D. Gorinevsky](https://web.stanford.edu/~boyd/papers/pdf/l1_ls.pdf)
+//! * [Simple Matlab Solver for l1-regularized Least Squares Problems](https://web.stanford.edu/~boyd/l1_ls/)
+//!
+
+use crate::error::Failed;
+use crate::linalg::BaseVector;
+use crate::linalg::Matrix;
+use crate::linear::bg_solver::BiconjugateGradientSolver;
+use crate::math::num::RealNumber;
+
+pub struct InteriorPointOptimizer<T: RealNumber, M: Matrix<T>> {
+    ata: M,
+    d1: Vec<T>,
+    d2: Vec<T>,
+    prb: Vec<T>,
+    prs: Vec<T>,
+}
+
+impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
+    pub fn new(a: &M, n: usize) -> InteriorPointOptimizer<T, M> {
+        InteriorPointOptimizer {
+            ata: a.ab(true, a, false),
+            d1: vec![T::zero(); n],
+            d2: vec![T::zero(); n],
+            prb: vec![T::zero(); n],
+            prs: vec![T::zero(); n],
+        }
+    }
+
+    pub fn optimize(
+        &mut self,
+        x: &M,
+        y: &M::RowVector,
+        lambda: T,
+        max_iter: usize,
+        tol: T,
+    ) -> Result<M, Failed> {
+        let (n, p) = x.shape();
+        let p_f64 = T::from_usize(p).unwrap();
+
+        let lambda = lambda.max(T::epsilon());
+
+        //parameters
+        let pcgmaxi = 5000;
+        let min_pcgtol = T::from_f64(0.1).unwrap();
+        let eta = T::from_f64(1E-3).unwrap();
+        let alpha = T::from_f64(0.01).unwrap();
+        let beta = T::from_f64(0.5).unwrap();
+        let gamma = T::from_f64(-0.25).unwrap();
+        let mu = T::two();
+
+        let y = M::from_row_vector(y.sub_scalar(y.mean())).transpose();
+
+        let mut max_ls_iter = 100;
+        let mut pitr = 0;
+        let mut w = M::zeros(p, 1);
+        let mut neww = w.clone();
+        let mut u = M::ones(p, 1);
+        let mut newu = u.clone();
+
+        let mut f = M::fill(p, 2, -T::one());
+        let mut newf = f.clone();
+
+        let mut q1 = vec![T::zero(); p];
+        let mut q2 = vec![T::zero(); p];
+
+        let mut dx = M::zeros(p, 1);
+        let mut du = M::zeros(p, 1);
+        let mut dxu = M::zeros(2 * p, 1);
+        let mut grad = M::zeros(2 * p, 1);
+
+        let mut nu = M::zeros(n, 1);
+        let mut dobj = T::zero();
+        let mut s = T::infinity();
+        let mut t = T::one()
+            .max(T::one() / lambda)
+            .min(T::two() * p_f64 / T::from(1e-3).unwrap());
+
+        for ntiter in 0..max_iter {
+            let mut z = x.matmul(&w);
+
+            for i in 0..n {
+                z.set(i, 0, z.get(i, 0) - y.get(i, 0));
+                nu.set(i, 0, T::two() * z.get(i, 0));
+            }
+
+            // CALCULATE DUALITY GAP
+            let xnu = x.ab(true, &nu, false);
+            let max_xnu = xnu.norm(T::infinity());
+            if max_xnu > lambda {
+                let lnu = lambda / max_xnu;
+                nu.mul_scalar_mut(lnu);
+            }
+
+            let pobj = z.dot(&z) + lambda * w.norm(T::one());
+            dobj = dobj.max(gamma * nu.dot(&nu) - nu.dot(&y));
+
+            let gap = pobj - dobj;
+
+            // STOPPING CRITERION
+            if gap / dobj < tol {
+                break;
+            }
+
+            // UPDATE t
+            if s >= T::half() {
+                t = t.max((T::two() * p_f64 * mu / gap).min(mu * t));
+            }
+
+            // CALCULATE NEWTON STEP
+            for i in 0..p {
+                let q1i = T::one() / (u.get(i, 0) + w.get(i, 0));
+                let q2i = T::one() / (u.get(i, 0) - w.get(i, 0));
+                q1[i] = q1i;
+                q2[i] = q2i;
+                self.d1[i] = (q1i * q1i + q2i * q2i) / t;
+                self.d2[i] = (q1i * q1i - q2i * q2i) / t;
+            }
+
+            let mut gradphi = x.ab(true, &z, false);
+
+            for i in 0..p {
+                let g1 = T::two() * gradphi.get(i, 0) - (q1[i] - q2[i]) / t;
+                let g2 = lambda - (q1[i] + q2[i]) / t;
+                gradphi.set(i, 0, g1);
+                grad.set(i, 0, -g1);
+                grad.set(i + p, 0, -g2);
+            }
+
+            for i in 0..p {
+                self.prb[i] = T::two() + self.d1[i];
+                self.prs[i] = self.prb[i] * self.d1[i] - self.d2[i].powi(2);
+            }
+
+            let normg = grad.norm2();
+            let mut pcgtol = min_pcgtol.min(eta * gap / T::one().min(normg));
+            if ntiter != 0 && pitr == 0 {
+                pcgtol *= min_pcgtol;
+            }
+
+            let error = self.solve_mut(x, &grad, &mut dxu, pcgtol, pcgmaxi)?;
+            if error > pcgtol {
+                pitr = pcgmaxi;
+            }
+
+            for i in 0..p {
+                dx.set(i, 0, dxu.get(i, 0));
+                du.set(i, 0, dxu.get(i + p, 0));
+            }
+
+            // BACKTRACKING LINE SEARCH
+            let phi = z.dot(&z) + lambda * u.sum() - Self::sumlogneg(&f) / t;
+            s = T::one();
+            let gdx = grad.dot(&dxu);
+
+            let lsiter = 0;
+            while lsiter < max_ls_iter {
+                for i in 0..p {
+                    neww.set(i, 0, w.get(i, 0) + s * dx.get(i, 0));
+                    newu.set(i, 0, u.get(i, 0) + s * du.get(i, 0));
+                    newf.set(i, 0, neww.get(i, 0) - newu.get(i, 0));
+                    newf.set(i, 1, -neww.get(i, 0) - newu.get(i, 0));
+                }
+
+                if newf.max() < T::zero() {
+                    let mut newz = x.matmul(&neww);
+                    for i in 0..n {
+                        newz.set(i, 0, newz.get(i, 0) - y.get(i, 0));
+                    }
+
+                    let newphi = newz.dot(&newz) + lambda * newu.sum() - Self::sumlogneg(&newf) / t;
+                    if newphi - phi <= alpha * s * gdx {
+                        break;
+                    }
+                }
+                s = beta * s;
+                max_ls_iter += 1;
+            }
+
+            if lsiter == max_ls_iter {
+                return Err(Failed::fit(
+                    "Exceeded maximum number of iteration for interior point optimizer",
+                ));
+            }
+
+            w.copy_from(&neww);
+            u.copy_from(&newu);
+            f.copy_from(&newf);
+        }
+
+        Ok(w)
+    }
+
+    fn sumlogneg(f: &M) -> T {
+        let (n, _) = f.shape();
+        let mut sum = T::zero();
+        for i in 0..n {
+            sum += (-f.get(i, 0)).ln();
+            sum += (-f.get(i, 1)).ln();
+        }
+        sum
+    }
+}
+
+impl<'a, T: RealNumber, M: Matrix<T>> BiconjugateGradientSolver<T, M>
+    for InteriorPointOptimizer<T, M>
+{
+    fn solve_preconditioner(&self, a: &M, b: &M, x: &mut M) {
+        let (_, p) = a.shape();
+
+        for i in 0..p {
+            x.set(
+                i,
+                0,
+                (self.d1[i] * b.get(i, 0) - self.d2[i] * b.get(i + p, 0)) / self.prs[i],
+            );
+            x.set(
+                i + p,
+                0,
+                (-self.d2[i] * b.get(i, 0) + self.prb[i] * b.get(i + p, 0)) / self.prs[i],
+            );
+        }
+    }
+
+    fn mat_vec_mul(&self, _: &M, x: &M, y: &mut M) {
+        let (_, p) = self.ata.shape();
+        let atax = self.ata.matmul(&x.slice(0..p, 0..1));
+
+        for i in 0..p {
+            y.set(
+                i,
+                0,
+                T::two() * atax.get(i, 0) + self.d1[i] * x.get(i, 0) + self.d2[i] * x.get(i + p, 0),
+            );
+            y.set(
+                i + p,
+                0,
+                self.d2[i] * x.get(i, 0) + self.d1[i] * x.get(i + p, 0),
+            );
+        }
+    }
+
+    fn mat_t_vec_mul(&self, a: &M, x: &M, y: &mut M) {
+        self.mat_vec_mul(a, x, y);
+    }
+}
@@ -45,9 +45,9 @@
 //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
 //!           100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
 //!
-//! let lr = LinearRegression::fit(&x, &y, LinearRegressionParameters {
-//!                        solver: LinearRegressionSolverName::QR, // or SVD
-//!          }).unwrap();
+//! let lr = LinearRegression::fit(&x, &y,
+//!             LinearRegressionParameters::default().
+//!             with_solver(LinearRegressionSolverName::QR)).unwrap();
 //!
 //! let y_hat = lr.predict(&x).unwrap();
 //! ```
@@ -62,13 +62,16 @@
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
 use std::fmt::Debug;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 /// Approach to use for estimation of regression coefficients. QR is more efficient but SVD is more stable.
 pub enum LinearRegressionSolverName {
    /// QR decomposition, see [QR](../../linalg/qr/index.html)
@@ -78,20 +81,30 @@ pub enum LinearRegressionSolverName {
 }

 /// Linear Regression parameters
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct LinearRegressionParameters {
    /// Solver to use for estimation of regression coefficients.
    pub solver: LinearRegressionSolverName,
 }

 /// Linear Regression
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct LinearRegression<T: RealNumber, M: Matrix<T>> {
    coefficients: M,
    intercept: T,
    solver: LinearRegressionSolverName,
 }

+impl LinearRegressionParameters {
+    /// Solver to use for estimation of regression coefficients.
+    pub fn with_solver(mut self, solver: LinearRegressionSolverName) -> Self {
+        self.solver = solver;
+        self
+    }
+}
+
 impl Default for LinearRegressionParameters {
    fn default() -> Self {
        LinearRegressionParameters {
@@ -107,6 +120,24 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for LinearRegression<T, M> {
    }
 }

+impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, LinearRegressionParameters>
+    for LinearRegression<T, M>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: LinearRegressionParameters,
+    ) -> Result<Self, Failed> {
+        LinearRegression::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for LinearRegression<T, M> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber, M: Matrix<T>> LinearRegression<T, M> {
    /// Fits Linear Regression to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -220,6 +251,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
@@ -40,7 +40,7 @@
 //!           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 //! ];
 //!
-//! let lr = LogisticRegression::fit(&x, &y).unwrap();
+//! let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
 //!
 //! let y_hat = lr.predict(&x).unwrap();
 //! ```
@@ -54,10 +54,11 @@
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
 use std::cmp::Ordering;
 use std::fmt::Debug;
-use std::marker::PhantomData;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
@@ -66,8 +67,27 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
 use crate::optimization::line_search::Backtracking;
 use crate::optimization::FunctionOrder;

+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
+/// Solver options for Logistic regression. Right now only LBFGS solver is supported.
+pub enum LogisticRegressionSolverName {
+    /// Limited-memory Broyden–Fletcher–Goldfarb–Shanno method, see [LBFGS paper](http://users.iems.northwestern.edu/~nocedal/lbfgsb.html)
+    LBFGS,
+}
+
+/// Logistic Regression parameters
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
+pub struct LogisticRegressionParameters<T: RealNumber> {
+    /// Solver to use for estimation of regression coefficients.
+    pub solver: LogisticRegressionSolverName,
+    /// Regularization parameter.
+    pub alpha: T,
+}
+
 /// Logistic Regression
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct LogisticRegression<T: RealNumber, M: Matrix<T>> {
    coefficients: M,
    intercept: M,
@@ -94,7 +114,29 @@ trait ObjectiveFunction<T: RealNumber, M: Matrix<T>> {
 struct BinaryObjectiveFunction<'a, T: RealNumber, M: Matrix<T>> {
    x: &'a M,
    y: Vec<usize>,
-    phantom: PhantomData<&'a T>,
+    alpha: T,
+}
+
+impl<T: RealNumber> LogisticRegressionParameters<T> {
+    /// Solver to use for estimation of regression coefficients.
+    pub fn with_solver(mut self, solver: LogisticRegressionSolverName) -> Self {
+        self.solver = solver;
+        self
+    }
+    /// Regularization parameter.
+    pub fn with_alpha(mut self, alpha: T) -> Self {
+        self.alpha = alpha;
+        self
+    }
+}
+
+impl<T: RealNumber> Default for LogisticRegressionParameters<T> {
+    fn default() -> Self {
+        LogisticRegressionParameters {
+            solver: LogisticRegressionSolverName::LBFGS,
+            alpha: T::zero(),
+        }
+    }
 }

 impl<T: RealNumber, M: Matrix<T>> PartialEq for LogisticRegression<T, M> {
@@ -121,13 +163,22 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
 {
    fn f(&self, w_bias: &M) -> T {
        let mut f = T::zero();
-        let (n, _) = self.x.shape();
+        let (n, p) = self.x.shape();

        for i in 0..n {
            let wx = BinaryObjectiveFunction::partial_dot(w_bias, self.x, 0, i);
            f += wx.ln_1pe() - (T::from(self.y[i]).unwrap()) * wx;
        }

+        if self.alpha > T::zero() {
+            let mut w_squared = T::zero();
+            for i in 0..p {
+                let w = w_bias.get(0, i);
+                w_squared += w * w;
+            }
+            f += T::half() * self.alpha * w_squared;
+        }
+
        f
    }

@@ -145,6 +196,13 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
            }
            g.set(0, p, g.get(0, p) - dyi);
        }
+
+        if self.alpha > T::zero() {
+            for i in 0..p {
+                let w = w_bias.get(0, i);
+                g.set(0, i, g.get(0, i) + self.alpha * w);
+            }
+        }
    }
 }

@@ -152,7 +210,7 @@ struct MultiClassObjectiveFunction<'a, T: RealNumber, M: Matrix<T>> {
    x: &'a M,
    y: Vec<usize>,
    k: usize,
-    phantom: PhantomData<&'a T>,
+    alpha: T,
 }

 impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
@@ -174,6 +232,17 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
            f -= prob.get(0, self.y[i]).ln();
        }

+        if self.alpha > T::zero() {
+            let mut w_squared = T::zero();
+            for i in 0..self.k {
+                for j in 0..p {
+                    let wi = w_bias.get(0, i * (p + 1) + j);
+                    w_squared += wi * wi;
+                }
+            }
+            f += T::half() * self.alpha * w_squared;
+        }
+
        f
    }

@@ -204,6 +273,35 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
                g.set(0, j * (p + 1) + p, g.get(0, j * (p + 1) + p) - yi);
            }
        }
+
+        if self.alpha > T::zero() {
+            for i in 0..self.k {
+                for j in 0..p {
+                    let pos = i * (p + 1);
+                    let wi = w.get(0, pos + j);
+                    g.set(0, pos + j, g.get(0, pos + j) + self.alpha * wi);
+                }
+            }
+        }
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>>
+    SupervisedEstimator<M, M::RowVector, LogisticRegressionParameters<T>>
+    for LogisticRegression<T, M>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: LogisticRegressionParameters<T>,
+    ) -> Result<Self, Failed> {
+        LogisticRegression::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for LogisticRegression<T, M> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
    }
 }

@@ -211,7 +309,12 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
    /// Fits Logistic Regression to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
    /// * `y` - target class values
-    pub fn fit(x: &M, y: &M::RowVector) -> Result<LogisticRegression<T, M>, Failed> {
+    /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.    
+    pub fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: LogisticRegressionParameters<T>,
+    ) -> Result<LogisticRegression<T, M>, Failed> {
        let y_m = M::from_row_vector(y.clone());
        let (x_nrows, num_attributes) = x.shape();
        let (_, y_nrows) = y_m.shape();
@@ -228,9 +331,9 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {

        let mut yi: Vec<usize> = vec![0; y_nrows];

-        for i in 0..y_nrows {
+        for (i, yi_i) in yi.iter_mut().enumerate().take(y_nrows) {
            let yc = y_m.get(0, i);
-            yi[i] = classes.iter().position(|c| yc == *c).unwrap();
+            *yi_i = classes.iter().position(|c| yc == *c).unwrap();
        }

        match k.cmp(&2) {
@@ -244,7 +347,7 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
                let objective = BinaryObjectiveFunction {
                    x,
                    y: yi,
-                    phantom: PhantomData,
+                    alpha: parameters.alpha,
                };

                let result = LogisticRegression::minimize(x0, objective);
@@ -266,7 +369,7 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
                    x,
                    y: yi,
                    k,
-                    phantom: PhantomData,
+                    alpha: parameters.alpha,
                };

                let result = LogisticRegression::minimize(x0, objective);
@@ -289,13 +392,13 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
        let n = x.shape().0;
        let mut result = M::zeros(1, n);
        if self.num_classes == 2 {
-            let y_hat: Vec<T> = x.matmul(&self.coefficients.transpose()).get_col_as_vec(0);
+            let y_hat: Vec<T> = x.ab(false, &self.coefficients, true).get_col_as_vec(0);
            let intercept = self.intercept.get(0, 0);
-            for i in 0..n {
+            for (i, y_hat_i) in y_hat.iter().enumerate().take(n) {
                result.set(
                    0,
                    i,
-                    self.classes[if (y_hat[i] + intercept).sigmoid() > T::half() {
+                    self.classes[if (*y_hat_i + intercept).sigmoid() > T::half() {
                        1
                    } else {
                        0
@@ -310,8 +413,8 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
                }
            }
            let class_idxs = y_hat.argmax();
-            for i in 0..n {
-                result.set(0, i, self.classes[class_idxs[i]]);
+            for (i, class_i) in class_idxs.iter().enumerate().take(n) {
+                result.set(0, i, self.classes[*class_i]);
            }
        }
        Ok(result.to_row_vector())
@@ -373,9 +476,9 @@ mod tests {

        let objective = MultiClassObjectiveFunction {
            x: &x,
-            y,
+            y: y.clone(),
            k: 3,
-            phantom: PhantomData,
+            alpha: 0.0,
        };

        let mut g: DenseMatrix<f64> = DenseMatrix::zeros(1, 9);
@@ -396,6 +499,24 @@ mod tests {
        ]));

        assert!((f - 408.0052230582765).abs() < std::f64::EPSILON);
+
+        let objective_reg = MultiClassObjectiveFunction {
+            x: &x,
+            y: y.clone(),
+            k: 3,
+            alpha: 1.0,
+        };
+
+        let f = objective_reg.f(&DenseMatrix::row_vector_from_array(&[
+            1., 2., 3., 4., 5., 6., 7., 8., 9.,
+        ]));
+        assert!((f - 487.5052).abs() < 1e-4);
+
+        objective_reg.df(
+            &mut g,
+            &DenseMatrix::row_vector_from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]),
+        );
+        assert!((g.get(0, 0).abs() - 32.0).abs() < 1e-4);
    }

    #[test]
@@ -422,8 +543,8 @@ mod tests {

        let objective = BinaryObjectiveFunction {
            x: &x,
-            y,
-            phantom: PhantomData,
+            y: y.clone(),
+            alpha: 0.0,
        };

        let mut g: DenseMatrix<f64> = DenseMatrix::zeros(1, 3);
@@ -438,6 +559,20 @@ mod tests {
        let f = objective.f(&DenseMatrix::row_vector_from_array(&[1., 2., 3.]));

        assert!((f - 59.76994756647412).abs() < std::f64::EPSILON);
+
+        let objective_reg = BinaryObjectiveFunction {
+            x: &x,
+            y: y.clone(),
+            alpha: 1.0,
+        };
+
+        let f = objective_reg.f(&DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
+        assert!((f - 62.2699).abs() < 1e-4);
+
+        objective_reg.df(&mut g, &DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
+        assert!((g.get(0, 0) - 27.0511).abs() < 1e-4);
+        assert!((g.get(0, 1) - 12.239).abs() < 1e-4);
+        assert!((g.get(0, 2) - 3.8693).abs() < 1e-4);
    }

    #[test]
@@ -461,7 +596,7 @@ mod tests {
        ]);
        let y: Vec<f64> = vec![0., 0., 1., 1., 2., 1., 1., 0., 0., 2., 1., 1., 0., 0., 1.];

-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();

        assert_eq!(lr.coefficients().shape(), (3, 2));
        assert_eq!(lr.intercept().shape(), (3, 1));
@@ -484,11 +619,20 @@ mod tests {
        let x = DenseMatrix::from_vec(15, 4, &blobs.data);
        let y = blobs.target;

-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();

        let y_hat = lr.predict(&x).unwrap();

        assert!(accuracy(&y_hat, &y) > 0.9);
+
+        let lr_reg = LogisticRegression::fit(
+            &x,
+            &y,
+            LogisticRegressionParameters::default().with_alpha(10.0),
+        )
+        .unwrap();
+
+        assert!(lr_reg.coefficients().abs().sum() < lr.coefficients().abs().sum());
    }

    #[test]
@@ -498,14 +642,24 @@ mod tests {
        let x = DenseMatrix::from_vec(20, 4, &blobs.data);
        let y = blobs.target;

-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();

        let y_hat = lr.predict(&x).unwrap();

        assert!(accuracy(&y_hat, &y) > 0.9);
+
+        let lr_reg = LogisticRegression::fit(
+            &x,
+            &y,
+            LogisticRegressionParameters::default().with_alpha(10.0),
+        )
+        .unwrap();
+
+        assert!(lr_reg.coefficients().abs().sum() < lr.coefficients().abs().sum());
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[1., -5.],
@@ -526,7 +680,7 @@ mod tests {
        ]);
        let y: Vec<f64> = vec![0., 0., 1., 1., 2., 1., 1., 0., 0., 2., 1., 1., 0., 0., 1.];

-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();

        let deserialized_lr: LogisticRegression<f64, DenseMatrix<f64>> =
            serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();
@@ -562,7 +716,13 @@ mod tests {
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        ];

-        let lr = LogisticRegression::fit(&x, &y).unwrap();
+        let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
+        let lr_reg = LogisticRegression::fit(
+            &x,
+            &y,
+            LogisticRegressionParameters::default().with_alpha(1.0),
+        )
+        .unwrap();

        let y_hat = lr.predict(&x).unwrap();

@@ -573,5 +733,6 @@ mod tests {
            .sum();

        assert!(error <= 1.0);
+        assert!(lr_reg.coefficients().abs().sum() < lr.coefficients().abs().sum());
    }
 }
@@ -20,6 +20,10 @@
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>

+pub(crate) mod bg_solver;
+pub mod elastic_net;
+pub mod lasso;
+pub(crate) mod lasso_optimizer;
 pub mod linear_regression;
 pub mod logistic_regression;
 pub mod ridge_regression;
@@ -45,11 +45,8 @@
 //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
 //!           100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
 //!
-//! let y_hat = RidgeRegression::fit(&x, &y, RidgeRegressionParameters {
-//!                        solver: RidgeRegressionSolverName::Cholesky,
-//!                        alpha: 0.1,
-//!                        normalize: true
-//! }).and_then(|lr| lr.predict(&x)).unwrap();
+//! let y_hat = RidgeRegression::fit(&x, &y, RidgeRegressionParameters::default().with_alpha(0.1)).
+//!                 and_then(|lr| lr.predict(&x)).unwrap();
 //! ```
 //!
 //! ## References:
@@ -61,14 +58,17 @@
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
 use std::fmt::Debug;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 /// Approach to use for estimation of regression coefficients. Cholesky is more efficient but SVD is more stable.
 pub enum RidgeRegressionSolverName {
    /// Cholesky decomposition, see [Cholesky](../../linalg/cholesky/index.html)
@@ -78,7 +78,8 @@ pub enum RidgeRegressionSolverName {
 }

 /// Ridge Regression parameters
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct RidgeRegressionParameters<T: RealNumber> {
    /// Solver to use for estimation of regression coefficients.
    pub solver: RidgeRegressionSolverName,
@@ -90,13 +91,32 @@ pub struct RidgeRegressionParameters<T: RealNumber> {
 }

 /// Ridge regression
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct RidgeRegression<T: RealNumber, M: Matrix<T>> {
    coefficients: M,
    intercept: T,
    solver: RidgeRegressionSolverName,
 }

+impl<T: RealNumber> RidgeRegressionParameters<T> {
+    /// Regularization parameter.
+    pub fn with_alpha(mut self, alpha: T) -> Self {
+        self.alpha = alpha;
+        self
+    }
+    /// Solver to use for estimation of regression coefficients.
+    pub fn with_solver(mut self, solver: RidgeRegressionSolverName) -> Self {
+        self.solver = solver;
+        self
+    }
+    /// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
+    pub fn with_normalize(mut self, normalize: bool) -> Self {
+        self.normalize = normalize;
+        self
+    }
+}
+
 impl<T: RealNumber> Default for RidgeRegressionParameters<T> {
    fn default() -> Self {
        RidgeRegressionParameters {
@@ -114,6 +134,24 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for RidgeRegression<T, M> {
    }
 }

+impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, RidgeRegressionParameters<T>>
+    for RidgeRegression<T, M>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: RidgeRegressionParameters<T>,
+    ) -> Result<Self, Failed> {
+        RidgeRegression::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RidgeRegression<T, M> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
    /// Fits ridge regression to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -155,14 +193,14 @@ impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
                RidgeRegressionSolverName::SVD => x_t_x.svd_solve_mut(x_t_y)?,
            };

-            for i in 0..p {
-                w.set(i, 0, w.get(i, 0) / col_std[i]);
+            for (i, col_std_i) in col_std.iter().enumerate().take(p) {
+                w.set(i, 0, w.get(i, 0) / *col_std_i);
            }

            let mut b = T::zero();

-            for i in 0..p {
-                b += w.get(i, 0) * col_mean[i];
+            for (i, col_mean_i) in col_mean.iter().enumerate().take(p) {
+                b += w.get(i, 0) * *col_mean_i;
            }

            let b = y.mean() - b;
@@ -196,8 +234,8 @@ impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
        let col_mean = x.mean(0);
        let col_std = x.std(0);

-        for i in 0..col_std.len() {
-            if (col_std[i] - T::zero()).abs() < T::epsilon() {
+        for (i, col_std_i) in col_std.iter().enumerate() {
+            if (*col_std_i - T::zero()).abs() < T::epsilon() {
                return Err(Failed::fit(&format!(
                    "Cannot rescale constant column {}",
                    i
@@ -292,6 +330,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
@@ -18,6 +18,7 @@
 //!
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::math::num::RealNumber;
@@ -25,12 +26,13 @@ use crate::math::num::RealNumber;
 use super::Distance;

 /// Euclidean distance is a measure of the true straight line distance between two points in Euclidean n-space.
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct Euclidian {}

 impl Euclidian {
    #[inline]
-    pub(crate) fn squared_distance<T: RealNumber>(x: &Vec<T>, y: &Vec<T>) -> T {
+    pub(crate) fn squared_distance<T: RealNumber>(x: &[T], y: &[T]) -> T {
        if x.len() != y.len() {
            panic!("Input vector sizes are different.");
        }
@@ -19,6 +19,7 @@
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::math::num::RealNumber;
@@ -26,7 +27,8 @@ use crate::math::num::RealNumber;
 use super::Distance;

 /// While comparing two integer-valued vectors of equal length, Hamming distance is the number of bit positions in which the two bits are different
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct Hamming {}

 impl<T: PartialEq, F: RealNumber> Distance<Vec<T>, F> for Hamming {
@@ -44,6 +44,7 @@

 use std::marker::PhantomData;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::math::num::RealNumber;
@@ -52,7 +53,8 @@ use super::Distance;
 use crate::linalg::Matrix;

 /// Mahalanobis distance.
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct Mahalanobis<T: RealNumber, M: Matrix<T>> {
    /// covariance matrix of the dataset
    pub sigma: M,
@@ -17,6 +17,7 @@
 //! ```
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::math::num::RealNumber;
@@ -24,7 +25,8 @@ use crate::math::num::RealNumber;
 use super::Distance;

 /// Manhattan distance
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct Manhattan {}

 impl<T: RealNumber> Distance<Vec<T>, T> for Manhattan {
@@ -21,6 +21,7 @@
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::math::num::RealNumber;
@@ -28,7 +29,8 @@ use crate::math::num::RealNumber;
 use super::Distance;

 /// Defines the Minkowski distance of order `p`
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct Minkowski {
    /// order, integer
    pub p: u16,
@@ -28,7 +28,7 @@ use crate::linalg::Matrix;
 use crate::math::num::RealNumber;

 /// Distance metric, a function that calculates distance between two points
-pub trait Distance<T, F: RealNumber> {
+pub trait Distance<T, F: RealNumber>: Clone {
    /// Calculates distance between _a_ and _b_
    fn distance(&self, a: &T, b: &T) -> F;
 }
@@ -16,13 +16,15 @@
 //!
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::BaseVector;
 use crate::math::num::RealNumber;

 /// Accuracy metric.
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct Accuracy {}

 impl Accuracy {
@@ -20,6 +20,7 @@
 //! * ["The ROC-AUC and the Mann-Whitney U-test", Haupt, J.](https://johaupt.github.io/roc-auc/model%20evaluation/Area_under_ROC_curve.html)
 #![allow(non_snake_case)]

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::algorithm::sort::quick_sort::QuickArgSort;
@@ -27,7 +28,8 @@ use crate::linalg::BaseVector;
 use crate::math::num::RealNumber;

 /// Area Under the Receiver Operating Characteristic Curve (ROC AUC)
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct AUC {}

 impl AUC {
@@ -68,8 +70,8 @@ impl AUC {
                    j += 1;
                }
                let r = T::from_usize(i + 1 + j).unwrap() / T::two();
-                for k in i..j {
-                    rank[k] = r;
+                for rank_k in rank.iter_mut().take(j).skip(i) {
+                    *rank_k = r;
                }
                i = j - 1;
            }
@@ -1,10 +1,12 @@
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::BaseVector;
 use crate::math::num::RealNumber;
 use crate::metrics::cluster_helpers::*;

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 /// Homogeneity, completeness and V-Measure scores.
 pub struct HCVScore {}

@@ -1,3 +1,4 @@
+#![allow(clippy::ptr_arg)]
 use std::collections::HashMap;

 use crate::math::num::RealNumber;
@@ -23,7 +24,7 @@ pub fn contingency_matrix<T: RealNumber>(
    contingency_matrix
 }

-pub fn entropy<T: RealNumber>(data: &Vec<T>) -> Option<T> {
+pub fn entropy<T: RealNumber>(data: &[T]) -> Option<T> {
    let mut bincounts = HashMap::with_capacity(data.len());

    for e in data.iter() {
@@ -44,17 +45,17 @@ pub fn entropy<T: RealNumber>(data: &Vec<T>) -> Option<T> {
    Some(entropy)
 }

-pub fn mutual_info_score<T: RealNumber>(contingency: &Vec<Vec<usize>>) -> T {
+pub fn mutual_info_score<T: RealNumber>(contingency: &[Vec<usize>]) -> T {
    let mut contingency_sum = 0;
    let mut pi = vec![0; contingency.len()];
    let mut pj = vec![0; contingency[0].len()];
    let (mut nzx, mut nzy, mut nz_val) = (Vec::new(), Vec::new(), Vec::new());

    for r in 0..contingency.len() {
-        for c in 0..contingency[0].len() {
+        for (c, pj_c) in pj.iter_mut().enumerate().take(contingency[0].len()) {
            contingency_sum += contingency[r][c];
            pi[r] += contingency[r][c];
-            pj[c] += contingency[r][c];
+            *pj_c += contingency[r][c];
            if contingency[r][c] > 0 {
                nzx.push(r);
                nzy.push(c);
@@ -18,6 +18,7 @@
 //!
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::BaseVector;
@@ -26,7 +27,8 @@ use crate::metrics::precision::Precision;
 use crate::metrics::recall::Recall;

 /// F-measure
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct F1<T: RealNumber> {
    /// a positive real factor
    pub beta: T,
@@ -18,12 +18,14 @@
 //!
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::BaseVector;
 use crate::math::num::RealNumber;

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 /// Mean Absolute Error
 pub struct MeanAbsoluteError {}

@@ -18,12 +18,14 @@
 //!
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::BaseVector;
 use crate::math::num::RealNumber;

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 /// Mean Squared Error
 pub struct MeanSquareError {}

@@ -42,7 +42,7 @@
 //!             0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 //!   ];
 //!
-//! let lr = LogisticRegression::fit(&x, &y).unwrap();
+//! let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
 //!
 //! let y_hat = lr.predict(&x).unwrap();
 //!
@@ -18,13 +18,15 @@
 //!
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::BaseVector;
 use crate::math::num::RealNumber;

 /// Precision metric.
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct Precision {}

 impl Precision {
@@ -18,13 +18,15 @@
 //!
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::BaseVector;
 use crate::math::num::RealNumber;

 /// Coefficient of Determination (R2)
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct R2 {}

 impl R2 {
@@ -18,13 +18,15 @@
 //!
 //! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::BaseVector;
 use crate::math::num::RealNumber;

 /// Recall metric.
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct Recall {}

 impl Recall {
@@ -0,0 +1,269 @@
+//! # KFold
+//!
+//! Defines k-fold cross validator.
+
+use crate::linalg::Matrix;
+use crate::math::num::RealNumber;
+use crate::model_selection::BaseKFold;
+use rand::seq::SliceRandom;
+use rand::thread_rng;
+
+/// K-Folds cross-validator
+pub struct KFold {
+    /// Number of folds. Must be at least 2.
+    pub n_splits: usize, // cannot exceed std::usize::MAX
+    /// Whether to shuffle the data before splitting into batches
+    pub shuffle: bool,
+}
+
+impl KFold {
+    fn test_indices<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<usize>> {
+        // number of samples (rows) in the matrix
+        let n_samples: usize = x.shape().0;
+
+        // initialise indices
+        let mut indices: Vec<usize> = (0..n_samples).collect();
+        if self.shuffle {
+            indices.shuffle(&mut thread_rng());
+        }
+        //  return a new array of given shape n_split, filled with each element of n_samples divided by n_splits.
+        let mut fold_sizes = vec![n_samples / self.n_splits; self.n_splits];
+
+        // increment by one if odd
+        for fold_size in fold_sizes.iter_mut().take(n_samples % self.n_splits) {
+            *fold_size += 1;
+        }
+
+        // generate the right array of arrays for test indices
+        let mut return_values: Vec<Vec<usize>> = Vec::with_capacity(self.n_splits);
+        let mut current: usize = 0;
+        for fold_size in fold_sizes.drain(..) {
+            let stop = current + fold_size;
+            return_values.push(indices[current..stop].to_vec());
+            current = stop
+        }
+
+        return_values
+    }
+
+    fn test_masks<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<bool>> {
+        let mut return_values: Vec<Vec<bool>> = Vec::with_capacity(self.n_splits);
+        for test_index in self.test_indices(x).drain(..) {
+            // init mask
+            let mut test_mask = vec![false; x.shape().0];
+            // set mask's indices to true according to test indices
+            for i in test_index {
+                test_mask[i] = true; // can be implemented with map()
+            }
+            return_values.push(test_mask);
+        }
+        return_values
+    }
+}
+
+impl Default for KFold {
+    fn default() -> KFold {
+        KFold {
+            n_splits: 3,
+            shuffle: true,
+        }
+    }
+}
+
+impl KFold {
+    /// Number of folds. Must be at least 2.
+    pub fn with_n_splits(mut self, n_splits: usize) -> Self {
+        self.n_splits = n_splits;
+        self
+    }
+    /// Whether to shuffle the data before splitting into batches
+    pub fn with_shuffle(mut self, shuffle: bool) -> Self {
+        self.shuffle = shuffle;
+        self
+    }
+}
+
+/// An iterator over indices that split data into training and test set.
+pub struct KFoldIter {
+    indices: Vec<usize>,
+    test_indices: Vec<Vec<bool>>,
+}
+
+impl Iterator for KFoldIter {
+    type Item = (Vec<usize>, Vec<usize>);
+
+    fn next(&mut self) -> Option<(Vec<usize>, Vec<usize>)> {
+        self.test_indices.pop().map(|test_index| {
+            let train_index = self
+                .indices
+                .iter()
+                .enumerate()
+                .filter(|&(idx, _)| !test_index[idx])
+                .map(|(idx, _)| idx)
+                .collect::<Vec<usize>>(); // filter train indices out according to mask
+            let test_index = self
+                .indices
+                .iter()
+                .enumerate()
+                .filter(|&(idx, _)| test_index[idx])
+                .map(|(idx, _)| idx)
+                .collect::<Vec<usize>>(); // filter tests indices out according to mask
+
+            (train_index, test_index)
+        })
+    }
+}
+
+/// Abstract class for all KFold functionalities
+impl BaseKFold for KFold {
+    type Output = KFoldIter;
+
+    fn n_splits(&self) -> usize {
+        self.n_splits
+    }
+
+    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output {
+        if self.n_splits < 2 {
+            panic!("Number of splits is too small: {}", self.n_splits);
+        }
+        let n_samples: usize = x.shape().0;
+        let indices: Vec<usize> = (0..n_samples).collect();
+        let mut test_indices = self.test_masks(x);
+        test_indices.reverse();
+
+        KFoldIter {
+            indices,
+            test_indices,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+    use crate::linalg::naive::dense_matrix::*;
+
+    #[test]
+    fn run_kfold_return_test_indices_simple() {
+        let k = KFold {
+            n_splits: 3,
+            shuffle: false,
+        };
+        let x: DenseMatrix<f64> = DenseMatrix::rand(33, 100);
+        let test_indices = k.test_indices(&x);
+
+        assert_eq!(test_indices[0], (0..11).collect::<Vec<usize>>());
+        assert_eq!(test_indices[1], (11..22).collect::<Vec<usize>>());
+        assert_eq!(test_indices[2], (22..33).collect::<Vec<usize>>());
+    }
+
+    #[test]
+    fn run_kfold_return_test_indices_odd() {
+        let k = KFold {
+            n_splits: 3,
+            shuffle: false,
+        };
+        let x: DenseMatrix<f64> = DenseMatrix::rand(34, 100);
+        let test_indices = k.test_indices(&x);
+
+        assert_eq!(test_indices[0], (0..12).collect::<Vec<usize>>());
+        assert_eq!(test_indices[1], (12..23).collect::<Vec<usize>>());
+        assert_eq!(test_indices[2], (23..34).collect::<Vec<usize>>());
+    }
+
+    #[test]
+    fn run_kfold_return_test_mask_simple() {
+        let k = KFold {
+            n_splits: 2,
+            shuffle: false,
+        };
+        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
+        let test_masks = k.test_masks(&x);
+
+        for t in &test_masks[0][0..11] {
+            // TODO: this can be prob done better
+            assert_eq!(*t, true)
+        }
+        for t in &test_masks[0][11..22] {
+            assert_eq!(*t, false)
+        }
+
+        for t in &test_masks[1][0..11] {
+            assert_eq!(*t, false)
+        }
+        for t in &test_masks[1][11..22] {
+            assert_eq!(*t, true)
+        }
+    }
+
+    #[test]
+    fn run_kfold_return_split_simple() {
+        let k = KFold {
+            n_splits: 2,
+            shuffle: false,
+        };
+        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
+        let train_test_splits: Vec<(Vec<usize>, Vec<usize>)> = k.split(&x).collect();
+
+        assert_eq!(train_test_splits[0].1, (0..11).collect::<Vec<usize>>());
+        assert_eq!(train_test_splits[0].0, (11..22).collect::<Vec<usize>>());
+        assert_eq!(train_test_splits[1].0, (0..11).collect::<Vec<usize>>());
+        assert_eq!(train_test_splits[1].1, (11..22).collect::<Vec<usize>>());
+    }
+
+    #[test]
+    fn run_kfold_return_split_simple_shuffle() {
+        let k = KFold {
+            n_splits: 2,
+            ..KFold::default()
+        };
+        let x: DenseMatrix<f64> = DenseMatrix::rand(23, 100);
+        let train_test_splits: Vec<(Vec<usize>, Vec<usize>)> = k.split(&x).collect();
+
+        assert_eq!(train_test_splits[0].1.len(), 12_usize);
+        assert_eq!(train_test_splits[0].0.len(), 11_usize);
+        assert_eq!(train_test_splits[1].0.len(), 12_usize);
+        assert_eq!(train_test_splits[1].1.len(), 11_usize);
+    }
+
+    #[test]
+    fn numpy_parity_test() {
+        let k = KFold {
+            n_splits: 3,
+            shuffle: false,
+        };
+        let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
+        let expected: Vec<(Vec<usize>, Vec<usize>)> = vec![
+            (vec![4, 5, 6, 7, 8, 9], vec![0, 1, 2, 3]),
+            (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
+            (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
+        ];
+        for ((train, test), (expected_train, expected_test)) in
+            k.split(&x).into_iter().zip(expected)
+        {
+            assert_eq!(test, expected_test);
+            assert_eq!(train, expected_train);
+        }
+    }
+
+    #[test]
+    fn numpy_parity_test_shuffle() {
+        let k = KFold {
+            n_splits: 3,
+            ..KFold::default()
+        };
+        let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
+        let expected: Vec<(Vec<usize>, Vec<usize>)> = vec![
+            (vec![4, 5, 6, 7, 8, 9], vec![0, 1, 2, 3]),
+            (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
+            (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
+        ];
+        for ((train, test), (expected_train, expected_test)) in
+            k.split(&x).into_iter().zip(expected)
+        {
+            assert_eq!(test.len(), expected_test.len());
+            assert_eq!(train.len(), expected_train.len());
+        }
+    }
+}
@@ -1,29 +1,140 @@
 //! # Model Selection methods
 //!
-//! In statistics and machine learning we usually split our data into multiple subsets: training data and testing data (and sometimes to validate),
-//! and fit our model on the train data, in order to make predictions on the test data. We do that to avoid overfitting or underfitting model to our data.
+//! In statistics and machine learning we usually split our data into two sets: one for training and the other one for testing.
+//! We fit our model to the training data, in order to make predictions on the test data. We do that to avoid overfitting or underfitting model to our data.
 //! Overfitting is bad because the model we trained fits trained data too well and can’t make any inferences on new data.
 //! Underfitted is bad because the model is undetrained and does not fit the training data well.
-//! Splitting data into multiple subsets helps to find the right combination of hyperparameters, estimate model performance and choose the right model for
-//! your data.
+//! Splitting data into multiple subsets helps us to find the right combination of hyperparameters, estimate model performance and choose the right model for
+//! the data.
 //!
-//! In SmartCore you can split your data into training and test datasets using `train_test_split` function.
+//! In SmartCore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function.
+//!
+//! ```
+//! use crate::smartcore::linalg::BaseMatrix;
+//! use smartcore::linalg::naive::dense_matrix::DenseMatrix;
+//! use smartcore::model_selection::train_test_split;
+//!
+//! //Iris data
+//! let x = DenseMatrix::from_2d_array(&[
+//!           &[5.1, 3.5, 1.4, 0.2],
+//!           &[4.9, 3.0, 1.4, 0.2],
+//!           &[4.7, 3.2, 1.3, 0.2],
+//!           &[4.6, 3.1, 1.5, 0.2],
+//!           &[5.0, 3.6, 1.4, 0.2],
+//!           &[5.4, 3.9, 1.7, 0.4],
+//!           &[4.6, 3.4, 1.4, 0.3],
+//!           &[5.0, 3.4, 1.5, 0.2],
+//!           &[4.4, 2.9, 1.4, 0.2],
+//!           &[4.9, 3.1, 1.5, 0.1],
+//!           &[7.0, 3.2, 4.7, 1.4],
+//!           &[6.4, 3.2, 4.5, 1.5],
+//!           &[6.9, 3.1, 4.9, 1.5],
+//!           &[5.5, 2.3, 4.0, 1.3],
+//!           &[6.5, 2.8, 4.6, 1.5],
+//!           &[5.7, 2.8, 4.5, 1.3],
+//!           &[6.3, 3.3, 4.7, 1.6],
+//!           &[4.9, 2.4, 3.3, 1.0],
+//!           &[6.6, 2.9, 4.6, 1.3],
+//!           &[5.2, 2.7, 3.9, 1.4],
+//!           ]);
+//! let y: Vec<f64> = vec![
+//!           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
+//! ];
+//!
+//! let (x_train, x_test, y_train, y_test) = train_test_split(&x, &y, 0.2, true);
+//!
+//! println!("X train: {:?}, y train: {}, X test: {:?}, y test: {}",
+//!             x_train.shape(), y_train.len(), x_test.shape(), y_test.len());
+//! ```
+//!
+//! When we partition the available data into two disjoint sets, we drastically reduce the number of samples that can be used for training.
+//!
+//! One way to solve this problem is to use k-fold cross-validation. With k-fold validation, the dataset is split into k disjoint sets.
+//! A model is trained using k - 1 of the folds, and the resulting model is validated on the remaining portion of the data.
+//!
+//! The simplest way to run cross-validation is to use the [cross_val_score](./fn.cross_validate.html) helper function on your estimator and the dataset.
+//!
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::DenseMatrix;
+//! use smartcore::model_selection::{KFold, cross_validate};
+//! use smartcore::metrics::accuracy;
+//! use smartcore::linear::logistic_regression::LogisticRegression;
+//!
+//! //Iris data
+//! let x = DenseMatrix::from_2d_array(&[
+//!           &[5.1, 3.5, 1.4, 0.2],
+//!           &[4.9, 3.0, 1.4, 0.2],
+//!           &[4.7, 3.2, 1.3, 0.2],
+//!           &[4.6, 3.1, 1.5, 0.2],
+//!           &[5.0, 3.6, 1.4, 0.2],
+//!           &[5.4, 3.9, 1.7, 0.4],
+//!           &[4.6, 3.4, 1.4, 0.3],
+//!           &[5.0, 3.4, 1.5, 0.2],
+//!           &[4.4, 2.9, 1.4, 0.2],
+//!           &[4.9, 3.1, 1.5, 0.1],
+//!           &[7.0, 3.2, 4.7, 1.4],
+//!           &[6.4, 3.2, 4.5, 1.5],
+//!           &[6.9, 3.1, 4.9, 1.5],
+//!           &[5.5, 2.3, 4.0, 1.3],
+//!           &[6.5, 2.8, 4.6, 1.5],
+//!           &[5.7, 2.8, 4.5, 1.3],
+//!           &[6.3, 3.3, 4.7, 1.6],
+//!           &[4.9, 2.4, 3.3, 1.0],
+//!           &[6.6, 2.9, 4.6, 1.3],
+//!           &[5.2, 2.7, 3.9, 1.4],
+//!           ]);
+//! let y: Vec<f64> = vec![
+//!           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
+//! ];
+//!
+//! let cv = KFold::default().with_n_splits(3);
+//!
+//! let results = cross_validate(LogisticRegression::fit,   //estimator
+//!                                 &x, &y,                 //data
+//!                                 Default::default(),     //hyperparameters
+//!                                 cv,                     //cross validation split
+//!                                 &accuracy).unwrap();    //metric
+//!
+//! println!("Training accuracy: {}, test accuracy: {}",
+//!     results.mean_test_score(), results.mean_train_score());
+//! ```
+//!
+//! The function [cross_val_predict](./fn.cross_val_predict.html) has a similar interface to `cross_val_score`,
+//! but instead of test error it calculates predictions for all samples in the test set.

+use crate::api::Predictor;
+use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
 use rand::seq::SliceRandom;
 use rand::thread_rng;
-use rand::Rng;
+
+pub(crate) mod kfold;
+
+pub use kfold::{KFold, KFoldIter};
+
+/// An interface for the K-Folds cross-validator
+pub trait BaseKFold {
+    /// An iterator over indices that split data into training and test set.
+    type Output: Iterator<Item = (Vec<usize>, Vec<usize>)>;
+    /// Return a tuple containing the the training set indices for that split and
+    /// the testing set indices for that split.
+    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Self::Output;
+    /// Returns the number of splits
+    fn n_splits(&self) -> usize;
+}

 /// Splits data into 2 disjoint datasets.
 /// * `x` - features, matrix of size _NxM_ where _N_ is number of samples and _M_ is number of attributes.
-/// * `y` - target values, should be of size _M_
+/// * `y` - target values, should be of size _N_
 /// * `test_size`, (0, 1] - the proportion of the dataset to include in the test split.
+/// * `shuffle`, - whether or not to shuffle the data before splitting
 pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
    x: &M,
    y: &M::RowVector,
    test_size: f32,
+    shuffle: bool,
 ) -> (M, M, M::RowVector, M::RowVector) {
    if x.shape().0 != y.len() {
        panic!(
@@ -38,155 +149,131 @@ pub fn train_test_split<T: RealNumber, M: Matrix<T>>(
    }

    let n = y.len();
-    let m = x.shape().1;

-    let mut rng = rand::thread_rng();
-    let mut n_test = 0;
-    let mut index = vec![false; n];
+    let n_test = ((n as f32) * test_size) as usize;

-    for i in 0..n {
-        let p_test: f32 = rng.gen();
-        if p_test <= test_size {
-            index[i] = true;
-            n_test += 1;
-        }
+    if n_test < 1 {
+        panic!("number of sample is too small {}", n);
    }

-    let n_train = n - n_test;
+    let mut indices: Vec<usize> = (0..n).collect();

-    let mut x_train = M::zeros(n_train, m);
-    let mut x_test = M::zeros(n_test, m);
-    let mut y_train = M::RowVector::zeros(n_train);
-    let mut y_test = M::RowVector::zeros(n_test);
-
-    let mut r_train = 0;
-    let mut r_test = 0;
-
-    for r in 0..n {
-        if index[r] {
-            //sample belongs to test
-            for c in 0..m {
-                x_test.set(r_test, c, x.get(r, c));
-                y_test.set(r_test, y.get(r));
-            }
-            r_test += 1;
-        } else {
-            for c in 0..m {
-                x_train.set(r_train, c, x.get(r, c));
-                y_train.set(r_train, y.get(r));
-            }
-            r_train += 1;
-        }
+    if shuffle {
+        indices.shuffle(&mut thread_rng());
    }

+    let x_train = x.take(&indices[n_test..n], 0);
+    let x_test = x.take(&indices[0..n_test], 0);
+    let y_train = y.take(&indices[n_test..n]);
+    let y_test = y.take(&indices[0..n_test]);
+
    (x_train, x_test, y_train, y_test)
 }

-///
-/// KFold Cross-Validation
-///
-pub trait BaseKFold {
-    /// Returns integer indices corresponding to test sets
-    fn test_indices<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<usize>>;
-
-    /// Returns masksk corresponding to test sets
-    fn test_masks<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<bool>>;
-
-    /// Return a tuple containing the the training set indices for that split and
-    /// the testing set indices for that split.
-    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<(Vec<usize>, Vec<usize>)>;
+/// Cross validation results.
+#[derive(Clone, Debug)]
+pub struct CrossValidationResult<T: RealNumber> {
+    /// Vector with test scores on each cv split
+    pub test_score: Vec<T>,
+    /// Vector with training scores on each cv split
+    pub train_score: Vec<T>,
 }

-///
-/// An implementation of KFold
-///
-pub struct KFold {
-    n_splits: usize, // cannot exceed std::usize::MAX
-    shuffle: bool,
-    // TODO: to be implemented later
-    // random_state: i32,
-}
-
-impl Default for KFold {
-    fn default() -> KFold {
-        KFold {
-            n_splits: 3_usize,
-            shuffle: true,
-        }
+impl<T: RealNumber> CrossValidationResult<T> {
+    /// Average test score
+    pub fn mean_test_score(&self) -> T {
+        self.test_score.sum() / T::from_usize(self.test_score.len()).unwrap()
+    }
+    /// Average training score
+    pub fn mean_train_score(&self) -> T {
+        self.train_score.sum() / T::from_usize(self.train_score.len()).unwrap()
    }
 }

-///
-/// Abstract class for all KFold functionalities
-///
-impl BaseKFold for KFold {
-    fn test_indices<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<usize>> {
-        // number of samples (rows) in the matrix
-        let n_samples: usize = x.shape().0;
+/// Evaluate an estimator by cross-validation using given metric.
+/// * `fit_estimator` - a `fit` function of an estimator
+/// * `x` - features, matrix of size _NxM_ where _N_ is number of samples and _M_ is number of attributes.
+/// * `y` - target values, should be of size _N_
+/// * `parameters` - parameters of selected estimator. Use `Default::default()` for default parameters.
+/// * `cv` - the cross-validation splitting strategy, should be an instance of [`BaseKFold`](./trait.BaseKFold.html)
+/// * `score` - a metric to use for evaluation, see [metrics](../metrics/index.html)
+pub fn cross_validate<T, M, H, E, K, F, S>(
+    fit_estimator: F,
+    x: &M,
+    y: &M::RowVector,
+    parameters: H,
+    cv: K,
+    score: S,
+) -> Result<CrossValidationResult<T>, Failed>
+where
+    T: RealNumber,
+    M: Matrix<T>,
+    H: Clone,
+    E: Predictor<M, M::RowVector>,
+    K: BaseKFold,
+    F: Fn(&M, &M::RowVector, H) -> Result<E, Failed>,
+    S: Fn(&M::RowVector, &M::RowVector) -> T,
+{
+    let k = cv.n_splits();
+    let mut test_score = Vec::with_capacity(k);
+    let mut train_score = Vec::with_capacity(k);

-        // initialise indices
-        let mut indices: Vec<usize> = (0..n_samples).collect();
-        if self.shuffle {
-            indices.shuffle(&mut thread_rng());
-        }
-        //  return a new array of given shape n_split, filled with each element of n_samples divided by n_splits.
-        let mut fold_sizes = vec![n_samples / self.n_splits; self.n_splits];
+    for (train_idx, test_idx) in cv.split(x) {
+        let train_x = x.take(&train_idx, 0);
+        let train_y = y.take(&train_idx);
+        let test_x = x.take(&test_idx, 0);
+        let test_y = y.take(&test_idx);

-        // increment by one if odd
-        for i in 0..(n_samples % self.n_splits) {
-            fold_sizes[i] += 1;
-        }
+        let estimator = fit_estimator(&train_x, &train_y, parameters.clone())?;

-        // generate the right array of arrays for test indices
-        let mut return_values: Vec<Vec<usize>> = Vec::with_capacity(self.n_splits);
-        let mut current: usize = 0;
-        for fold_size in fold_sizes.drain(..) {
-            let stop = current + fold_size;
-            return_values.push(indices[current..stop].to_vec());
-            current = stop
-        }
-
-        return_values
+        train_score.push(score(&train_y, &estimator.predict(&train_x)?));
+        test_score.push(score(&test_y, &estimator.predict(&test_x)?));
    }

-    fn test_masks<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<Vec<bool>> {
-        let mut return_values: Vec<Vec<bool>> = Vec::with_capacity(self.n_splits);
-        for test_index in self.test_indices(x).drain(..) {
-            // init mask
-            let mut test_mask = vec![false; x.shape().0];
-            // set mask's indices to true according to test indices
-            for i in test_index {
-                test_mask[i] = true; // can be implemented with map()
-            }
-            return_values.push(test_mask);
+    Ok(CrossValidationResult {
+        test_score,
+        train_score,
+    })
+}
+
+/// Generate cross-validated estimates for each input data point.
+/// The data is split according to the cv parameter. Each sample belongs to exactly one test set, and its prediction is computed with an estimator fitted on the corresponding training set.
+/// * `fit_estimator` - a `fit` function of an estimator
+/// * `x` - features, matrix of size _NxM_ where _N_ is number of samples and _M_ is number of attributes.
+/// * `y` - target values, should be of size _N_
+/// * `parameters` - parameters of selected estimator. Use `Default::default()` for default parameters.
+/// * `cv` - the cross-validation splitting strategy, should be an instance of [`BaseKFold`](./trait.BaseKFold.html)
+pub fn cross_val_predict<T, M, H, E, K, F>(
+    fit_estimator: F,
+    x: &M,
+    y: &M::RowVector,
+    parameters: H,
+    cv: K,
+) -> Result<M::RowVector, Failed>
+where
+    T: RealNumber,
+    M: Matrix<T>,
+    H: Clone,
+    E: Predictor<M, M::RowVector>,
+    K: BaseKFold,
+    F: Fn(&M, &M::RowVector, H) -> Result<E, Failed>,
+{
+    let mut y_hat = M::RowVector::zeros(y.len());
+
+    for (train_idx, test_idx) in cv.split(x) {
+        let train_x = x.take(&train_idx, 0);
+        let train_y = y.take(&train_idx);
+        let test_x = x.take(&test_idx, 0);
+
+        let estimator = fit_estimator(&train_x, &train_y, parameters.clone())?;
+
+        let y_test_hat = estimator.predict(&test_x)?;
+        for (i, &idx) in test_idx.iter().enumerate() {
+            y_hat.set(idx, y_test_hat.get(i));
        }
-        return_values
    }

-    fn split<T: RealNumber, M: Matrix<T>>(&self, x: &M) -> Vec<(Vec<usize>, Vec<usize>)> {
-        let n_samples: usize = x.shape().0;
-        let indices: Vec<usize> = (0..n_samples).collect();
-
-        let mut return_values: Vec<(Vec<usize>, Vec<usize>)> = Vec::with_capacity(self.n_splits); // TODO: init nested vecs with capacities by getting the length of test_index vecs
-
-        for test_index in self.test_masks(x).drain(..) {
-            let train_index = indices
-                .clone()
-                .iter()
-                .enumerate()
-                .filter(|&(idx, _)| !test_index[idx])
-                .map(|(idx, _)| idx)
-                .collect::<Vec<usize>>(); // filter train indices out according to mask
-            let test_index = indices
-                .iter()
-                .enumerate()
-                .filter(|&(idx, _)| test_index[idx])
-                .map(|(idx, _)| idx)
-                .collect::<Vec<usize>>(); // filter tests indices out according to mask
-            return_values.push((train_index, test_index))
-        }
-        return_values
-    }
+    Ok(y_hat)
 }

 #[cfg(test)]
@@ -194,14 +281,17 @@ mod tests {

    use super::*;
    use crate::linalg::naive::dense_matrix::*;
+    use crate::metrics::{accuracy, mean_absolute_error};
+    use crate::model_selection::kfold::KFold;
+    use crate::neighbors::knn_regressor::KNNRegressor;

    #[test]
    fn run_train_test_split() {
-        let n = 100;
-        let x: DenseMatrix<f64> = DenseMatrix::rand(100, 3);
-        let y = vec![0f64; 100];
+        let n = 123;
+        let x: DenseMatrix<f64> = DenseMatrix::rand(n, 3);
+        let y = vec![0f64; n];

-        let (x_train, x_test, y_train, y_test) = train_test_split(&x, &y, 0.2);
+        let (x_train, x_test, y_train, y_test) = train_test_split(&x, &y, 0.2, true);

        assert!(
            x_train.shape().0 > (n as f64 * 0.65) as usize
@@ -215,126 +305,144 @@ mod tests {
        assert_eq!(x_test.shape().0, y_test.len());
    }

-    #[test]
-    fn run_kfold_return_test_indices_simple() {
-        let k = KFold {
-            n_splits: 3,
-            shuffle: false,
-        };
-        let x: DenseMatrix<f64> = DenseMatrix::rand(33, 100);
-        let test_indices = k.test_indices(&x);
+    #[derive(Clone)]
+    struct NoParameters {}

-        assert_eq!(test_indices[0], (0..11).collect::<Vec<usize>>());
-        assert_eq!(test_indices[1], (11..22).collect::<Vec<usize>>());
-        assert_eq!(test_indices[2], (22..33).collect::<Vec<usize>>());
+    #[test]
+    fn test_cross_validate_biased() {
+        struct BiasedEstimator {}
+
+        impl BiasedEstimator {
+            fn fit<M: Matrix<f32>>(
+                _: &M,
+                _: &M::RowVector,
+                _: NoParameters,
+            ) -> Result<BiasedEstimator, Failed> {
+                Ok(BiasedEstimator {})
+            }
+        }
+
+        impl<M: Matrix<f32>> Predictor<M, M::RowVector> for BiasedEstimator {
+            fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+                let (n, _) = x.shape();
+                Ok(M::RowVector::zeros(n))
+            }
+        }
+
+        let x = DenseMatrix::from_2d_array(&[
+            &[5.1, 3.5, 1.4, 0.2],
+            &[4.9, 3.0, 1.4, 0.2],
+            &[4.7, 3.2, 1.3, 0.2],
+            &[4.6, 3.1, 1.5, 0.2],
+            &[5.0, 3.6, 1.4, 0.2],
+            &[5.4, 3.9, 1.7, 0.4],
+            &[4.6, 3.4, 1.4, 0.3],
+            &[5.0, 3.4, 1.5, 0.2],
+            &[4.4, 2.9, 1.4, 0.2],
+            &[4.9, 3.1, 1.5, 0.1],
+            &[7.0, 3.2, 4.7, 1.4],
+            &[6.4, 3.2, 4.5, 1.5],
+            &[6.9, 3.1, 4.9, 1.5],
+            &[5.5, 2.3, 4.0, 1.3],
+            &[6.5, 2.8, 4.6, 1.5],
+            &[5.7, 2.8, 4.5, 1.3],
+            &[6.3, 3.3, 4.7, 1.6],
+            &[4.9, 2.4, 3.3, 1.0],
+            &[6.6, 2.9, 4.6, 1.3],
+            &[5.2, 2.7, 3.9, 1.4],
+        ]);
+        let y = vec![
+            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
+        ];
+
+        let cv = KFold {
+            n_splits: 5,
+            ..KFold::default()
+        };
+
+        let results =
+            cross_validate(BiasedEstimator::fit, &x, &y, NoParameters {}, cv, &accuracy).unwrap();
+
+        assert_eq!(0.4, results.mean_test_score());
+        assert_eq!(0.4, results.mean_train_score());
    }

    #[test]
-    fn run_kfold_return_test_indices_odd() {
-        let k = KFold {
-            n_splits: 3,
-            shuffle: false,
-        };
-        let x: DenseMatrix<f64> = DenseMatrix::rand(34, 100);
-        let test_indices = k.test_indices(&x);
+    fn test_cross_validate_knn() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[234.289, 235.6, 159., 107.608, 1947., 60.323],
+            &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+            &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+            &[284.599, 335.1, 165., 110.929, 1950., 61.187],
+            &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+            &[346.999, 193.2, 359.4, 113.27, 1952., 63.639],
+            &[365.385, 187., 354.7, 115.094, 1953., 64.989],
+            &[363.112, 357.8, 335., 116.219, 1954., 63.761],
+            &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+            &[419.18, 282.2, 285.7, 118.734, 1956., 67.857],
+            &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+            &[444.546, 468.1, 263.7, 121.95, 1958., 66.513],
+            &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+            &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+            &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+            &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
+        ]);
+        let y = vec![
+            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+            114.2, 115.7, 116.9,
+        ];

-        assert_eq!(test_indices[0], (0..12).collect::<Vec<usize>>());
-        assert_eq!(test_indices[1], (12..23).collect::<Vec<usize>>());
-        assert_eq!(test_indices[2], (23..34).collect::<Vec<usize>>());
+        let cv = KFold {
+            n_splits: 5,
+            ..KFold::default()
+        };
+
+        let results = cross_validate(
+            KNNRegressor::fit,
+            &x,
+            &y,
+            Default::default(),
+            cv,
+            &mean_absolute_error,
+        )
+        .unwrap();
+
+        assert!(results.mean_test_score() < 15.0);
+        assert!(results.mean_train_score() < results.mean_test_score());
    }

    #[test]
-    fn run_kfold_return_test_mask_simple() {
-        let k = KFold {
-            n_splits: 2,
-            shuffle: false,
-        };
-        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
-        let test_masks = k.test_masks(&x);
+    fn test_cross_val_predict_knn() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[234.289, 235.6, 159., 107.608, 1947., 60.323],
+            &[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
+            &[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
+            &[284.599, 335.1, 165., 110.929, 1950., 61.187],
+            &[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
+            &[346.999, 193.2, 359.4, 113.27, 1952., 63.639],
+            &[365.385, 187., 354.7, 115.094, 1953., 64.989],
+            &[363.112, 357.8, 335., 116.219, 1954., 63.761],
+            &[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
+            &[419.18, 282.2, 285.7, 118.734, 1956., 67.857],
+            &[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
+            &[444.546, 468.1, 263.7, 121.95, 1958., 66.513],
+            &[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
+            &[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
+            &[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
+            &[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
+        ]);
+        let y = vec![
+            83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
+            114.2, 115.7, 116.9,
+        ];

-        for t in &test_masks[0][0..11] {
-            // TODO: this can be prob done better
-            assert_eq!(*t, true)
-        }
-        for t in &test_masks[0][11..22] {
-            assert_eq!(*t, false)
-        }
-
-        for t in &test_masks[1][0..11] {
-            assert_eq!(*t, false)
-        }
-        for t in &test_masks[1][11..22] {
-            assert_eq!(*t, true)
-        }
-    }
-
-    #[test]
-    fn run_kfold_return_split_simple() {
-        let k = KFold {
-            n_splits: 2,
-            shuffle: false,
-        };
-        let x: DenseMatrix<f64> = DenseMatrix::rand(22, 100);
-        let train_test_splits = k.split(&x);
-
-        assert_eq!(train_test_splits[0].1, (0..11).collect::<Vec<usize>>());
-        assert_eq!(train_test_splits[0].0, (11..22).collect::<Vec<usize>>());
-        assert_eq!(train_test_splits[1].0, (0..11).collect::<Vec<usize>>());
-        assert_eq!(train_test_splits[1].1, (11..22).collect::<Vec<usize>>());
-    }
-
-    #[test]
-    fn run_kfold_return_split_simple_shuffle() {
-        let k = KFold {
+        let cv = KFold {
            n_splits: 2,
            ..KFold::default()
        };
-        let x: DenseMatrix<f64> = DenseMatrix::rand(23, 100);
-        let train_test_splits = k.split(&x);

-        assert_eq!(train_test_splits[0].1.len(), 12_usize);
-        assert_eq!(train_test_splits[0].0.len(), 11_usize);
-        assert_eq!(train_test_splits[1].0.len(), 12_usize);
-        assert_eq!(train_test_splits[1].1.len(), 11_usize);
-    }
+        let y_hat = cross_val_predict(KNNRegressor::fit, &x, &y, Default::default(), cv).unwrap();

-    #[test]
-    fn numpy_parity_test() {
-        let k = KFold {
-            n_splits: 3,
-            shuffle: false,
-        };
-        let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
-        let expected: Vec<(Vec<usize>, Vec<usize>)> = vec![
-            (vec![4, 5, 6, 7, 8, 9], vec![0, 1, 2, 3]),
-            (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
-            (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
-        ];
-        for ((train, test), (expected_train, expected_test)) in
-            k.split(&x).into_iter().zip(expected)
-        {
-            assert_eq!(test, expected_test);
-            assert_eq!(train, expected_train);
-        }
-    }
-
-    #[test]
-    fn numpy_parity_test_shuffle() {
-        let k = KFold {
-            n_splits: 3,
-            ..KFold::default()
-        };
-        let x: DenseMatrix<f64> = DenseMatrix::rand(10, 4);
-        let expected: Vec<(Vec<usize>, Vec<usize>)> = vec![
-            (vec![4, 5, 6, 7, 8, 9], vec![0, 1, 2, 3]),
-            (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]),
-            (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]),
-        ];
-        for ((train, test), (expected_train, expected_test)) in
-            k.split(&x).into_iter().zip(expected)
-        {
-            assert_eq!(test.len(), expected_test.len());
-            assert_eq!(train.len(), expected_train.len());
-        }
+        assert!(mean_absolute_error(&y, &y_hat) < 10.0);
    }
 }
@@ -0,0 +1,480 @@
+//! # Bernoulli Naive Bayes
+//!
+//! Bernoulli Naive Bayes classifier is a variant of [Naive Bayes](../index.html) for the data that is distributed according to multivariate Bernoulli distribution.
+//! It is used for discrete data with binary features. One example of a binary feature is a word that occurs in the text or not.
+//!
+//! Example:
+//!
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::*;
+//! use smartcore::naive_bayes::bernoulli::BernoulliNB;
+//!
+//! // Training data points are:
+//! // Chinese Beijing Chinese (class: China)
+//! // Chinese Chinese Shanghai (class: China)
+//! // Chinese Macao (class: China)
+//! // Tokyo Japan Chinese (class: Japan)
+//! let x = DenseMatrix::<f64>::from_2d_array(&[
+//!           &[1., 1., 0., 0., 0., 0.],
+//!           &[0., 1., 0., 0., 1., 0.],
+//!           &[0., 1., 0., 1., 0., 0.],
+//!           &[0., 1., 1., 0., 0., 1.],
+//! ]);
+//! let y = vec![0., 0., 0., 1.];
+//!
+//! let nb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
+//!
+//! // Testing data point is:
+//! // Chinese Chinese Chinese Tokyo Japan
+//! let x_test = DenseMatrix::<f64>::from_2d_array(&[&[0., 1., 1., 0., 0., 1.]]);
+//! let y_hat = nb.predict(&x_test).unwrap();
+//! ```
+//!
+//! ## References:
+//!
+//! * ["Introduction to Information Retrieval", Manning C. D., Raghavan P., Schutze H., 2009, Chapter 13 ](https://nlp.stanford.edu/IR-book/information-retrieval-book.html)
+use crate::api::{Predictor, SupervisedEstimator};
+use crate::error::Failed;
+use crate::linalg::row_iter;
+use crate::linalg::BaseVector;
+use crate::linalg::Matrix;
+use crate::math::num::RealNumber;
+use crate::math::vector::RealNumberVector;
+use crate::naive_bayes::{BaseNaiveBayes, NBDistribution};
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+/// Naive Bayes classifier for Bearnoulli features
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
+struct BernoulliNBDistribution<T: RealNumber> {
+    /// class labels known to the classifier
+    class_labels: Vec<T>,
+    /// number of training samples observed in each class
+    class_count: Vec<usize>,
+    /// probability of each class
+    class_priors: Vec<T>,
+    /// Number of samples encountered for each (class, feature)
+    feature_count: Vec<Vec<usize>>,
+    /// probability of features per class
+    feature_log_prob: Vec<Vec<T>>,
+    /// Number of features of each sample
+    n_features: usize,
+}
+
+impl<T: RealNumber> PartialEq for BernoulliNBDistribution<T> {
+    fn eq(&self, other: &Self) -> bool {
+        if self.class_labels == other.class_labels
+            && self.class_count == other.class_count
+            && self.class_priors == other.class_priors
+            && self.feature_count == other.feature_count
+            && self.n_features == other.n_features
+        {
+            for (a, b) in self
+                .feature_log_prob
+                .iter()
+                .zip(other.feature_log_prob.iter())
+            {
+                if !a.approximate_eq(b, T::epsilon()) {
+                    return false;
+                }
+            }
+            true
+        } else {
+            false
+        }
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for BernoulliNBDistribution<T> {
+    fn prior(&self, class_index: usize) -> T {
+        self.class_priors[class_index]
+    }
+
+    fn log_likelihood(&self, class_index: usize, j: &M::RowVector) -> T {
+        let mut likelihood = T::zero();
+        for feature in 0..j.len() {
+            let value = j.get(feature);
+            if value == T::one() {
+                likelihood += self.feature_log_prob[class_index][feature];
+            } else {
+                likelihood += (T::one() - self.feature_log_prob[class_index][feature].exp()).ln();
+            }
+        }
+        likelihood
+    }
+
+    fn classes(&self) -> &Vec<T> {
+        &self.class_labels
+    }
+}
+
+/// `BernoulliNB` parameters. Use `Default::default()` for default values.
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
+pub struct BernoulliNBParameters<T: RealNumber> {
+    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
+    pub alpha: T,
+    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
+    pub priors: Option<Vec<T>>,
+    /// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.
+    pub binarize: Option<T>,
+}
+
+impl<T: RealNumber> BernoulliNBParameters<T> {
+    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
+    pub fn with_alpha(mut self, alpha: T) -> Self {
+        self.alpha = alpha;
+        self
+    }
+    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
+    pub fn with_priors(mut self, priors: Vec<T>) -> Self {
+        self.priors = Some(priors);
+        self
+    }
+    /// Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.
+    pub fn with_binarize(mut self, binarize: T) -> Self {
+        self.binarize = Some(binarize);
+        self
+    }
+}
+
+impl<T: RealNumber> Default for BernoulliNBParameters<T> {
+    fn default() -> Self {
+        Self {
+            alpha: T::one(),
+            priors: None,
+            binarize: Some(T::zero()),
+        }
+    }
+}
+
+impl<T: RealNumber> BernoulliNBDistribution<T> {
+    /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
+    /// * `x` - training data.
+    /// * `y` - vector with target values (classes) of length N.
+    /// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
+    /// priors are adjusted according to the data.
+    /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
+    /// * `binarize` - Threshold for binarizing.
+    pub fn fit<M: Matrix<T>>(
+        x: &M,
+        y: &M::RowVector,
+        alpha: T,
+        priors: Option<Vec<T>>,
+    ) -> Result<Self, Failed> {
+        let (n_samples, n_features) = x.shape();
+        let y_samples = y.len();
+        if y_samples != n_samples {
+            return Err(Failed::fit(&format!(
+                "Size of x should equal size of y; |x|=[{}], |y|=[{}]",
+                n_samples, y_samples
+            )));
+        }
+
+        if n_samples == 0 {
+            return Err(Failed::fit(&format!(
+                "Size of x and y should greater than 0; |x|=[{}]",
+                n_samples
+            )));
+        }
+        if alpha < T::zero() {
+            return Err(Failed::fit(&format!(
+                "Alpha should be greater than 0; |alpha|=[{}]",
+                alpha
+            )));
+        }
+
+        let y = y.to_vec();
+
+        let (class_labels, indices) = <Vec<T> as RealNumberVector<T>>::unique_with_indices(&y);
+        let mut class_count = vec![0_usize; class_labels.len()];
+
+        for class_index in indices.iter() {
+            class_count[*class_index] += 1;
+        }
+
+        let class_priors = if let Some(class_priors) = priors {
+            if class_priors.len() != class_labels.len() {
+                return Err(Failed::fit(
+                    "Size of priors provided does not match the number of classes of the data.",
+                ));
+            }
+            class_priors
+        } else {
+            class_count
+                .iter()
+                .map(|&c| T::from(c).unwrap() / T::from(n_samples).unwrap())
+                .collect()
+        };
+
+        let mut feature_in_class_counter = vec![vec![0_usize; n_features]; class_labels.len()];
+
+        for (row, class_index) in row_iter(x).zip(indices) {
+            for (idx, row_i) in row.iter().enumerate().take(n_features) {
+                feature_in_class_counter[class_index][idx] +=
+                    row_i.to_usize().ok_or_else(|| {
+                        Failed::fit(&format!(
+                            "Elements of the matrix should be 1.0 or 0.0 |found|=[{}]",
+                            row_i
+                        ))
+                    })?;
+            }
+        }
+
+        let feature_log_prob = feature_in_class_counter
+            .iter()
+            .enumerate()
+            .map(|(class_index, feature_count)| {
+                feature_count
+                    .iter()
+                    .map(|&count| {
+                        ((T::from(count).unwrap() + alpha)
+                            / (T::from(class_count[class_index]).unwrap() + alpha * T::two()))
+                        .ln()
+                    })
+                    .collect()
+            })
+            .collect();
+
+        Ok(Self {
+            class_labels,
+            class_priors,
+            class_count,
+            feature_count: feature_in_class_counter,
+            feature_log_prob,
+            n_features,
+        })
+    }
+}
+
+/// BernoulliNB implements the categorical naive Bayes algorithm for categorically distributed data.
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, PartialEq)]
+pub struct BernoulliNB<T: RealNumber, M: Matrix<T>> {
+    inner: BaseNaiveBayes<T, M, BernoulliNBDistribution<T>>,
+    binarize: Option<T>,
+}
+
+impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, BernoulliNBParameters<T>>
+    for BernoulliNB<T, M>
+{
+    fn fit(x: &M, y: &M::RowVector, parameters: BernoulliNBParameters<T>) -> Result<Self, Failed> {
+        BernoulliNB::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for BernoulliNB<T, M> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> BernoulliNB<T, M> {
+    /// Fits BernoulliNB with given data
+    /// * `x` - training data of size NxM where N is the number of samples and M is the number of
+    /// features.
+    /// * `y` - vector with target values (classes) of length N.
+    /// * `parameters` - additional parameters like class priors, alpha for smoothing and
+    /// binarizing threshold.
+    pub fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: BernoulliNBParameters<T>,
+    ) -> Result<Self, Failed> {
+        let distribution = if let Some(threshold) = parameters.binarize {
+            BernoulliNBDistribution::fit(
+                &(x.binarize(threshold)),
+                y,
+                parameters.alpha,
+                parameters.priors,
+            )?
+        } else {
+            BernoulliNBDistribution::fit(x, y, parameters.alpha, parameters.priors)?
+        };
+
+        let inner = BaseNaiveBayes::fit(distribution)?;
+        Ok(Self {
+            inner,
+            binarize: parameters.binarize,
+        })
+    }
+
+    /// Estimates the class labels for the provided data.
+    /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
+    /// Returns a vector of size N with class estimates.
+    pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        if let Some(threshold) = self.binarize {
+            self.inner.predict(&(x.binarize(threshold)))
+        } else {
+            self.inner.predict(x)
+        }
+    }
+
+    /// Class labels known to the classifier.
+    /// Returns a vector of size n_classes.
+    pub fn classes(&self) -> &Vec<T> {
+        &self.inner.distribution.class_labels
+    }
+
+    /// Number of training samples observed in each class.
+    /// Returns a vector of size n_classes.
+    pub fn class_count(&self) -> &Vec<usize> {
+        &self.inner.distribution.class_count
+    }
+
+    /// Number of features of each sample
+    pub fn n_features(&self) -> usize {
+        self.inner.distribution.n_features
+    }
+
+    /// Number of samples encountered for each (class, feature)
+    /// Returns a 2d vector of shape (n_classes, n_features)
+    pub fn feature_count(&self) -> &Vec<Vec<usize>> {
+        &self.inner.distribution.feature_count
+    }
+
+    /// Empirical log probability of features given a class
+    pub fn feature_log_prob(&self) -> &Vec<Vec<T>> {
+        &self.inner.distribution.feature_log_prob
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::linalg::naive::dense_matrix::DenseMatrix;
+
+    #[test]
+    fn run_bernoulli_naive_bayes() {
+        // Tests that BernoulliNB when alpha=1.0 gives the same values as
+        // those given for the toy example in Manning, Raghavan, and
+        // Schuetze's "Introduction to Information Retrieval" book:
+        // https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
+
+        // Training data points are:
+        // Chinese Beijing Chinese (class: China)
+        // Chinese Chinese Shanghai (class: China)
+        // Chinese Macao (class: China)
+        // Tokyo Japan Chinese (class: Japan)
+        let x = DenseMatrix::<f64>::from_2d_array(&[
+            &[1., 1., 0., 0., 0., 0.],
+            &[0., 1., 0., 0., 1., 0.],
+            &[0., 1., 0., 1., 0., 0.],
+            &[0., 1., 1., 0., 0., 1.],
+        ]);
+        let y = vec![0., 0., 0., 1.];
+        let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
+
+        assert_eq!(bnb.inner.distribution.class_priors, &[0.75, 0.25]);
+        assert_eq!(
+            bnb.feature_log_prob(),
+            &[
+                &[
+                    -0.916290731874155,
+                    -0.2231435513142097,
+                    -1.6094379124341003,
+                    -0.916290731874155,
+                    -0.916290731874155,
+                    -1.6094379124341003
+                ],
+                &[
+                    -1.0986122886681098,
+                    -0.40546510810816444,
+                    -0.40546510810816444,
+                    -1.0986122886681098,
+                    -1.0986122886681098,
+                    -0.40546510810816444
+                ]
+            ]
+        );
+
+        // Testing data point is:
+        //  Chinese Chinese Chinese Tokyo Japan
+        let x_test = DenseMatrix::<f64>::from_2d_array(&[&[0., 1., 1., 0., 0., 1.]]);
+        let y_hat = bnb.predict(&x_test).unwrap();
+
+        assert_eq!(y_hat, &[1.]);
+    }
+
+    #[test]
+    fn bernoulli_nb_scikit_parity() {
+        let x = DenseMatrix::<f64>::from_2d_array(&[
+            &[2., 4., 0., 0., 2., 1., 2., 4., 2., 0.],
+            &[3., 4., 0., 2., 1., 0., 1., 4., 0., 3.],
+            &[1., 4., 2., 4., 1., 0., 1., 2., 3., 2.],
+            &[0., 3., 3., 4., 1., 0., 3., 1., 1., 1.],
+            &[0., 2., 1., 4., 3., 4., 1., 2., 3., 1.],
+            &[3., 2., 4., 1., 3., 0., 2., 4., 0., 2.],
+            &[3., 1., 3., 0., 2., 0., 4., 4., 3., 4.],
+            &[2., 2., 2., 0., 1., 1., 2., 1., 0., 1.],
+            &[3., 3., 2., 2., 0., 2., 3., 2., 2., 3.],
+            &[4., 3., 4., 4., 4., 2., 2., 0., 1., 4.],
+            &[3., 4., 2., 2., 1., 4., 4., 4., 1., 3.],
+            &[3., 0., 1., 4., 4., 0., 0., 3., 2., 4.],
+            &[2., 0., 3., 3., 1., 2., 0., 2., 4., 1.],
+            &[2., 4., 0., 4., 2., 4., 1., 3., 1., 4.],
+            &[0., 2., 2., 3., 4., 0., 4., 4., 4., 4.],
+        ]);
+        let y = vec![2., 2., 0., 0., 0., 2., 1., 1., 0., 1., 0., 0., 2., 0., 2.];
+        let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
+
+        let y_hat = bnb.predict(&x).unwrap();
+
+        assert_eq!(bnb.classes(), &[0., 1., 2.]);
+        assert_eq!(bnb.class_count(), &[7, 3, 5]);
+        assert_eq!(bnb.n_features(), 10);
+        assert_eq!(
+            bnb.feature_count(),
+            &[
+                &[5, 6, 6, 7, 6, 4, 6, 7, 7, 7],
+                &[3, 3, 3, 1, 3, 2, 3, 2, 2, 3],
+                &[4, 4, 3, 4, 5, 2, 4, 5, 3, 4]
+            ]
+        );
+
+        assert!(bnb
+            .inner
+            .distribution
+            .class_priors
+            .approximate_eq(&vec!(0.46, 0.2, 0.33), 1e-2));
+        assert!(bnb.feature_log_prob()[1].approximate_eq(
+            &vec![
+                -0.22314355,
+                -0.22314355,
+                -0.22314355,
+                -0.91629073,
+                -0.22314355,
+                -0.51082562,
+                -0.22314355,
+                -0.51082562,
+                -0.51082562,
+                -0.22314355
+            ],
+            1e-1
+        ));
+        assert!(y_hat.approximate_eq(
+            &vec!(2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
+            1e-5
+        ));
+    }
+
+    #[test]
+    #[cfg(feature = "serde")]
+    fn serde() {
+        let x = DenseMatrix::<f64>::from_2d_array(&[
+            &[1., 1., 0., 0., 0., 0.],
+            &[0., 1., 0., 0., 1., 0.],
+            &[0., 1., 0., 1., 0., 0.],
+            &[0., 1., 1., 0., 0., 1.],
+        ]);
+        let y = vec![0., 0., 0., 1.];
+
+        let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap();
+        let deserialized_bnb: BernoulliNB<f64, DenseMatrix<f64>> =
+            serde_json::from_str(&serde_json::to_string(&bnb).unwrap()).unwrap();
+
+        assert_eq!(bnb, deserialized_bnb);
+    }
+}
@@ -1,16 +1,96 @@
+//! # Categorical Naive Bayes
+//!
+//! Categorical Naive Bayes is a variant of [Naive Bayes](../index.html) for the categorically distributed data.
+//! It assumes that each feature has its own categorical distribution.
+//!
+//! Example:
+//!
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::*;
+//! use smartcore::naive_bayes::categorical::CategoricalNB;
+//!
+//! let x = DenseMatrix::from_2d_array(&[
+//!              &[3., 4., 0., 1.],
+//!              &[3., 0., 0., 1.],
+//!              &[4., 4., 1., 2.],
+//!              &[4., 2., 4., 3.],
+//!              &[4., 2., 4., 2.],
+//!              &[4., 1., 1., 0.],
+//!              &[1., 1., 1., 1.],
+//!              &[0., 4., 1., 0.],
+//!              &[0., 3., 2., 1.],
+//!              &[0., 3., 1., 1.],
+//!              &[3., 4., 0., 1.],
+//!              &[3., 4., 2., 4.],
+//!              &[0., 3., 1., 2.],
+//!              &[0., 4., 1., 2.],
+//!          ]);
+//! let y = vec![0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0.];
+//!
+//! let nb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
+//! let y_hat = nb.predict(&x).unwrap();
+//! ```
+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
 use crate::naive_bayes::{BaseNaiveBayes, NBDistribution};
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 /// Naive Bayes classifier for categorical features
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug)]
 struct CategoricalNBDistribution<T: RealNumber> {
+    /// number of training samples observed in each class
+    class_count: Vec<usize>,
+    /// class labels known to the classifier
    class_labels: Vec<T>,
+    /// probability of each class
    class_priors: Vec<T>,
    coefficients: Vec<Vec<Vec<T>>>,
+    /// Number of features of each sample
+    n_features: usize,
+    /// Number of categories for each feature
+    n_categories: Vec<usize>,
+    /// Holds arrays of shape (n_classes, n_categories of respective feature)
+    /// for each feature. Each array provides the number of samples
+    /// encountered for each class and category of the specific feature.
+    category_count: Vec<Vec<Vec<usize>>>,
+}
+
+impl<T: RealNumber> PartialEq for CategoricalNBDistribution<T> {
+    fn eq(&self, other: &Self) -> bool {
+        if self.class_labels == other.class_labels
+            && self.class_priors == other.class_priors
+            && self.n_features == other.n_features
+            && self.n_categories == other.n_categories
+            && self.class_count == other.class_count
+        {
+            if self.coefficients.len() != other.coefficients.len() {
+                return false;
+            }
+            for (a, b) in self.coefficients.iter().zip(other.coefficients.iter()) {
+                if a.len() != b.len() {
+                    return false;
+                }
+                for (a_i, b_i) in a.iter().zip(b.iter()) {
+                    if a_i.len() != b_i.len() {
+                        return false;
+                    }
+                    for (a_i_j, b_i_j) in a_i.iter().zip(b_i.iter()) {
+                        if (*a_i_j - *b_i_j).abs() > T::epsilon() {
+                            return false;
+                        }
+                    }
+                }
+            }
+            true
+        } else {
+            false
+        }
+    }
 }

 impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for CategoricalNBDistribution<T> {
@@ -27,8 +107,8 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for CategoricalNBDistribu
            let mut likelihood = T::zero();
            for feature in 0..j.len() {
                let value = j.get(feature).floor().to_usize().unwrap();
-                if self.coefficients[class_index][feature].len() > value {
-                    likelihood += self.coefficients[class_index][feature][value];
+                if self.coefficients[feature][class_index].len() > value {
+                    likelihood += self.coefficients[feature][class_index][value];
                } else {
                    return T::zero();
                }
@@ -86,12 +166,12 @@ impl<T: RealNumber> CategoricalNBDistribution<T> {
        let class_labels: Vec<T> = (0..*y_max + 1)
            .map(|label| T::from(label).unwrap())
            .collect();
-        let mut classes_count: Vec<T> = vec![T::zero(); class_labels.len()];
+        let mut class_count = vec![0_usize; class_labels.len()];
        for elem in y.iter() {
-            classes_count[*elem] += T::one();
+            class_count[*elem] += 1;
        }

-        let mut feature_categories: Vec<Vec<T>> = Vec::with_capacity(n_features);
+        let mut n_categories: Vec<usize> = Vec::with_capacity(n_features);
        for feature in 0..n_features {
            let feature_max = x
                .get_col_as_vec(feature)
@@ -104,18 +184,15 @@ impl<T: RealNumber> CategoricalNBDistribution<T> {
                        feature
                    ))
                })?;
-            let feature_types = (0..feature_max + 1)
-                .map(|feat| T::from(feat).unwrap())
-                .collect();
-            feature_categories.push(feature_types);
+            n_categories.push(feature_max + 1);
        }

        let mut coefficients: Vec<Vec<Vec<T>>> = Vec::with_capacity(class_labels.len());
-        for (label, label_count) in class_labels.iter().zip(classes_count.iter()) {
+        let mut category_count: Vec<Vec<Vec<usize>>> = Vec::with_capacity(class_labels.len());
+        for (feature_index, &n_categories_i) in n_categories.iter().enumerate().take(n_features) {
            let mut coef_i: Vec<Vec<T>> = Vec::with_capacity(n_features);
-            for (feature_index, feature_options) in
-                feature_categories.iter().enumerate().take(n_features)
-            {
+            let mut category_count_i: Vec<Vec<usize>> = Vec::with_capacity(n_features);
+            for (label, &label_count) in class_labels.iter().zip(class_count.iter()) {
                let col = x
                    .get_col_as_vec(feature_index)
                    .iter()
@@ -123,57 +200,61 @@ impl<T: RealNumber> CategoricalNBDistribution<T> {
                    .filter(|(i, _j)| T::from(y[*i]).unwrap() == *label)
                    .map(|(_, j)| *j)
                    .collect::<Vec<T>>();
-                let mut feat_count: Vec<T> = vec![T::zero(); feature_options.len()];
+                let mut feat_count: Vec<usize> = vec![0_usize; n_categories_i];
                for row in col.iter() {
                    let index = row.floor().to_usize().unwrap();
-                    feat_count[index] += T::one();
+                    feat_count[index] += 1;
                }
+
                let coef_i_j = feat_count
                    .iter()
                    .map(|c| {
-                        ((*c + alpha)
-                            / (*label_count + T::from(feature_options.len()).unwrap() * alpha))
+                        ((T::from(*c).unwrap() + alpha)
+                            / (T::from(label_count).unwrap()
+                                + T::from(n_categories_i).unwrap() * alpha))
                            .ln()
                    })
                    .collect::<Vec<T>>();
+                category_count_i.push(feat_count);
                coef_i.push(coef_i_j);
            }
+            category_count.push(category_count_i);
            coefficients.push(coef_i);
        }

-        let class_priors = classes_count
-            .into_iter()
-            .map(|count| count / T::from(n_samples).unwrap())
+        let class_priors = class_count
+            .iter()
+            .map(|&count| T::from(count).unwrap() / T::from(n_samples).unwrap())
            .collect::<Vec<T>>();

        Ok(Self {
+            class_count,
            class_labels,
            class_priors,
            coefficients,
+            n_categories,
+            n_features,
+            category_count,
        })
    }
 }

 /// `CategoricalNB` parameters. Use `Default::default()` for default values.
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct CategoricalNBParameters<T: RealNumber> {
    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
    pub alpha: T,
 }

 impl<T: RealNumber> CategoricalNBParameters<T> {
-    /// Create CategoricalNBParameters with specific paramaters.
-    pub fn new(alpha: T) -> Result<Self, Failed> {
-        if alpha > T::zero() {
-            Ok(Self { alpha })
-        } else {
-            Err(Failed::fit(&format!(
-                "alpha should be >= 0, alpha=[{}]",
-                alpha
-            )))
-        }
+    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
+    pub fn with_alpha(mut self, alpha: T) -> Self {
+        self.alpha = alpha;
+        self
    }
 }
+
 impl<T: RealNumber> Default for CategoricalNBParameters<T> {
    fn default() -> Self {
        Self { alpha: T::one() }
@@ -181,11 +262,30 @@ impl<T: RealNumber> Default for CategoricalNBParameters<T> {
 }

 /// CategoricalNB implements the categorical naive Bayes algorithm for categorically distributed data.
-#[derive(Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, PartialEq)]
 pub struct CategoricalNB<T: RealNumber, M: Matrix<T>> {
    inner: BaseNaiveBayes<T, M, CategoricalNBDistribution<T>>,
 }

+impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, CategoricalNBParameters<T>>
+    for CategoricalNB<T, M>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: CategoricalNBParameters<T>,
+    ) -> Result<Self, Failed> {
+        CategoricalNB::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for CategoricalNB<T, M> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber, M: Matrix<T>> CategoricalNB<T, M> {
    /// Fits CategoricalNB with given data
    /// * `x` - training data of size NxM where N is the number of samples and M is the number of
@@ -209,6 +309,41 @@ impl<T: RealNumber, M: Matrix<T>> CategoricalNB<T, M> {
    pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
        self.inner.predict(x)
    }
+
+    /// Class labels known to the classifier.
+    /// Returns a vector of size n_classes.
+    pub fn classes(&self) -> &Vec<T> {
+        &self.inner.distribution.class_labels
+    }
+
+    /// Number of training samples observed in each class.
+    /// Returns a vector of size n_classes.
+    pub fn class_count(&self) -> &Vec<usize> {
+        &self.inner.distribution.class_count
+    }
+
+    /// Number of features of each sample
+    pub fn n_features(&self) -> usize {
+        self.inner.distribution.n_features
+    }
+
+    /// Number of features of each sample
+    pub fn n_categories(&self) -> &Vec<usize> {
+        &self.inner.distribution.n_categories
+    }
+
+    /// Holds arrays of shape (n_classes, n_categories of respective feature)
+    /// for each feature. Each array provides the number of samples
+    /// encountered for each class and category of the specific feature.
+    pub fn category_count(&self) -> &Vec<Vec<Vec<usize>>> {
+        &self.inner.distribution.category_count
+    }
+    /// Holds arrays of shape (n_classes, n_categories of respective feature)
+    /// for each feature. Each array provides the empirical log probability
+    /// of categories given the respective feature and class, ``P(x_i|y)``.
+    pub fn feature_log_prob(&self) -> &Vec<Vec<Vec<T>>> {
+        &self.inner.distribution.coefficients
+    }
 }

 #[cfg(test)]
@@ -237,6 +372,60 @@ mod tests {
        let y = vec![0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0.];

        let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
+
+        // checking parity with scikit
+        assert_eq!(cnb.classes(), &[0., 1.]);
+        assert_eq!(cnb.class_count(), &[5, 9]);
+        assert_eq!(cnb.n_features(), 4);
+        assert_eq!(cnb.n_categories(), &[3, 3, 2, 2]);
+        assert_eq!(
+            cnb.category_count(),
+            &vec![
+                vec![vec![3, 0, 2], vec![2, 4, 3]],
+                vec![vec![1, 2, 2], vec![3, 4, 2]],
+                vec![vec![1, 4], vec![6, 3]],
+                vec![vec![2, 3], vec![6, 3]]
+            ]
+        );
+
+        assert_eq!(
+            cnb.feature_log_prob(),
+            &vec![
+                vec![
+                    vec![
+                        -0.6931471805599453,
+                        -2.0794415416798357,
+                        -0.9808292530117262
+                    ],
+                    vec![
+                        -1.3862943611198906,
+                        -0.8754687373538999,
+                        -1.0986122886681098
+                    ]
+                ],
+                vec![
+                    vec![
+                        -1.3862943611198906,
+                        -0.9808292530117262,
+                        -0.9808292530117262
+                    ],
+                    vec![
+                        -1.0986122886681098,
+                        -0.8754687373538999,
+                        -1.3862943611198906
+                    ]
+                ],
+                vec![
+                    vec![-1.252762968495368, -0.3364722366212129],
+                    vec![-0.45198512374305727, -1.0116009116784799]
+                ],
+                vec![
+                    vec![-0.8472978603872037, -0.5596157879354228],
+                    vec![-0.45198512374305727, -1.0116009116784799]
+                ]
+            ]
+        );
+
        let x_test = DenseMatrix::from_2d_array(&[&[0., 2., 1., 0.], &[2., 2., 0., 0.]]);
        let y_hat = cnb.predict(&x_test).unwrap();
        assert_eq!(y_hat, vec![0., 1.]);
@@ -269,4 +458,33 @@ mod tests {
            vec![0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 0., 1., 1., 1.]
        );
    }
+
+    #[test]
+    #[cfg(feature = "serde")]
+    fn serde() {
+        let x = DenseMatrix::<f64>::from_2d_array(&[
+            &[3., 4., 0., 1.],
+            &[3., 0., 0., 1.],
+            &[4., 4., 1., 2.],
+            &[4., 2., 4., 3.],
+            &[4., 2., 4., 2.],
+            &[4., 1., 1., 0.],
+            &[1., 1., 1., 1.],
+            &[0., 4., 1., 0.],
+            &[0., 3., 2., 1.],
+            &[0., 3., 1., 1.],
+            &[3., 4., 0., 1.],
+            &[3., 4., 2., 4.],
+            &[0., 3., 1., 2.],
+            &[0., 4., 1., 2.],
+        ]);
+
+        let y = vec![0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0.];
+        let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
+
+        let deserialized_cnb: CategoricalNB<f64, DenseMatrix<f64>> =
+            serde_json::from_str(&serde_json::to_string(&cnb).unwrap()).unwrap();
+
+        assert_eq!(cnb, deserialized_cnb);
+    }
 }
@@ -0,0 +1,336 @@
+//! # Gaussian Naive Bayes
+//!
+//! Gaussian Naive Bayes is a variant of [Naive Bayes](../index.html) for the data that follows Gaussian distribution and
+//! it supports continuous valued features conforming to a normal distribution.
+//!
+//! Example:
+//!
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::*;
+//! use smartcore::naive_bayes::gaussian::GaussianNB;
+//!
+//! let x = DenseMatrix::from_2d_array(&[
+//!              &[-1., -1.],
+//!              &[-2., -1.],
+//!              &[-3., -2.],
+//!              &[ 1.,  1.],
+//!              &[ 2.,  1.],
+//!              &[ 3.,  2.],
+//!          ]);
+//! let y = vec![1., 1., 1., 2., 2., 2.];
+//!
+//! let nb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
+//! let y_hat = nb.predict(&x).unwrap();
+//! ```
+use crate::api::{Predictor, SupervisedEstimator};
+use crate::error::Failed;
+use crate::linalg::row_iter;
+use crate::linalg::BaseVector;
+use crate::linalg::Matrix;
+use crate::math::num::RealNumber;
+use crate::math::vector::RealNumberVector;
+use crate::naive_bayes::{BaseNaiveBayes, NBDistribution};
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+/// Naive Bayes classifier for categorical features
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, PartialEq)]
+struct GaussianNBDistribution<T: RealNumber> {
+    /// class labels known to the classifier
+    class_labels: Vec<T>,
+    /// number of training samples observed in each class
+    class_count: Vec<usize>,
+    /// probability of each class.
+    class_priors: Vec<T>,
+    /// variance of each feature per class
+    var: Vec<Vec<T>>,
+    /// mean of each feature per class
+    theta: Vec<Vec<T>>,
+}
+
+impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for GaussianNBDistribution<T> {
+    fn prior(&self, class_index: usize) -> T {
+        if class_index >= self.class_labels.len() {
+            T::zero()
+        } else {
+            self.class_priors[class_index]
+        }
+    }
+
+    fn log_likelihood(&self, class_index: usize, j: &M::RowVector) -> T {
+        let mut likelihood = T::zero();
+        for feature in 0..j.len() {
+            let value = j.get(feature);
+            let mean = self.theta[class_index][feature];
+            let variance = self.var[class_index][feature];
+            likelihood += self.calculate_log_probability(value, mean, variance);
+        }
+        likelihood
+    }
+
+    fn classes(&self) -> &Vec<T> {
+        &self.class_labels
+    }
+}
+
+/// `GaussianNB` parameters. Use `Default::default()` for default values.
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Default, Clone)]
+pub struct GaussianNBParameters<T: RealNumber> {
+    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
+    pub priors: Option<Vec<T>>,
+}
+
+impl<T: RealNumber> GaussianNBParameters<T> {
+    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
+    pub fn with_priors(mut self, priors: Vec<T>) -> Self {
+        self.priors = Some(priors);
+        self
+    }
+}
+
+impl<T: RealNumber> GaussianNBDistribution<T> {
+    /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
+    /// * `x` - training data.
+    /// * `y` - vector with target values (classes) of length N.
+    /// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
+    /// priors are adjusted according to the data.
+    pub fn fit<M: Matrix<T>>(
+        x: &M,
+        y: &M::RowVector,
+        priors: Option<Vec<T>>,
+    ) -> Result<Self, Failed> {
+        let (n_samples, n_features) = x.shape();
+        let y_samples = y.len();
+        if y_samples != n_samples {
+            return Err(Failed::fit(&format!(
+                "Size of x should equal size of y; |x|=[{}], |y|=[{}]",
+                n_samples, y_samples
+            )));
+        }
+
+        if n_samples == 0 {
+            return Err(Failed::fit(&format!(
+                "Size of x and y should greater than 0; |x|=[{}]",
+                n_samples
+            )));
+        }
+        let y = y.to_vec();
+        let (class_labels, indices) = <Vec<T> as RealNumberVector<T>>::unique_with_indices(&y);
+
+        let mut class_count = vec![0_usize; class_labels.len()];
+
+        let mut subdataset: Vec<Vec<Vec<T>>> = vec![vec![]; class_labels.len()];
+
+        for (row, class_index) in row_iter(x).zip(indices.iter()) {
+            class_count[*class_index] += 1;
+            subdataset[*class_index].push(row);
+        }
+
+        let class_priors = if let Some(class_priors) = priors {
+            if class_priors.len() != class_labels.len() {
+                return Err(Failed::fit(
+                    "Size of priors provided does not match the number of classes of the data.",
+                ));
+            }
+            class_priors
+        } else {
+            class_count
+                .iter()
+                .map(|&c| T::from(c).unwrap() / T::from(n_samples).unwrap())
+                .collect()
+        };
+
+        let subdataset: Vec<M> = subdataset
+            .into_iter()
+            .map(|v| {
+                let mut m = M::zeros(v.len(), n_features);
+                for (row_i, v_i) in v.iter().enumerate() {
+                    for (col_j, v_i_j) in v_i.iter().enumerate().take(n_features) {
+                        m.set(row_i, col_j, *v_i_j);
+                    }
+                }
+                m
+            })
+            .collect();
+
+        let (var, theta): (Vec<Vec<T>>, Vec<Vec<T>>) = subdataset
+            .iter()
+            .map(|data| (data.var(0), data.mean(0)))
+            .unzip();
+
+        Ok(Self {
+            class_labels,
+            class_count,
+            class_priors,
+            var,
+            theta,
+        })
+    }
+
+    /// Calculate probability of x equals to a value of a Gaussian distribution given its mean and its
+    /// variance.
+    fn calculate_log_probability(&self, value: T, mean: T, variance: T) -> T {
+        let pi = T::from(std::f64::consts::PI).unwrap();
+        -((value - mean).powf(T::two()) / (T::two() * variance))
+            - (T::two() * pi).ln() / T::two()
+            - (variance).ln() / T::two()
+    }
+}
+
+/// GaussianNB implements the categorical naive Bayes algorithm for categorically distributed data.
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, PartialEq)]
+pub struct GaussianNB<T: RealNumber, M: Matrix<T>> {
+    inner: BaseNaiveBayes<T, M, GaussianNBDistribution<T>>,
+}
+
+impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, GaussianNBParameters<T>>
+    for GaussianNB<T, M>
+{
+    fn fit(x: &M, y: &M::RowVector, parameters: GaussianNBParameters<T>) -> Result<Self, Failed> {
+        GaussianNB::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for GaussianNB<T, M> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> GaussianNB<T, M> {
+    /// Fits GaussianNB with given data
+    /// * `x` - training data of size NxM where N is the number of samples and M is the number of
+    /// features.
+    /// * `y` - vector with target values (classes) of length N.
+    /// * `parameters` - additional parameters like class priors.
+    pub fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: GaussianNBParameters<T>,
+    ) -> Result<Self, Failed> {
+        let distribution = GaussianNBDistribution::fit(x, y, parameters.priors)?;
+        let inner = BaseNaiveBayes::fit(distribution)?;
+        Ok(Self { inner })
+    }
+
+    /// Estimates the class labels for the provided data.
+    /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
+    /// Returns a vector of size N with class estimates.
+    pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.inner.predict(x)
+    }
+
+    /// Class labels known to the classifier.
+    /// Returns a vector of size n_classes.
+    pub fn classes(&self) -> &Vec<T> {
+        &self.inner.distribution.class_labels
+    }
+
+    /// Number of training samples observed in each class.
+    /// Returns a vector of size n_classes.
+    pub fn class_count(&self) -> &Vec<usize> {
+        &self.inner.distribution.class_count
+    }
+
+    /// Probability of each class
+    /// Returns a vector of size n_classes.
+    pub fn class_priors(&self) -> &Vec<T> {
+        &self.inner.distribution.class_priors
+    }
+
+    /// Mean of each feature per class
+    /// Returns a 2d vector of shape (n_classes, n_features).
+    pub fn theta(&self) -> &Vec<Vec<T>> {
+        &self.inner.distribution.theta
+    }
+
+    /// Variance of each feature per class
+    /// Returns a 2d vector of shape (n_classes, n_features).
+    pub fn var(&self) -> &Vec<Vec<T>> {
+        &self.inner.distribution.var
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::linalg::naive::dense_matrix::DenseMatrix;
+
+    #[test]
+    fn run_gaussian_naive_bayes() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[-1., -1.],
+            &[-2., -1.],
+            &[-3., -2.],
+            &[1., 1.],
+            &[2., 1.],
+            &[3., 2.],
+        ]);
+        let y = vec![1., 1., 1., 2., 2., 2.];
+
+        let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
+        let y_hat = gnb.predict(&x).unwrap();
+        assert_eq!(y_hat, y);
+
+        assert_eq!(gnb.classes(), &[1., 2.]);
+
+        assert_eq!(gnb.class_count(), &[3, 3]);
+
+        assert_eq!(
+            gnb.var(),
+            &[
+                &[0.666666666666667, 0.22222222222222232],
+                &[0.666666666666667, 0.22222222222222232]
+            ]
+        );
+
+        assert_eq!(gnb.class_priors(), &[0.5, 0.5]);
+
+        assert_eq!(
+            gnb.theta(),
+            &[&[-2., -1.3333333333333333], &[2., 1.3333333333333333]]
+        );
+    }
+
+    #[test]
+    fn run_gaussian_naive_bayes_with_priors() {
+        let x = DenseMatrix::from_2d_array(&[
+            &[-1., -1.],
+            &[-2., -1.],
+            &[-3., -2.],
+            &[1., 1.],
+            &[2., 1.],
+            &[3., 2.],
+        ]);
+        let y = vec![1., 1., 1., 2., 2., 2.];
+
+        let priors = vec![0.3, 0.7];
+        let parameters = GaussianNBParameters::default().with_priors(priors.clone());
+        let gnb = GaussianNB::fit(&x, &y, parameters).unwrap();
+
+        assert_eq!(gnb.class_priors(), &priors);
+    }
+
+    #[test]
+    #[cfg(feature = "serde")]
+    fn serde() {
+        let x = DenseMatrix::<f64>::from_2d_array(&[
+            &[-1., -1.],
+            &[-2., -1.],
+            &[-3., -2.],
+            &[1., 1.],
+            &[2., 1.],
+            &[3., 2.],
+        ]);
+        let y = vec![1., 1., 1., 2., 2., 2.];
+
+        let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
+        let deserialized_gnb: GaussianNB<f64, DenseMatrix<f64>> =
+            serde_json::from_str(&serde_json::to_string(&gnb).unwrap()).unwrap();
+
+        assert_eq!(gnb, deserialized_gnb);
+    }
+}
@@ -1,7 +1,45 @@
+//! # Naive Bayes
+//!
+//! Naive Bayes (NB) is a simple but powerful machine learning algorithm.
+//! Naive Bayes classifier is based on Bayes’ Theorem with an ssumption of conditional independence
+//! between every pair of features given the value of the class variable.
+//!
+//! Bayes’ theorem can be written as
+//!
+//! \\[ P(y | X) = \frac{P(y)P(X| y)}{P(X)} \\]
+//!
+//! where
+//!
+//! * \\(X = (x_1,...x_n)\\) represents the predictors.
+//! * \\(P(y | X)\\) is the probability of class _y_ given the data X
+//! * \\(P(X| y)\\) is the probability of data X given the class _y_.
+//! * \\(P(y)\\) is the probability of class y. This is called the prior probability of y.
+//! * \\(P(y | X)\\) is the probability of the data (regardless of the class value).
+//!
+//! The naive conditional independence assumption let us rewrite this equation as
+//!
+//! \\[ P(y | x_1,...x_n) = \frac{P(y)\prod_{i=1}^nP(x_i|y)}{P(x_1,...x_n)} \\]
+//!
+//!
+//! The denominator can be removed since \\(P(x_1,...x_n)\\) is constrant for all the entries in the dataset.
+//!
+//! \\[ P(y | x_1,...x_n) \propto P(y)\prod_{i=1}^nP(x_i|y) \\]
+//!
+//! To find class y from predictors X we use this equation
+//!
+//! \\[ y = \underset{y}{argmax} P(y)\prod_{i=1}^nP(x_i|y) \\]
+//!
+//! ## References:
+//!
+//! * ["Machine Learning: A Probabilistic Perspective", Kevin P. Murphy, 2012, Chapter 3 ](https://mitpress.mit.edu/books/machine-learning-1)
+//!
+//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};
 use std::marker::PhantomData;

@@ -18,7 +56,8 @@ pub(crate) trait NBDistribution<T: RealNumber, M: Matrix<T>> {
 }

 /// Base struct for the Naive Bayes classifier.
-#[derive(Serialize, Deserialize, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, PartialEq)]
 pub(crate) struct BaseNaiveBayes<T: RealNumber, M: Matrix<T>, D: NBDistribution<T, M>> {
    distribution: D,
    _phantom_t: PhantomData<T>,
@@ -64,5 +103,7 @@ impl<T: RealNumber, M: Matrix<T>, D: NBDistribution<T, M>> BaseNaiveBayes<T, M,
        Ok(y_hat)
    }
 }
-mod categorical;
-pub use categorical::{CategoricalNB, CategoricalNBParameters};
+pub mod bernoulli;
+pub mod categorical;
+pub mod gaussian;
+pub mod multinomial;
@@ -0,0 +1,431 @@
+//! # Multinomial Naive Bayes
+//!
+//! Multinomial Naive Bayes classifier is a variant of [Naive Bayes](../index.html) for the multinomially distributed data.
+//! It is often used for discrete data with predictors representing the number of times an event was observed in a particular instance,
+//! for example frequency of the words present in the document.
+//!
+//! Example:
+//!
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::*;
+//! use smartcore::naive_bayes::multinomial::MultinomialNB;
+//!
+//! // Training data points are:
+//! // Chinese Beijing Chinese (class: China)
+//! // Chinese Chinese Shanghai (class: China)
+//! // Chinese Macao (class: China)
+//! // Tokyo Japan Chinese (class: Japan)
+//! let x = DenseMatrix::<f64>::from_2d_array(&[
+//!                   &[1., 2., 0., 0., 0., 0.],
+//!                   &[0., 2., 0., 0., 1., 0.],
+//!                   &[0., 1., 0., 1., 0., 0.],
+//!                   &[0., 1., 1., 0., 0., 1.],
+//!         ]);
+//! let y = vec![0., 0., 0., 1.];
+//! let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
+//!
+//! // Testing data point is:
+//! //  Chinese Chinese Chinese Tokyo Japan
+//! let x_test = DenseMatrix::<f64>::from_2d_array(&[&[0., 3., 1., 0., 0., 1.]]);
+//! let y_hat = nb.predict(&x_test).unwrap();
+//! ```
+//!
+//! ## References:
+//!
+//! * ["Introduction to Information Retrieval", Manning C. D., Raghavan P., Schutze H., 2009, Chapter 13 ](https://nlp.stanford.edu/IR-book/information-retrieval-book.html)
+use crate::api::{Predictor, SupervisedEstimator};
+use crate::error::Failed;
+use crate::linalg::row_iter;
+use crate::linalg::BaseVector;
+use crate::linalg::Matrix;
+use crate::math::num::RealNumber;
+use crate::math::vector::RealNumberVector;
+use crate::naive_bayes::{BaseNaiveBayes, NBDistribution};
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+/// Naive Bayes classifier for Multinomial features
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, PartialEq)]
+struct MultinomialNBDistribution<T: RealNumber> {
+    /// class labels known to the classifier
+    class_labels: Vec<T>,
+    /// number of training samples observed in each class
+    class_count: Vec<usize>,
+    /// probability of each class
+    class_priors: Vec<T>,
+    /// Empirical log probability of features given a class
+    feature_log_prob: Vec<Vec<T>>,
+    /// Number of samples encountered for each (class, feature)
+    feature_count: Vec<Vec<usize>>,
+    /// Number of features of each sample
+    n_features: usize,
+}
+
+impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for MultinomialNBDistribution<T> {
+    fn prior(&self, class_index: usize) -> T {
+        self.class_priors[class_index]
+    }
+
+    fn log_likelihood(&self, class_index: usize, j: &M::RowVector) -> T {
+        let mut likelihood = T::zero();
+        for feature in 0..j.len() {
+            let value = j.get(feature);
+            likelihood += value * self.feature_log_prob[class_index][feature];
+        }
+        likelihood
+    }
+
+    fn classes(&self) -> &Vec<T> {
+        &self.class_labels
+    }
+}
+
+/// `MultinomialNB` parameters. Use `Default::default()` for default values.
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
+pub struct MultinomialNBParameters<T: RealNumber> {
+    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
+    pub alpha: T,
+    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
+    pub priors: Option<Vec<T>>,
+}
+
+impl<T: RealNumber> MultinomialNBParameters<T> {
+    /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
+    pub fn with_alpha(mut self, alpha: T) -> Self {
+        self.alpha = alpha;
+        self
+    }
+    /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
+    pub fn with_priors(mut self, priors: Vec<T>) -> Self {
+        self.priors = Some(priors);
+        self
+    }
+}
+
+impl<T: RealNumber> Default for MultinomialNBParameters<T> {
+    fn default() -> Self {
+        Self {
+            alpha: T::one(),
+            priors: None,
+        }
+    }
+}
+
+impl<T: RealNumber> MultinomialNBDistribution<T> {
+    /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features.
+    /// * `x` - training data.
+    /// * `y` - vector with target values (classes) of length N.
+    /// * `priors` - Optional vector with prior probabilities of the classes. If not defined,
+    /// priors are adjusted according to the data.
+    /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter.
+    pub fn fit<M: Matrix<T>>(
+        x: &M,
+        y: &M::RowVector,
+        alpha: T,
+        priors: Option<Vec<T>>,
+    ) -> Result<Self, Failed> {
+        let (n_samples, n_features) = x.shape();
+        let y_samples = y.len();
+        if y_samples != n_samples {
+            return Err(Failed::fit(&format!(
+                "Size of x should equal size of y; |x|=[{}], |y|=[{}]",
+                n_samples, y_samples
+            )));
+        }
+
+        if n_samples == 0 {
+            return Err(Failed::fit(&format!(
+                "Size of x and y should greater than 0; |x|=[{}]",
+                n_samples
+            )));
+        }
+        if alpha < T::zero() {
+            return Err(Failed::fit(&format!(
+                "Alpha should be greater than 0; |alpha|=[{}]",
+                alpha
+            )));
+        }
+
+        let y = y.to_vec();
+
+        let (class_labels, indices) = <Vec<T> as RealNumberVector<T>>::unique_with_indices(&y);
+        let mut class_count = vec![0_usize; class_labels.len()];
+
+        for class_index in indices.iter() {
+            class_count[*class_index] += 1;
+        }
+
+        let class_priors = if let Some(class_priors) = priors {
+            if class_priors.len() != class_labels.len() {
+                return Err(Failed::fit(
+                    "Size of priors provided does not match the number of classes of the data.",
+                ));
+            }
+            class_priors
+        } else {
+            class_count
+                .iter()
+                .map(|&c| T::from(c).unwrap() / T::from(n_samples).unwrap())
+                .collect()
+        };
+
+        let mut feature_in_class_counter = vec![vec![0_usize; n_features]; class_labels.len()];
+
+        for (row, class_index) in row_iter(x).zip(indices) {
+            for (idx, row_i) in row.iter().enumerate().take(n_features) {
+                feature_in_class_counter[class_index][idx] +=
+                    row_i.to_usize().ok_or_else(|| {
+                        Failed::fit(&format!(
+                            "Elements of the matrix should be convertible to usize |found|=[{}]",
+                            row_i
+                        ))
+                    })?;
+            }
+        }
+
+        let feature_log_prob = feature_in_class_counter
+            .iter()
+            .map(|feature_count| {
+                let n_c: usize = feature_count.iter().sum();
+                feature_count
+                    .iter()
+                    .map(|&count| {
+                        ((T::from(count).unwrap() + alpha)
+                            / (T::from(n_c).unwrap() + alpha * T::from(n_features).unwrap()))
+                        .ln()
+                    })
+                    .collect()
+            })
+            .collect();
+
+        Ok(Self {
+            class_count,
+            class_labels,
+            class_priors,
+            feature_log_prob,
+            feature_count: feature_in_class_counter,
+            n_features,
+        })
+    }
+}
+
+/// MultinomialNB implements the categorical naive Bayes algorithm for categorically distributed data.
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, PartialEq)]
+pub struct MultinomialNB<T: RealNumber, M: Matrix<T>> {
+    inner: BaseNaiveBayes<T, M, MultinomialNBDistribution<T>>,
+}
+
+impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, MultinomialNBParameters<T>>
+    for MultinomialNB<T, M>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: MultinomialNBParameters<T>,
+    ) -> Result<Self, Failed> {
+        MultinomialNB::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for MultinomialNB<T, M> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> MultinomialNB<T, M> {
+    /// Fits MultinomialNB with given data
+    /// * `x` - training data of size NxM where N is the number of samples and M is the number of
+    /// features.
+    /// * `y` - vector with target values (classes) of length N.
+    /// * `parameters` - additional parameters like class priors, alpha for smoothing and
+    /// binarizing threshold.
+    pub fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: MultinomialNBParameters<T>,
+    ) -> Result<Self, Failed> {
+        let distribution =
+            MultinomialNBDistribution::fit(x, y, parameters.alpha, parameters.priors)?;
+        let inner = BaseNaiveBayes::fit(distribution)?;
+        Ok(Self { inner })
+    }
+
+    /// Estimates the class labels for the provided data.
+    /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features.
+    /// Returns a vector of size N with class estimates.
+    pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.inner.predict(x)
+    }
+
+    /// Class labels known to the classifier.
+    /// Returns a vector of size n_classes.
+    pub fn classes(&self) -> &Vec<T> {
+        &self.inner.distribution.class_labels
+    }
+
+    /// Number of training samples observed in each class.
+    /// Returns a vector of size n_classes.
+    pub fn class_count(&self) -> &Vec<usize> {
+        &self.inner.distribution.class_count
+    }
+
+    /// Empirical log probability of features given a class, P(x_i|y).
+    /// Returns a 2d vector of shape (n_classes, n_features)
+    pub fn feature_log_prob(&self) -> &Vec<Vec<T>> {
+        &self.inner.distribution.feature_log_prob
+    }
+
+    /// Number of features of each sample
+    pub fn n_features(&self) -> usize {
+        self.inner.distribution.n_features
+    }
+
+    /// Number of samples encountered for each (class, feature)
+    /// Returns a 2d vector of shape (n_classes, n_features)
+    pub fn feature_count(&self) -> &Vec<Vec<usize>> {
+        &self.inner.distribution.feature_count
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::linalg::naive::dense_matrix::DenseMatrix;
+
+    #[test]
+    fn run_multinomial_naive_bayes() {
+        // Tests that MultinomialNB when alpha=1.0 gives the same values as
+        // those given for the toy example in Manning, Raghavan, and
+        // Schuetze's "Introduction to Information Retrieval" book:
+        // https://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html
+
+        // Training data points are:
+        // Chinese Beijing Chinese (class: China)
+        // Chinese Chinese Shanghai (class: China)
+        // Chinese Macao (class: China)
+        // Tokyo Japan Chinese (class: Japan)
+        let x = DenseMatrix::<f64>::from_2d_array(&[
+            &[1., 2., 0., 0., 0., 0.],
+            &[0., 2., 0., 0., 1., 0.],
+            &[0., 1., 0., 1., 0., 0.],
+            &[0., 1., 1., 0., 0., 1.],
+        ]);
+        let y = vec![0., 0., 0., 1.];
+        let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
+
+        assert_eq!(mnb.classes(), &[0., 1.]);
+        assert_eq!(mnb.class_count(), &[3, 1]);
+
+        assert_eq!(mnb.inner.distribution.class_priors, &[0.75, 0.25]);
+        assert_eq!(
+            mnb.feature_log_prob(),
+            &[
+                &[
+                    (1_f64 / 7_f64).ln(),
+                    (3_f64 / 7_f64).ln(),
+                    (1_f64 / 14_f64).ln(),
+                    (1_f64 / 7_f64).ln(),
+                    (1_f64 / 7_f64).ln(),
+                    (1_f64 / 14_f64).ln()
+                ],
+                &[
+                    (1_f64 / 9_f64).ln(),
+                    (2_f64 / 9_f64).ln(),
+                    (2_f64 / 9_f64).ln(),
+                    (1_f64 / 9_f64).ln(),
+                    (1_f64 / 9_f64).ln(),
+                    (2_f64 / 9_f64).ln()
+                ]
+            ]
+        );
+
+        // Testing data point is:
+        //  Chinese Chinese Chinese Tokyo Japan
+        let x_test = DenseMatrix::<f64>::from_2d_array(&[&[0., 3., 1., 0., 0., 1.]]);
+        let y_hat = mnb.predict(&x_test).unwrap();
+
+        assert_eq!(y_hat, &[0.]);
+    }
+
+    #[test]
+    fn multinomial_nb_scikit_parity() {
+        let x = DenseMatrix::<f64>::from_2d_array(&[
+            &[2., 4., 0., 0., 2., 1., 2., 4., 2., 0.],
+            &[3., 4., 0., 2., 1., 0., 1., 4., 0., 3.],
+            &[1., 4., 2., 4., 1., 0., 1., 2., 3., 2.],
+            &[0., 3., 3., 4., 1., 0., 3., 1., 1., 1.],
+            &[0., 2., 1., 4., 3., 4., 1., 2., 3., 1.],
+            &[3., 2., 4., 1., 3., 0., 2., 4., 0., 2.],
+            &[3., 1., 3., 0., 2., 0., 4., 4., 3., 4.],
+            &[2., 2., 2., 0., 1., 1., 2., 1., 0., 1.],
+            &[3., 3., 2., 2., 0., 2., 3., 2., 2., 3.],
+            &[4., 3., 4., 4., 4., 2., 2., 0., 1., 4.],
+            &[3., 4., 2., 2., 1., 4., 4., 4., 1., 3.],
+            &[3., 0., 1., 4., 4., 0., 0., 3., 2., 4.],
+            &[2., 0., 3., 3., 1., 2., 0., 2., 4., 1.],
+            &[2., 4., 0., 4., 2., 4., 1., 3., 1., 4.],
+            &[0., 2., 2., 3., 4., 0., 4., 4., 4., 4.],
+        ]);
+        let y = vec![2., 2., 0., 0., 0., 2., 1., 1., 0., 1., 0., 0., 2., 0., 2.];
+        let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
+
+        assert_eq!(nb.n_features(), 10);
+        assert_eq!(
+            nb.feature_count(),
+            &[
+                &[12, 20, 11, 24, 12, 14, 13, 17, 13, 18],
+                &[9, 6, 9, 4, 7, 3, 8, 5, 4, 9],
+                &[10, 12, 9, 9, 11, 3, 9, 18, 10, 10]
+            ]
+        );
+
+        let y_hat = nb.predict(&x).unwrap();
+
+        assert!(nb
+            .inner
+            .distribution
+            .class_priors
+            .approximate_eq(&vec!(0.46, 0.2, 0.33), 1e-2));
+        assert!(nb.feature_log_prob()[1].approximate_eq(
+            &vec![
+                -2.00148,
+                -2.35815494,
+                -2.00148,
+                -2.69462718,
+                -2.22462355,
+                -2.91777073,
+                -2.10684052,
+                -2.51230562,
+                -2.69462718,
+                -2.00148
+            ],
+            1e-5
+        ));
+        assert!(y_hat.approximate_eq(
+            &vec!(2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0),
+            1e-5
+        ));
+    }
+    #[test]
+    #[cfg(feature = "serde")]
+    fn serde() {
+        let x = DenseMatrix::<f64>::from_2d_array(&[
+            &[1., 1., 0., 0., 0., 0.],
+            &[0., 1., 0., 0., 1., 0.],
+            &[0., 1., 0., 1., 0., 0.],
+            &[0., 1., 1., 0., 0., 1.],
+        ]);
+        let y = vec![0., 0., 0., 1.];
+
+        let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
+        let deserialized_mnb: MultinomialNB<f64, DenseMatrix<f64>> =
+            serde_json::from_str(&serde_json::to_string(&mnb).unwrap()).unwrap();
+
+        assert_eq!(mnb, deserialized_mnb);
+    }
+}
@@ -25,35 +25,47 @@
 //! &[9., 10.]]);
 //! let y = vec![2., 2., 2., 3., 3.]; //your class labels
 //!
-//! let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+//! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
 //! let y_hat = knn.predict(&x).unwrap();
 //! ```
 //!
 //! variable `y_hat` will hold a vector with estimates of class labels
 //!
+use std::marker::PhantomData;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::{row_iter, Matrix};
-use crate::math::distance::Distance;
+use crate::math::distance::euclidian::Euclidian;
+use crate::math::distance::{Distance, Distances};
 use crate::math::num::RealNumber;
 use crate::neighbors::KNNWeightFunction;

 /// `KNNClassifier` parameters. Use `Default::default()` for default values.
-#[derive(Serialize, Deserialize, Debug)]
-pub struct KNNClassifierParameters {
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
+pub struct KNNClassifierParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
+    /// a function that defines a distance between each pair of point in training data.
+    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
+    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
+    pub distance: D,
    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
    pub algorithm: KNNAlgorithmName,
    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
    pub weight: KNNWeightFunction,
    /// number of training samples to consider when estimating class for new point. Default value is 3.
    pub k: usize,
+    /// this parameter is not used
+    t: PhantomData<T>,
 }

 /// K Nearest Neighbors Classifier
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct KNNClassifier<T: RealNumber, D: Distance<Vec<T>, T>> {
    classes: Vec<T>,
    y: Vec<usize>,
@@ -62,12 +74,47 @@ pub struct KNNClassifier<T: RealNumber, D: Distance<Vec<T>, T>> {
    k: usize,
 }

-impl Default for KNNClassifierParameters {
+impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifierParameters<T, D> {
+    /// number of training samples to consider when estimating class for new point. Default value is 3.
+    pub fn with_k(mut self, k: usize) -> Self {
+        self.k = k;
+        self
+    }
+    /// a function that defines a distance between each pair of point in training data.
+    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
+    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
+    pub fn with_distance<DD: Distance<Vec<T>, T>>(
+        self,
+        distance: DD,
+    ) -> KNNClassifierParameters<T, DD> {
+        KNNClassifierParameters {
+            distance,
+            algorithm: self.algorithm,
+            weight: self.weight,
+            k: self.k,
+            t: PhantomData,
+        }
+    }
+    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
+    pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self {
+        self.algorithm = algorithm;
+        self
+    }
+    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
+    pub fn with_weight(mut self, weight: KNNWeightFunction) -> Self {
+        self.weight = weight;
+        self
+    }
+}
+
+impl<T: RealNumber> Default for KNNClassifierParameters<T, Euclidian> {
    fn default() -> Self {
        KNNClassifierParameters {
+            distance: Distances::euclidian(),
            algorithm: KNNAlgorithmName::CoverTree,
            weight: KNNWeightFunction::Uniform,
            k: 3,
+            t: PhantomData,
        }
    }
 }
@@ -95,19 +142,35 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for KNNClassifier<T, D> {
    }
 }

+impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>>
+    SupervisedEstimator<M, M::RowVector, KNNClassifierParameters<T, D>> for KNNClassifier<T, D>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: KNNClassifierParameters<T, D>,
+    ) -> Result<Self, Failed> {
+        KNNClassifier::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>> Predictor<M, M::RowVector>
+    for KNNClassifier<T, D>
+{
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
    /// Fits KNN classifier to a NxM matrix where N is number of samples and M is number of features.
    /// * `x` - training data
    /// * `y` - vector with target values (classes) of length N    
-    /// * `distance` - a function that defines a distance between each pair of point in training data.
-    ///    This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
-    ///    See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    /// * `parameters` - additional parameters like search algorithm and k
    pub fn fit<M: Matrix<T>>(
        x: &M,
        y: &M::RowVector,
-        distance: D,
-        parameters: KNNClassifierParameters,
+        parameters: KNNClassifierParameters<T, D>,
    ) -> Result<KNNClassifier<T, D>, Failed> {
        let y_m = M::from_row_vector(y.clone());

@@ -119,9 +182,9 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
        let mut yi: Vec<usize> = vec![0; y_n];
        let classes = y_m.unique();

-        for i in 0..y_n {
+        for (i, yi_i) in yi.iter_mut().enumerate().take(y_n) {
            let yc = y_m.get(0, i);
-            yi[i] = classes.iter().position(|c| yc == *c).unwrap();
+            *yi_i = classes.iter().position(|c| yc == *c).unwrap();
        }

        if x_n != y_n {
@@ -142,7 +205,7 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
            classes,
            y: yi,
            k: parameters.k,
-            knn_algorithm: parameters.algorithm.fit(data, distance)?,
+            knn_algorithm: parameters.algorithm.fit(data, parameters.distance)?,
            weight: parameters.weight,
        })
    }
@@ -187,14 +250,13 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNClassifier<T, D> {
 mod tests {
    use super::*;
    use crate::linalg::naive::dense_matrix::DenseMatrix;
-    use crate::math::distance::Distances;

    #[test]
    fn knn_fit_predict() {
        let x =
            DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
        let y = vec![2., 2., 2., 3., 3.];
-        let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+        let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();
        let y_hat = knn.predict(&x).unwrap();
        assert_eq!(5, Vec::len(&y_hat));
        assert_eq!(y.to_vec(), y_hat);
@@ -207,12 +269,10 @@ mod tests {
        let knn = KNNClassifier::fit(
            &x,
            &y,
-            Distances::euclidian(),
-            KNNClassifierParameters {
-                k: 5,
-                algorithm: KNNAlgorithmName::LinearSearch,
-                weight: KNNWeightFunction::Distance,
-            },
+            KNNClassifierParameters::default()
+                .with_k(5)
+                .with_algorithm(KNNAlgorithmName::LinearSearch)
+                .with_weight(KNNWeightFunction::Distance),
        )
        .unwrap();
        let y_hat = knn.predict(&DenseMatrix::from_2d_array(&[&[4.1]])).unwrap();
@@ -220,12 +280,13 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x =
            DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
        let y = vec![2., 2., 2., 3., 3.];

-        let knn = KNNClassifier::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+        let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap();

        let deserialized_knn = bincode::deserialize(&bincode::serialize(&knn).unwrap()).unwrap();

@@ -27,35 +27,48 @@
 //!     &[5., 5.]]);
 //! let y = vec![1., 2., 3., 4., 5.]; //your target values
 //!
-//! let knn = KNNRegressor::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+//! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
 //! let y_hat = knn.predict(&x).unwrap();
 //! ```
 //!
 //! variable `y_hat` will hold predicted value
 //!
 //!
+use std::marker::PhantomData;
+
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::{row_iter, BaseVector, Matrix};
-use crate::math::distance::Distance;
+use crate::math::distance::euclidian::Euclidian;
+use crate::math::distance::{Distance, Distances};
 use crate::math::num::RealNumber;
 use crate::neighbors::KNNWeightFunction;

 /// `KNNRegressor` parameters. Use `Default::default()` for default values.
-#[derive(Serialize, Deserialize, Debug)]
-pub struct KNNRegressorParameters {
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
+pub struct KNNRegressorParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
+    /// a function that defines a distance between each pair of point in training data.
+    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
+    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
+    distance: D,
    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
    pub algorithm: KNNAlgorithmName,
    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
    pub weight: KNNWeightFunction,
    /// number of training samples to consider when estimating class for new point. Default value is 3.
    pub k: usize,
+    /// this parameter is not used
+    t: PhantomData<T>,
 }

 /// K Nearest Neighbors Regressor
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct KNNRegressor<T: RealNumber, D: Distance<Vec<T>, T>> {
    y: Vec<T>,
    knn_algorithm: KNNAlgorithm<T, D>,
@@ -63,12 +76,47 @@ pub struct KNNRegressor<T: RealNumber, D: Distance<Vec<T>, T>> {
    k: usize,
 }

-impl Default for KNNRegressorParameters {
+impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressorParameters<T, D> {
+    /// number of training samples to consider when estimating class for new point. Default value is 3.
+    pub fn with_k(mut self, k: usize) -> Self {
+        self.k = k;
+        self
+    }
+    /// a function that defines a distance between each pair of point in training data.
+    /// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
+    /// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
+    pub fn with_distance<DD: Distance<Vec<T>, T>>(
+        self,
+        distance: DD,
+    ) -> KNNRegressorParameters<T, DD> {
+        KNNRegressorParameters {
+            distance,
+            algorithm: self.algorithm,
+            weight: self.weight,
+            k: self.k,
+            t: PhantomData,
+        }
+    }
+    /// backend search algorithm. See [`knn search algorithms`](../../algorithm/neighbour/index.html). `CoverTree` is default.
+    pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self {
+        self.algorithm = algorithm;
+        self
+    }
+    /// weighting function that is used to calculate estimated class value. Default function is `KNNWeightFunction::Uniform`.
+    pub fn with_weight(mut self, weight: KNNWeightFunction) -> Self {
+        self.weight = weight;
+        self
+    }
+}
+
+impl<T: RealNumber> Default for KNNRegressorParameters<T, Euclidian> {
    fn default() -> Self {
        KNNRegressorParameters {
+            distance: Distances::euclidian(),
            algorithm: KNNAlgorithmName::CoverTree,
            weight: KNNWeightFunction::Uniform,
            k: 3,
+            t: PhantomData,
        }
    }
 }
@@ -88,19 +136,35 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> PartialEq for KNNRegressor<T, D> {
    }
 }

+impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>>
+    SupervisedEstimator<M, M::RowVector, KNNRegressorParameters<T, D>> for KNNRegressor<T, D>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: KNNRegressorParameters<T, D>,
+    ) -> Result<Self, Failed> {
+        KNNRegressor::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>, D: Distance<Vec<T>, T>> Predictor<M, M::RowVector>
+    for KNNRegressor<T, D>
+{
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressor<T, D> {
    /// Fits KNN regressor to a NxM matrix where N is number of samples and M is number of features.
    /// * `x` - training data
    /// * `y` - vector with real values    
-    /// * `distance` - a function that defines a distance between each pair of point in training data.
-    ///    This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
-    ///    See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
    /// * `parameters` - additional parameters like search algorithm and k
    pub fn fit<M: Matrix<T>>(
        x: &M,
        y: &M::RowVector,
-        distance: D,
-        parameters: KNNRegressorParameters,
+        parameters: KNNRegressorParameters<T, D>,
    ) -> Result<KNNRegressor<T, D>, Failed> {
        let y_m = M::from_row_vector(y.clone());

@@ -126,7 +190,7 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> KNNRegressor<T, D> {
        Ok(KNNRegressor {
            y: y.to_vec(),
            k: parameters.k,
-            knn_algorithm: parameters.algorithm.fit(data, distance)?,
+            knn_algorithm: parameters.algorithm.fit(data, parameters.distance)?,
            weight: parameters.weight,
        })
    }
@@ -176,12 +240,11 @@ mod tests {
        let knn = KNNRegressor::fit(
            &x,
            &y,
-            Distances::euclidian(),
-            KNNRegressorParameters {
-                k: 3,
-                algorithm: KNNAlgorithmName::LinearSearch,
-                weight: KNNWeightFunction::Distance,
-            },
+            KNNRegressorParameters::default()
+                .with_k(3)
+                .with_distance(Distances::euclidian())
+                .with_algorithm(KNNAlgorithmName::LinearSearch)
+                .with_weight(KNNWeightFunction::Distance),
        )
        .unwrap();
        let y_hat = knn.predict(&x).unwrap();
@@ -197,7 +260,7 @@ mod tests {
            DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
        let y: Vec<f64> = vec![1., 2., 3., 4., 5.];
        let y_exp = vec![2., 2., 3., 4., 4.];
-        let knn = KNNRegressor::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+        let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();
        let y_hat = knn.predict(&x).unwrap();
        assert_eq!(5, Vec::len(&y_hat));
        for i in 0..y_hat.len() {
@@ -206,12 +269,13 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x =
            DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
        let y = vec![1., 2., 3., 4., 5.];

-        let knn = KNNRegressor::fit(&x, &y, Distances::euclidian(), Default::default()).unwrap();
+        let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap();

        let deserialized_knn = bincode::deserialize(&bincode::serialize(&knn).unwrap()).unwrap();

@@ -33,6 +33,7 @@
 //! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>

 use crate::math::num::RealNumber;
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 /// K Nearest Neighbors Classifier
@@ -48,7 +49,8 @@ pub mod knn_regressor;
 pub type KNNAlgorithmName = crate::algorithm::neighbour::KNNAlgorithmName;

 /// Weight function that is used to determine estimated value.
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub enum KNNWeightFunction {
    /// All k nearest points are weighted equally
    Uniform,
@@ -1,3 +1,4 @@
+#![allow(clippy::suspicious_operation_groupings)]
 use std::default::Default;
 use std::fmt::Debug;

@@ -41,7 +41,7 @@ impl<T: Float> Default for Backtracking<T> {
 }

 impl<T: Float> LineSearchMethod<T> for Backtracking<T> {
-    fn search<'a>(
+    fn search(
        &self,
        f: &(dyn Fn(T) -> T),
        _: &(dyn Fn(T) -> T),
@@ -0,0 +1,329 @@
+//! # One-hot Encoding For [RealNumber](../../math/num/trait.RealNumber.html) Matricies
+//! Transform a data [Matrix](../../linalg/trait.BaseMatrix.html) by replacing all categorical variables with their one-hot equivalents
+//!
+//! Internally OneHotEncoder treats every categorical column as a series and transforms it using [CategoryMapper](../series_encoder/struct.CategoryMapper.html)
+//!
+//! ### Usage Example
+//! ```
+//! use smartcore::linalg::naive::dense_matrix::DenseMatrix;
+//! use smartcore::preprocessing::categorical::{OneHotEncoder, OneHotEncoderParams};
+//! let data = DenseMatrix::from_2d_array(&[
+//!         &[1.5, 1.0, 1.5, 3.0],
+//!         &[1.5, 2.0, 1.5, 4.0],
+//!         &[1.5, 1.0, 1.5, 5.0],
+//!         &[1.5, 2.0, 1.5, 6.0],
+//!   ]);
+//! let encoder_params = OneHotEncoderParams::from_cat_idx(&[1, 3]);
+//! // Infer number of categories from data and return a reusable encoder
+//! let encoder = OneHotEncoder::fit(&data, encoder_params).unwrap();
+//! // Transform categorical to one-hot encoded (can transform similar)
+//! let oh_data = encoder.transform(&data).unwrap();
+//! // Produces the following:
+//! //    &[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0]
+//! //    &[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0]
+//! //    &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0]
+//! //    &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0]
+//! ```
+use std::iter;
+
+use crate::error::Failed;
+use crate::linalg::Matrix;
+
+use crate::preprocessing::data_traits::{CategoricalFloat, Categorizable};
+use crate::preprocessing::series_encoder::CategoryMapper;
+
+/// OneHotEncoder Parameters
+#[derive(Debug, Clone)]
+pub struct OneHotEncoderParams {
+    /// Column number that contain categorical variable
+    pub col_idx_categorical: Option<Vec<usize>>,
+    /// (Currently not implemented) Try and infer which of the matrix columns are categorical variables
+    infer_categorical: bool,
+}
+
+impl OneHotEncoderParams {
+    /// Generate parameters from categorical variable column numbers
+    pub fn from_cat_idx(categorical_params: &[usize]) -> Self {
+        Self {
+            col_idx_categorical: Some(categorical_params.to_vec()),
+            infer_categorical: false,
+        }
+    }
+}
+
+/// Calculate the offset to parameters to due introduction of one-hot encoding
+fn find_new_idxs(num_params: usize, cat_sizes: &[usize], cat_idxs: &[usize]) -> Vec<usize> {
+    // This functions uses iterators and returns a vector.
+    // In case we get a huge amount of paramenters this might be a problem
+    // todo: Change this such that it will return an iterator
+
+    let cat_idx = cat_idxs.iter().copied().chain((num_params..).take(1));
+
+    // Offset is constant between two categorical values, here we calculate the number of steps
+    // that remain constant
+    let repeats = cat_idx.scan(0, |a, v| {
+        let im = v + 1 - *a;
+        *a = v;
+        Some(im)
+    });
+
+    // Calculate the offset to parameter idx due to newly intorduced one-hot vectors
+    let offset_ = cat_sizes.iter().scan(0, |a, &v| {
+        *a = *a + v - 1;
+        Some(*a)
+    });
+    let offset = (0..1).chain(offset_);
+
+    let new_param_idxs: Vec<usize> = (0..num_params)
+        .zip(
+            repeats
+                .zip(offset)
+                .map(|(r, o)| iter::repeat(o).take(r))
+                .flatten(),
+        )
+        .map(|(idx, ofst)| idx + ofst)
+        .collect();
+    new_param_idxs
+}
+
+fn validate_col_is_categorical<T: Categorizable>(data: &[T]) -> bool {
+    for v in data {
+        if !v.is_valid() {
+            return false;
+        }
+    }
+    true
+}
+
+/// Encode Categorical variavbles of data matrix to one-hot
+#[derive(Debug, Clone)]
+pub struct OneHotEncoder {
+    category_mappers: Vec<CategoryMapper<CategoricalFloat>>,
+    col_idx_categorical: Vec<usize>,
+}
+
+impl OneHotEncoder {
+    /// Create an encoder instance with categories infered from data matrix
+    pub fn fit<T, M>(data: &M, params: OneHotEncoderParams) -> Result<OneHotEncoder, Failed>
+    where
+        T: Categorizable,
+        M: Matrix<T>,
+    {
+        match (params.col_idx_categorical, params.infer_categorical) {
+            (None, false) => Err(Failed::fit(
+                "Must pass categorical series ids or infer flag",
+            )),
+
+            (Some(_idxs), true) => Err(Failed::fit(
+                "Ambigous parameters, got both infer and categroy ids",
+            )),
+
+            (Some(mut idxs), false) => {
+                // make sure categories have same order as data columns
+                idxs.sort_unstable();
+
+                let (nrows, _) = data.shape();
+
+                // col buffer to avoid allocations
+                let mut col_buf: Vec<T> = iter::repeat(T::zero()).take(nrows).collect();
+
+                let mut res: Vec<CategoryMapper<CategoricalFloat>> = Vec::with_capacity(idxs.len());
+
+                for &idx in &idxs {
+                    data.copy_col_as_vec(idx, &mut col_buf);
+                    if !validate_col_is_categorical(&col_buf) {
+                        let msg = format!(
+                            "Column {} of data matrix containts non categorizable (integer) values",
+                            idx
+                        );
+                        return Err(Failed::fit(&msg[..]));
+                    }
+                    let hashable_col = col_buf.iter().map(|v| v.to_category());
+                    res.push(CategoryMapper::fit_to_iter(hashable_col));
+                }
+
+                Ok(Self {
+                    category_mappers: res,
+                    col_idx_categorical: idxs,
+                })
+            }
+
+            (None, true) => {
+                todo!("Auto-Inference for Categorical Variables not yet implemented")
+            }
+        }
+    }
+
+    /// Transform categorical variables to one-hot encoded and return a new matrix
+    pub fn transform<T, M>(&self, x: &M) -> Result<M, Failed>
+    where
+        T: Categorizable,
+        M: Matrix<T>,
+    {
+        let (nrows, p) = x.shape();
+        let additional_params: Vec<usize> = self
+            .category_mappers
+            .iter()
+            .map(|enc| enc.num_categories())
+            .collect();
+
+        // Eac category of size v adds v-1 params
+        let expandws_p: usize = p + additional_params.iter().fold(0, |cs, &v| cs + v - 1);
+
+        let new_col_idx = find_new_idxs(p, &additional_params[..], &self.col_idx_categorical[..]);
+        let mut res = M::zeros(nrows, expandws_p);
+
+        for (pidx, &old_cidx) in self.col_idx_categorical.iter().enumerate() {
+            let cidx = new_col_idx[old_cidx];
+            let col_iter = (0..nrows).map(|r| x.get(r, old_cidx).to_category());
+            let sencoder = &self.category_mappers[pidx];
+            let oh_series = col_iter.map(|c| sencoder.get_one_hot::<T, Vec<T>>(&c));
+
+            for (row, oh_vec) in oh_series.enumerate() {
+                match oh_vec {
+                    None => {
+                        // Since we support T types, bad value in a series causes in to be invalid
+                        let msg = format!("At least one value in column {} doesn't conform to category definition", old_cidx);
+                        return Err(Failed::transform(&msg[..]));
+                    }
+                    Some(v) => {
+                        // copy one hot vectors to their place in the data matrix;
+                        for (col_ofst, &val) in v.iter().enumerate() {
+                            res.set(row, cidx + col_ofst, val);
+                        }
+                    }
+                }
+            }
+        }
+
+        // copy old data in x to their new location while skipping catergorical vars (already treated)
+        let mut skip_idx_iter = self.col_idx_categorical.iter();
+        let mut cur_skip = skip_idx_iter.next();
+
+        for (old_p, &new_p) in new_col_idx.iter().enumerate() {
+            // if found treated varible, skip it
+            if let Some(&v) = cur_skip {
+                if v == old_p {
+                    cur_skip = skip_idx_iter.next();
+                    continue;
+                }
+            }
+
+            for r in 0..nrows {
+                let val = x.get(r, old_p);
+                res.set(r, new_p, val);
+            }
+        }
+
+        Ok(res)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::linalg::naive::dense_matrix::DenseMatrix;
+    use crate::preprocessing::series_encoder::CategoryMapper;
+
+    #[test]
+    fn adjust_idxs() {
+        assert_eq!(find_new_idxs(0, &[], &[]), Vec::<usize>::new());
+        // [0,1,2] -> [0, 1, 1, 1, 2]
+        assert_eq!(find_new_idxs(3, &[3], &[1]), vec![0, 1, 4]);
+    }
+
+    fn build_cat_first_and_last() -> (DenseMatrix<f64>, DenseMatrix<f64>) {
+        let orig = DenseMatrix::from_2d_array(&[
+            &[1.0, 1.5, 3.0],
+            &[2.0, 1.5, 4.0],
+            &[1.0, 1.5, 5.0],
+            &[2.0, 1.5, 6.0],
+        ]);
+
+        let oh_enc = DenseMatrix::from_2d_array(&[
+            &[1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
+            &[0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
+            &[1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
+            &[0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
+        ]);
+
+        (orig, oh_enc)
+    }
+
+    fn build_fake_matrix() -> (DenseMatrix<f64>, DenseMatrix<f64>) {
+        // Categorical first and last
+        let orig = DenseMatrix::from_2d_array(&[
+            &[1.5, 1.0, 1.5, 3.0],
+            &[1.5, 2.0, 1.5, 4.0],
+            &[1.5, 1.0, 1.5, 5.0],
+            &[1.5, 2.0, 1.5, 6.0],
+        ]);
+
+        let oh_enc = DenseMatrix::from_2d_array(&[
+            &[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
+            &[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
+            &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
+            &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
+        ]);
+
+        (orig, oh_enc)
+    }
+
+    #[test]
+    fn hash_encode_f64_series() {
+        let series = vec![3.0, 1.0, 2.0, 1.0];
+        let hashable_series: Vec<CategoricalFloat> =
+            series.iter().map(|v| v.to_category()).collect();
+        let enc = CategoryMapper::from_positional_category_vec(hashable_series);
+        let inv = enc.invert_one_hot(vec![0.0, 0.0, 1.0]);
+        let orig_val: f64 = inv.unwrap().into();
+        assert_eq!(orig_val, 2.0);
+    }
+    #[test]
+    fn test_fit() {
+        let (x, _) = build_fake_matrix();
+        let params = OneHotEncoderParams::from_cat_idx(&[1, 3]);
+        let oh_enc = OneHotEncoder::fit(&x, params).unwrap();
+        assert_eq!(oh_enc.category_mappers.len(), 2);
+
+        let num_cat: Vec<usize> = oh_enc
+            .category_mappers
+            .iter()
+            .map(|a| a.num_categories())
+            .collect();
+        assert_eq!(num_cat, vec![2, 4]);
+    }
+
+    #[test]
+    fn matrix_transform_test() {
+        let (x, expected_x) = build_fake_matrix();
+        let params = OneHotEncoderParams::from_cat_idx(&[1, 3]);
+        let oh_enc = OneHotEncoder::fit(&x, params).unwrap();
+        let nm = oh_enc.transform(&x).unwrap();
+        assert_eq!(nm, expected_x);
+
+        let (x, expected_x) = build_cat_first_and_last();
+        let params = OneHotEncoderParams::from_cat_idx(&[0, 2]);
+        let oh_enc = OneHotEncoder::fit(&x, params).unwrap();
+        let nm = oh_enc.transform(&x).unwrap();
+        assert_eq!(nm, expected_x);
+    }
+
+    #[test]
+    fn fail_on_bad_category() {
+        let m = DenseMatrix::from_2d_array(&[
+            &[1.0, 1.5, 3.0],
+            &[2.0, 1.5, 4.0],
+            &[1.0, 1.5, 5.0],
+            &[2.0, 1.5, 6.0],
+        ]);
+
+        let params = OneHotEncoderParams::from_cat_idx(&[1]);
+        match OneHotEncoder::fit(&m, params) {
+            Err(_) => {
+                assert!(true);
+            }
+            _ => assert!(false),
+        }
+    }
+}
@@ -0,0 +1,43 @@
+//! Traits to indicate that float variables can be viewed as categorical
+//! This module assumes
+
+use crate::math::num::RealNumber;
+
+pub type CategoricalFloat = u16;
+
+// pub struct CategoricalFloat(u16);
+const ERROR_MARGIN: f64 = 0.001;
+
+pub trait Categorizable: RealNumber {
+    type A;
+
+    fn to_category(self) -> CategoricalFloat;
+
+    fn is_valid(self) -> bool;
+}
+
+impl Categorizable for f32 {
+    type A = CategoricalFloat;
+
+    fn to_category(self) -> CategoricalFloat {
+        self as CategoricalFloat
+    }
+
+    fn is_valid(self) -> bool {
+        let a = self.to_category();
+        (a as f32 - self).abs() < (ERROR_MARGIN as f32)
+    }
+}
+
+impl Categorizable for f64 {
+    type A = CategoricalFloat;
+
+    fn to_category(self) -> CategoricalFloat {
+        self as CategoricalFloat
+    }
+
+    fn is_valid(self) -> bool {
+        let a = self.to_category();
+        (a as f64 - self).abs() < ERROR_MARGIN
+    }
+}
@@ -0,0 +1,5 @@
+/// Transform a data matrix by replaceing all categorical variables with their one-hot vector equivalents
+pub mod categorical;
+mod data_traits;
+/// Encode a series (column, array) of categorical variables as one-hot vectors
+pub mod series_encoder;
@@ -0,0 +1,278 @@
+#![allow(clippy::ptr_arg)]
+//! # Series Encoder
+//! Encode a series of categorical features as a one-hot numeric array.
+
+use crate::error::Failed;
+use crate::linalg::BaseVector;
+use crate::math::num::RealNumber;
+use std::collections::HashMap;
+use std::hash::Hash;
+
+/// ## Bi-directional map category <-> label num.
+/// Turn Hashable objects into a one-hot vectors or ordinal values.
+/// This struct encodes single class per exmample
+///
+/// You can fit_to_iter a category enumeration by passing an iterator of categories.
+/// category numbers will be assigned in the order they are encountered
+///
+/// Example:
+/// ```
+/// use std::collections::HashMap;
+/// use smartcore::preprocessing::series_encoder::CategoryMapper;
+///
+/// let fake_categories: Vec<usize> = vec![1, 2, 3, 4, 5, 3, 5, 3, 1, 2, 4];
+/// let it = fake_categories.iter().map(|&a| a);
+/// let enc = CategoryMapper::<usize>::fit_to_iter(it);
+/// let oh_vec: Vec<f64> = enc.get_one_hot(&1).unwrap();
+/// // notice that 1 is actually a zero-th positional category
+/// assert_eq!(oh_vec, vec![1.0, 0.0, 0.0, 0.0, 0.0]);
+/// ```
+///
+/// You can also pass a predefined category enumeration such as a hashmap `HashMap<C, usize>` or a vector `Vec<C>`
+///
+///
+/// ```
+/// use std::collections::HashMap;
+/// use smartcore::preprocessing::series_encoder::CategoryMapper;
+///
+/// let category_map: HashMap<&str, usize> =
+/// vec![("cat", 2), ("background",0), ("dog", 1)]
+/// .into_iter()
+/// .collect();
+/// let category_vec = vec!["background", "dog", "cat"];
+///
+/// let enc_lv  = CategoryMapper::<&str>::from_positional_category_vec(category_vec);
+/// let enc_lm  = CategoryMapper::<&str>::from_category_map(category_map);
+///
+/// // ["background", "dog", "cat"]
+/// println!("{:?}", enc_lv.get_categories());
+/// let lv: Vec<f64> = enc_lv.get_one_hot(&"dog").unwrap();
+/// let lm: Vec<f64> = enc_lm.get_one_hot(&"dog").unwrap();
+/// assert_eq!(lv, lm);
+/// ```
+#[derive(Debug, Clone)]
+pub struct CategoryMapper<C> {
+    category_map: HashMap<C, usize>,
+    categories: Vec<C>,
+    num_categories: usize,
+}
+
+impl<C> CategoryMapper<C>
+where
+    C: Hash + Eq + Clone,
+{
+    /// Get the number of categories in the mapper
+    pub fn num_categories(&self) -> usize {
+        self.num_categories
+    }
+
+    /// Fit an encoder to a lable iterator
+    pub fn fit_to_iter(categories: impl Iterator<Item = C>) -> Self {
+        let mut category_map: HashMap<C, usize> = HashMap::new();
+        let mut category_num = 0usize;
+        let mut unique_lables: Vec<C> = Vec::new();
+
+        for l in categories {
+            if !category_map.contains_key(&l) {
+                category_map.insert(l.clone(), category_num);
+                unique_lables.push(l.clone());
+                category_num += 1;
+            }
+        }
+        Self {
+            category_map,
+            num_categories: category_num,
+            categories: unique_lables,
+        }
+    }
+
+    /// Build an encoder from a predefined (category -> class number) map
+    pub fn from_category_map(category_map: HashMap<C, usize>) -> Self {
+        let mut _unique_cat: Vec<(C, usize)> =
+            category_map.iter().map(|(k, v)| (k.clone(), *v)).collect();
+        _unique_cat.sort_by(|a, b| a.1.cmp(&b.1));
+        let categories: Vec<C> = _unique_cat.into_iter().map(|a| a.0).collect();
+        Self {
+            num_categories: categories.len(),
+            categories,
+            category_map,
+        }
+    }
+
+    /// Build an encoder from a predefined positional category-class num vector
+    pub fn from_positional_category_vec(categories: Vec<C>) -> Self {
+        let category_map: HashMap<C, usize> = categories
+            .iter()
+            .enumerate()
+            .map(|(v, k)| (k.clone(), v))
+            .collect();
+        Self {
+            num_categories: categories.len(),
+            category_map,
+            categories,
+        }
+    }
+
+    /// Get label num of a category
+    pub fn get_num(&self, category: &C) -> Option<&usize> {
+        self.category_map.get(category)
+    }
+
+    /// Return category corresponding to label num
+    pub fn get_cat(&self, num: usize) -> &C {
+        &self.categories[num]
+    }
+
+    /// List all categories (position = category number)
+    pub fn get_categories(&self) -> &[C] {
+        &self.categories[..]
+    }
+
+    /// Get one-hot encoding of the category
+    pub fn get_one_hot<U, V>(&self, category: &C) -> Option<V>
+    where
+        U: RealNumber,
+        V: BaseVector<U>,
+    {
+        match self.get_num(category) {
+            None => None,
+            Some(&idx) => Some(make_one_hot::<U, V>(idx, self.num_categories)),
+        }
+    }
+
+    /// Invert one-hot vector, back to the category
+    pub fn invert_one_hot<U, V>(&self, one_hot: V) -> Result<C, Failed>
+    where
+        U: RealNumber,
+        V: BaseVector<U>,
+    {
+        let pos = U::one();
+
+        let oh_it = (0..one_hot.len()).map(|idx| one_hot.get(idx));
+
+        let s: Vec<usize> = oh_it
+            .enumerate()
+            .filter_map(|(idx, v)| if v == pos { Some(idx) } else { None })
+            .collect();
+
+        if s.len() == 1 {
+            let idx = s[0];
+            return Ok(self.get_cat(idx).clone());
+        }
+        let pos_entries = format!(
+            "Expected a single positive entry, {} entires found",
+            s.len()
+        );
+        Err(Failed::transform(&pos_entries[..]))
+    }
+
+    /// Get ordinal encoding of the catergory
+    pub fn get_ordinal<U>(&self, category: &C) -> Option<U>
+    where
+        U: RealNumber,
+    {
+        match self.get_num(category) {
+            None => None,
+            Some(&idx) => U::from_usize(idx),
+        }
+    }
+}
+
+/// Make a one-hot encoded vector from a categorical variable
+///
+/// Example:
+/// ```
+/// use smartcore::preprocessing::series_encoder::make_one_hot;
+/// let one_hot: Vec<f64> = make_one_hot(2, 3);
+/// assert_eq!(one_hot, vec![0.0, 0.0, 1.0]);
+/// ```
+pub fn make_one_hot<T, V>(category_idx: usize, num_categories: usize) -> V
+where
+    T: RealNumber,
+    V: BaseVector<T>,
+{
+    let pos = T::one();
+    let mut z = V::zeros(num_categories);
+    z.set(category_idx, pos);
+    z
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn from_categories() {
+        let fake_categories: Vec<usize> = vec![1, 2, 3, 4, 5, 3, 5, 3, 1, 2, 4];
+        let it = fake_categories.iter().map(|&a| a);
+        let enc = CategoryMapper::<usize>::fit_to_iter(it);
+        let oh_vec: Vec<f64> = match enc.get_one_hot(&1) {
+            None => panic!("Wrong categories"),
+            Some(v) => v,
+        };
+        let res: Vec<f64> = vec![1f64, 0f64, 0f64, 0f64, 0f64];
+        assert_eq!(oh_vec, res);
+    }
+
+    fn build_fake_str_enc<'a>() -> CategoryMapper<&'a str> {
+        let fake_category_pos = vec!["background", "dog", "cat"];
+        let enc = CategoryMapper::<&str>::from_positional_category_vec(fake_category_pos);
+        enc
+    }
+    #[test]
+    fn ordinal_encoding() {
+        let enc = build_fake_str_enc();
+        assert_eq!(1f64, enc.get_ordinal::<f64>(&"dog").unwrap())
+    }
+
+    #[test]
+    fn category_map_and_vec() {
+        let category_map: HashMap<&str, usize> = vec![("background", 0), ("dog", 1), ("cat", 2)]
+            .into_iter()
+            .collect();
+        let enc = CategoryMapper::<&str>::from_category_map(category_map);
+        let oh_vec: Vec<f64> = match enc.get_one_hot(&"dog") {
+            None => panic!("Wrong categories"),
+            Some(v) => v,
+        };
+        let res: Vec<f64> = vec![0f64, 1f64, 0f64];
+        assert_eq!(oh_vec, res);
+    }
+
+    #[test]
+    fn positional_categories_vec() {
+        let enc = build_fake_str_enc();
+        let oh_vec: Vec<f64> = match enc.get_one_hot(&"dog") {
+            None => panic!("Wrong categories"),
+            Some(v) => v,
+        };
+        let res: Vec<f64> = vec![0.0, 1.0, 0.0];
+        assert_eq!(oh_vec, res);
+    }
+
+    #[test]
+    fn invert_label_test() {
+        let enc = build_fake_str_enc();
+        let res: Vec<f64> = vec![0.0, 1.0, 0.0];
+        let lab = enc.invert_one_hot(res).unwrap();
+        assert_eq!(lab, "dog");
+        if let Err(e) = enc.invert_one_hot(vec![0.0, 0.0, 0.0]) {
+            let pos_entries = format!("Expected a single positive entry, 0 entires found");
+            assert_eq!(e, Failed::transform(&pos_entries[..]));
+        };
+    }
+
+    #[test]
+    fn test_many_categorys() {
+        let enc = build_fake_str_enc();
+        let cat_it = ["dog", "cat", "fish", "background"].iter().cloned();
+        let res: Vec<Option<Vec<f64>>> = cat_it.map(|v| enc.get_one_hot(&v)).collect();
+        let v = vec![
+            Some(vec![0.0, 1.0, 0.0]),
+            Some(vec![0.0, 0.0, 1.0]),
+            None,
+            Some(vec![1.0, 0.0, 0.0]),
+        ];
+        assert_eq!(res, v)
+    }
+}
@@ -26,6 +26,7 @@
 pub mod svc;
 pub mod svr;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::linalg::BaseVector;
@@ -93,16 +94,21 @@ impl Kernels {
 }

 /// Linear Kernel
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct LinearKernel {}

 /// Radial basis function (Gaussian) kernel
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct RBFKernel<T: RealNumber> {
    /// kernel coefficient
    pub gamma: T,
 }

 /// Polynomial kernel
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct PolynomialKernel<T: RealNumber> {
    /// degree of the polynomial
    pub degree: T,
@@ -113,6 +119,8 @@ pub struct PolynomialKernel<T: RealNumber> {
 }

 /// Sigmoid (hyperbolic tangent) kernel
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub struct SigmoidKernel<T: RealNumber> {
    /// kernel coefficient
    pub gamma: T,
@@ -28,7 +28,6 @@
 //!
 //! ```
 //! use smartcore::linalg::naive::dense_matrix::*;
-//! use smartcore::linear::linear_regression::*;
 //! use smartcore::svm::Kernels;
 //! use smartcore::svm::svc::{SVC, SVCParameters};
 //!
@@ -58,13 +57,7 @@
 //! let y = vec![ 0., 0., 0., 0., 0., 0., 0., 0.,
 //!            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.];
 //!
-//! let svr = SVC::fit(&x, &y,
-//!             Kernels::linear(),
-//!             SVCParameters {
-//!                 epoch: 2,
-//!                 c: 200.0,
-//!                 tol: 1e-3,
-//!             }).unwrap();
+//! let svr = SVC::fit(&x, &y, SVCParameters::default().with_c(200.0)).unwrap();
 //!
 //! let y_hat = svr.predict(&x).unwrap();
 //! ```
@@ -83,31 +76,41 @@ use std::marker::PhantomData;

 use rand::seq::SliceRandom;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
-use crate::svm::Kernel;
-
-#[derive(Serialize, Deserialize, Debug)]
+use crate::svm::{Kernel, Kernels, LinearKernel};

+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 /// SVC Parameters
-pub struct SVCParameters<T: RealNumber> {
-    /// Number of epochs
+pub struct SVCParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
+    /// Number of epochs.
    pub epoch: usize,
    /// Regularization parameter.
    pub c: T,
-    /// Tolerance for stopping criterion
+    /// Tolerance for stopping criterion.
    pub tol: T,
+    /// The kernel function.
+    pub kernel: K,
+    /// Unused parameter.
+    m: PhantomData<M>,
 }

-#[derive(Serialize, Deserialize, Debug)]
-#[serde(bound(
-    serialize = "M::RowVector: Serialize, K: Serialize, T: Serialize",
-    deserialize = "M::RowVector: Deserialize<'de>, K: Deserialize<'de>, T: Deserialize<'de>",
-))]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
+#[cfg_attr(
+    feature = "serde",
+    serde(bound(
+        serialize = "M::RowVector: Serialize, K: Serialize, T: Serialize",
+        deserialize = "M::RowVector: Deserialize<'de>, K: Deserialize<'de>, T: Deserialize<'de>",
+    ))
+)]
 /// Support Vector Classifier
 pub struct SVC<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
    classes: Vec<T>,
@@ -117,7 +120,8 @@ pub struct SVC<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
    b: T,
 }

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 struct SupportVector<T: RealNumber, V: BaseVector<T>> {
    index: usize,
    x: V,
@@ -137,7 +141,7 @@ struct Cache<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
 struct Optimizer<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
    x: &'a M,
    y: &'a M::RowVector,
-    parameters: &'a SVCParameters<T>,
+    parameters: &'a SVCParameters<T, M, K>,
    svmin: usize,
    svmax: usize,
    gmin: T,
@@ -148,27 +152,71 @@ struct Optimizer<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
    recalculate_minmax_grad: bool,
 }

-impl<T: RealNumber> Default for SVCParameters<T> {
+impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVCParameters<T, M, K> {
+    /// Number of epochs.
+    pub fn with_epoch(mut self, epoch: usize) -> Self {
+        self.epoch = epoch;
+        self
+    }
+    /// Regularization parameter.
+    pub fn with_c(mut self, c: T) -> Self {
+        self.c = c;
+        self
+    }
+    /// Tolerance for stopping criterion.
+    pub fn with_tol(mut self, tol: T) -> Self {
+        self.tol = tol;
+        self
+    }
+    /// The kernel function.
+    pub fn with_kernel<KK: Kernel<T, M::RowVector>>(&self, kernel: KK) -> SVCParameters<T, M, KK> {
+        SVCParameters {
+            epoch: self.epoch,
+            c: self.c,
+            tol: self.tol,
+            kernel,
+            m: PhantomData,
+        }
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Default for SVCParameters<T, M, LinearKernel> {
    fn default() -> Self {
        SVCParameters {
            epoch: 2,
            c: T::one(),
            tol: T::from_f64(1e-3).unwrap(),
+            kernel: Kernels::linear(),
+            m: PhantomData,
        }
    }
 }

+impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>>
+    SupervisedEstimator<M, M::RowVector, SVCParameters<T, M, K>> for SVC<T, M, K>
+{
+    fn fit(x: &M, y: &M::RowVector, parameters: SVCParameters<T, M, K>) -> Result<Self, Failed> {
+        SVC::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector>
+    for SVC<T, M, K>
+{
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
    /// Fits SVC to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
    /// * `y` - class labels
-    /// * `kernel` - the kernel function
    /// * `parameters` - optional parameters, use `Default::default()` to set parameters to default values.
    pub fn fit(
        x: &M,
        y: &M::RowVector,
-        kernel: K,
-        parameters: SVCParameters<T>,
+        parameters: SVCParameters<T, M, K>,
    ) -> Result<SVC<T, M, K>, Failed> {
        let (n, _) = x.shape();

@@ -199,13 +247,13 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
            }
        }

-        let optimizer = Optimizer::new(x, &y, &kernel, &parameters);
+        let optimizer = Optimizer::new(x, &y, &parameters.kernel, &parameters);

        let (support_vectors, weight, b) = optimizer.optimize();

        Ok(SVC {
            classes,
-            kernel,
+            kernel: parameters.kernel,
            instances: support_vectors,
            w: weight,
            b,
@@ -322,7 +370,7 @@ impl<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Optimizer<'a,
        x: &'a M,
        y: &'a M::RowVector,
        kernel: &'a K,
-        parameters: &'a SVCParameters<T>,
+        parameters: &'a SVCParameters<T, M, K>,
    ) -> Optimizer<'a, T, M, K> {
        let (n, _) = x.shape();

@@ -678,6 +726,7 @@ mod tests {
    use super::*;
    use crate::linalg::naive::dense_matrix::*;
    use crate::metrics::accuracy;
+    #[cfg(feature = "serde")]
    use crate::svm::*;

    #[test]
@@ -712,18 +761,13 @@ mod tests {
        let y_hat = SVC::fit(
            &x,
            &y,
-            Kernels::linear(),
-            SVCParameters {
-                epoch: 2,
-                c: 200.0,
-                tol: 1e-3,
-            },
+            SVCParameters::default()
+                .with_c(200.0)
+                .with_kernel(Kernels::linear()),
        )
        .and_then(|lr| lr.predict(&x))
        .unwrap();

-        println!("{:?}", y_hat);
-
        assert!(accuracy(&y_hat, &y) >= 0.9);
    }

@@ -760,12 +804,9 @@ mod tests {
        let y_hat = SVC::fit(
            &x,
            &y,
-            Kernels::rbf(0.7),
-            SVCParameters {
-                epoch: 2,
-                c: 1.0,
-                tol: 1e-3,
-            },
+            SVCParameters::default()
+                .with_c(1.0)
+                .with_kernel(Kernels::rbf(0.7)),
        )
        .and_then(|lr| lr.predict(&x))
        .unwrap();
@@ -774,6 +815,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn svc_serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[5.1, 3.5, 1.4, 0.2],
@@ -802,7 +844,7 @@ mod tests {
            -1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        ];

-        let svr = SVC::fit(&x, &y, Kernels::linear(), Default::default()).unwrap();
+        let svr = SVC::fit(&x, &y, Default::default()).unwrap();

        let deserialized_svr: SVC<f64, DenseMatrix<f64>, LinearKernel> =
            serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
@@ -49,13 +49,7 @@
 //! let y: Vec<f64> = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0,
 //!           100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
 //!
-//! let svr = SVR::fit(&x, &y,
-//!             LinearKernel {},
-//!             SVRParameters {
-//!                 eps: 2.0,
-//!                 c: 10.0,
-//!                 tol: 1e-3,
-//!             }).unwrap();
+//! let svr = SVR::fit(&x, &y, SVRParameters::default().with_eps(2.0).with_c(10.0)).unwrap();
 //!
 //! let y_hat = svr.predict(&x).unwrap();
 //! ```
@@ -72,32 +66,43 @@

 use std::cell::{Ref, RefCell};
 use std::fmt::Debug;
+use std::marker::PhantomData;

+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::BaseVector;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;
-use crate::svm::Kernel;
-
-#[derive(Serialize, Deserialize, Debug)]
+use crate::svm::{Kernel, Kernels, LinearKernel};

+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 /// SVR Parameters
-pub struct SVRParameters<T: RealNumber> {
-    /// Epsilon in the epsilon-SVR model
+pub struct SVRParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
+    /// Epsilon in the epsilon-SVR model.
    pub eps: T,
    /// Regularization parameter.
    pub c: T,
-    /// Tolerance for stopping criterion
+    /// Tolerance for stopping criterion.
    pub tol: T,
+    /// The kernel function.
+    pub kernel: K,
+    /// Unused parameter.
+    m: PhantomData<M>,
 }

-#[derive(Serialize, Deserialize, Debug)]
-#[serde(bound(
-    serialize = "M::RowVector: Serialize, K: Serialize, T: Serialize",
-    deserialize = "M::RowVector: Deserialize<'de>, K: Deserialize<'de>, T: Deserialize<'de>",
-))]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
+#[cfg_attr(
+    feature = "serde",
+    serde(bound(
+        serialize = "M::RowVector: Serialize, K: Serialize, T: Serialize",
+        deserialize = "M::RowVector: Deserialize<'de>, K: Deserialize<'de>, T: Deserialize<'de>",
+    ))
+)]

 /// Epsilon-Support Vector Regression
 pub struct SVR<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
@@ -107,7 +112,8 @@ pub struct SVR<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
    b: T,
 }

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 struct SupportVector<T: RealNumber, V: BaseVector<T>> {
    index: usize,
    x: V,
@@ -135,16 +141,62 @@ struct Cache<T: Clone> {
    data: Vec<RefCell<Option<Vec<T>>>>,
 }

-impl<T: RealNumber> Default for SVRParameters<T> {
+impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVRParameters<T, M, K> {
+    /// Epsilon in the epsilon-SVR model.
+    pub fn with_eps(mut self, eps: T) -> Self {
+        self.eps = eps;
+        self
+    }
+    /// Regularization parameter.
+    pub fn with_c(mut self, c: T) -> Self {
+        self.c = c;
+        self
+    }
+    /// Tolerance for stopping criterion.
+    pub fn with_tol(mut self, tol: T) -> Self {
+        self.tol = tol;
+        self
+    }
+    /// The kernel function.
+    pub fn with_kernel<KK: Kernel<T, M::RowVector>>(&self, kernel: KK) -> SVRParameters<T, M, KK> {
+        SVRParameters {
+            eps: self.eps,
+            c: self.c,
+            tol: self.tol,
+            kernel,
+            m: PhantomData,
+        }
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Default for SVRParameters<T, M, LinearKernel> {
    fn default() -> Self {
        SVRParameters {
            eps: T::from_f64(0.1).unwrap(),
            c: T::one(),
            tol: T::from_f64(1e-3).unwrap(),
+            kernel: Kernels::linear(),
+            m: PhantomData,
        }
    }
 }

+impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>>
+    SupervisedEstimator<M, M::RowVector, SVRParameters<T, M, K>> for SVR<T, M, K>
+{
+    fn fit(x: &M, y: &M::RowVector, parameters: SVRParameters<T, M, K>) -> Result<Self, Failed> {
+        SVR::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Predictor<M, M::RowVector>
+    for SVR<T, M, K>
+{
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
    /// Fits SVR to your data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -154,8 +206,7 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
    pub fn fit(
        x: &M,
        y: &M::RowVector,
-        kernel: K,
-        parameters: SVRParameters<T>,
+        parameters: SVRParameters<T, M, K>,
    ) -> Result<SVR<T, M, K>, Failed> {
        let (n, _) = x.shape();

@@ -165,12 +216,12 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
            ));
        }

-        let optimizer = Optimizer::new(x, y, &kernel, &parameters);
+        let optimizer = Optimizer::new(x, y, &parameters.kernel, &parameters);

        let (support_vectors, weight, b) = optimizer.smo();

        Ok(SVR {
-            kernel,
+            kernel: parameters.kernel,
            instances: support_vectors,
            w: weight,
            b,
@@ -243,7 +294,7 @@ impl<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Optimizer<'a,
        x: &M,
        y: &M::RowVector,
        kernel: &'a K,
-        parameters: &SVRParameters<T>,
+        parameters: &SVRParameters<T, M, K>,
    ) -> Optimizer<'a, T, M, K> {
        let (n, _) = x.shape();

@@ -482,6 +533,7 @@ mod tests {
    use super::*;
    use crate::linalg::naive::dense_matrix::*;
    use crate::metrics::mean_squared_error;
+    #[cfg(feature = "serde")]
    use crate::svm::*;

    #[test]
@@ -510,23 +562,15 @@ mod tests {
            114.2, 115.7, 116.9,
        ];

-        let y_hat = SVR::fit(
-            &x,
-            &y,
-            LinearKernel {},
-            SVRParameters {
-                eps: 2.0,
-                c: 10.0,
-                tol: 1e-3,
-            },
-        )
-        .and_then(|lr| lr.predict(&x))
-        .unwrap();
+        let y_hat = SVR::fit(&x, &y, SVRParameters::default().with_eps(2.0).with_c(10.0))
+            .and_then(|lr| lr.predict(&x))
+            .unwrap();

        assert!(mean_squared_error(&y_hat, &y) < 2.5);
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn svr_serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
@@ -552,7 +596,7 @@ mod tests {
            114.2, 115.7, 116.9,
        ];

-        let svr = SVR::fit(&x, &y, LinearKernel {}, Default::default()).unwrap();
+        let svr = SVR::fit(&x, &y, Default::default()).unwrap();

        let deserialized_svr: SVR<f64, DenseMatrix<f64>, LinearKernel> =
            serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
@@ -68,14 +68,17 @@ use std::fmt::Debug;
 use std::marker::PhantomData;

 use rand::seq::SliceRandom;
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::algorithm::sort::quick_sort::QuickArgSort;
+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 /// Parameters of Decision Tree
 pub struct DecisionTreeClassifierParameters {
    /// Split criteria to use when building a tree.
@@ -89,7 +92,8 @@ pub struct DecisionTreeClassifierParameters {
 }

 /// Decision Tree
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct DecisionTreeClassifier<T: RealNumber> {
    nodes: Vec<Node<T>>,
    parameters: DecisionTreeClassifierParameters,
@@ -99,7 +103,8 @@ pub struct DecisionTreeClassifier<T: RealNumber> {
 }

 /// The function to measure the quality of a split.
-#[derive(Serialize, Deserialize, Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 pub enum SplitCriterion {
    /// [Gini index](../decision_tree_classifier/index.html)
    Gini,
@@ -109,7 +114,8 @@ pub enum SplitCriterion {
    ClassificationError,
 }

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 struct Node<T: RealNumber> {
    index: usize,
    output: usize,
@@ -160,6 +166,29 @@ impl<T: RealNumber> PartialEq for Node<T> {
    }
 }

+impl DecisionTreeClassifierParameters {
+    /// Split criteria to use when building a tree.
+    pub fn with_criterion(mut self, criterion: SplitCriterion) -> Self {
+        self.criterion = criterion;
+        self
+    }
+    /// The maximum depth of the tree.
+    pub fn with_max_depth(mut self, max_depth: u16) -> Self {
+        self.max_depth = Some(max_depth);
+        self
+    }
+    /// The minimum number of samples required to be at a leaf node.
+    pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
+        self.min_samples_leaf = min_samples_leaf;
+        self
+    }
+    /// The minimum number of samples required to split an internal node.
+    pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
+        self.min_samples_split = min_samples_split;
+        self
+    }
+}
+
 impl Default for DecisionTreeClassifierParameters {
    fn default() -> Self {
        DecisionTreeClassifierParameters {
@@ -187,42 +216,42 @@ impl<T: RealNumber> Node<T> {

 struct NodeVisitor<'a, T: RealNumber, M: Matrix<T>> {
    x: &'a M,
-    y: &'a Vec<usize>,
+    y: &'a [usize],
    node: usize,
    samples: Vec<usize>,
-    order: &'a Vec<Vec<usize>>,
+    order: &'a [Vec<usize>],
    true_child_output: usize,
    false_child_output: usize,
    level: u16,
    phantom: PhantomData<&'a T>,
 }

-fn impurity<T: RealNumber>(criterion: &SplitCriterion, count: &Vec<usize>, n: usize) -> T {
+fn impurity<T: RealNumber>(criterion: &SplitCriterion, count: &[usize], n: usize) -> T {
    let mut impurity = T::zero();

    match criterion {
        SplitCriterion::Gini => {
            impurity = T::one();
-            for i in 0..count.len() {
-                if count[i] > 0 {
-                    let p = T::from(count[i]).unwrap() / T::from(n).unwrap();
+            for count_i in count.iter() {
+                if *count_i > 0 {
+                    let p = T::from(*count_i).unwrap() / T::from(n).unwrap();
                    impurity -= p * p;
                }
            }
        }

        SplitCriterion::Entropy => {
-            for i in 0..count.len() {
-                if count[i] > 0 {
-                    let p = T::from(count[i]).unwrap() / T::from(n).unwrap();
+            for count_i in count.iter() {
+                if *count_i > 0 {
+                    let p = T::from(*count_i).unwrap() / T::from(n).unwrap();
                    impurity -= p * p.log2();
                }
            }
        }
        SplitCriterion::ClassificationError => {
-            for i in 0..count.len() {
-                if count[i] > 0 {
-                    impurity = impurity.max(T::from(count[i]).unwrap() / T::from(n).unwrap());
+            for count_i in count.iter() {
+                if *count_i > 0 {
+                    impurity = impurity.max(T::from(*count_i).unwrap() / T::from(n).unwrap());
                }
            }
            impurity = (T::one() - impurity).abs();
@@ -236,9 +265,9 @@ impl<'a, T: RealNumber, M: Matrix<T>> NodeVisitor<'a, T, M> {
    fn new(
        node_id: usize,
        samples: Vec<usize>,
-        order: &'a Vec<Vec<usize>>,
+        order: &'a [Vec<usize>],
        x: &'a M,
-        y: &'a Vec<usize>,
+        y: &'a [usize],
        level: u16,
    ) -> Self {
        NodeVisitor {
@@ -255,13 +284,13 @@ impl<'a, T: RealNumber, M: Matrix<T>> NodeVisitor<'a, T, M> {
    }
 }

-pub(in crate) fn which_max(x: &Vec<usize>) -> usize {
+pub(in crate) fn which_max(x: &[usize]) -> usize {
    let mut m = x[0];
    let mut which = 0;

-    for i in 1..x.len() {
-        if x[i] > m {
-            m = x[i];
+    for (i, x_i) in x.iter().enumerate().skip(1) {
+        if *x_i > m {
+            m = *x_i;
            which = i;
        }
    }
@@ -269,6 +298,25 @@ pub(in crate) fn which_max(x: &Vec<usize>) -> usize {
    which
 }

+impl<T: RealNumber, M: Matrix<T>>
+    SupervisedEstimator<M, M::RowVector, DecisionTreeClassifierParameters>
+    for DecisionTreeClassifier<T>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: DecisionTreeClassifierParameters,
+    ) -> Result<Self, Failed> {
+        DecisionTreeClassifier::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for DecisionTreeClassifier<T> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber> DecisionTreeClassifier<T> {
    /// Build a decision tree classifier from the training data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -304,9 +352,9 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {

        let mut yi: Vec<usize> = vec![0; y_ncols];

-        for i in 0..y_ncols {
+        for (i, yi_i) in yi.iter_mut().enumerate().take(y_ncols) {
            let yc = y_m.get(0, i);
-            yi[i] = classes.iter().position(|c| yc == *c).unwrap();
+            *yi_i = classes.iter().position(|c| yc == *c).unwrap();
        }

        let mut nodes: Vec<Node<T>> = Vec::new();
@@ -431,23 +479,20 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {

        let parent_impurity = impurity(&self.parameters.criterion, &count, n);

-        let mut variables = vec![0; n_attr];
-        for i in 0..n_attr {
-            variables[i] = i;
-        }
+        let mut variables = (0..n_attr).collect::<Vec<_>>();

        if mtry < n_attr {
            variables.shuffle(&mut rand::thread_rng());
        }

-        for j in 0..mtry {
+        for variable in variables.iter().take(mtry) {
            self.find_best_split(
                visitor,
                n,
                &count,
                &mut false_count,
                parent_impurity,
-                variables[j],
+                *variable,
            );
        }

@@ -458,7 +503,7 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
        &mut self,
        visitor: &mut NodeVisitor<'_, T, M>,
        n: usize,
-        count: &Vec<usize>,
+        count: &[usize],
        false_count: &mut Vec<usize>,
        parent_impurity: T,
        j: usize,
@@ -527,13 +572,13 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
        let mut fc = 0;
        let mut true_samples: Vec<usize> = vec![0; n];

-        for i in 0..n {
+        for (i, true_sample) in true_samples.iter_mut().enumerate().take(n) {
            if visitor.samples[i] > 0 {
                if visitor.x.get(i, self.nodes[visitor.node].split_feature)
                    <= self.nodes[visitor.node].split_value.unwrap_or_else(T::nan)
                {
-                    true_samples[i] = visitor.samples[i];
-                    tc += true_samples[i];
+                    *true_sample = visitor.samples[i];
+                    tc += *true_sample;
                    visitor.samples[i] = 0;
                } else {
                    fc += visitor.samples[i];
@@ -700,6 +745,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[1., 1., 1., 0.],
@@ -63,14 +63,17 @@ use std::default::Default;
 use std::fmt::Debug;

 use rand::seq::SliceRandom;
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use crate::algorithm::sort::quick_sort::QuickArgSort;
+use crate::api::{Predictor, SupervisedEstimator};
 use crate::error::Failed;
 use crate::linalg::Matrix;
 use crate::math::num::RealNumber;

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone)]
 /// Parameters of Regression Tree
 pub struct DecisionTreeRegressorParameters {
    /// The maximum depth of the tree.
@@ -82,14 +85,16 @@ pub struct DecisionTreeRegressorParameters {
 }

 /// Regression Tree
-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 pub struct DecisionTreeRegressor<T: RealNumber> {
    nodes: Vec<Node<T>>,
    parameters: DecisionTreeRegressorParameters,
    depth: u16,
 }

-#[derive(Serialize, Deserialize, Debug)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug)]
 struct Node<T: RealNumber> {
    index: usize,
    output: T,
@@ -100,6 +105,24 @@ struct Node<T: RealNumber> {
    false_child: Option<usize>,
 }

+impl DecisionTreeRegressorParameters {
+    /// The maximum depth of the tree.
+    pub fn with_max_depth(mut self, max_depth: u16) -> Self {
+        self.max_depth = Some(max_depth);
+        self
+    }
+    /// The minimum number of samples required to be at a leaf node.
+    pub fn with_min_samples_leaf(mut self, min_samples_leaf: usize) -> Self {
+        self.min_samples_leaf = min_samples_leaf;
+        self
+    }
+    /// The minimum number of samples required to split an internal node.
+    pub fn with_min_samples_split(mut self, min_samples_split: usize) -> Self {
+        self.min_samples_split = min_samples_split;
+        self
+    }
+}
+
 impl Default for DecisionTreeRegressorParameters {
    fn default() -> Self {
        DecisionTreeRegressorParameters {
@@ -161,7 +184,7 @@ struct NodeVisitor<'a, T: RealNumber, M: Matrix<T>> {
    y: &'a M,
    node: usize,
    samples: Vec<usize>,
-    order: &'a Vec<Vec<usize>>,
+    order: &'a [Vec<usize>],
    true_child_output: T,
    false_child_output: T,
    level: u16,
@@ -171,7 +194,7 @@ impl<'a, T: RealNumber, M: Matrix<T>> NodeVisitor<'a, T, M> {
    fn new(
        node_id: usize,
        samples: Vec<usize>,
-        order: &'a Vec<Vec<usize>>,
+        order: &'a [Vec<usize>],
        x: &'a M,
        y: &'a M,
        level: u16,
@@ -189,6 +212,25 @@ impl<'a, T: RealNumber, M: Matrix<T>> NodeVisitor<'a, T, M> {
    }
 }

+impl<T: RealNumber, M: Matrix<T>>
+    SupervisedEstimator<M, M::RowVector, DecisionTreeRegressorParameters>
+    for DecisionTreeRegressor<T>
+{
+    fn fit(
+        x: &M,
+        y: &M::RowVector,
+        parameters: DecisionTreeRegressorParameters,
+    ) -> Result<Self, Failed> {
+        DecisionTreeRegressor::fit(x, y, parameters)
+    }
+}
+
+impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for DecisionTreeRegressor<T> {
+    fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
+        self.predict(x)
+    }
+}
+
 impl<T: RealNumber> DecisionTreeRegressor<T> {
    /// Build a decision tree regressor from the training data.
    /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
@@ -219,9 +261,9 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {

        let mut n = 0;
        let mut sum = T::zero();
-        for i in 0..y_ncols {
-            n += samples[i];
-            sum += T::from(samples[i]).unwrap() * y_m.get(0, i);
+        for (i, sample_i) in samples.iter().enumerate().take(y_ncols) {
+            n += *sample_i;
+            sum += T::from(*sample_i).unwrap() * y_m.get(0, i);
        }

        let root = Node::new(0, sum / T::from(n).unwrap());
@@ -312,10 +354,7 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {

        let sum = self.nodes[visitor.node].output * T::from(n).unwrap();

-        let mut variables = vec![0; n_attr];
-        for i in 0..n_attr {
-            variables[i] = i;
-        }
+        let mut variables = (0..n_attr).collect::<Vec<_>>();

        if mtry < n_attr {
            variables.shuffle(&mut rand::thread_rng());
@@ -324,8 +363,8 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
        let parent_gain =
            T::from(n).unwrap() * self.nodes[visitor.node].output * self.nodes[visitor.node].output;

-        for j in 0..mtry {
-            self.find_best_split(visitor, n, sum, parent_gain, variables[j]);
+        for variable in variables.iter().take(mtry) {
+            self.find_best_split(visitor, n, sum, parent_gain, *variable);
        }

        self.nodes[visitor.node].split_score != Option::None
@@ -399,13 +438,13 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
        let mut fc = 0;
        let mut true_samples: Vec<usize> = vec![0; n];

-        for i in 0..n {
+        for (i, true_sample) in true_samples.iter_mut().enumerate().take(n) {
            if visitor.samples[i] > 0 {
                if visitor.x.get(i, self.nodes[visitor.node].split_feature)
                    <= self.nodes[visitor.node].split_value.unwrap_or_else(T::nan)
                {
-                    true_samples[i] = visitor.samples[i];
-                    tc += true_samples[i];
+                    *true_sample = visitor.samples[i];
+                    tc += *true_sample;
                    visitor.samples[i] = 0;
                } else {
                    fc += visitor.samples[i];
@@ -542,6 +581,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature = "serde")]
    fn serde() {
        let x = DenseMatrix::from_2d_array(&[
            &[234.289, 235.6, 159., 107.608, 1947., 60.323],