feat: extends interface of Matrix to support for broad range of types

This commit is contained in:
Volodymyr Orlov
2020-03-26 15:28:26 -07:00
parent 84ffd331cd
commit 02b85415d9
27 changed files with 1021 additions and 868 deletions
+24 -13
View File
@@ -1,7 +1,11 @@
extern crate rand;
use rand::Rng;
use std::default::Default;
use std::fmt::Debug;
use rand::Rng;
use crate::math::num::FloatExt;
use crate::linalg::Matrix;
use crate::tree::decision_tree_classifier::{DecisionTreeClassifier, DecisionTreeClassifierParameters, SplitCriterion, which_max};
@@ -16,10 +20,10 @@ pub struct RandomForestClassifierParameters {
}
#[derive(Debug)]
pub struct RandomForestClassifier {
pub struct RandomForestClassifier<T: FloatExt> {
parameters: RandomForestClassifierParameters,
trees: Vec<DecisionTreeClassifier>,
classes: Vec<f64>
trees: Vec<DecisionTreeClassifier<T>>,
classes: Vec<T>
}
impl Default for RandomForestClassifierParameters {
@@ -35,9 +39,9 @@ impl Default for RandomForestClassifierParameters {
}
}
impl RandomForestClassifier {
impl<T: FloatExt + Debug> RandomForestClassifier<T> {
pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, parameters: RandomForestClassifierParameters) -> RandomForestClassifier {
pub fn fit<M: Matrix<T>>(x: &M, y: &M::RowVector, parameters: RandomForestClassifierParameters) -> RandomForestClassifier<T> {
let (_, num_attributes) = x.shape();
let y_m = M::from_row_vector(y.clone());
let (_, y_ncols) = y_m.shape();
@@ -49,14 +53,14 @@ impl RandomForestClassifier {
yi[i] = classes.iter().position(|c| yc == *c).unwrap();
}
let mtry = parameters.mtry.unwrap_or((num_attributes as f64).sqrt().floor() as usize);
let mtry = parameters.mtry.unwrap_or((T::from(num_attributes).unwrap()).sqrt().floor().to_usize().unwrap());
let classes = y_m.unique();
let k = classes.len();
let mut trees: Vec<DecisionTreeClassifier> = Vec::new();
let mut trees: Vec<DecisionTreeClassifier<T>> = Vec::new();
for _ in 0..parameters.n_trees {
let samples = RandomForestClassifier::sample_with_replacement(&yi, k);
let samples = RandomForestClassifier::<T>::sample_with_replacement(&yi, k);
let params = DecisionTreeClassifierParameters{
criterion: parameters.criterion.clone(),
max_depth: parameters.max_depth,
@@ -74,7 +78,7 @@ impl RandomForestClassifier {
}
}
pub fn predict<M: Matrix>(&self, x: &M) -> M::RowVector {
pub fn predict<M: Matrix<T>>(&self, x: &M) -> M::RowVector {
let mut result = M::zeros(1, x.shape().0);
let (n, _) = x.shape();
@@ -86,7 +90,7 @@ impl RandomForestClassifier {
result.to_row_vector()
}
fn predict_for_row<M: Matrix>(&self, x: &M, row: usize) -> usize {
fn predict_for_row<M: Matrix<T>>(&self, x: &M, row: usize) -> usize {
let mut result = vec![0; self.classes.len()];
for tree in self.trees.iter() {
@@ -154,9 +158,16 @@ mod tests {
&[5.2, 2.7, 3.9, 1.4]]);
let y = vec![0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.];
RandomForestClassifier::fit(&x, &y, Default::default());
let classifier = RandomForestClassifier::fit(&x, &y, RandomForestClassifierParameters{
criterion: SplitCriterion::Gini,
max_depth: None,
min_samples_leaf: 1,
min_samples_split: 2,
n_trees: 1000,
mtry: Option::None
});
assert_eq!(y, RandomForestClassifier::fit(&x, &y, Default::default()).predict(&x));
assert_eq!(y, classifier.predict(&x));
}
+17 -13
View File
@@ -1,7 +1,11 @@
extern crate rand;
use rand::Rng;
use std::default::Default;
use std::fmt::Debug;
use rand::Rng;
use crate::math::num::FloatExt;
use crate::linalg::Matrix;
use crate::tree::decision_tree_regressor::{DecisionTreeRegressor, DecisionTreeRegressorParameters};
@@ -15,9 +19,9 @@ pub struct RandomForestRegressorParameters {
}
#[derive(Debug)]
pub struct RandomForestRegressor {
pub struct RandomForestRegressor<T: FloatExt> {
parameters: RandomForestRegressorParameters,
trees: Vec<DecisionTreeRegressor>
trees: Vec<DecisionTreeRegressor<T>>
}
impl Default for RandomForestRegressorParameters {
@@ -32,17 +36,17 @@ impl Default for RandomForestRegressorParameters {
}
}
impl RandomForestRegressor {
impl<T: FloatExt + Debug> RandomForestRegressor<T> {
pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, parameters: RandomForestRegressorParameters) -> RandomForestRegressor {
pub fn fit<M: Matrix<T>>(x: &M, y: &M::RowVector, parameters: RandomForestRegressorParameters) -> RandomForestRegressor<T> {
let (n_rows, num_attributes) = x.shape();
let mtry = parameters.mtry.unwrap_or((num_attributes as f64).sqrt().floor() as usize);
let mut trees: Vec<DecisionTreeRegressor> = Vec::new();
let mut trees: Vec<DecisionTreeRegressor<T>> = Vec::new();
for _ in 0..parameters.n_trees {
let samples = RandomForestRegressor::sample_with_replacement(n_rows);
let samples = RandomForestRegressor::<T>::sample_with_replacement(n_rows);
let params = DecisionTreeRegressorParameters{
max_depth: parameters.max_depth,
min_samples_leaf: parameters.min_samples_leaf,
@@ -58,7 +62,7 @@ impl RandomForestRegressor {
}
}
pub fn predict<M: Matrix>(&self, x: &M) -> M::RowVector {
pub fn predict<M: Matrix<T>>(&self, x: &M) -> M::RowVector {
let mut result = M::zeros(1, x.shape().0);
let (n, _) = x.shape();
@@ -70,17 +74,17 @@ impl RandomForestRegressor {
result.to_row_vector()
}
fn predict_for_row<M: Matrix>(&self, x: &M, row: usize) -> f64 {
fn predict_for_row<M: Matrix<T>>(&self, x: &M, row: usize) -> T {
let n_trees = self.trees.len();
let mut result = 0f64;
let mut result = T::zero();
for tree in self.trees.iter() {
result += tree.predict_for_row(x, row);
result = result + tree.predict_for_row(x, row);
}
result / n_trees as f64
result / T::from(n_trees).unwrap()
}
@@ -123,7 +127,7 @@ mod tests {
&[ 554.894, 400.7, 282.7, 130.081, 1962., 70.551]]);
let y = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
let expected_y = vec![85., 88., 88., 89., 97., 98., 99., 99., 102., 104., 109., 110., 113., 114., 115., 116.];
let expected_y: Vec<f64> = vec![85., 88., 88., 89., 97., 98., 99., 99., 102., 104., 109., 110., 113., 114., 115., 116.];
let y_hat = RandomForestRegressor::fit(&x, &y,
RandomForestRegressorParameters{max_depth: None,