feat: adds RandomForestRegressor

This commit is contained in:
Volodymyr Orlov
2020-03-23 16:56:42 -07:00
parent 18dc6bdb40
commit 17200fe633
3 changed files with 154 additions and 12 deletions
+2 -1
View File
@@ -1 +1,2 @@
pub mod random_forest; pub mod random_forest_classifier;
pub mod random_forest_regressor;
@@ -6,7 +6,7 @@ use crate::linalg::Matrix;
use crate::tree::decision_tree_classifier::{DecisionTreeClassifier, DecisionTreeClassifierParameters, SplitCriterion, which_max}; use crate::tree::decision_tree_classifier::{DecisionTreeClassifier, DecisionTreeClassifierParameters, SplitCriterion, which_max};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct RandomForestParameters { pub struct RandomForestClassifierParameters {
pub criterion: SplitCriterion, pub criterion: SplitCriterion,
pub max_depth: Option<u16>, pub max_depth: Option<u16>,
pub min_samples_leaf: u16, pub min_samples_leaf: u16,
@@ -16,15 +16,15 @@ pub struct RandomForestParameters {
} }
#[derive(Debug)] #[derive(Debug)]
pub struct RandomForest { pub struct RandomForestClassifier {
parameters: RandomForestParameters, parameters: RandomForestClassifierParameters,
trees: Vec<DecisionTreeClassifier>, trees: Vec<DecisionTreeClassifier>,
classes: Vec<f64> classes: Vec<f64>
} }
impl Default for RandomForestParameters { impl Default for RandomForestClassifierParameters {
fn default() -> Self { fn default() -> Self {
RandomForestParameters { RandomForestClassifierParameters {
criterion: SplitCriterion::Gini, criterion: SplitCriterion::Gini,
max_depth: None, max_depth: None,
min_samples_leaf: 1, min_samples_leaf: 1,
@@ -35,9 +35,9 @@ impl Default for RandomForestParameters {
} }
} }
impl RandomForest { impl RandomForestClassifier {
pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, parameters: RandomForestParameters) -> RandomForest { pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, parameters: RandomForestClassifierParameters) -> RandomForestClassifier {
let (_, num_attributes) = x.shape(); let (_, num_attributes) = x.shape();
let y_m = M::from_row_vector(y.clone()); let y_m = M::from_row_vector(y.clone());
let (_, y_ncols) = y_m.shape(); let (_, y_ncols) = y_m.shape();
@@ -56,7 +56,7 @@ impl RandomForest {
let mut trees: Vec<DecisionTreeClassifier> = Vec::new(); let mut trees: Vec<DecisionTreeClassifier> = Vec::new();
for _ in 0..parameters.n_trees { for _ in 0..parameters.n_trees {
let samples = RandomForest::sample_with_replacement(&yi, k); let samples = RandomForestClassifier::sample_with_replacement(&yi, k);
let params = DecisionTreeClassifierParameters{ let params = DecisionTreeClassifierParameters{
criterion: parameters.criterion.clone(), criterion: parameters.criterion.clone(),
max_depth: parameters.max_depth, max_depth: parameters.max_depth,
@@ -67,7 +67,7 @@ impl RandomForest {
trees.push(tree); trees.push(tree);
} }
RandomForest { RandomForestClassifier {
parameters: parameters, parameters: parameters,
trees: trees, trees: trees,
classes classes
@@ -154,9 +154,9 @@ mod tests {
&[5.2, 2.7, 3.9, 1.4]]); &[5.2, 2.7, 3.9, 1.4]]);
let y = vec![0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]; let y = vec![0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.];
RandomForest::fit(&x, &y, Default::default()); RandomForestClassifier::fit(&x, &y, Default::default());
assert_eq!(y, RandomForest::fit(&x, &y, Default::default()).predict(&x)); assert_eq!(y, RandomForestClassifier::fit(&x, &y, Default::default()).predict(&x));
} }
+141
View File
@@ -0,0 +1,141 @@
extern crate rand;
use rand::Rng;
use std::default::Default;
use crate::linalg::Matrix;
use crate::tree::decision_tree_regressor::{DecisionTreeRegressor, DecisionTreeRegressorParameters};
#[derive(Debug, Clone)]
pub struct RandomForestRegressorParameters {
pub max_depth: Option<u16>,
pub min_samples_leaf: usize,
pub min_samples_split: usize,
pub n_trees: usize,
pub mtry: Option<usize>
}
#[derive(Debug)]
pub struct RandomForestRegressor {
parameters: RandomForestRegressorParameters,
trees: Vec<DecisionTreeRegressor>
}
impl Default for RandomForestRegressorParameters {
fn default() -> Self {
RandomForestRegressorParameters {
max_depth: None,
min_samples_leaf: 1,
min_samples_split: 2,
n_trees: 10,
mtry: Option::None
}
}
}
impl RandomForestRegressor {
pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, parameters: RandomForestRegressorParameters) -> RandomForestRegressor {
let (n_rows, num_attributes) = x.shape();
let mtry = parameters.mtry.unwrap_or((num_attributes as f64).sqrt().floor() as usize);
let mut trees: Vec<DecisionTreeRegressor> = Vec::new();
for _ in 0..parameters.n_trees {
let samples = RandomForestRegressor::sample_with_replacement(n_rows);
let params = DecisionTreeRegressorParameters{
max_depth: parameters.max_depth,
min_samples_leaf: parameters.min_samples_leaf,
min_samples_split: parameters.min_samples_split
};
let tree = DecisionTreeRegressor::fit_weak_learner(x, y, samples, mtry, params);
trees.push(tree);
}
RandomForestRegressor {
parameters: parameters,
trees: trees
}
}
pub fn predict<M: Matrix>(&self, x: &M) -> M::RowVector {
let mut result = M::zeros(1, x.shape().0);
let (n, _) = x.shape();
for i in 0..n {
result.set(0, i, self.predict_for_row(x, i));
}
result.to_row_vector()
}
fn predict_for_row<M: Matrix>(&self, x: &M, row: usize) -> f64 {
let n_trees = self.trees.len();
let mut result = 0f64;
for tree in self.trees.iter() {
result += tree.predict_for_row(x, row);
}
result / n_trees as f64
}
fn sample_with_replacement(nrows: usize) -> Vec<usize>{
let mut rng = rand::thread_rng();
let mut samples = vec![0; nrows];
for _ in 0..nrows {
let xi = rng.gen_range(0, nrows);
samples[xi] += 1;
}
samples
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[test]
fn fit_longley() {
let x = DenseMatrix::from_array(&[
&[ 234.289, 235.6, 159., 107.608, 1947., 60.323],
&[ 259.426, 232.5, 145.6, 108.632, 1948., 61.122],
&[ 258.054, 368.2, 161.6, 109.773, 1949., 60.171],
&[ 284.599, 335.1, 165., 110.929, 1950., 61.187],
&[ 328.975, 209.9, 309.9, 112.075, 1951., 63.221],
&[ 346.999, 193.2, 359.4, 113.27 , 1952., 63.639],
&[ 365.385, 187., 354.7, 115.094, 1953., 64.989],
&[ 363.112, 357.8, 335., 116.219, 1954., 63.761],
&[ 397.469, 290.4, 304.8, 117.388, 1955., 66.019],
&[ 419.18 , 282.2, 285.7, 118.734, 1956., 67.857],
&[ 442.769, 293.6, 279.8, 120.445, 1957., 68.169],
&[ 444.546, 468.1, 263.7, 121.95 , 1958., 66.513],
&[ 482.704, 381.3, 255.2, 123.366, 1959., 68.655],
&[ 502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[ 518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[ 554.894, 400.7, 282.7, 130.081, 1962., 70.551]]);
let y = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
let expected_y = vec![85., 88., 88., 89., 97., 98., 99., 99., 102., 104., 109., 110., 113., 114., 115., 116.];
let y_hat = RandomForestRegressor::fit(&x, &y,
RandomForestRegressorParameters{max_depth: None,
min_samples_leaf: 1,
min_samples_split: 2,
n_trees: 1000,
mtry: Option::None}).predict(&x);
for i in 0..y_hat.len() {
assert!((y_hat[i] - expected_y[i]).abs() < 1.0);
}
}
}