feat: adds RandomForestRegressor
This commit is contained in:
+2
-1
@@ -1 +1,2 @@
|
|||||||
pub mod random_forest;
|
pub mod random_forest_classifier;
|
||||||
|
pub mod random_forest_regressor;
|
||||||
@@ -6,7 +6,7 @@ use crate::linalg::Matrix;
|
|||||||
use crate::tree::decision_tree_classifier::{DecisionTreeClassifier, DecisionTreeClassifierParameters, SplitCriterion, which_max};
|
use crate::tree::decision_tree_classifier::{DecisionTreeClassifier, DecisionTreeClassifierParameters, SplitCriterion, which_max};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct RandomForestParameters {
|
pub struct RandomForestClassifierParameters {
|
||||||
pub criterion: SplitCriterion,
|
pub criterion: SplitCriterion,
|
||||||
pub max_depth: Option<u16>,
|
pub max_depth: Option<u16>,
|
||||||
pub min_samples_leaf: u16,
|
pub min_samples_leaf: u16,
|
||||||
@@ -16,15 +16,15 @@ pub struct RandomForestParameters {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct RandomForest {
|
pub struct RandomForestClassifier {
|
||||||
parameters: RandomForestParameters,
|
parameters: RandomForestClassifierParameters,
|
||||||
trees: Vec<DecisionTreeClassifier>,
|
trees: Vec<DecisionTreeClassifier>,
|
||||||
classes: Vec<f64>
|
classes: Vec<f64>
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for RandomForestParameters {
|
impl Default for RandomForestClassifierParameters {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
RandomForestParameters {
|
RandomForestClassifierParameters {
|
||||||
criterion: SplitCriterion::Gini,
|
criterion: SplitCriterion::Gini,
|
||||||
max_depth: None,
|
max_depth: None,
|
||||||
min_samples_leaf: 1,
|
min_samples_leaf: 1,
|
||||||
@@ -35,9 +35,9 @@ impl Default for RandomForestParameters {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RandomForest {
|
impl RandomForestClassifier {
|
||||||
|
|
||||||
pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, parameters: RandomForestParameters) -> RandomForest {
|
pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, parameters: RandomForestClassifierParameters) -> RandomForestClassifier {
|
||||||
let (_, num_attributes) = x.shape();
|
let (_, num_attributes) = x.shape();
|
||||||
let y_m = M::from_row_vector(y.clone());
|
let y_m = M::from_row_vector(y.clone());
|
||||||
let (_, y_ncols) = y_m.shape();
|
let (_, y_ncols) = y_m.shape();
|
||||||
@@ -56,7 +56,7 @@ impl RandomForest {
|
|||||||
let mut trees: Vec<DecisionTreeClassifier> = Vec::new();
|
let mut trees: Vec<DecisionTreeClassifier> = Vec::new();
|
||||||
|
|
||||||
for _ in 0..parameters.n_trees {
|
for _ in 0..parameters.n_trees {
|
||||||
let samples = RandomForest::sample_with_replacement(&yi, k);
|
let samples = RandomForestClassifier::sample_with_replacement(&yi, k);
|
||||||
let params = DecisionTreeClassifierParameters{
|
let params = DecisionTreeClassifierParameters{
|
||||||
criterion: parameters.criterion.clone(),
|
criterion: parameters.criterion.clone(),
|
||||||
max_depth: parameters.max_depth,
|
max_depth: parameters.max_depth,
|
||||||
@@ -67,7 +67,7 @@ impl RandomForest {
|
|||||||
trees.push(tree);
|
trees.push(tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
RandomForest {
|
RandomForestClassifier {
|
||||||
parameters: parameters,
|
parameters: parameters,
|
||||||
trees: trees,
|
trees: trees,
|
||||||
classes
|
classes
|
||||||
@@ -154,9 +154,9 @@ mod tests {
|
|||||||
&[5.2, 2.7, 3.9, 1.4]]);
|
&[5.2, 2.7, 3.9, 1.4]]);
|
||||||
let y = vec![0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.];
|
let y = vec![0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.];
|
||||||
|
|
||||||
RandomForest::fit(&x, &y, Default::default());
|
RandomForestClassifier::fit(&x, &y, Default::default());
|
||||||
|
|
||||||
assert_eq!(y, RandomForest::fit(&x, &y, Default::default()).predict(&x));
|
assert_eq!(y, RandomForestClassifier::fit(&x, &y, Default::default()).predict(&x));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -0,0 +1,141 @@
|
|||||||
|
extern crate rand;
|
||||||
|
|
||||||
|
use rand::Rng;
|
||||||
|
use std::default::Default;
|
||||||
|
use crate::linalg::Matrix;
|
||||||
|
use crate::tree::decision_tree_regressor::{DecisionTreeRegressor, DecisionTreeRegressorParameters};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct RandomForestRegressorParameters {
|
||||||
|
pub max_depth: Option<u16>,
|
||||||
|
pub min_samples_leaf: usize,
|
||||||
|
pub min_samples_split: usize,
|
||||||
|
pub n_trees: usize,
|
||||||
|
pub mtry: Option<usize>
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct RandomForestRegressor {
|
||||||
|
parameters: RandomForestRegressorParameters,
|
||||||
|
trees: Vec<DecisionTreeRegressor>
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for RandomForestRegressorParameters {
|
||||||
|
fn default() -> Self {
|
||||||
|
RandomForestRegressorParameters {
|
||||||
|
max_depth: None,
|
||||||
|
min_samples_leaf: 1,
|
||||||
|
min_samples_split: 2,
|
||||||
|
n_trees: 10,
|
||||||
|
mtry: Option::None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RandomForestRegressor {
|
||||||
|
|
||||||
|
pub fn fit<M: Matrix>(x: &M, y: &M::RowVector, parameters: RandomForestRegressorParameters) -> RandomForestRegressor {
|
||||||
|
let (n_rows, num_attributes) = x.shape();
|
||||||
|
|
||||||
|
let mtry = parameters.mtry.unwrap_or((num_attributes as f64).sqrt().floor() as usize);
|
||||||
|
|
||||||
|
let mut trees: Vec<DecisionTreeRegressor> = Vec::new();
|
||||||
|
|
||||||
|
for _ in 0..parameters.n_trees {
|
||||||
|
let samples = RandomForestRegressor::sample_with_replacement(n_rows);
|
||||||
|
let params = DecisionTreeRegressorParameters{
|
||||||
|
max_depth: parameters.max_depth,
|
||||||
|
min_samples_leaf: parameters.min_samples_leaf,
|
||||||
|
min_samples_split: parameters.min_samples_split
|
||||||
|
};
|
||||||
|
let tree = DecisionTreeRegressor::fit_weak_learner(x, y, samples, mtry, params);
|
||||||
|
trees.push(tree);
|
||||||
|
}
|
||||||
|
|
||||||
|
RandomForestRegressor {
|
||||||
|
parameters: parameters,
|
||||||
|
trees: trees
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn predict<M: Matrix>(&self, x: &M) -> M::RowVector {
|
||||||
|
let mut result = M::zeros(1, x.shape().0);
|
||||||
|
|
||||||
|
let (n, _) = x.shape();
|
||||||
|
|
||||||
|
for i in 0..n {
|
||||||
|
result.set(0, i, self.predict_for_row(x, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
result.to_row_vector()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn predict_for_row<M: Matrix>(&self, x: &M, row: usize) -> f64 {
|
||||||
|
|
||||||
|
let n_trees = self.trees.len();
|
||||||
|
|
||||||
|
let mut result = 0f64;
|
||||||
|
|
||||||
|
for tree in self.trees.iter() {
|
||||||
|
result += tree.predict_for_row(x, row);
|
||||||
|
}
|
||||||
|
|
||||||
|
result / n_trees as f64
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sample_with_replacement(nrows: usize) -> Vec<usize>{
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
let mut samples = vec![0; nrows];
|
||||||
|
for _ in 0..nrows {
|
||||||
|
let xi = rng.gen_range(0, nrows);
|
||||||
|
samples[xi] += 1;
|
||||||
|
}
|
||||||
|
samples
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::linalg::naive::dense_matrix::DenseMatrix;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fit_longley() {
|
||||||
|
|
||||||
|
let x = DenseMatrix::from_array(&[
|
||||||
|
&[ 234.289, 235.6, 159., 107.608, 1947., 60.323],
|
||||||
|
&[ 259.426, 232.5, 145.6, 108.632, 1948., 61.122],
|
||||||
|
&[ 258.054, 368.2, 161.6, 109.773, 1949., 60.171],
|
||||||
|
&[ 284.599, 335.1, 165., 110.929, 1950., 61.187],
|
||||||
|
&[ 328.975, 209.9, 309.9, 112.075, 1951., 63.221],
|
||||||
|
&[ 346.999, 193.2, 359.4, 113.27 , 1952., 63.639],
|
||||||
|
&[ 365.385, 187., 354.7, 115.094, 1953., 64.989],
|
||||||
|
&[ 363.112, 357.8, 335., 116.219, 1954., 63.761],
|
||||||
|
&[ 397.469, 290.4, 304.8, 117.388, 1955., 66.019],
|
||||||
|
&[ 419.18 , 282.2, 285.7, 118.734, 1956., 67.857],
|
||||||
|
&[ 442.769, 293.6, 279.8, 120.445, 1957., 68.169],
|
||||||
|
&[ 444.546, 468.1, 263.7, 121.95 , 1958., 66.513],
|
||||||
|
&[ 482.704, 381.3, 255.2, 123.366, 1959., 68.655],
|
||||||
|
&[ 502.601, 393.1, 251.4, 125.368, 1960., 69.564],
|
||||||
|
&[ 518.173, 480.6, 257.2, 127.852, 1961., 69.331],
|
||||||
|
&[ 554.894, 400.7, 282.7, 130.081, 1962., 70.551]]);
|
||||||
|
let y = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9];
|
||||||
|
|
||||||
|
let expected_y = vec![85., 88., 88., 89., 97., 98., 99., 99., 102., 104., 109., 110., 113., 114., 115., 116.];
|
||||||
|
|
||||||
|
let y_hat = RandomForestRegressor::fit(&x, &y,
|
||||||
|
RandomForestRegressorParameters{max_depth: None,
|
||||||
|
min_samples_leaf: 1,
|
||||||
|
min_samples_split: 2,
|
||||||
|
n_trees: 1000,
|
||||||
|
mtry: Option::None}).predict(&x);
|
||||||
|
|
||||||
|
for i in 0..y_hat.len() {
|
||||||
|
assert!((y_hat[i] - expected_y[i]).abs() < 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user