From 5c400f40d258c989659daefab030efcb24cec823 Mon Sep 17 00:00:00 2001 From: gaxler Date: Wed, 27 Jan 2021 19:36:38 -0800 Subject: [PATCH] Scaffold for turniing floats to hashable and fittinng to columns --- src/preprocessing/categorical_encoders.rs | 27 +++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/preprocessing/categorical_encoders.rs diff --git a/src/preprocessing/categorical_encoders.rs b/src/preprocessing/categorical_encoders.rs new file mode 100644 index 0000000..828eeef --- /dev/null +++ b/src/preprocessing/categorical_encoders.rs @@ -0,0 +1,27 @@ +#![allow(clippy::ptr_arg)] +//! # Encode categorical features as a one-hot numeric array. + +use crate::error::Failed; +use crate::linalg::{BaseVector, Matrix}; +use crate::math::num::RealNumber; + +use crate::preprocessing::series_encoder::SeriesOneHotEncoder; + +pub type HashableReal = u32; + +fn hashable_num(v: &T) -> HashableReal { + // gaxler: If first 32 bits are the same, assume numbers are the same for the categorical coercion + v.to_f32_bits() +} + +#[derive(Debug, Clone)] +pub struct OneHotEncoderParams { + pub categorical_param_idxs: Option>, + pub infer_categorical: bool, +} +/// Encode Categorical variavbles of data matrix to one-hot +pub struct OneHotEncoder { + series_encoders: Vec>, + categorical_param_idxs: Vec, +} +