From f91b1f99425789b6d11c10941b079b4cd7150f5c Mon Sep 17 00:00:00 2001 From: gaxler Date: Wed, 27 Jan 2021 19:37:54 -0800 Subject: [PATCH] fit SeriesOneHotEncoders to predefined columns --- src/preprocessing/categorical_encoders.rs | 42 +++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/preprocessing/categorical_encoders.rs b/src/preprocessing/categorical_encoders.rs index 828eeef..012f364 100644 --- a/src/preprocessing/categorical_encoders.rs +++ b/src/preprocessing/categorical_encoders.rs @@ -25,3 +25,45 @@ pub struct OneHotEncoder { categorical_param_idxs: Vec, } +impl> OneHotEncoder { + /// PlaceHolder + + pub fn fit(data: &M, params: OneHotEncoderParams) -> Result { + match (params.categorical_param_idxs, params.infer_categorical) { + (None, false) => Err(Failed::fit( + "Must pass categorical series ids or infer flag", + )), + + (Some(idxs), true) => Err(Failed::fit( + "Ambigous parameters, got both infer and categroy ids", + )), + + (Some(idxs), false) => Ok(Self { + series_encoders: Self::build_series_encoders::(data, &idxs[..]), + categorical_param_idxs: idxs, + }), + + (None, true) => { + todo!("implement categorical auto-inference") + } + } + } + + fn build_series_encoders(data: &M, idxs: &[usize]) -> Vec> { + let (nrows, _) = data.shape(); + // let mut res: Vec> = Vec::with_capacity(idxs.len()); + let mut tmp_col: Vec = Vec::with_capacity(nrows); + + let res: Vec> = idxs + .iter() + .map(|&idx| { + data.copy_col_as_vec(idx, &mut tmp_col); + let hashable_col = tmp_col.iter().map(|v| hashable_num::(v)); + SeriesOneHotEncoder::fit_to_iter(hashable_col) + }) + .collect(); + res + } + + +} \ No newline at end of file