From 237b1160b17308252b6040d4c5ca07880079051c Mon Sep 17 00:00:00 2001 From: gaxler Date: Tue, 2 Feb 2021 18:20:27 -0800 Subject: [PATCH] doc update --- src/preprocessing/series_encoder.rs | 64 ++++++++++++++++------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/src/preprocessing/series_encoder.rs b/src/preprocessing/series_encoder.rs index 4e9ddf9..9d7e259 100644 --- a/src/preprocessing/series_encoder.rs +++ b/src/preprocessing/series_encoder.rs @@ -8,6 +8,7 @@ use crate::math::num::RealNumber; use std::collections::HashMap; use std::hash::Hash; +/// Bi-directional map category <-> label num. #[derive(Debug, Clone)] pub struct CategoryMapper { category_map: HashMap, @@ -16,7 +17,9 @@ pub struct CategoryMapper { } impl<'a, CategoryType: 'a + Hash + Eq + Clone> CategoryMapper { - fn fit_to_iter(categories: impl Iterator) -> Self { + + /// Fit an encoder to a lable iterator + pub fn fit_to_iter(categories: impl Iterator) -> Self { let mut category_map: HashMap = HashMap::new(); let mut category_num = 0usize; let mut unique_lables: Vec = Vec::new(); @@ -34,8 +37,9 @@ impl<'a, CategoryType: 'a + Hash + Eq + Clone> CategoryMapper { categories: unique_lables, } } - - fn from_category_map(category_map: HashMap) -> Self { + + /// Build an encoder from a predefined (category -> class number) map + pub fn from_category_map(category_map: HashMap) -> Self { let mut _unique_cat: Vec<(CategoryType, usize)> = category_map.iter().map(|(k, v)| (k.clone(), *v)).collect(); _unique_cat.sort_by(|a, b| a.1.cmp(&b.1)); @@ -46,8 +50,9 @@ impl<'a, CategoryType: 'a + Hash + Eq + Clone> CategoryMapper { category_map, } } - - fn from_positional_category_vec(categories: Vec) -> Self { + + /// Build an encoder from a predefined positional category-class num vector + pub fn from_positional_category_vec(categories: Vec) -> Self { let category_map: HashMap = categories .iter() .enumerate() @@ -61,16 +66,17 @@ impl<'a, CategoryType: 'a + Hash + Eq + Clone> CategoryMapper { } /// Get label num of a category - fn get_num(&self, category: &CategoryType) -> Option<&usize> { + pub fn get_num(&self, category: &CategoryType) -> Option<&usize> { self.category_map.get(category) } /// Return category corresponding to label num - fn get_cat(&self, num: usize) -> &CategoryType { + pub fn get_cat(&self, num: usize) -> &CategoryType { &self.categories[num] } - fn get_categories(&self) -> &[CategoryType] { + /// List all categories (position = category number) + pub fn get_categories(&self) -> &[CategoryType] { &self.categories[..] } } @@ -80,14 +86,14 @@ pub trait SeriesEncoder: where CategoryType:Hash + Eq + Clone { - /// Fit an encoder to a lable list + /// Fit an encoder to a lable iterator fn fit_to_iter(categories: impl Iterator) -> Self; /// Number of categories for categorical variable fn num_categories(&self) -> usize; /// Build an encoder from a predefined (category -> class number) map - fn from_category_map(category_map: HashMap) -> Self; + fn from_category_map(category_map: HashMap) -> Self; /// Build an encoder from a predefined positional category-class num vector fn from_positional_category_vec(categories: Vec) -> Self; @@ -119,6 +125,7 @@ pub trait SeriesEncoder: self.transform_iter(v) } } + /// Make a one-hot encoded vector from a categorical variable /// /// Example: @@ -182,20 +189,20 @@ pub struct SeriesOneHotEncoder { } impl SeriesEncoder for SeriesOneHotEncoder { - + fn fit_to_iter(categories: impl Iterator) -> Self { Self {mapper:CategoryMapper::fit_to_iter(categories)} - } + } /// Build an encoder from a predefined (category -> class number) map fn from_category_map(category_map: HashMap) -> Self { Self {mapper: CategoryMapper::from_category_map(category_map)} - } + } /// Build an encoder from a predefined positional category-class num vector fn from_positional_category_vec(categories: Vec) -> Self { Self {mapper:CategoryMapper::from_positional_category_vec(categories)} - } + } fn num_categories(&self) -> usize { self.mapper.num_categories @@ -207,25 +214,25 @@ impl SeriesEncoder for SeriesOneH fn invert_one>(&self, one_hot: V) -> Result { - let pos = U::from_f64(1f64).unwrap(); + let pos = U::from_f64(1f64).unwrap(); let oh_it = (0..one_hot.len()).map(|idx| one_hot.get(idx)); - + let s: Vec = oh_it - .enumerate() - .filter_map(|(idx, v)| if v == pos { Some(idx) } else { None }) - .collect(); - - if s.len() == 1 { - let idx = s[0]; + .enumerate() + .filter_map(|(idx, v)| if v == pos { Some(idx) } else { None }) + .collect(); + + if s.len() == 1 { + let idx = s[0]; return Ok(self.mapper.get_cat(idx).clone()); + } + let pos_entries = format!( + "Expected a single positive entry, {} entires found", + s.len() + ); + Err(Failed::transform(&pos_entries[..])) } - let pos_entries = format!( - "Expected a single positive entry, {} entires found", - s.len() - ); - Err(Failed::transform(&pos_entries[..])) - } fn transform_one>(&self, category: &CategoryType) -> Option { match self.mapper.get_num(category) { @@ -233,6 +240,7 @@ impl SeriesEncoder for SeriesOneH Some(&idx) => Some(make_one_hot(idx, self.num_categories())), } } + } #[cfg(test)]