//! # Multinomial Naive Bayes //! //! Multinomial Naive Bayes classifier is a variant of [Naive Bayes](../index.html) for the multinomially distributed data. //! It is often used for discrete data with predictors representing the number of times an event was observed in a particular instance, //! for example frequency of the words present in the document. //! //! Example: //! //! ``` //! use smartcore::linalg::naive::dense_matrix::*; //! use smartcore::naive_bayes::multinomial::MultinomialNB; //! //! // Training data points are: //! // Chinese Beijing Chinese (class: China) //! // Chinese Chinese Shanghai (class: China) //! // Chinese Macao (class: China) //! // Tokyo Japan Chinese (class: Japan) //! let x = DenseMatrix::::from_2d_array(&[ //! &[1., 2., 0., 0., 0., 0.], //! &[0., 2., 0., 0., 1., 0.], //! &[0., 1., 0., 1., 0., 0.], //! &[0., 1., 1., 0., 0., 1.], //! ]); //! let y = vec![0., 0., 0., 1.]; //! let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); //! //! // Testing data point is: //! // Chinese Chinese Chinese Tokyo Japan //! let x_test = DenseMatrix::::from_2d_array(&[&[0., 3., 1., 0., 0., 1.]]); //! let y_hat = nb.predict(&x_test).unwrap(); //! ``` //! //! ## References: //! //! * ["Introduction to Information Retrieval", Manning C. D., Raghavan P., Schutze H., 2009, Chapter 13 ](https://nlp.stanford.edu/IR-book/information-retrieval-book.html) use crate::base::Predictor; use crate::error::Failed; use crate::linalg::row_iter; use crate::linalg::BaseVector; use crate::linalg::Matrix; use crate::math::num::RealNumber; use crate::math::vector::RealNumberVector; use crate::naive_bayes::{BaseNaiveBayes, NBDistribution}; use serde::{Deserialize, Serialize}; /// Naive Bayes classifier for Multinomial features #[derive(Serialize, Deserialize, Debug, PartialEq)] struct MultinomialNBDistribution { /// class labels known to the classifier class_labels: Vec, class_priors: Vec, feature_prob: Vec>, } impl> NBDistribution for MultinomialNBDistribution { fn prior(&self, class_index: usize) -> T { self.class_priors[class_index] } fn log_likelihood(&self, class_index: usize, j: &M::RowVector) -> T { let mut likelihood = T::zero(); for feature in 0..j.len() { let value = j.get(feature); likelihood += value * self.feature_prob[class_index][feature].ln(); } likelihood } fn classes(&self) -> &Vec { &self.class_labels } } /// `MultinomialNB` parameters. Use `Default::default()` for default values. #[derive(Serialize, Deserialize, Debug, Clone)] pub struct MultinomialNBParameters { /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). pub alpha: T, /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data pub priors: Option>, } impl MultinomialNBParameters { /// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing). pub fn with_alpha(mut self, alpha: T) -> Self { self.alpha = alpha; self } /// Prior probabilities of the classes. If specified the priors are not adjusted according to the data pub fn with_priors(mut self, priors: Vec) -> Self { self.priors = Some(priors); self } } impl Default for MultinomialNBParameters { fn default() -> Self { Self { alpha: T::one(), priors: None, } } } impl MultinomialNBDistribution { /// Fits the distribution to a NxM matrix where N is number of samples and M is number of features. /// * `x` - training data. /// * `y` - vector with target values (classes) of length N. /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, /// priors are adjusted according to the data. /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter. pub fn fit>( x: &M, y: &M::RowVector, alpha: T, priors: Option>, ) -> Result { let (n_samples, n_features) = x.shape(); let y_samples = y.len(); if y_samples != n_samples { return Err(Failed::fit(&format!( "Size of x should equal size of y; |x|=[{}], |y|=[{}]", n_samples, y_samples ))); } if n_samples == 0 { return Err(Failed::fit(&format!( "Size of x and y should greater than 0; |x|=[{}]", n_samples ))); } if alpha < T::zero() { return Err(Failed::fit(&format!( "Alpha should be greater than 0; |alpha|=[{}]", alpha ))); } let y = y.to_vec(); let (class_labels, indices) = as RealNumberVector>::unique_with_indices(&y); let mut class_count = vec![T::zero(); class_labels.len()]; for class_index in indices.iter() { class_count[*class_index] += T::one(); } let class_priors = if let Some(class_priors) = priors { if class_priors.len() != class_labels.len() { return Err(Failed::fit( "Size of priors provided does not match the number of classes of the data.", )); } class_priors } else { class_count .iter() .map(|&c| c / T::from(n_samples).unwrap()) .collect() }; let mut feature_in_class_counter = vec![vec![T::zero(); n_features]; class_labels.len()]; for (row, class_index) in row_iter(x).zip(indices) { for (idx, row_i) in row.iter().enumerate().take(n_features) { feature_in_class_counter[class_index][idx] += *row_i; } } let feature_prob = feature_in_class_counter .iter() .map(|feature_count| { let n_c = feature_count.sum(); feature_count .iter() .map(|&count| (count + alpha) / (n_c + alpha * T::from(n_features).unwrap())) .collect() }) .collect(); Ok(Self { class_labels, class_priors, feature_prob, }) } } /// MultinomialNB implements the categorical naive Bayes algorithm for categorically distributed data. #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct MultinomialNB> { inner: BaseNaiveBayes>, } impl> Predictor for MultinomialNB { fn predict(&self, x: &M) -> Result { self.predict(x) } } impl> MultinomialNB { /// Fits MultinomialNB with given data /// * `x` - training data of size NxM where N is the number of samples and M is the number of /// features. /// * `y` - vector with target values (classes) of length N. /// * `parameters` - additional parameters like class priors, alpha for smoothing and /// binarizing threshold. pub fn fit( x: &M, y: &M::RowVector, parameters: MultinomialNBParameters, ) -> Result { let distribution = MultinomialNBDistribution::fit(x, y, parameters.alpha, parameters.priors)?; let inner = BaseNaiveBayes::fit(distribution)?; Ok(Self { inner }) } /// Estimates the class labels for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. /// Returns a vector of size N with class estimates. pub fn predict(&self, x: &M) -> Result { self.inner.predict(x) } } #[cfg(test)] mod tests { use super::*; use crate::linalg::naive::dense_matrix::DenseMatrix; #[test] fn run_multinomial_naive_bayes() { // Tests that MultinomialNB when alpha=1.0 gives the same values as // those given for the toy example in Manning, Raghavan, and // Schuetze's "Introduction to Information Retrieval" book: // https://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html // Training data points are: // Chinese Beijing Chinese (class: China) // Chinese Chinese Shanghai (class: China) // Chinese Macao (class: China) // Tokyo Japan Chinese (class: Japan) let x = DenseMatrix::::from_2d_array(&[ &[1., 2., 0., 0., 0., 0.], &[0., 2., 0., 0., 1., 0.], &[0., 1., 0., 1., 0., 0.], &[0., 1., 1., 0., 0., 1.], ]); let y = vec![0., 0., 0., 1.]; let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); assert_eq!(mnb.inner.distribution.class_priors, &[0.75, 0.25]); assert_eq!( mnb.inner.distribution.feature_prob, &[ &[1. / 7., 3. / 7., 1. / 14., 1. / 7., 1. / 7., 1. / 14.], &[1. / 9., 2. / 9.0, 2. / 9.0, 1. / 9.0, 1. / 9.0, 2. / 9.0] ] ); // Testing data point is: // Chinese Chinese Chinese Tokyo Japan let x_test = DenseMatrix::::from_2d_array(&[&[0., 3., 1., 0., 0., 1.]]); let y_hat = mnb.predict(&x_test).unwrap(); assert_eq!(y_hat, &[0.]); } #[test] fn multinomial_nb_scikit_parity() { let x = DenseMatrix::::from_2d_array(&[ &[2., 4., 0., 0., 2., 1., 2., 4., 2., 0.], &[3., 4., 0., 2., 1., 0., 1., 4., 0., 3.], &[1., 4., 2., 4., 1., 0., 1., 2., 3., 2.], &[0., 3., 3., 4., 1., 0., 3., 1., 1., 1.], &[0., 2., 1., 4., 3., 4., 1., 2., 3., 1.], &[3., 2., 4., 1., 3., 0., 2., 4., 0., 2.], &[3., 1., 3., 0., 2., 0., 4., 4., 3., 4.], &[2., 2., 2., 0., 1., 1., 2., 1., 0., 1.], &[3., 3., 2., 2., 0., 2., 3., 2., 2., 3.], &[4., 3., 4., 4., 4., 2., 2., 0., 1., 4.], &[3., 4., 2., 2., 1., 4., 4., 4., 1., 3.], &[3., 0., 1., 4., 4., 0., 0., 3., 2., 4.], &[2., 0., 3., 3., 1., 2., 0., 2., 4., 1.], &[2., 4., 0., 4., 2., 4., 1., 3., 1., 4.], &[0., 2., 2., 3., 4., 0., 4., 4., 4., 4.], ]); let y = vec![2., 2., 0., 0., 0., 2., 1., 1., 0., 1., 0., 0., 2., 0., 2.]; let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); let y_hat = nb.predict(&x).unwrap(); assert!(nb .inner .distribution .class_priors .approximate_eq(&vec!(0.46, 0.2, 0.33), 1e-2)); assert!(nb.inner.distribution.feature_prob[1].approximate_eq( &vec!(0.07, 0.12, 0.07, 0.15, 0.07, 0.09, 0.08, 0.10, 0.08, 0.11), 1e-1 )); assert!(y_hat.approximate_eq( &vec!(2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0), 1e-5 )); } #[test] fn serde() { let x = DenseMatrix::::from_2d_array(&[ &[1., 1., 0., 0., 0., 0.], &[0., 1., 0., 0., 1., 0.], &[0., 1., 0., 1., 0., 0.], &[0., 1., 1., 0., 0., 1.], ]); let y = vec![0., 0., 0., 1.]; let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); let deserialized_mnb: MultinomialNB> = serde_json::from_str(&serde_json::to_string(&mnb).unwrap()).unwrap(); assert_eq!(mnb, deserialized_mnb); } }