feat: + cluster metrics

This commit is contained in:
Volodymyr Orlov
2020-09-22 20:23:51 -07:00
parent 0803532e79
commit 750015b861
15 changed files with 477 additions and 16 deletions
+67
View File
@@ -0,0 +1,67 @@
//! # Optical Recognition of Handwritten Digits Data Set
//!
//! | Number of Instances | Number of Attributes | Missing Values? | Associated Tasks: |
//! |-|-|-|-|
//! | 1797 | 64 | No | Classification, Clusteing |
//!
//! [Digits dataset](https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits) contains normalized bitmaps of handwritten digits (0-9) from a preprinted form.
//! This multivariate dataset is frequently used to demonstrate various machine learning algorithms.
//!
//! All input attributes are integers in the range 0..16.
//!
use crate::dataset::deserialize_data;
use crate::dataset::Dataset;
/// Get dataset
pub fn load_dataset() -> Dataset<f32, f32> {
let (x, y, num_samples, num_features) = match deserialize_data(std::include_bytes!("digits.xy"))
{
Err(why) => panic!("Can't deserialize digits.xy. {}", why),
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
};
Dataset {
data: x,
target: y,
num_samples: num_samples,
num_features: num_features,
feature_names: vec![
"sepal length (cm)",
"sepal width (cm)",
"petal length (cm)",
"petal width (cm)",
]
.iter()
.map(|s| s.to_string())
.collect(),
target_names: vec!["setosa", "versicolor", "virginica"]
.iter()
.map(|s| s.to_string())
.collect(),
description: "Digits dataset: https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits".to_string(),
}
}
#[cfg(test)]
mod tests {
use super::super::*;
use super::*;
#[test]
#[ignore]
fn refresh_digits_dataset() {
// run this test to generate digits.xy file.
let dataset = load_dataset();
assert!(serialize_data(&dataset, "digits.xy").is_ok());
}
#[test]
fn digits_dataset() {
let dataset = load_dataset();
assert_eq!(dataset.data.len(), 1797 * 64);
assert_eq!(dataset.target.len(), 1797);
assert_eq!(dataset.num_features, 64);
assert_eq!(dataset.num_samples, 1797);
}
}
Binary file not shown.
+1
View File
@@ -4,6 +4,7 @@
pub mod boston;
pub mod breast_cancer;
pub mod diabetes;
pub mod digits;
pub mod iris;
use crate::math::num::RealNumber;