feat: + cluster metrics
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
//! # Optical Recognition of Handwritten Digits Data Set
|
||||
//!
|
||||
//! | Number of Instances | Number of Attributes | Missing Values? | Associated Tasks: |
|
||||
//! |-|-|-|-|
|
||||
//! | 1797 | 64 | No | Classification, Clusteing |
|
||||
//!
|
||||
//! [Digits dataset](https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits) contains normalized bitmaps of handwritten digits (0-9) from a preprinted form.
|
||||
//! This multivariate dataset is frequently used to demonstrate various machine learning algorithms.
|
||||
//!
|
||||
//! All input attributes are integers in the range 0..16.
|
||||
//!
|
||||
use crate::dataset::deserialize_data;
|
||||
use crate::dataset::Dataset;
|
||||
|
||||
/// Get dataset
|
||||
pub fn load_dataset() -> Dataset<f32, f32> {
|
||||
let (x, y, num_samples, num_features) = match deserialize_data(std::include_bytes!("digits.xy"))
|
||||
{
|
||||
Err(why) => panic!("Can't deserialize digits.xy. {}", why),
|
||||
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
|
||||
};
|
||||
|
||||
Dataset {
|
||||
data: x,
|
||||
target: y,
|
||||
num_samples: num_samples,
|
||||
num_features: num_features,
|
||||
feature_names: vec![
|
||||
"sepal length (cm)",
|
||||
"sepal width (cm)",
|
||||
"petal length (cm)",
|
||||
"petal width (cm)",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect(),
|
||||
target_names: vec!["setosa", "versicolor", "virginica"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect(),
|
||||
description: "Digits dataset: https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::super::*;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn refresh_digits_dataset() {
|
||||
// run this test to generate digits.xy file.
|
||||
let dataset = load_dataset();
|
||||
assert!(serialize_data(&dataset, "digits.xy").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn digits_dataset() {
|
||||
let dataset = load_dataset();
|
||||
assert_eq!(dataset.data.len(), 1797 * 64);
|
||||
assert_eq!(dataset.target.len(), 1797);
|
||||
assert_eq!(dataset.num_features, 64);
|
||||
assert_eq!(dataset.num_samples, 1797);
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -4,6 +4,7 @@
|
||||
pub mod boston;
|
||||
pub mod breast_cancer;
|
||||
pub mod diabetes;
|
||||
pub mod digits;
|
||||
pub mod iris;
|
||||
|
||||
use crate::math::num::RealNumber;
|
||||
|
||||
Reference in New Issue
Block a user