Files
smartcore/src/dataset/diabetes.rs
morenol 6f22bbd150 chore: update clippy lints (#272)
* chore: fix clippy lints
---------

Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>
2023-11-20 21:54:09 -04:00

86 lines
2.9 KiB
Rust

//! # Diabetes Data
//!
//! | Number of Instances | Number of Attributes | Missing Values? | Associated Tasks: |
//! |-|-|-|-|
//! | 442 | 10 | No | Regression |
//!
//! [Diabetes Data](https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html) was collected by Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani for the "Least Angle Regression" paper.
//! Predictive variables have been mean centered and scaled to unit variance.
//! The dataset has following attributes:
//!
//! | Predictor | Data Type | Target? |
//! |-|-|-|
//! | Age | Numerical | No |
//! | Sex | Numerical | No |
//! | Body mass index (BMI) | Numerical | No |
//! | Average blood pressure (BP) | Numerical | No |
//! | Six blood serum measurements (SR1 - SR6) | Numerical | No |
//! | A quantitative measure of disease progression one year after baseline | Numerical | Yes |
//!
//! ## References:
//! * ["Least Angle Regression", Efron B., Hastie T., Johnstone I., Tibshirani R., 2004, Annals of Statistics (with discussion), 407-499](http://statweb.stanford.edu/~tibs/ftp/lars.pdf)
use crate::dataset::deserialize_data;
use crate::dataset::Dataset;
/// Get dataset
pub fn load_dataset() -> Dataset<f32, u32> {
let (x, y, num_samples, num_features) =
match deserialize_data(std::include_bytes!("diabetes.xy")) {
Err(why) => panic!("Can't deserialize diabetes.xy. {why}"),
Ok((x, y, num_samples, num_features)) => (
x,
y.into_iter().map(|x| x as u32).collect(),
num_samples,
num_features,
),
};
Dataset {
data: x,
target: y,
num_samples,
num_features,
feature_names: [
"Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6",
]
.iter()
.map(|s| s.to_string())
.collect(),
target_names: vec!["Disease progression".to_string()],
description: "Diabetes Data: https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html"
.to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
// TODO: fix serialization
// #[cfg(not(target_arch = "wasm32"))]
// #[test]
// #[ignore]
// fn refresh_diabetes_dataset() {
// // run this test to generate diabetes.xy file.
// let dataset = load_dataset();
// assert!(serialize_data(&dataset, "diabetes.xy").is_ok());
// }
#[cfg_attr(
all(target_arch = "wasm32", not(target_os = "wasi")),
wasm_bindgen_test::wasm_bindgen_test
)]
#[test]
fn boston_dataset() {
let dataset = load_dataset();
assert_eq!(
dataset.data.len(),
dataset.num_features * dataset.num_samples
);
assert_eq!(dataset.target.len(), dataset.num_samples);
assert_eq!(dataset.num_features, 10);
assert_eq!(dataset.num_samples, 442);
}
}