110 Commits

Author SHA1 Message Date
Lorenzo (Mec-iS)
61db4ebd90 Add test 2022-08-24 12:34:56 +01:00
Lorenzo (Mec-iS)
2603a1f42b Add test 2022-08-24 11:44:30 +01:00
Alan Race
663db0334d Added per-class probability prediction for random forests 2022-07-11 16:08:03 +02:00
ferrouille
b4a807eb9f Add SVC::decision_function (#135) 2022-06-21 12:48:16 -04:00
dependabot[bot]
ff456df0a4 Update nalgebra requirement from 0.23.0 to 0.31.0 (#128)
Updates the requirements on [nalgebra](https://github.com/dimforge/nalgebra) to permit the latest version.
- [Release notes](https://github.com/dimforge/nalgebra/releases)
- [Changelog](https://github.com/dimforge/nalgebra/blob/dev/CHANGELOG.md)
- [Commits](https://github.com/dimforge/nalgebra/compare/v0.23.0...v0.31.0)

---
updated-dependencies:
- dependency-name: nalgebra
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-05-11 13:14:14 -04:00
dependabot-preview[bot]
322610c7fb build(deps): update nalgebra requirement from 0.23.0 to 0.26.2 (#98)
* build(deps): update nalgebra requirement from 0.23.0 to 0.26.2

Updates the requirements on [nalgebra](https://github.com/dimforge/nalgebra) to permit the latest version.
- [Release notes](https://github.com/dimforge/nalgebra/releases)
- [Changelog](https://github.com/dimforge/nalgebra/blob/dev/CHANGELOG.md)
- [Commits](https://github.com/dimforge/nalgebra/compare/v0.23.0...v0.26.2)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>

* fix: updates for nalgebre

* test: explicitly call pow_mut from BaseVector since now it conflicts with nalgebra implementation

* Don't be strict with dependencies

Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>
2022-05-11 13:04:27 -04:00
morenol
70df9a8b49 Merge pull request #133 from smartcorelib/release-0.2.1
Release 0.2.1
2022-05-10 08:57:53 -04:00
Volodymyr Orlov
7ea620e6fd Updates version to 0.2.1 2022-05-09 16:03:05 -07:00
VolodymyrOrlov
db5edcf67a Merge pull request #132 from smartcorelib/formatting-fix
Fixes broken build
2022-05-09 15:56:22 -07:00
Volodymyr Orlov
8297cbe67e Fixes broken build 2022-05-09 15:50:25 -07:00
VolodymyrOrlov
38c9b5ad2f Merge pull request #126 from ericschief/cover-tree-fix
Fix issue with cover tree k-nearest neighbors
2022-05-09 15:34:10 -07:00
morenol
820201e920 Solve conflic with num-traits (#130)
* Solve conflic with num-traits

* Fix clippy warnings

Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>
2022-05-05 10:39:18 -04:00
Kiran Eiden
389b0e8e67 Only sort in CoverTree::find function if there are more than k points
Sorting only needs to be done if the list of KNN candidates is greater
than length k.
2022-01-04 14:50:47 -08:00
Kiran Eiden
f93286ffbd Fix bug in cover tree KNN algorithm
Prior to this change, the find function implementation for the
CoverTree class could have potentially returned the wrong result
in cases where there were multiple points in the dataset
equidistant from p. For example, the current test passed for k=3
but failed to produce the correct result for k=4 (it claimed that
3, 4, 5, and 7 were the 4 closest points to 5 in the dataset
rather than 3, 4, 5, and 6). Sorting the neighbors vector before
collecting the first k values from it resolved this issue.
2022-01-02 20:05:39 -08:00
Malte Londschien
12c102d02b Allow setting seed for RandomForestClassifier and Regressor (#120)
* Seed for the classifier.

* Seed for the regressor.

* Forgot one.

* typo.
2021-11-10 20:51:24 -04:00
VolodymyrOrlov
521dab49ef Merge pull request #116 from mlondschien/issue-115
Add OOB predictions to random forests
2021-10-28 08:10:09 -07:00
Malte Londschien
3bf8813946 Merge branch 'development' into issue-115 2021-10-28 09:54:22 +02:00
VolodymyrOrlov
7830946ecb Merge pull request #117 from morenol/lmm/fix_clippy
Fix clippy warnings
2021-10-27 11:01:16 -07:00
VolodymyrOrlov
813c7ab233 Merge pull request #110 from morenol/nb/fix_docs
docs: fix documentation of naive bayes structs
2021-10-27 11:00:12 -07:00
Luis Moreno
4397c91570 Fix clippy warnings 2021-10-20 14:15:41 -05:00
Malte Londschien
14245e15ad type error. 2021-10-20 17:13:00 +02:00
Malte Londschien
d0a4ccbe20 Set keep_samples attribute. 2021-10-20 17:09:13 +02:00
Malte Londschien
85b9fde9a7 Another format. 2021-10-20 17:04:24 +02:00
Malte Londschien
d239314967 Same for regressor. 2021-10-14 09:59:26 +02:00
Malte Londschien
4bae62ab2f Test. 2021-10-14 09:47:00 +02:00
Malte Londschien
e8cba343ca Initial implementation of predict_oob. 2021-10-14 09:34:45 +02:00
Luis Moreno
0b3bf946df chore: fix clippy warnings 2021-06-05 01:41:40 -04:00
Luis Moreno
763a8370eb docs: fix documentation of naive bayes structs 2021-06-05 00:25:34 -04:00
Luis Moreno
1208051fb5 Merge pull request #103 from smartcorelib/dependabot/add-v2-config-file
Upgrade to GitHub-native Dependabot
2021-04-29 12:40:54 -04:00
dependabot-preview[bot]
436d0a089f Upgrade to GitHub-native Dependabot 2021-04-29 16:13:20 +00:00
Luis Moreno
92265cc979 Merge pull request #99 from smartcorelib/dependabot/cargo/num-0.4.0
build(deps): update num requirement from 0.3.0 to 0.4.0
2021-04-28 18:02:58 -04:00
dependabot-preview[bot]
513d3898c9 build(deps): update num requirement from 0.3.0 to 0.4.0
Updates the requirements on [num](https://github.com/rust-num/num) to permit the latest version.
- [Release notes](https://github.com/rust-num/num/releases)
- [Changelog](https://github.com/rust-num/num/blob/master/RELEASES.md)
- [Commits](https://github.com/rust-num/num/compare/num-0.3.0...num-0.4.0)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2021-04-28 21:44:02 +00:00
Luis Moreno
4b654b25ac Merge pull request #97 from smartcorelib/dependabot/cargo/ndarray-0.15
build(deps): update ndarray requirement from 0.14 to 0.15
2021-04-28 17:41:56 -04:00
dependabot-preview[bot]
5a2e1f1262 build(deps): update ndarray requirement from 0.14 to 0.15
Updates the requirements on [ndarray](https://github.com/rust-ndarray/ndarray) to permit the latest version.
- [Release notes](https://github.com/rust-ndarray/ndarray/releases)
- [Changelog](https://github.com/rust-ndarray/ndarray/blob/master/RELEASES.md)
- [Commits](https://github.com/rust-ndarray/ndarray/compare/ndarray-rand-0.14.0...0.15.1)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2021-04-28 21:41:48 +00:00
Luis Moreno
377d5d0b06 Merge pull request #96 from smartcorelib/dependabot/cargo/rand-0.8.3
build(deps): update rand requirement from 0.7.3 to 0.8.3
2021-04-28 17:40:02 -04:00
Luis Moreno
9ce448379a docs: create changelog (#102)
Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>
2021-04-28 16:58:15 -04:00
Luis Moreno
c295a0d1bb fix: fix code to be compatible with rand 0.8, following the recommendations of https://rust-random.github.io/book/update-0.8.html and https://docs.rs/getrandom/0.2.2/getrandom/#webassembly-support 2021-04-28 16:28:43 -04:00
dependabot-preview[bot]
703dc9688b build(deps): update rand_distr requirement from 0.3.0 to 0.4.0
Updates the requirements on [rand_distr](https://github.com/rust-random/rand) to permit the latest version.
- [Release notes](https://github.com/rust-random/rand/releases)
- [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-random/rand/compare/rand_distr-0.3.0...rand_distr-0.4.0)
2021-04-28 16:25:05 -04:00
dependabot-preview[bot]
790979a26d build(deps): update rand requirement from 0.7.3 to 0.8.3
Updates the requirements on [rand](https://github.com/rust-random/rand) to permit the latest version.
- [Release notes](https://github.com/rust-random/rand/releases)
- [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-random/rand/compare/0.7.3...0.8.3)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2021-04-28 20:00:24 +00:00
Luis Moreno
162bed2aa2 feat: added support to wasm (#94)
* test: run tests also in wasm targets

* fix: install rand with wasm-bindgen por wasm targets

* fix: use actual usize size to access buffer.

* fix: do not run functions that create files in wasm.

* test: do not run in wasm test that panics.

Co-authored-by: Luis Moreno <morenol@users.noreply.github.com>
2021-04-28 15:58:39 -04:00
Luis Moreno
5ed5772a4e Merge pull request #95 from morenol/lmm/clippy_151
style(lint): fix clippy warnings
2021-04-28 00:08:27 -04:00
Luis Moreno
d9814c0918 style(lint): fix clippy warnings 2021-04-27 09:32:01 -04:00
Luis Moreno
7f44b93838 Merge pull request #89 from morenol/lmm/github_actions
Move CI to github actions
2021-03-05 19:02:11 -04:00
Luis Moreno
02200ae1e3 Only run tests once per OS 2021-03-05 18:53:54 -04:00
Luis Moreno
3dc5336514 Move CI to github actions 2021-03-05 17:57:28 -04:00
Luis Moreno
abeff7926e Merge pull request #88 from morenol/lmm/use_usize_size
fix:  usize::from_le_bytes buffer
2021-03-05 16:59:59 -04:00
Luis Moreno
1395cc6518 fix: Use usize time for usize::from_le_bytes buffer 2021-03-05 10:25:34 -04:00
Volodymyr Orlov
4335ee5a56 Fixes width and hight parameters of the logo 2021-02-26 12:43:10 -08:00
Volodymyr Orlov
4c1dbc3327 Fixes width and hight parameters of the logo 2021-02-26 12:34:05 -08:00
VolodymyrOrlov
a920959ae3 Merge pull request #83 from z1queue/development
rename svm svr to svc in tests and docs
2021-02-25 18:57:29 -08:00
zhangyiqun01
6d58dbe2a2 rename svm svr to svc in tests and docs 2021-02-26 10:52:04 +08:00
zEqueue
023b449ff1 Merge pull request #1 from smartcorelib/development
update
2021-02-26 10:47:50 +08:00
zhangyiqun01
cd44f1d515 reset 2021-02-26 10:47:21 +08:00
Luis Moreno
1b42f8a396 feat: Add getters for naive bayes structs (#74)
* feat: Add getters for GaussianNB

* Add classes getter to BernoulliNB

Add classes getter to CategoricalNB

Add classes getter to MultinomialNB

* Add feature_log_prob getter to MultinomialNB

* Add class_count to NB structs

* Add n_features getter for NB

* Add feature_count to MultinomialNB and BernoulliNB

* Add n_categories to CategoricalNB

* Implement feature_log_prob and category_count getter for CategoricalNB

* Implement feature_log_prob for BernoulliNB
2021-02-25 15:44:34 -04:00
VolodymyrOrlov
c0be45b667 Merge pull request #82 from cmccomb/development
Adding `make_moons` data generator
2021-02-25 09:56:05 -08:00
zhangyiqun01
0e9c517b1a rename svm svr to svc in tests and docs 2021-02-25 15:59:09 +08:00
Chris McComb
fed11f005c Fixed formatting to pass cargo format check. 2021-02-17 21:29:51 -05:00
Chris McComb
483a21bec0 Oops, test was failing due to typo. Fixed now. 2021-02-17 21:22:41 -05:00
Chris McComb
4fb2625a33 Implemented make_moons generator per https://github.com/scikit-learn/scikit-learn/blob/95119c13a/sklearn/datasets/_samples_generator.py#L683 2021-02-17 21:22:06 -05:00
Luis Moreno
a30802ec43 fix: Change to compile for wasm32-unknown-unknown target (#80) 2021-02-16 22:20:02 -04:00
Luis Moreno
4af69878e0 fix: Fix new clippy warnings (#79)
* Fix new clippy warnings

* Allow clippy::suspicious-operation-groupings
2021-02-16 18:19:14 -04:00
VolodymyrOrlov
745d0b570e Merge pull request #76 from gaxler/OneHotEncoder
One hot encoder
2021-02-11 17:42:57 -08:00
gaxler
6b5bed6092 remove old 2021-02-09 22:01:59 -08:00
gaxler
af6ec2d402 rename categorical 2021-02-09 22:01:34 -08:00
gaxler
828df4e338 Use CategoryMapper to transform an iterator. No more passing iterator to SeriesEncoders 2021-02-03 13:42:27 -08:00
gaxler
374dfeceb9 No more SeriesEncoders. 2021-02-03 13:41:25 -08:00
gaxler
3cc20fd400 Move all functionality to CategoryMapper (one-hot and ordinal). 2021-02-03 13:39:26 -08:00
gaxler
700d320724 simplify SeriesEncoder trait 2021-02-03 10:45:25 -08:00
gaxler
ef06f45638 Switch to use SeriesEncoder trait 2021-02-02 18:21:06 -08:00
gaxler
237b1160b1 doc update 2021-02-02 18:20:27 -08:00
gaxler
d31145b4fe Define common series encoder behavior 2021-02-02 18:19:36 -08:00
gaxler
19ff6df84c Separate mapper object 2021-02-02 17:40:58 -08:00
gaxler
228b54baf7 fmt 2021-02-01 11:24:50 -08:00
gaxler
03b9f76e9f Doc+Naming Improvement 2021-02-01 11:24:20 -08:00
gaxler
a882741e12 If transform fails - fail before copying the whole matrix
(changed the order of coping, first do the categorical, than copy ther rest)
2021-02-01 11:20:03 -08:00
gaxler
f4b5936dcf fmt 2021-01-30 20:18:52 -08:00
gaxler
863be5ef75 style fixes 2021-01-30 20:09:52 -08:00
gaxler
ca0816db97 Clippy fixes 2021-01-30 19:55:04 -08:00
gaxler
2f03c1d6d7 module name change 2021-01-30 19:54:42 -08:00
gaxler
c987d39d43 tests + force Categorizable be RealNumber 2021-01-30 19:31:09 -08:00
gaxler
fd6b2e8014 Transform matrix 2021-01-30 19:29:58 -08:00
gaxler
cd5611079c Fit OneHotEncoder 2021-01-30 19:29:33 -08:00
gaxler
dd39433ff8 Categorizable trait defines logic of turning floats into hashable categorical variables. Since we only support RealNumbers for now, the idea is to treat round numbers as ordinal (or nominal if user chooses to ignore order) categories. 2021-01-30 18:48:23 -08:00
gaxler
3dc8a42832 Adapt column numbers to the new columns introduced by categorical variables. 2021-01-30 16:05:45 -08:00
gaxler
3480e728af Documentation updates 2021-01-30 16:04:41 -08:00
gaxler
f91b1f9942 fit SeriesOneHotEncoders to predefined columns 2021-01-27 19:37:54 -08:00
gaxler
5c400f40d2 Scaffold for turniing floats to hashable and fittinng to columns 2021-01-27 19:36:38 -08:00
gaxler
408b97d8aa Rename series encoder and move to separate module file 2021-01-27 19:31:14 -08:00
gaxler
6109fc5211 Renaming fit/transform for API compatibility. Also rename label to category. 2021-01-27 12:13:45 -08:00
gaxler
19088b682a remoe LabelDefinition, looks like unnecesery abstraction for now 2021-01-27 12:06:43 -08:00
gaxler
244a724445 Genertic make_one_hot. Current implementation returns BaseVector of RealNumber 2021-01-27 12:03:13 -08:00
gaxler
9833a2f851 codecov-fix 2021-01-26 10:03:33 -08:00
VolodymyrOrlov
68e7162fba Merge pull request #72 from smartcorelib/lr_reg
feat: adds l2 regularization penalty to the Logistic Regression
2021-01-26 09:37:39 -08:00
gaxler
7daf536aeb fixed docs 2021-01-26 09:15:24 -08:00
gaxler
0df797cbae fmt fix 2021-01-26 00:04:15 -08:00
gaxler
139bbae456 cliipy fixes 2021-01-26 00:01:20 -08:00
gaxler
dbca6d43ce fmt fix 2021-01-25 23:55:43 -08:00
gaxler
991631876e build one-hot encoder 2021-01-25 23:33:48 -08:00
Volodymyr Orlov
40a92ee4db feat: adds l2 regularization penalty to the Logistic Regression 2021-01-21 14:37:34 -08:00
VolodymyrOrlov
87d4e9a423 Merge pull request #71 from smartcorelib/log_regression_solvers
feat: adds a new parameter to the logistic regression: solver
2021-01-21 09:23:19 -08:00
Volodymyr Orlov
bd5fbb63b1 feat: adds a new parameter to the logistic regression: solver 2021-01-20 16:55:58 -08:00
VolodymyrOrlov
272aabcd69 Merge pull request #67 from ssorc3/development
Make SerDe Optional
2021-01-18 13:53:37 -08:00
Ben Cross
fd00bc3780 Run the pipeline with --all-features enabled 2021-01-18 20:50:49 +00:00
Ben Cross
f1cf8a6f08 Added serde feature flags to tests 2021-01-18 10:32:35 +00:00
Ben Cross
762986b271 Cargo format 2021-01-17 21:37:30 +00:00
Ben Cross
e0d46f430b feat: Make SerDe optional 2021-01-17 21:35:03 +00:00
Luis Moreno
eb769493e7 Add coverage check (#57)
* Add coverage check
2021-01-05 16:13:39 -04:00
VolodymyrOrlov
4a941d1700 Merge pull request #56 from atcol/patch-1
Fix Matrix typo in documentation
2021-01-05 09:14:54 -08:00
Alex
0e8166386c Fix Matrix typo in documentation 2021-01-05 16:57:14 +00:00
VolodymyrOrlov
d91999b430 Merge pull request #48 from smartcorelib/main
Merge pull request #47 from smartcorelib/development
2021-01-03 15:10:32 -08:00
86 changed files with 2585 additions and 439 deletions
-43
View File
@@ -1,43 +0,0 @@
version: 2.1
workflows:
version: 2.1
build:
jobs:
- build
- clippy
jobs:
build:
docker:
- image: circleci/rust:latest
environment:
TZ: "/usr/share/zoneinfo/your/location"
steps:
- checkout
- restore_cache:
key: project-cache
- run:
name: Check formatting
command: cargo fmt -- --check
- run:
name: Stable Build
command: cargo build --features "nalgebra-bindings ndarray-bindings"
- run:
name: Test
command: cargo test --features "nalgebra-bindings ndarray-bindings"
- save_cache:
key: project-cache
paths:
- "~/.cargo"
- "./target"
clippy:
docker:
- image: circleci/rust:latest
steps:
- checkout
- run:
name: Install cargo clippy
command: rustup component add clippy
- run:
name: Run cargo clippy
command: cargo clippy --all-features -- -Drust-2018-idioms -Dwarnings
+11
View File
@@ -0,0 +1,11 @@
version: 2
updates:
- package-ecosystem: cargo
directory: "/"
schedule:
interval: daily
open-pull-requests-limit: 10
ignore:
- dependency-name: rand_distr
versions:
- 0.4.0
+57
View File
@@ -0,0 +1,57 @@
name: CI
on:
push:
branches: [ main, development ]
pull_request:
branches: [ development ]
jobs:
tests:
runs-on: "${{ matrix.platform.os }}-latest"
strategy:
matrix:
platform: [
{ os: "windows", target: "x86_64-pc-windows-msvc" },
{ os: "windows", target: "i686-pc-windows-msvc" },
{ os: "ubuntu", target: "x86_64-unknown-linux-gnu" },
{ os: "ubuntu", target: "i686-unknown-linux-gnu" },
{ os: "ubuntu", target: "wasm32-unknown-unknown" },
{ os: "macos", target: "aarch64-apple-darwin" },
]
env:
TZ: "/usr/share/zoneinfo/your/location"
steps:
- uses: actions/checkout@v2
- name: Cache .cargo and target
uses: actions/cache@v2
with:
path: |
~/.cargo
./target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.toml') }}
restore-keys: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.toml') }}
- name: Install Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
target: ${{ matrix.platform.target }}
profile: minimal
default: true
- name: Install test runner for wasm
if: matrix.platform.target == 'wasm32-unknown-unknown'
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Stable Build
uses: actions-rs/cargo@v1
with:
command: build
args: --all-features --target ${{ matrix.platform.target }}
- name: Tests
if: matrix.platform.target == 'x86_64-unknown-linux-gnu' || matrix.platform.target == 'x86_64-pc-windows-msvc' || matrix.platform.target == 'aarch64-apple-darwin'
uses: actions-rs/cargo@v1
with:
command: test
args: --all-features
- name: Tests in WASM
if: matrix.platform.target == 'wasm32-unknown-unknown'
run: wasm-pack test --node -- --all-features
+44
View File
@@ -0,0 +1,44 @@
name: Coverage
on:
push:
branches: [ main, development ]
pull_request:
branches: [ development ]
jobs:
coverage:
runs-on: ubuntu-latest
env:
TZ: "/usr/share/zoneinfo/your/location"
steps:
- uses: actions/checkout@v2
- name: Cache .cargo
uses: actions/cache@v2
with:
path: |
~/.cargo
./target
key: ${{ runner.os }}-coverage-cargo-${{ hashFiles('**/Cargo.toml') }}
restore-keys: ${{ runner.os }}-coverage-cargo-${{ hashFiles('**/Cargo.toml') }}
- name: Install Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
profile: minimal
default: true
- name: Install cargo-tarpaulin
uses: actions-rs/install@v0.1
with:
crate: cargo-tarpaulin
version: latest
use-tool-cache: true
- name: Run cargo-tarpaulin
uses: actions-rs/cargo@v1
with:
command: tarpaulin
args: --out Lcov --all-features -- --test-threads 1
- name: Upload to codecov.io
uses: codecov/codecov-action@v1
with:
fail_ci_if_error: true
+41
View File
@@ -0,0 +1,41 @@
name: Lint checks
on:
push:
branches: [ main, development ]
pull_request:
branches: [ development ]
jobs:
lint:
runs-on: ubuntu-latest
env:
TZ: "/usr/share/zoneinfo/your/location"
steps:
- uses: actions/checkout@v2
- name: Cache .cargo and target
uses: actions/cache@v2
with:
path: |
~/.cargo
./target
key: ${{ runner.os }}-lint-cargo-${{ hashFiles('**/Cargo.toml') }}
restore-keys: ${{ runner.os }}-lint-cargo-${{ hashFiles('**/Cargo.toml') }}
- name: Install Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
default: true
- run: rustup component add rustfmt
- name: Check formt
uses: actions-rs/cargo@v1
with:
command: fmt
args: --all -- --check
- run: rustup component add clippy
- name: Run clippy
uses: actions-rs/cargo@v1
with:
command: clippy
args: --all-features -- -Drust-2018-idioms -Dwarnings
+60
View File
@@ -0,0 +1,60 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
## Added
- L2 regularization penalty to the Logistic Regression
- Getters for the naive bayes structs
- One hot encoder
- Make moons data generator
- Support for WASM.
## Changed
- Make serde optional
## [0.2.0] - 2021-01-03
### Added
- DBSCAN
- Epsilon-SVR, SVC
- Ridge, Lasso, ElasticNet
- Bernoulli, Gaussian, Categorical and Multinomial Naive Bayes
- K-fold Cross Validation
- Singular value decomposition
- New api module
- Integration with Clippy
- Cholesky decomposition
### Changed
- ndarray upgraded to 0.14
- smartcore::error:FailedError is now non-exhaustive
- K-Means
- PCA
- Random Forest
- Linear and Logistic Regression
- KNN
- Decision Tree
## [0.1.0] - 2020-09-25
### Added
- First release of smartcore.
- KNN + distance metrics (Euclidian, Minkowski, Manhattan, Hamming, Mahalanobis)
- Linear Regression (OLS)
- Logistic Regression
- Random Forest Classifier
- Decision Tree Classifier
- PCA
- K-Means
- Integrated with ndarray
- Abstract linear algebra methods
- RandomForest Regressor
- Decision Tree Regressor
- Serde integration
- Integrated with nalgebra
- LU, QR, SVD, EVD
- Evaluation Metrics
+14 -9
View File
@@ -2,7 +2,7 @@
name = "smartcore"
description = "The most advanced machine learning library in rust."
homepage = "https://smartcorelib.org"
version = "0.2.0"
version = "0.2.1"
authors = ["SmartCore Developers"]
edition = "2018"
license = "Apache-2.0"
@@ -19,20 +19,25 @@ nalgebra-bindings = ["nalgebra"]
datasets = []
[dependencies]
ndarray = { version = "0.14", optional = true }
nalgebra = { version = "0.23.0", optional = true }
num-traits = "0.2.12"
num = "0.3.0"
rand = "0.7.3"
rand_distr = "0.3.0"
serde = { version = "1.0.115", features = ["derive"] }
serde_derive = "1.0.115"
ndarray = { version = "0.15", optional = true }
nalgebra = { version = "0.31", optional = true }
num-traits = "0.2"
num = "0.4"
rand = "0.8"
rand_distr = "0.4"
serde = { version = "1", features = ["derive"], optional = true }
[target.'cfg(target_arch = "wasm32")'.dependencies]
getrandom = { version = "0.2", features = ["js"] }
[dev-dependencies]
criterion = "0.3"
serde_json = "1.0"
bincode = "1.3.1"
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
wasm-bindgen-test = "0.3"
[[bench]]
name = "distance"
harness = false
+3 -3
View File
@@ -9,9 +9,9 @@
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
inkscape:version="1.0 (4035a4f, 2020-05-01)"
sodipodi:docname="smartcore.svg"
width="396.01309mm"
height="86.286003mm"
viewBox="0 0 396.0131 86.286004"
width="1280"
height="320"
viewBox="0 0 454 86.286004"
version="1.1"
id="svg512">
<metadata

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

+1
View File
@@ -314,6 +314,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn bbdtree_iris() {
let data = DenseMatrix::from_2d_array(&[
+26 -18
View File
@@ -24,6 +24,7 @@
//! ```
use std::fmt::Debug;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::algorithm::sort::heap_select::HeapSelection;
@@ -32,7 +33,8 @@ use crate::math::distance::Distance;
use crate::math::num::RealNumber;
/// Implements Cover Tree algorithm
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct CoverTree<T, F: RealNumber, D: Distance<T, F>> {
base: F,
inv_log_base: F,
@@ -56,16 +58,17 @@ impl<T, F: RealNumber, D: Distance<T, F>> PartialEq for CoverTree<T, F, D> {
}
}
#[derive(Debug, Serialize, Deserialize)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
struct Node<F: RealNumber> {
idx: usize,
max_dist: F,
parent_dist: F,
children: Vec<Node<F>>,
scale: i64,
_scale: i64,
}
#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug)]
struct DistanceSet<F: RealNumber> {
idx: usize,
dist: Vec<F>,
@@ -82,7 +85,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
max_dist: F::zero(),
parent_dist: F::zero(),
children: Vec::new(),
scale: 0,
_scale: 0,
};
let mut tree = CoverTree {
base,
@@ -114,7 +117,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
}
let e = self.get_data_value(self.root.idx);
let mut d = self.distance.distance(&e, p);
let mut d = self.distance.distance(e, p);
let mut current_cover_set: Vec<(F, &Node<F>)> = Vec::new();
let mut zero_set: Vec<(F, &Node<F>)> = Vec::new();
@@ -172,11 +175,14 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
if ds.0 <= upper_bound {
let v = self.get_data_value(ds.1.idx);
if !self.identical_excluded || v != p {
neighbors.push((ds.1.idx, ds.0, &v));
neighbors.push((ds.1.idx, ds.0, v));
}
}
}
if neighbors.len() > k {
neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
}
Ok(neighbors.into_iter().take(k).collect())
}
@@ -197,7 +203,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
let mut zero_set: Vec<(F, &Node<F>)> = Vec::new();
let e = self.get_data_value(self.root.idx);
let mut d = self.distance.distance(&e, p);
let mut d = self.distance.distance(e, p);
current_cover_set.push((d, &self.root));
while !current_cover_set.is_empty() {
@@ -227,7 +233,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
for ds in zero_set {
let v = self.get_data_value(ds.1.idx);
if !self.identical_excluded || v != p {
neighbors.push((ds.1.idx, ds.0, &v));
neighbors.push((ds.1.idx, ds.0, v));
}
}
@@ -240,7 +246,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
max_dist: F::zero(),
parent_dist: F::zero(),
children: Vec::new(),
scale: 100,
_scale: 100,
}
}
@@ -284,7 +290,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
if point_set.is_empty() {
self.new_leaf(p)
} else {
let max_dist = self.max(&point_set);
let max_dist = self.max(point_set);
let next_scale = (max_scale - 1).min(self.get_scale(max_dist));
if next_scale == std::i64::MIN {
let mut children: Vec<Node<F>> = Vec::new();
@@ -301,7 +307,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
max_dist: F::zero(),
parent_dist: F::zero(),
children,
scale: 100,
_scale: 100,
}
} else {
let mut far: Vec<DistanceSet<F>> = Vec::new();
@@ -313,8 +319,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
point_set.append(&mut far);
child
} else {
let mut children: Vec<Node<F>> = Vec::new();
children.push(child);
let mut children: Vec<Node<F>> = vec![child];
let mut new_point_set: Vec<DistanceSet<F>> = Vec::new();
let mut new_consumed_set: Vec<DistanceSet<F>> = Vec::new();
@@ -371,7 +376,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
max_dist: self.max(consumed_set),
parent_dist: F::zero(),
children,
scale: (top_scale - max_scale),
_scale: (top_scale - max_scale),
}
}
}
@@ -454,7 +459,8 @@ mod tests {
use super::*;
use crate::math::distance::Distances;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
struct SimpleDistance {}
impl Distance<i32, f64> for SimpleDistance {
@@ -463,6 +469,7 @@ mod tests {
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn cover_tree_test() {
let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
@@ -479,7 +486,7 @@ mod tests {
let knn: Vec<i32> = knn.iter().map(|v| *v.2).collect();
assert_eq!(vec!(3, 4, 5, 6, 7), knn);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn cover_tree_test1() {
let data = vec![
@@ -498,8 +505,9 @@ mod tests {
assert_eq!(vec!(0, 1, 2), knn);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
+9 -5
View File
@@ -22,6 +22,7 @@
//!
//! ```
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::cmp::{Ordering, PartialOrd};
use std::marker::PhantomData;
@@ -32,7 +33,8 @@ use crate::math::distance::Distance;
use crate::math::num::RealNumber;
/// Implements Linear Search algorithm, see [KNN algorithms](../index.html)
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct LinearKNNSearch<T, F: RealNumber, D: Distance<T, F>> {
distance: D,
data: Vec<T>,
@@ -72,7 +74,7 @@ impl<T, F: RealNumber, D: Distance<T, F>> LinearKNNSearch<T, F, D> {
}
for i in 0..self.data.len() {
let d = self.distance.distance(&from, &self.data[i]);
let d = self.distance.distance(from, &self.data[i]);
let datum = heap.peek_mut();
if d < datum.distance {
datum.distance = d;
@@ -102,7 +104,7 @@ impl<T, F: RealNumber, D: Distance<T, F>> LinearKNNSearch<T, F, D> {
let mut neighbors: Vec<(usize, F, &T)> = Vec::new();
for i in 0..self.data.len() {
let d = self.distance.distance(&from, &self.data[i]);
let d = self.distance.distance(from, &self.data[i]);
if d <= radius {
neighbors.push((i, d, &self.data[i]));
@@ -138,7 +140,8 @@ mod tests {
use super::*;
use crate::math::distance::Distances;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
struct SimpleDistance {}
impl Distance<i32, f64> for SimpleDistance {
@@ -147,6 +150,7 @@ mod tests {
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn knn_find() {
let data1 = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
@@ -193,7 +197,7 @@ mod tests {
assert_eq!(vec!(1, 2, 3), found_idxs2);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn knn_point_eq() {
let point1 = KNNPoint {
+5 -2
View File
@@ -35,6 +35,7 @@ use crate::algorithm::neighbour::linear_search::LinearKNNSearch;
use crate::error::Failed;
use crate::math::distance::Distance;
use crate::math::num::RealNumber;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
pub(crate) mod bbd_tree;
@@ -45,7 +46,8 @@ pub mod linear_search;
/// Both, KNN classifier and regressor benefits from underlying search algorithms that helps to speed up queries.
/// `KNNAlgorithmName` maintains a list of supported search algorithms, see [KNN algorithms](../algorithm/neighbour/index.html)
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub enum KNNAlgorithmName {
/// Heap Search algorithm, see [`LinearSearch`](../algorithm/neighbour/linear_search/index.html)
LinearSearch,
@@ -53,7 +55,8 @@ pub enum KNNAlgorithmName {
CoverTree,
}
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub(crate) enum KNNAlgorithm<T: RealNumber, D: Distance<Vec<T>, T>> {
LinearSearch(LinearKNNSearch<Vec<T>, T, D>),
CoverTree(CoverTree<Vec<T>, T, D>),
+6 -2
View File
@@ -53,8 +53,7 @@ impl<'a, T: PartialOrd + Debug> HeapSelection<T> {
if self.sorted {
&self.heap[0]
} else {
&self
.heap
self.heap
.iter()
.max_by(|a, b| a.partial_cmp(b).unwrap())
.unwrap()
@@ -96,12 +95,14 @@ impl<'a, T: PartialOrd + Debug> HeapSelection<T> {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn with_capacity() {
let heap = HeapSelection::<i32>::with_capacity(3);
assert_eq!(3, heap.k);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_add() {
let mut heap = HeapSelection::with_capacity(3);
@@ -119,6 +120,7 @@ mod tests {
assert_eq!(vec![2, 0, -5], heap.get());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_add1() {
let mut heap = HeapSelection::with_capacity(3);
@@ -133,6 +135,7 @@ mod tests {
assert_eq!(vec![0f64, -1f64, -5f64], heap.get());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_add2() {
let mut heap = HeapSelection::with_capacity(3);
@@ -145,6 +148,7 @@ mod tests {
assert_eq!(vec![5.6568, 2.8284, 0.0], heap.get());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_add_ordered() {
let mut heap = HeapSelection::with_capacity(3);
+1
View File
@@ -113,6 +113,7 @@ impl<T: Float> QuickArgSort for Vec<T> {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn with_capacity() {
let arr1 = vec![0.3, 0.1, 0.2, 0.4, 0.9, 0.5, 0.7, 0.6, 0.8];
+9 -3
View File
@@ -43,6 +43,7 @@
use std::fmt::Debug;
use std::iter::Sum;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
@@ -55,7 +56,8 @@ use crate::math::num::RealNumber;
use crate::tree::decision_tree_classifier::which_max;
/// DBSCAN clustering algorithm
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct DBSCAN<T: RealNumber, D: Distance<Vec<T>, T>> {
cluster_labels: Vec<i16>,
num_classes: usize,
@@ -153,11 +155,11 @@ impl<T: RealNumber + Sum, D: Distance<Vec<T>, T>> DBSCAN<T, D> {
parameters: DBSCANParameters<T, D>,
) -> Result<DBSCAN<T, D>, Failed> {
if parameters.min_samples < 1 {
return Err(Failed::fit(&"Invalid minPts".to_string()));
return Err(Failed::fit("Invalid minPts"));
}
if parameters.eps <= T::zero() {
return Err(Failed::fit(&"Invalid radius: ".to_string()));
return Err(Failed::fit("Invalid radius: "));
}
let mut k = 0;
@@ -263,8 +265,10 @@ impl<T: RealNumber + Sum, D: Distance<Vec<T>, T>> DBSCAN<T, D> {
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg(feature = "serde")]
use crate::math::distance::euclidian::Euclidian;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_predict_dbscan() {
let x = DenseMatrix::from_2d_array(&[
@@ -296,7 +300,9 @@ mod tests {
assert_eq!(expected_labels, predicted_labels);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
+13 -7
View File
@@ -56,6 +56,7 @@ use rand::Rng;
use std::fmt::Debug;
use std::iter::Sum;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::algorithm::neighbour::bbd_tree::BBDTree;
@@ -66,12 +67,13 @@ use crate::math::distance::euclidian::*;
use crate::math::num::RealNumber;
/// K-Means clustering algorithm
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct KMeans<T: RealNumber> {
k: usize,
y: Vec<usize>,
_y: Vec<usize>,
size: Vec<usize>,
distortion: T,
_distortion: T,
centroids: Vec<Vec<T>>,
}
@@ -206,9 +208,9 @@ impl<T: RealNumber + Sum> KMeans<T> {
Ok(KMeans {
k: parameters.k,
y,
_y: y,
size,
distortion,
_distortion: distortion,
centroids,
})
}
@@ -243,7 +245,7 @@ impl<T: RealNumber + Sum> KMeans<T> {
let mut rng = rand::thread_rng();
let (n, m) = data.shape();
let mut y = vec![0; n];
let mut centroid = data.get_row_as_vec(rng.gen_range(0, n));
let mut centroid = data.get_row_as_vec(rng.gen_range(0..n));
let mut d = vec![T::max_value(); n];
@@ -297,6 +299,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn invalid_k() {
let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);
@@ -310,6 +313,7 @@ mod tests {
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_predict_iris() {
let x = DenseMatrix::from_2d_array(&[
@@ -340,11 +344,13 @@ mod tests {
let y = kmeans.predict(&x).unwrap();
for i in 0..y.len() {
assert_eq!(y[i] as usize, kmeans.y[i]);
assert_eq!(y[i] as usize, kmeans._y[i]);
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
+3
View File
@@ -56,9 +56,11 @@ pub fn load_dataset() -> Dataset<f32, f32> {
#[cfg(test)]
mod tests {
#[cfg(not(target_arch = "wasm32"))]
use super::super::*;
use super::*;
#[cfg(not(target_arch = "wasm32"))]
#[test]
#[ignore]
fn refresh_boston_dataset() {
@@ -67,6 +69,7 @@ mod tests {
assert!(serialize_data(&dataset, "boston.xy").is_ok());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn boston_dataset() {
let dataset = load_dataset();
+3
View File
@@ -66,17 +66,20 @@ pub fn load_dataset() -> Dataset<f32, f32> {
#[cfg(test)]
mod tests {
#[cfg(not(target_arch = "wasm32"))]
use super::super::*;
use super::*;
#[test]
#[ignore]
#[cfg(not(target_arch = "wasm32"))]
fn refresh_cancer_dataset() {
// run this test to generate breast_cancer.xy file.
let dataset = load_dataset();
assert!(serialize_data(&dataset, "breast_cancer.xy").is_ok());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn cancer_dataset() {
let dataset = load_dataset();
+3
View File
@@ -50,9 +50,11 @@ pub fn load_dataset() -> Dataset<f32, f32> {
#[cfg(test)]
mod tests {
#[cfg(not(target_arch = "wasm32"))]
use super::super::*;
use super::*;
#[cfg(not(target_arch = "wasm32"))]
#[test]
#[ignore]
fn refresh_diabetes_dataset() {
@@ -61,6 +63,7 @@ mod tests {
assert!(serialize_data(&dataset, "diabetes.xy").is_ok());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn boston_dataset() {
let dataset = load_dataset();
+3 -1
View File
@@ -45,9 +45,11 @@ pub fn load_dataset() -> Dataset<f32, f32> {
#[cfg(test)]
mod tests {
#[cfg(not(target_arch = "wasm32"))]
use super::super::*;
use super::*;
#[cfg(not(target_arch = "wasm32"))]
#[test]
#[ignore]
fn refresh_digits_dataset() {
@@ -55,7 +57,7 @@ mod tests {
let dataset = load_dataset();
assert!(serialize_data(&dataset, "digits.xy").is_ok());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn digits_dataset() {
let dataset = load_dataset();
+52
View File
@@ -88,6 +88,43 @@ pub fn make_circles(num_samples: usize, factor: f32, noise: f32) -> Dataset<f32,
}
}
/// Make two interleaving half circles in 2d
pub fn make_moons(num_samples: usize, noise: f32) -> Dataset<f32, f32> {
let num_samples_out = num_samples / 2;
let num_samples_in = num_samples - num_samples_out;
let linspace_out = linspace(0.0, std::f32::consts::PI, num_samples_out);
let linspace_in = linspace(0.0, std::f32::consts::PI, num_samples_in);
let noise = Normal::new(0.0, noise).unwrap();
let mut rng = rand::thread_rng();
let mut x: Vec<f32> = Vec::with_capacity(num_samples * 2);
let mut y: Vec<f32> = Vec::with_capacity(num_samples);
for v in linspace_out {
x.push(v.cos() + noise.sample(&mut rng));
x.push(v.sin() + noise.sample(&mut rng));
y.push(0.0);
}
for v in linspace_in {
x.push(1.0 - v.cos() + noise.sample(&mut rng));
x.push(1.0 - v.sin() + noise.sample(&mut rng) - 0.5);
y.push(1.0);
}
Dataset {
data: x,
target: y,
num_samples,
num_features: 2,
feature_names: (0..2).map(|n| n.to_string()).collect(),
target_names: vec!["label".to_string()],
description: "Two interleaving half circles in 2d".to_string(),
}
}
fn linspace(start: f32, stop: f32, num: usize) -> Vec<f32> {
let div = num as f32;
let delta = stop - start;
@@ -100,6 +137,7 @@ mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_make_blobs() {
let dataset = make_blobs(10, 2, 3);
@@ -112,6 +150,7 @@ mod tests {
assert_eq!(dataset.num_samples, 10);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_make_circles() {
let dataset = make_circles(10, 0.5, 0.05);
@@ -123,4 +162,17 @@ mod tests {
assert_eq!(dataset.num_features, 2);
assert_eq!(dataset.num_samples, 10);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_make_moons() {
let dataset = make_moons(10, 0.05);
assert_eq!(
dataset.data.len(),
dataset.num_features * dataset.num_samples
);
assert_eq!(dataset.target.len(), dataset.num_samples);
assert_eq!(dataset.num_features, 2);
assert_eq!(dataset.num_samples, 10);
}
}
+3
View File
@@ -50,9 +50,11 @@ pub fn load_dataset() -> Dataset<f32, f32> {
#[cfg(test)]
mod tests {
#[cfg(not(target_arch = "wasm32"))]
use super::super::*;
use super::*;
#[cfg(not(target_arch = "wasm32"))]
#[test]
#[ignore]
fn refresh_iris_dataset() {
@@ -61,6 +63,7 @@ mod tests {
assert!(serialize_data(&dataset, "iris.xy").is_ok());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn iris_dataset() {
let dataset = load_dataset();
+12 -5
View File
@@ -8,9 +8,12 @@ pub mod digits;
pub mod generator;
pub mod iris;
#[cfg(not(target_arch = "wasm32"))]
use crate::math::num::RealNumber;
#[cfg(not(target_arch = "wasm32"))]
use std::fs::File;
use std::io;
#[cfg(not(target_arch = "wasm32"))]
use std::io::prelude::*;
/// Dataset
@@ -49,6 +52,8 @@ impl<X, Y> Dataset<X, Y> {
}
}
// Running this in wasm throws: operation not supported on this platform.
#[cfg(not(target_arch = "wasm32"))]
#[allow(dead_code)]
pub(crate) fn serialize_data<X: RealNumber, Y: RealNumber>(
dataset: &Dataset<X, Y>,
@@ -62,14 +67,14 @@ pub(crate) fn serialize_data<X: RealNumber, Y: RealNumber>(
.data
.iter()
.copied()
.flat_map(|f| f.to_f32_bits().to_le_bytes().to_vec().into_iter())
.flat_map(|f| f.to_f32_bits().to_le_bytes().to_vec())
.collect();
file.write_all(&x)?;
let y: Vec<u8> = dataset
.target
.iter()
.copied()
.flat_map(|f| f.to_f32_bits().to_le_bytes().to_vec().into_iter())
.flat_map(|f| f.to_f32_bits().to_le_bytes().to_vec())
.collect();
file.write_all(&y)?;
}
@@ -82,11 +87,12 @@ pub(crate) fn deserialize_data(
bytes: &[u8],
) -> Result<(Vec<f32>, Vec<f32>, usize, usize), io::Error> {
// read the same file back into a Vec of bytes
const USIZE_SIZE: usize = std::mem::size_of::<usize>();
let (num_samples, num_features) = {
let mut buffer = [0u8; 8];
buffer.copy_from_slice(&bytes[0..8]);
let mut buffer = [0u8; USIZE_SIZE];
buffer.copy_from_slice(&bytes[0..USIZE_SIZE]);
let num_features = usize::from_le_bytes(buffer);
buffer.copy_from_slice(&bytes[8..16]);
buffer.copy_from_slice(&bytes[8..8 + USIZE_SIZE]);
let num_samples = usize::from_le_bytes(buffer);
(num_samples, num_features)
};
@@ -115,6 +121,7 @@ pub(crate) fn deserialize_data(
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn as_matrix() {
let dataset = Dataset {
+8 -3
View File
@@ -47,6 +47,7 @@
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::fmt::Debug;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Transformer, UnsupervisedEstimator};
@@ -55,7 +56,8 @@ use crate::linalg::Matrix;
use crate::math::num::RealNumber;
/// Principal components analysis algorithm
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct PCA<T: RealNumber, M: Matrix<T>> {
eigenvectors: M,
eigenvalues: Vec<T>,
@@ -323,7 +325,7 @@ mod tests {
&[6.8, 161.0, 60.0, 15.6],
])
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn pca_components() {
let us_arrests = us_arrests_data();
@@ -339,7 +341,7 @@ mod tests {
assert!(expected.approximate_eq(&pca.components().abs(), 0.4));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose_covariance() {
let us_arrests = us_arrests_data();
@@ -449,6 +451,7 @@ mod tests {
.approximate_eq(&expected_projection.abs(), 1e-4));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose_correlation() {
let us_arrests = us_arrests_data();
@@ -564,7 +567,9 @@ mod tests {
.approximate_eq(&expected_projection.abs(), 1e-4));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let iris = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
+6 -1
View File
@@ -46,6 +46,7 @@
use std::fmt::Debug;
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Transformer, UnsupervisedEstimator};
@@ -54,7 +55,8 @@ use crate::linalg::Matrix;
use crate::math::num::RealNumber;
/// SVD
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct SVD<T: RealNumber, M: Matrix<T>> {
components: M,
phantom: PhantomData<T>,
@@ -151,6 +153,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn svd_decompose() {
// https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/USArrests.html
@@ -225,7 +228,9 @@ mod tests {
.approximate_eq(&expected, 1e-4));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let iris = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
+238 -13
View File
@@ -45,15 +45,18 @@
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use std::default::Default;
use std::fmt::Debug;
use rand::Rng;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator};
use crate::error::Failed;
use crate::linalg::Matrix;
use crate::error::{Failed, FailedError};
use crate::linalg::naive::dense_matrix::DenseMatrix;
use crate::linalg::{BaseMatrix, Matrix};
use crate::math::num::RealNumber;
use crate::tree::decision_tree_classifier::{
which_max, DecisionTreeClassifier, DecisionTreeClassifierParameters, SplitCriterion,
@@ -61,7 +64,8 @@ use crate::tree::decision_tree_classifier::{
/// Parameters of the Random Forest algorithm.
/// Some parameters here are passed directly into base estimator.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct RandomForestClassifierParameters {
/// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub criterion: SplitCriterion,
@@ -75,14 +79,20 @@ pub struct RandomForestClassifierParameters {
pub n_trees: u16,
/// Number of random sample of predictors to use as split candidates.
pub m: Option<usize>,
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
pub keep_samples: bool,
/// Seed used for bootstrap sampling and feature selection for each tree.
pub seed: u64,
}
/// Random Forest Classifier
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct RandomForestClassifier<T: RealNumber> {
parameters: RandomForestClassifierParameters,
_parameters: RandomForestClassifierParameters,
trees: Vec<DecisionTreeClassifier<T>>,
classes: Vec<T>,
samples: Option<Vec<Vec<bool>>>,
}
impl RandomForestClassifierParameters {
@@ -116,6 +126,18 @@ impl RandomForestClassifierParameters {
self.m = Some(m);
self
}
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
pub fn with_keep_samples(mut self, keep_samples: bool) -> Self {
self.keep_samples = keep_samples;
self
}
/// Seed used for bootstrap sampling and feature selection for each tree.
pub fn with_seed(mut self, seed: u64) -> Self {
self.seed = seed;
self
}
}
impl<T: RealNumber> PartialEq for RandomForestClassifier<T> {
@@ -147,6 +169,8 @@ impl Default for RandomForestClassifierParameters {
min_samples_split: 2,
n_trees: 100,
m: Option::None,
keep_samples: false,
seed: 0,
}
}
}
@@ -198,26 +222,38 @@ impl<T: RealNumber> RandomForestClassifier<T> {
.unwrap()
});
let mut rng = StdRng::seed_from_u64(parameters.seed);
let classes = y_m.unique();
let k = classes.len();
let mut trees: Vec<DecisionTreeClassifier<T>> = Vec::new();
let mut maybe_all_samples: Option<Vec<Vec<bool>>> = Option::None;
if parameters.keep_samples {
maybe_all_samples = Some(Vec::new());
}
for _ in 0..parameters.n_trees {
let samples = RandomForestClassifier::<T>::sample_with_replacement(&yi, k);
let samples = RandomForestClassifier::<T>::sample_with_replacement(&yi, k, &mut rng);
if let Some(ref mut all_samples) = maybe_all_samples {
all_samples.push(samples.iter().map(|x| *x != 0).collect())
}
let params = DecisionTreeClassifierParameters {
criterion: parameters.criterion.clone(),
max_depth: parameters.max_depth,
min_samples_leaf: parameters.min_samples_leaf,
min_samples_split: parameters.min_samples_split,
};
let tree = DecisionTreeClassifier::fit_weak_learner(x, y, samples, mtry, params)?;
let tree =
DecisionTreeClassifier::fit_weak_learner(x, y, samples, mtry, params, &mut rng)?;
trees.push(tree);
}
Ok(RandomForestClassifier {
parameters,
_parameters: parameters,
trees,
classes,
samples: maybe_all_samples,
})
}
@@ -245,8 +281,74 @@ impl<T: RealNumber> RandomForestClassifier<T> {
which_max(&result)
}
fn sample_with_replacement(y: &[usize], num_classes: usize) -> Vec<usize> {
let mut rng = rand::thread_rng();
/// Predict OOB classes for `x`. `x` is expected to be equal to the dataset used in training.
pub fn predict_oob<M: Matrix<T>>(&self, x: &M) -> Result<M::RowVector, Failed> {
let (n, _) = x.shape();
if self.samples.is_none() {
Err(Failed::because(
FailedError::PredictFailed,
"Need samples=true for OOB predictions.",
))
} else if self.samples.as_ref().unwrap()[0].len() != n {
Err(Failed::because(
FailedError::PredictFailed,
"Prediction matrix must match matrix used in training for OOB predictions.",
))
} else {
let mut result = M::zeros(1, n);
for i in 0..n {
result.set(0, i, self.classes[self.predict_for_row_oob(x, i)]);
}
Ok(result.to_row_vector())
}
}
fn predict_for_row_oob<M: Matrix<T>>(&self, x: &M, row: usize) -> usize {
let mut result = vec![0; self.classes.len()];
for (tree, samples) in self.trees.iter().zip(self.samples.as_ref().unwrap()) {
if !samples[row] {
result[tree.predict_for_row(x, row)] += 1;
}
}
which_max(&result)
}
/// Predict the per-class probabilties for each observation.
/// The probability is calculated as the fraction of trees that predicted a given class
pub fn predict_probs<M: Matrix<T>>(&self, x: &M) -> Result<DenseMatrix<f64>, Failed> {
let mut result = DenseMatrix::<f64>::zeros(x.shape().0, self.classes.len());
let (n, _) = x.shape();
for i in 0..n {
let row_probs = self.predict_probs_for_row(x, i);
for (j, item) in row_probs.iter().enumerate() {
result.set(i, j, *item);
}
}
Ok(result)
}
fn predict_probs_for_row<M: Matrix<T>>(&self, x: &M, row: usize) -> Vec<f64> {
let mut result = vec![0; self.classes.len()];
for tree in self.trees.iter() {
result[tree.predict_for_row(x, row)] += 1;
}
result
.iter()
.map(|n| *n as f64 / self.trees.len() as f64)
.collect()
}
fn sample_with_replacement(y: &[usize], num_classes: usize, rng: &mut impl Rng) -> Vec<usize> {
let class_weight = vec![1.; num_classes];
let nrows = y.len();
let mut samples = vec![0; nrows];
@@ -262,7 +364,7 @@ impl<T: RealNumber> RandomForestClassifier<T> {
let size = ((n_samples as f64) / *class_weight_l) as usize;
for _ in 0..size {
let xi: usize = rng.gen_range(0, n_samples);
let xi: usize = rng.gen_range(0..n_samples);
samples[index[xi]] += 1;
}
}
@@ -271,11 +373,12 @@ impl<T: RealNumber> RandomForestClassifier<T> {
}
#[cfg(test)]
mod tests {
mod tests_prob {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
use crate::metrics::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_predict_iris() {
let x = DenseMatrix::from_2d_array(&[
@@ -314,6 +417,8 @@ mod tests {
min_samples_split: 2,
n_trees: 100,
m: Option::None,
keep_samples: false,
seed: 87,
},
)
.unwrap();
@@ -321,7 +426,60 @@ mod tests {
assert!(accuracy(&y, &classifier.predict(&x).unwrap()) >= 0.95);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_predict_iris_oob() {
let x = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
&[4.9, 3.0, 1.4, 0.2],
&[4.7, 3.2, 1.3, 0.2],
&[4.6, 3.1, 1.5, 0.2],
&[5.0, 3.6, 1.4, 0.2],
&[5.4, 3.9, 1.7, 0.4],
&[4.6, 3.4, 1.4, 0.3],
&[5.0, 3.4, 1.5, 0.2],
&[4.4, 2.9, 1.4, 0.2],
&[4.9, 3.1, 1.5, 0.1],
&[7.0, 3.2, 4.7, 1.4],
&[6.4, 3.2, 4.5, 1.5],
&[6.9, 3.1, 4.9, 1.5],
&[5.5, 2.3, 4.0, 1.3],
&[6.5, 2.8, 4.6, 1.5],
&[5.7, 2.8, 4.5, 1.3],
&[6.3, 3.3, 4.7, 1.6],
&[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4],
]);
let y = vec![
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
];
let classifier = RandomForestClassifier::fit(
&x,
&y,
RandomForestClassifierParameters {
criterion: SplitCriterion::Gini,
max_depth: None,
min_samples_leaf: 1,
min_samples_split: 2,
n_trees: 100,
m: Option::None,
keep_samples: true,
seed: 87,
},
)
.unwrap();
assert!(
accuracy(&y, &classifier.predict_oob(&x).unwrap())
< accuracy(&y, &classifier.predict(&x).unwrap())
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
@@ -356,4 +514,71 @@ mod tests {
assert_eq!(forest, deserialized_forest);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_predict_probabilities() {
let x = DenseMatrix::<f64>::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
&[4.9, 3.0, 1.4, 0.2],
&[4.7, 3.2, 1.3, 0.2],
&[4.6, 3.1, 1.5, 0.2],
&[5.0, 3.6, 1.4, 0.2],
&[5.4, 3.9, 1.7, 0.4],
&[4.6, 3.4, 1.4, 0.3],
&[5.0, 3.4, 1.5, 0.2],
&[4.4, 2.9, 1.4, 0.2],
&[4.9, 3.1, 1.5, 0.1],
&[7.0, 3.2, 4.7, 1.4],
&[6.4, 3.2, 4.5, 1.5],
&[6.9, 3.1, 4.9, 1.5],
&[5.5, 2.3, 4.0, 1.3],
&[6.5, 2.8, 4.6, 1.5],
&[5.7, 2.8, 4.5, 1.3],
&[6.3, 3.3, 4.7, 1.6],
&[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4],
]);
let y = vec![
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
];
let classifier = RandomForestClassifier::fit(
&x,
&y,
RandomForestClassifierParameters {
criterion: SplitCriterion::Gini,
max_depth: None,
min_samples_leaf: 1,
min_samples_split: 2,
n_trees: 100,
m: Option::None,
keep_samples: false,
seed: 87,
},
)
.unwrap();
println!("{:?}", classifier.classes);
let results = classifier.predict_probs(&x).unwrap();
println!("{:?}", x.shape());
println!("{:?}", results);
println!("{:?}", results.shape());
assert_eq!(
results,
DenseMatrix::<f64>::from_array(
20,
2,
&[
1.0, 0.78, 0.95, 0.82, 1.0, 0.92, 0.99, 0.96, 0.36, 0.33, 0.02, 0.02, 0.0, 0.0,
0.0, 0.0, 0.03, 0.05, 0.0, 0.02, 0.0, 0.22, 0.05, 0.18, 0.0, 0.08, 0.01, 0.04,
0.64, 0.67, 0.98, 0.98, 1.0, 1.0, 1.0, 1.0, 0.97, 0.95, 1.0, 0.98
]
)
);
assert!(false);
}
}
+138 -12
View File
@@ -43,21 +43,24 @@
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use std::default::Default;
use std::fmt::Debug;
use rand::Rng;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator};
use crate::error::Failed;
use crate::error::{Failed, FailedError};
use crate::linalg::Matrix;
use crate::math::num::RealNumber;
use crate::tree::decision_tree_regressor::{
DecisionTreeRegressor, DecisionTreeRegressorParameters,
};
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// Parameters of the Random Forest Regressor
/// Some parameters here are passed directly into base estimator.
pub struct RandomForestRegressorParameters {
@@ -71,13 +74,19 @@ pub struct RandomForestRegressorParameters {
pub n_trees: usize,
/// Number of random sample of predictors to use as split candidates.
pub m: Option<usize>,
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
pub keep_samples: bool,
/// Seed used for bootstrap sampling and feature selection for each tree.
pub seed: u64,
}
/// Random Forest Regressor
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct RandomForestRegressor<T: RealNumber> {
parameters: RandomForestRegressorParameters,
_parameters: RandomForestRegressorParameters,
trees: Vec<DecisionTreeRegressor<T>>,
samples: Option<Vec<Vec<bool>>>,
}
impl RandomForestRegressorParameters {
@@ -106,8 +115,19 @@ impl RandomForestRegressorParameters {
self.m = Some(m);
self
}
}
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
pub fn with_keep_samples(mut self, keep_samples: bool) -> Self {
self.keep_samples = keep_samples;
self
}
/// Seed used for bootstrap sampling and feature selection for each tree.
pub fn with_seed(mut self, seed: u64) -> Self {
self.seed = seed;
self
}
}
impl Default for RandomForestRegressorParameters {
fn default() -> Self {
RandomForestRegressorParameters {
@@ -116,6 +136,8 @@ impl Default for RandomForestRegressorParameters {
min_samples_split: 2,
n_trees: 10,
m: Option::None,
keep_samples: false,
seed: 0,
}
}
}
@@ -169,20 +191,34 @@ impl<T: RealNumber> RandomForestRegressor<T> {
.m
.unwrap_or((num_attributes as f64).sqrt().floor() as usize);
let mut rng = StdRng::seed_from_u64(parameters.seed);
let mut trees: Vec<DecisionTreeRegressor<T>> = Vec::new();
let mut maybe_all_samples: Option<Vec<Vec<bool>>> = Option::None;
if parameters.keep_samples {
maybe_all_samples = Some(Vec::new());
}
for _ in 0..parameters.n_trees {
let samples = RandomForestRegressor::<T>::sample_with_replacement(n_rows);
let samples = RandomForestRegressor::<T>::sample_with_replacement(n_rows, &mut rng);
if let Some(ref mut all_samples) = maybe_all_samples {
all_samples.push(samples.iter().map(|x| *x != 0).collect())
}
let params = DecisionTreeRegressorParameters {
max_depth: parameters.max_depth,
min_samples_leaf: parameters.min_samples_leaf,
min_samples_split: parameters.min_samples_split,
};
let tree = DecisionTreeRegressor::fit_weak_learner(x, y, samples, mtry, params)?;
let tree =
DecisionTreeRegressor::fit_weak_learner(x, y, samples, mtry, params, &mut rng)?;
trees.push(tree);
}
Ok(RandomForestRegressor { parameters, trees })
Ok(RandomForestRegressor {
_parameters: parameters,
trees,
samples: maybe_all_samples,
})
}
/// Predict class for `x`
@@ -211,11 +247,49 @@ impl<T: RealNumber> RandomForestRegressor<T> {
result / T::from(n_trees).unwrap()
}
fn sample_with_replacement(nrows: usize) -> Vec<usize> {
let mut rng = rand::thread_rng();
/// Predict OOB classes for `x`. `x` is expected to be equal to the dataset used in training.
pub fn predict_oob<M: Matrix<T>>(&self, x: &M) -> Result<M::RowVector, Failed> {
let (n, _) = x.shape();
if self.samples.is_none() {
Err(Failed::because(
FailedError::PredictFailed,
"Need samples=true for OOB predictions.",
))
} else if self.samples.as_ref().unwrap()[0].len() != n {
Err(Failed::because(
FailedError::PredictFailed,
"Prediction matrix must match matrix used in training for OOB predictions.",
))
} else {
let mut result = M::zeros(1, n);
for i in 0..n {
result.set(0, i, self.predict_for_row_oob(x, i));
}
Ok(result.to_row_vector())
}
}
fn predict_for_row_oob<M: Matrix<T>>(&self, x: &M, row: usize) -> T {
let mut n_trees = 0;
let mut result = T::zero();
for (tree, samples) in self.trees.iter().zip(self.samples.as_ref().unwrap()) {
if !samples[row] {
result += tree.predict_for_row(x, row);
n_trees += 1;
}
}
// TODO: What to do if there are no oob trees?
result / T::from(n_trees).unwrap()
}
fn sample_with_replacement(nrows: usize, rng: &mut impl Rng) -> Vec<usize> {
let mut samples = vec![0; nrows];
for _ in 0..nrows {
let xi = rng.gen_range(0, nrows);
let xi = rng.gen_range(0..nrows);
samples[xi] += 1;
}
samples
@@ -228,6 +302,7 @@ mod tests {
use crate::linalg::naive::dense_matrix::DenseMatrix;
use crate::metrics::mean_absolute_error;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_longley() {
let x = DenseMatrix::from_2d_array(&[
@@ -262,6 +337,8 @@ mod tests {
min_samples_split: 2,
n_trees: 1000,
m: Option::None,
keep_samples: false,
seed: 87,
},
)
.and_then(|rf| rf.predict(&x))
@@ -270,7 +347,56 @@ mod tests {
assert!(mean_absolute_error(&y, &y_hat) < 1.0);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_predict_longley_oob() {
let x = DenseMatrix::from_2d_array(&[
&[234.289, 235.6, 159., 107.608, 1947., 60.323],
&[259.426, 232.5, 145.6, 108.632, 1948., 61.122],
&[258.054, 368.2, 161.6, 109.773, 1949., 60.171],
&[284.599, 335.1, 165., 110.929, 1950., 61.187],
&[328.975, 209.9, 309.9, 112.075, 1951., 63.221],
&[346.999, 193.2, 359.4, 113.27, 1952., 63.639],
&[365.385, 187., 354.7, 115.094, 1953., 64.989],
&[363.112, 357.8, 335., 116.219, 1954., 63.761],
&[397.469, 290.4, 304.8, 117.388, 1955., 66.019],
&[419.18, 282.2, 285.7, 118.734, 1956., 67.857],
&[442.769, 293.6, 279.8, 120.445, 1957., 68.169],
&[444.546, 468.1, 263.7, 121.95, 1958., 66.513],
&[482.704, 381.3, 255.2, 123.366, 1959., 68.655],
&[502.601, 393.1, 251.4, 125.368, 1960., 69.564],
&[518.173, 480.6, 257.2, 127.852, 1961., 69.331],
&[554.894, 400.7, 282.7, 130.081, 1962., 70.551],
]);
let y = vec![
83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6,
114.2, 115.7, 116.9,
];
let regressor = RandomForestRegressor::fit(
&x,
&y,
RandomForestRegressorParameters {
max_depth: None,
min_samples_leaf: 1,
min_samples_split: 2,
n_trees: 1000,
m: Option::None,
keep_samples: true,
seed: 87,
},
)
.unwrap();
let y_hat = regressor.predict(&x).unwrap();
let y_hat_oob = regressor.predict_oob(&x).unwrap();
assert!(mean_absolute_error(&y, &y_hat) < mean_absolute_error(&y, &y_hat_oob));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[234.289, 235.6, 159., 107.608, 1947., 60.323],
+5 -2
View File
@@ -2,10 +2,12 @@
use std::error::Error;
use std::fmt;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// Generic error to be raised when something goes wrong.
#[derive(Debug, Serialize, Deserialize)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct Failed {
err: FailedError,
msg: String,
@@ -13,7 +15,8 @@ pub struct Failed {
/// Type of error
#[non_exhaustive]
#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Copy, Clone, Debug)]
pub enum FailedError {
/// Can't fit algorithm to data
FitFailed = 1,
+7 -3
View File
@@ -1,10 +1,12 @@
#![allow(
clippy::type_complexity,
clippy::too_many_arguments,
clippy::many_single_char_names
clippy::many_single_char_names,
clippy::unnecessary_wraps,
clippy::upper_case_acronyms
)]
#![warn(missing_docs)]
#![warn(missing_doc_code_examples)]
#![warn(rustdoc::missing_doc_code_examples)]
//! # SmartCore
//!
@@ -28,7 +30,7 @@
//!
//! All machine learning algorithms in SmartCore are grouped into these broad categories:
//! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data.
//! * [Martix Decomposition](decomposition/index.html), various methods for matrix decomposition.
//! * [Matrix Decomposition](decomposition/index.html), various methods for matrix decomposition.
//! * [Linear Models](linear/index.html), regression and classification methods where output is assumed to have linear relation to explanatory variables
//! * [Ensemble Models](ensemble/index.html), variety of regression and classification ensemble models
//! * [Tree-based Models](tree/index.html), classification and regression trees
@@ -91,6 +93,8 @@ pub mod naive_bayes;
/// Supervised neighbors-based learning methods
pub mod neighbors;
pub(crate) mod optimization;
/// Preprocessing utilities
pub mod preprocessing;
/// Support Vector Machines
pub mod svm;
/// Supervised tree-based learning methods
+5 -5
View File
@@ -87,8 +87,7 @@ impl<T: RealNumber, M: BaseMatrix<T>> Cholesky<T, M> {
if bn != rn {
return Err(Failed::because(
FailedError::SolutionFailed,
&"Can\'t solve Ax = b for x. Number of rows in b != number of rows in R."
.to_string(),
"Can\'t solve Ax = b for x. Number of rows in b != number of rows in R.",
));
}
@@ -128,7 +127,7 @@ pub trait CholeskyDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
if m != n {
return Err(Failed::because(
FailedError::DecompositionFailed,
&"Can\'t do Cholesky decomposition on a non-square matrix".to_string(),
"Can\'t do Cholesky decomposition on a non-square matrix",
));
}
@@ -148,7 +147,7 @@ pub trait CholeskyDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
if d < T::zero() {
return Err(Failed::because(
FailedError::DecompositionFailed,
&"The matrix is not positive definite.".to_string(),
"The matrix is not positive definite.",
));
}
@@ -168,7 +167,7 @@ pub trait CholeskyDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn cholesky_decompose() {
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
@@ -187,6 +186,7 @@ mod tests {
.approximate_eq(&a.abs(), 1e-4));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn cholesky_solve_mut() {
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
+11 -16
View File
@@ -93,11 +93,11 @@ pub trait EVDDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
sort(&mut d, &mut e, &mut V);
}
Ok(EVD { V, d, e })
Ok(EVD { d, e, V })
}
}
fn tred2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut Vec<T>, e: &mut Vec<T>) {
fn tred2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut [T], e: &mut [T]) {
let (n, _) = V.shape();
for (i, d_i) in d.iter_mut().enumerate().take(n) {
*d_i = V.get(n - 1, i);
@@ -195,7 +195,7 @@ fn tred2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut Vec<T>, e: &mut Vec
e[0] = T::zero();
}
fn tql2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut Vec<T>, e: &mut Vec<T>) {
fn tql2<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, d: &mut [T], e: &mut [T]) {
let (n, _) = V.shape();
for i in 1..n {
e[i - 1] = e[i];
@@ -419,7 +419,7 @@ fn eltran<T: RealNumber, M: BaseMatrix<T>>(A: &M, V: &mut M, perm: &[usize]) {
}
}
fn hqr2<T: RealNumber, M: BaseMatrix<T>>(A: &mut M, V: &mut M, d: &mut Vec<T>, e: &mut Vec<T>) {
fn hqr2<T: RealNumber, M: BaseMatrix<T>>(A: &mut M, V: &mut M, d: &mut [T], e: &mut [T]) {
let (n, _) = A.shape();
let mut z = T::zero();
let mut s = T::zero();
@@ -471,7 +471,7 @@ fn hqr2<T: RealNumber, M: BaseMatrix<T>>(A: &mut M, V: &mut M, d: &mut Vec<T>, e
A.set(nn, nn, x);
A.set(nn - 1, nn - 1, y + t);
if q >= T::zero() {
z = p + z.copysign(p);
z = p + RealNumber::copysign(z, p);
d[nn - 1] = x + z;
d[nn] = x + z;
if z != T::zero() {
@@ -570,7 +570,7 @@ fn hqr2<T: RealNumber, M: BaseMatrix<T>>(A: &mut M, V: &mut M, d: &mut Vec<T>, e
r /= x;
}
}
let s = (p * p + q * q + r * r).sqrt().copysign(p);
let s = RealNumber::copysign((p * p + q * q + r * r).sqrt(), p);
if s != T::zero() {
if k == m {
if l != m {
@@ -594,12 +594,7 @@ fn hqr2<T: RealNumber, M: BaseMatrix<T>>(A: &mut M, V: &mut M, d: &mut Vec<T>, e
A.sub_element_mut(k + 1, j, p * y);
A.sub_element_mut(k, j, p * x);
}
let mmin;
if nn < k + 3 {
mmin = nn;
} else {
mmin = k + 3;
}
let mmin = if nn < k + 3 { nn } else { k + 3 };
for i in 0..mmin + 1 {
p = x * A.get(i, k) + y * A.get(i, k + 1);
if k + 1 != nn {
@@ -783,7 +778,7 @@ fn balbak<T: RealNumber, M: BaseMatrix<T>>(V: &mut M, scale: &[T]) {
}
}
fn sort<T: RealNumber, M: BaseMatrix<T>>(d: &mut Vec<T>, e: &mut Vec<T>, V: &mut M) {
fn sort<T: RealNumber, M: BaseMatrix<T>>(d: &mut [T], e: &mut [T], V: &mut M) {
let n = d.len();
let mut temp = vec![T::zero(); n];
for j in 1..n {
@@ -816,7 +811,7 @@ fn sort<T: RealNumber, M: BaseMatrix<T>>(d: &mut Vec<T>, e: &mut Vec<T>, V: &mut
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose_symmetric() {
let A = DenseMatrix::from_2d_array(&[
@@ -843,7 +838,7 @@ mod tests {
assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON);
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose_asymmetric() {
let A = DenseMatrix::from_2d_array(&[
@@ -870,7 +865,7 @@ mod tests {
assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON);
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose_complex() {
let A = DenseMatrix::from_2d_array(&[
+5 -4
View File
@@ -46,13 +46,13 @@ use crate::math::num::RealNumber;
pub struct LU<T: RealNumber, M: BaseMatrix<T>> {
LU: M,
pivot: Vec<usize>,
pivot_sign: i8,
_pivot_sign: i8,
singular: bool,
phantom: PhantomData<T>,
}
impl<T: RealNumber, M: BaseMatrix<T>> LU<T, M> {
pub(crate) fn new(LU: M, pivot: Vec<usize>, pivot_sign: i8) -> LU<T, M> {
pub(crate) fn new(LU: M, pivot: Vec<usize>, _pivot_sign: i8) -> LU<T, M> {
let (_, n) = LU.shape();
let mut singular = false;
@@ -66,7 +66,7 @@ impl<T: RealNumber, M: BaseMatrix<T>> LU<T, M> {
LU {
LU,
pivot,
pivot_sign,
_pivot_sign,
singular,
phantom: PhantomData,
}
@@ -260,6 +260,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]);
@@ -274,7 +275,7 @@ mod tests {
assert!(lu.U().approximate_eq(&expected_U, 1e-4));
assert!(lu.pivot().approximate_eq(&expected_pivot, 1e-4));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn inverse() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]);
+11 -6
View File
@@ -1,3 +1,4 @@
#![allow(clippy::wrong_self_convention)]
//! # Linear Algebra and Matrix Decomposition
//!
//! Most machine learning algorithms in SmartCore depend on linear algebra and matrix decomposition methods from this module.
@@ -265,7 +266,7 @@ pub trait BaseVector<T: RealNumber>: Clone + Debug {
sum += xi * xi;
}
mu /= div;
sum / div - mu * mu
sum / div - mu.powi(2)
}
/// Computes the standard deviation.
fn std(&self) -> T {
@@ -688,12 +689,11 @@ impl<'a, T: RealNumber, M: BaseMatrix<T>> Iterator for RowIter<'a, T, M> {
type Item = Vec<T>;
fn next(&mut self) -> Option<Vec<T>> {
let res;
if self.pos < self.max_pos {
res = Some(self.m.get_row_as_vec(self.pos))
let res = if self.pos < self.max_pos {
Some(self.m.get_row_as_vec(self.pos))
} else {
res = None
}
None
};
self.pos += 1;
res
}
@@ -705,6 +705,7 @@ mod tests {
use crate::linalg::BaseMatrix;
use crate::linalg::BaseVector;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mean() {
let m = vec![1., 2., 3.];
@@ -712,6 +713,7 @@ mod tests {
assert_eq!(m.mean(), 2.0);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn std() {
let m = vec![1., 2., 3.];
@@ -719,6 +721,7 @@ mod tests {
assert!((m.std() - 0.81f64).abs() < 1e-2);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn var() {
let m = vec![1., 2., 3., 4.];
@@ -726,6 +729,7 @@ mod tests {
assert!((m.var() - 1.25f64).abs() < std::f64::EPSILON);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_take() {
let m = vec![1., 2., 3., 4., 5.];
@@ -733,6 +737,7 @@ mod tests {
assert_eq!(m.take(&vec!(0, 0, 4, 4)), vec![1., 1., 5., 5.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn take() {
let m = DenseMatrix::from_2d_array(&[
+41 -34
View File
@@ -1,11 +1,15 @@
#![allow(clippy::ptr_arg)]
use std::fmt;
use std::fmt::Debug;
#[cfg(feature = "serde")]
use std::marker::PhantomData;
use std::ops::Range;
#[cfg(feature = "serde")]
use serde::de::{Deserializer, MapAccess, SeqAccess, Visitor};
#[cfg(feature = "serde")]
use serde::ser::{SerializeStruct, Serializer};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::cholesky::CholeskyDecomposableMatrix;
@@ -326,7 +330,7 @@ impl<T: RealNumber> DenseMatrix<T> {
cur_r: 0,
max_c: self.ncols,
max_r: self.nrows,
m: &self,
m: self,
}
}
}
@@ -349,6 +353,7 @@ impl<'a, T: RealNumber> Iterator for DenseMatrixIterator<'a, T> {
}
}
#[cfg(feature = "serde")]
impl<'de, T: RealNumber + fmt::Debug + Deserialize<'de>> Deserialize<'de> for DenseMatrix<T> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
@@ -434,6 +439,7 @@ impl<'de, T: RealNumber + fmt::Debug + Deserialize<'de>> Deserialize<'de> for De
}
}
#[cfg(feature = "serde")]
impl<T: RealNumber + fmt::Debug + Serialize> Serialize for DenseMatrix<T> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@@ -517,10 +523,9 @@ impl<T: RealNumber> PartialEq for DenseMatrix<T> {
true
}
}
impl<T: RealNumber> Into<Vec<T>> for DenseMatrix<T> {
fn into(self) -> Vec<T> {
self.values
impl<T: RealNumber> From<DenseMatrix<T>> for Vec<T> {
fn from(dense_matrix: DenseMatrix<T>) -> Vec<T> {
dense_matrix.values
}
}
@@ -1054,14 +1059,14 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {
#[cfg(test)]
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_dot() {
let v1 = vec![1., 2., 3.];
let v2 = vec![4., 5., 6.];
assert_eq!(32.0, BaseVector::dot(&v1, &v2));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_copy_from() {
let mut v1 = vec![1., 2., 3.];
@@ -1069,7 +1074,7 @@ mod tests {
v1.copy_from(&v2);
assert_eq!(v1, v2);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_approximate_eq() {
let a = vec![1., 2., 3.];
@@ -1077,7 +1082,7 @@ mod tests {
assert!(a.approximate_eq(&b, 1e-4));
assert!(!a.approximate_eq(&b, 1e-5));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn from_array() {
let vec = [1., 2., 3., 4., 5., 6.];
@@ -1090,7 +1095,7 @@ mod tests {
DenseMatrix::new(2, 3, vec![1., 4., 2., 5., 3., 6.])
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn row_column_vec_from_array() {
let vec = vec![1., 2., 3., 4., 5., 6.];
@@ -1103,7 +1108,7 @@ mod tests {
DenseMatrix::new(6, 1, vec![1., 2., 3., 4., 5., 6.])
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn from_to_row_vec() {
let vec = vec![1., 2., 3.];
@@ -1116,20 +1121,20 @@ mod tests {
vec![1., 2., 3.]
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn col_matrix_to_row_vector() {
let m: DenseMatrix<f64> = BaseMatrix::zeros(10, 1);
assert_eq!(m.to_row_vector().len(), 10)
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn iter() {
let vec = vec![1., 2., 3., 4., 5., 6.];
let m = DenseMatrix::from_array(3, 2, &vec);
assert_eq!(vec, m.iter().collect::<Vec<f32>>());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn v_stack() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]);
@@ -1144,7 +1149,7 @@ mod tests {
let result = a.v_stack(&b);
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn h_stack() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]);
@@ -1157,13 +1162,13 @@ mod tests {
let result = a.h_stack(&b);
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn get_row() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]);
assert_eq!(vec![4., 5., 6.], a.get_row(1));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn matmul() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);
@@ -1172,7 +1177,7 @@ mod tests {
let result = a.matmul(&b);
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn ab() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);
@@ -1195,14 +1200,14 @@ mod tests {
DenseMatrix::from_2d_array(&[&[29., 39., 49.], &[40., 54., 68.,], &[51., 69., 87.]])
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn dot() {
let a = DenseMatrix::from_array(1, 3, &[1., 2., 3.]);
let b = DenseMatrix::from_array(1, 3, &[4., 5., 6.]);
assert_eq!(a.dot(&b), 32.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn copy_from() {
let mut a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]);
@@ -1210,7 +1215,7 @@ mod tests {
a.copy_from(&b);
assert_eq!(a, b);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn slice() {
let m = DenseMatrix::from_2d_array(&[
@@ -1222,7 +1227,7 @@ mod tests {
let result = m.slice(0..2, 1..3);
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn approximate_eq() {
let m = DenseMatrix::from_2d_array(&[&[2., 3.], &[5., 6.]]);
@@ -1231,7 +1236,7 @@ mod tests {
assert!(m.approximate_eq(&m_eq, 0.5));
assert!(!m.approximate_eq(&m_neq, 0.5));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn rand() {
let m: DenseMatrix<f64> = DenseMatrix::rand(3, 3);
@@ -1241,7 +1246,7 @@ mod tests {
}
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn transpose() {
let m = DenseMatrix::from_2d_array(&[&[1.0, 3.0], &[2.0, 4.0]]);
@@ -1253,7 +1258,7 @@ mod tests {
}
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn reshape() {
let m_orig = DenseMatrix::row_vector_from_array(&[1., 2., 3., 4., 5., 6.]);
@@ -1264,7 +1269,7 @@ mod tests {
assert_eq!(m_result.get(0, 1), 2.);
assert_eq!(m_result.get(0, 3), 4.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn norm() {
let v = DenseMatrix::row_vector_from_array(&[3., -2., 6.]);
@@ -1273,7 +1278,7 @@ mod tests {
assert_eq!(v.norm(std::f64::INFINITY), 6.);
assert_eq!(v.norm(std::f64::NEG_INFINITY), 2.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn softmax_mut() {
let mut prob: DenseMatrix<f64> = DenseMatrix::row_vector_from_array(&[1., 2., 3.]);
@@ -1282,14 +1287,14 @@ mod tests {
assert!((prob.get(0, 1) - 0.24).abs() < 0.01);
assert!((prob.get(0, 2) - 0.66).abs() < 0.01);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn col_mean() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]);
let res = a.column_mean();
assert_eq!(res, vec![4., 5., 6.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn min_max_sum() {
let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);
@@ -1297,30 +1302,32 @@ mod tests {
assert_eq!(1., a.min());
assert_eq!(6., a.max());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn eye() {
let a = DenseMatrix::from_2d_array(&[&[1., 0., 0.], &[0., 1., 0.], &[0., 0., 1.]]);
let res = DenseMatrix::eye(3);
assert_eq!(res, a);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn to_from_json() {
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
let deserialized_a: DenseMatrix<f64> =
serde_json::from_str(&serde_json::to_string(&a).unwrap()).unwrap();
assert_eq!(a, deserialized_a);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn to_from_bincode() {
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
let deserialized_a: DenseMatrix<f64> =
bincode::deserialize(&bincode::serialize(&a).unwrap()).unwrap();
assert_eq!(a, deserialized_a);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn to_string() {
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
@@ -1329,7 +1336,7 @@ mod tests {
"[[0.9, 0.4, 0.7], [0.4, 0.5, 0.3], [0.7, 0.3, 0.8]]"
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn cov() {
let a = DenseMatrix::from_2d_array(&[
+44 -4
View File
@@ -40,7 +40,7 @@
use std::iter::Sum;
use std::ops::{AddAssign, DivAssign, MulAssign, Range, SubAssign};
use nalgebra::{DMatrix, Dynamic, Matrix, MatrixMN, RowDVector, Scalar, VecStorage, U1};
use nalgebra::{Const, DMatrix, Dynamic, Matrix, OMatrix, RowDVector, Scalar, VecStorage, U1};
use crate::linalg::cholesky::CholeskyDecomposableMatrix;
use crate::linalg::evd::EVDDecomposableMatrix;
@@ -53,7 +53,7 @@ use crate::linalg::Matrix as SmartCoreMatrix;
use crate::linalg::{BaseMatrix, BaseVector};
use crate::math::num::RealNumber;
impl<T: RealNumber + 'static> BaseVector<T> for MatrixMN<T, U1, Dynamic> {
impl<T: RealNumber + 'static> BaseVector<T> for OMatrix<T, U1, Dynamic> {
fn get(&self, i: usize) -> T {
*self.get((0, i)).unwrap()
}
@@ -198,7 +198,7 @@ impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Su
fn to_row_vector(self) -> Self::RowVector {
let (nrows, ncols) = self.shape();
self.reshape_generic(U1, Dynamic::new(nrows * ncols))
self.reshape_generic(Const::<1>, Dynamic::new(nrows * ncols))
}
fn get(&self, row: usize, col: usize) -> T {
@@ -579,6 +579,7 @@ mod tests {
use crate::linear::linear_regression::*;
use nalgebra::{DMatrix, Matrix2x3, RowDVector};
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_copy_from() {
let mut v1 = RowDVector::from_vec(vec![1., 2., 3.]);
@@ -589,12 +590,14 @@ mod tests {
assert_ne!(v2, v1);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_len() {
let v = RowDVector::from_vec(vec![1., 2., 3.]);
assert_eq!(3, v.len());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn get_set_vector() {
let mut v = RowDVector::from_vec(vec![1., 2., 3., 4.]);
@@ -607,12 +610,14 @@ mod tests {
assert_eq!(5., BaseVector::get(&v, 1));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_to_vec() {
let v = RowDVector::from_vec(vec![1., 2., 3.]);
assert_eq!(vec![1., 2., 3.], v.to_vec());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_init() {
let zeros: RowDVector<f32> = BaseVector::zeros(3);
@@ -623,6 +628,7 @@ mod tests {
assert_eq!(twos, RowDVector::from_vec(vec![2., 2., 2.]));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_dot() {
let v1 = RowDVector::from_vec(vec![1., 2., 3.]);
@@ -630,6 +636,7 @@ mod tests {
assert_eq!(32.0, BaseVector::dot(&v1, &v2));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_approximate_eq() {
let a = RowDVector::from_vec(vec![1., 2., 3.]);
@@ -638,6 +645,7 @@ mod tests {
assert!(!a.approximate_eq(&(&noise + &a), 1e-5));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn get_set_dynamic() {
let mut m = DMatrix::from_row_slice(2, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
@@ -650,6 +658,7 @@ mod tests {
assert_eq!(10., BaseMatrix::get(&m, 1, 1));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn zeros() {
let expected = DMatrix::from_row_slice(2, 2, &[0., 0., 0., 0.]);
@@ -659,6 +668,7 @@ mod tests {
assert_eq!(m, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn ones() {
let expected = DMatrix::from_row_slice(2, 2, &[1., 1., 1., 1.]);
@@ -668,6 +678,7 @@ mod tests {
assert_eq!(m, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn eye() {
let expected = DMatrix::from_row_slice(3, 3, &[1., 0., 0., 0., 1., 0., 0., 0., 1.]);
@@ -675,6 +686,7 @@ mod tests {
assert_eq!(m, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn shape() {
let m: DMatrix<f64> = BaseMatrix::zeros(5, 10);
@@ -684,6 +696,7 @@ mod tests {
assert_eq!(ncols, 10);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn scalar_add_sub_mul_div() {
let mut m = DMatrix::from_row_slice(2, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
@@ -697,6 +710,7 @@ mod tests {
assert_eq!(m, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn add_sub_mul_div() {
let mut m = DMatrix::from_row_slice(2, 2, &[1.0, 2.0, 3.0, 4.0]);
@@ -715,6 +729,7 @@ mod tests {
assert_eq!(m, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn to_from_row_vector() {
let v = RowDVector::from_vec(vec![1., 2., 3., 4.]);
@@ -723,12 +738,14 @@ mod tests {
assert_eq!(m.to_row_vector(), expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn col_matrix_to_row_vector() {
let m: DMatrix<f64> = BaseMatrix::zeros(10, 1);
assert_eq!(m.to_row_vector().len(), 10)
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn get_row_col_as_vec() {
let m = DMatrix::from_row_slice(3, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
@@ -737,12 +754,14 @@ mod tests {
assert_eq!(m.get_col_as_vec(1), vec!(2., 5., 8.));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn get_row() {
let a = DMatrix::from_row_slice(3, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
assert_eq!(RowDVector::from_vec(vec![4., 5., 6.]), a.get_row(1));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn copy_row_col_as_vec() {
let m = DMatrix::from_row_slice(3, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
@@ -754,6 +773,7 @@ mod tests {
assert_eq!(v, vec!(2., 5., 8.));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn element_add_sub_mul_div() {
let mut m = DMatrix::from_row_slice(2, 2, &[1.0, 2.0, 3.0, 4.0]);
@@ -767,6 +787,7 @@ mod tests {
assert_eq!(m, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vstack_hstack() {
let m1 = DMatrix::from_row_slice(2, 3, &[1., 2., 3., 4., 5., 6.]);
@@ -782,6 +803,7 @@ mod tests {
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn matmul() {
let a = DMatrix::from_row_slice(2, 3, &[1., 2., 3., 4., 5., 6.]);
@@ -791,6 +813,7 @@ mod tests {
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn dot() {
let a = DMatrix::from_row_slice(1, 3, &[1., 2., 3.]);
@@ -798,6 +821,7 @@ mod tests {
assert_eq!(14., a.dot(&b));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn slice() {
let a = DMatrix::from_row_slice(
@@ -810,6 +834,7 @@ mod tests {
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn approximate_eq() {
let a = DMatrix::from_row_slice(3, 3, &[1., 2., 3., 4., 5., 6., 7., 8., 9.]);
@@ -822,6 +847,7 @@ mod tests {
assert!(!a.approximate_eq(&(&noise + &a), 1e-5));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn negative_mut() {
let mut v = DMatrix::from_row_slice(1, 3, &[3., -2., 6.]);
@@ -829,6 +855,7 @@ mod tests {
assert_eq!(v, DMatrix::from_row_slice(1, 3, &[-3., 2., -6.]));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn transpose() {
let m = DMatrix::from_row_slice(2, 2, &[1.0, 3.0, 2.0, 4.0]);
@@ -837,6 +864,7 @@ mod tests {
assert_eq!(m_transposed, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn rand() {
let m: DMatrix<f64> = BaseMatrix::rand(3, 3);
@@ -847,6 +875,7 @@ mod tests {
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn norm() {
let v = DMatrix::from_row_slice(1, 3, &[3., -2., 6.]);
@@ -856,6 +885,7 @@ mod tests {
assert_eq!(BaseMatrix::norm(&v, std::f64::NEG_INFINITY), 2.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn col_mean() {
let a = DMatrix::from_row_slice(3, 3, &[1., 2., 3., 4., 5., 6., 7., 8., 9.]);
@@ -863,6 +893,7 @@ mod tests {
assert_eq!(res, vec![4., 5., 6.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn reshape() {
let m_orig = DMatrix::from_row_slice(1, 6, &[1., 2., 3., 4., 5., 6.]);
@@ -874,6 +905,7 @@ mod tests {
assert_eq!(BaseMatrix::get(&m_result, 0, 3), 4.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn copy_from() {
let mut src = DMatrix::from_row_slice(1, 3, &[1., 2., 3.]);
@@ -882,6 +914,7 @@ mod tests {
assert_eq!(src, dst);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn abs_mut() {
let mut a = DMatrix::from_row_slice(2, 2, &[1., -2., 3., -4.]);
@@ -890,6 +923,7 @@ mod tests {
assert_eq!(a, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn min_max_sum() {
let a = DMatrix::from_row_slice(2, 3, &[1., 2., 3., 4., 5., 6.]);
@@ -898,6 +932,7 @@ mod tests {
assert_eq!(6., a.max());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn max_diff() {
let a1 = DMatrix::from_row_slice(2, 3, &[1., 2., 3., 4., -5., 6.]);
@@ -906,6 +941,7 @@ mod tests {
assert_eq!(a2.max_diff(&a2), 0.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn softmax_mut() {
let mut prob: DMatrix<f64> = DMatrix::from_row_slice(1, 3, &[1., 2., 3.]);
@@ -915,13 +951,15 @@ mod tests {
assert!((BaseMatrix::get(&prob, 0, 2) - 0.66).abs() < 0.01);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn pow_mut() {
let mut a = DMatrix::from_row_slice(1, 3, &[1., 2., 3.]);
a.pow_mut(3.);
BaseMatrix::pow_mut(&mut a, 3.);
assert_eq!(a, DMatrix::from_row_slice(1, 3, &[1., 8., 27.]));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn argmax() {
let a = DMatrix::from_row_slice(3, 3, &[1., 2., 3., -5., -6., -7., 0.1, 0.2, 0.1]);
@@ -929,6 +967,7 @@ mod tests {
assert_eq!(res, vec![2, 0, 1]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn unique() {
let a = DMatrix::from_row_slice(3, 3, &[1., 2., 2., -2., -6., -7., 2., 3., 4.]);
@@ -937,6 +976,7 @@ mod tests {
assert_eq!(res, vec![-7., -6., -2., 1., 2., 3., 4.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn ols_fit_predict() {
let x = DMatrix::from_row_slice(
+49 -4
View File
@@ -178,7 +178,7 @@ impl<T: RealNumber + ScalarOperand> BaseVector<T> for ArrayBase<OwnedRepr<T>, Ix
}
fn copy_from(&mut self, other: &Self) {
self.assign(&other);
self.assign(other);
}
}
@@ -385,7 +385,7 @@ impl<T: RealNumber + ScalarOperand + AddAssign + SubAssign + MulAssign + DivAssi
}
fn copy_from(&mut self, other: &Self) {
self.assign(&other);
self.assign(other);
}
fn abs_mut(&mut self) -> &Self {
@@ -530,6 +530,7 @@ mod tests {
use crate::metrics::mean_absolute_error;
use ndarray::{arr1, arr2, Array1, Array2};
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_get_set() {
let mut result = arr1(&[1., 2., 3.]);
@@ -541,6 +542,7 @@ mod tests {
assert_eq!(5., BaseVector::get(&result, 1));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_copy_from() {
let mut v1 = arr1(&[1., 2., 3.]);
@@ -551,18 +553,21 @@ mod tests {
assert_ne!(v1, v2);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_len() {
let v = arr1(&[1., 2., 3.]);
assert_eq!(3, v.len());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_to_vec() {
let v = arr1(&[1., 2., 3.]);
assert_eq!(vec![1., 2., 3.], v.to_vec());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_dot() {
let v1 = arr1(&[1., 2., 3.]);
@@ -570,6 +575,7 @@ mod tests {
assert_eq!(32.0, BaseVector::dot(&v1, &v2));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vec_approximate_eq() {
let a = arr1(&[1., 2., 3.]);
@@ -578,6 +584,7 @@ mod tests {
assert!(!a.approximate_eq(&(&noise + &a), 1e-5));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn from_to_row_vec() {
let vec = arr1(&[1., 2., 3.]);
@@ -588,12 +595,14 @@ mod tests {
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn col_matrix_to_row_vector() {
let m: Array2<f64> = BaseMatrix::zeros(10, 1);
assert_eq!(m.to_row_vector().len(), 10)
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn add_mut() {
let mut a1 = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -604,6 +613,7 @@ mod tests {
assert_eq!(a1, a3);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn sub_mut() {
let mut a1 = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -614,6 +624,7 @@ mod tests {
assert_eq!(a1, a3);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mul_mut() {
let mut a1 = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -624,6 +635,7 @@ mod tests {
assert_eq!(a1, a3);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn div_mut() {
let mut a1 = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -634,6 +646,7 @@ mod tests {
assert_eq!(a1, a3);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn div_element_mut() {
let mut a = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -642,6 +655,7 @@ mod tests {
assert_eq!(BaseMatrix::get(&a, 1, 1), 1.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mul_element_mut() {
let mut a = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -650,6 +664,7 @@ mod tests {
assert_eq!(BaseMatrix::get(&a, 1, 1), 25.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn add_element_mut() {
let mut a = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -657,7 +672,7 @@ mod tests {
assert_eq!(BaseMatrix::get(&a, 1, 1), 10.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn sub_element_mut() {
let mut a = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -666,6 +681,7 @@ mod tests {
assert_eq!(BaseMatrix::get(&a, 1, 1), 0.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn vstack_hstack() {
let a1 = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -680,6 +696,7 @@ mod tests {
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn get_set() {
let mut result = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -691,6 +708,7 @@ mod tests {
assert_eq!(10., BaseMatrix::get(&result, 1, 1));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn matmul() {
let a = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -700,6 +718,7 @@ mod tests {
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn dot() {
let a = arr2(&[[1., 2., 3.]]);
@@ -707,6 +726,7 @@ mod tests {
assert_eq!(14., BaseMatrix::dot(&a, &b));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn slice() {
let a = arr2(&[
@@ -719,6 +739,7 @@ mod tests {
assert_eq!(result, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn scalar_ops() {
let a = arr2(&[[1., 2., 3.]]);
@@ -728,6 +749,7 @@ mod tests {
assert_eq!(&arr2(&[[0.5, 1., 1.5]]), a.clone().div_scalar_mut(2.));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn transpose() {
let m = arr2(&[[1.0, 3.0], [2.0, 4.0]]);
@@ -736,6 +758,7 @@ mod tests {
assert_eq!(m_transposed, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn norm() {
let v = arr2(&[[3., -2., 6.]]);
@@ -745,6 +768,7 @@ mod tests {
assert_eq!(v.norm(std::f64::NEG_INFINITY), 2.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn negative_mut() {
let mut v = arr2(&[[3., -2., 6.]]);
@@ -752,6 +776,7 @@ mod tests {
assert_eq!(v, arr2(&[[-3., 2., -6.]]));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn reshape() {
let m_orig = arr2(&[[1., 2., 3., 4., 5., 6.]]);
@@ -763,6 +788,7 @@ mod tests {
assert_eq!(BaseMatrix::get(&m_result, 0, 3), 4.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn copy_from() {
let mut src = arr2(&[[1., 2., 3.]]);
@@ -771,6 +797,7 @@ mod tests {
assert_eq!(src, dst);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn min_max_sum() {
let a = arr2(&[[1., 2., 3.], [4., 5., 6.]]);
@@ -779,6 +806,7 @@ mod tests {
assert_eq!(6., a.max());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn max_diff() {
let a1 = arr2(&[[1., 2., 3.], [4., -5., 6.]]);
@@ -787,6 +815,7 @@ mod tests {
assert_eq!(a2.max_diff(&a2), 0.);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn softmax_mut() {
let mut prob: Array2<f64> = arr2(&[[1., 2., 3.]]);
@@ -796,6 +825,7 @@ mod tests {
assert!((BaseMatrix::get(&prob, 0, 2) - 0.66).abs() < 0.01);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn pow_mut() {
let mut a = arr2(&[[1., 2., 3.]]);
@@ -803,6 +833,7 @@ mod tests {
assert_eq!(a, arr2(&[[1., 8., 27.]]));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn argmax() {
let a = arr2(&[[1., 2., 3.], [-5., -6., -7.], [0.1, 0.2, 0.1]]);
@@ -810,6 +841,7 @@ mod tests {
assert_eq!(res, vec![2, 0, 1]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn unique() {
let a = arr2(&[[1., 2., 2.], [-2., -6., -7.], [2., 3., 4.]]);
@@ -818,6 +850,7 @@ mod tests {
assert_eq!(res, vec![-7., -6., -2., 1., 2., 3., 4.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn get_row_as_vector() {
let a = arr2(&[[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]);
@@ -825,12 +858,14 @@ mod tests {
assert_eq!(res, vec![4., 5., 6.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn get_row() {
let a = arr2(&[[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]);
assert_eq!(arr1(&[4., 5., 6.]), a.get_row(1));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn get_col_as_vector() {
let a = arr2(&[[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]);
@@ -838,6 +873,7 @@ mod tests {
assert_eq!(res, vec![2., 5., 8.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn copy_row_col_as_vec() {
let m = arr2(&[[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]);
@@ -849,6 +885,7 @@ mod tests {
assert_eq!(v, vec!(2., 5., 8.));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn col_mean() {
let a = arr2(&[[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]);
@@ -856,6 +893,7 @@ mod tests {
assert_eq!(res, vec![4., 5., 6.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn eye() {
let a = arr2(&[[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]);
@@ -863,6 +901,7 @@ mod tests {
assert_eq!(res, a);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn rand() {
let m: Array2<f64> = BaseMatrix::rand(3, 3);
@@ -873,6 +912,7 @@ mod tests {
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn approximate_eq() {
let a = arr2(&[[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]);
@@ -881,6 +921,7 @@ mod tests {
assert!(!a.approximate_eq(&(&noise + &a), 1e-5));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn abs_mut() {
let mut a = arr2(&[[1., -2.], [3., -4.]]);
@@ -889,6 +930,7 @@ mod tests {
assert_eq!(a, expected);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn lr_fit_predict_iris() {
let x = arr2(&[
@@ -924,12 +966,13 @@ mod tests {
let error: f64 = y
.into_iter()
.zip(y_hat.into_iter())
.map(|(&a, &b)| (a - b).abs())
.map(|(a, b)| (a - b).abs())
.sum();
assert!(error <= 1.0);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn my_fit_longley_ndarray() {
let x = arr2(&[
@@ -964,6 +1007,8 @@ mod tests {
min_samples_split: 2,
n_trees: 1000,
m: Option::None,
keep_samples: false,
seed: 0,
},
)
.unwrap()
+2 -1
View File
@@ -195,7 +195,7 @@ pub trait QRDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose() {
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
@@ -214,6 +214,7 @@ mod tests {
assert!(qr.R().abs().approximate_eq(&r.abs(), 1e-4));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn qr_solve_mut() {
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
+5 -5
View File
@@ -61,7 +61,7 @@ pub trait MatrixStats<T: RealNumber>: BaseMatrix<T> {
sum += a * a;
}
mu /= div;
*x_i = sum / div - mu * mu;
*x_i = sum / div - mu.powi(2);
}
x
@@ -150,7 +150,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
use crate::linalg::BaseVector;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mean() {
let m = DenseMatrix::from_2d_array(&[
@@ -164,7 +164,7 @@ mod tests {
assert_eq!(m.mean(0), expected_0);
assert_eq!(m.mean(1), expected_1);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn std() {
let m = DenseMatrix::from_2d_array(&[
@@ -178,7 +178,7 @@ mod tests {
assert!(m.std(0).approximate_eq(&expected_0, 1e-2));
assert!(m.std(1).approximate_eq(&expected_1, 1e-2));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn var() {
let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]);
@@ -188,7 +188,7 @@ mod tests {
assert!(m.var(0).approximate_eq(&expected_0, std::f64::EPSILON));
assert!(m.var(1).approximate_eq(&expected_1, std::f64::EPSILON));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn scale() {
let mut m = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]);
+10 -9
View File
@@ -47,7 +47,7 @@ pub struct SVD<T: RealNumber, M: SVDDecomposableMatrix<T>> {
pub V: M,
/// Singular values of the original matrix
pub s: Vec<T>,
full: bool,
_full: bool,
m: usize,
n: usize,
tol: T,
@@ -116,7 +116,7 @@ pub trait SVDDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
}
let mut f = U.get(i, i);
g = -s.sqrt().copysign(f);
g = -RealNumber::copysign(s.sqrt(), f);
let h = f * g - s;
U.set(i, i, f - g);
for j in l - 1..n {
@@ -152,7 +152,7 @@ pub trait SVDDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
}
let f = U.get(i, l - 1);
g = -s.sqrt().copysign(f);
g = -RealNumber::copysign(s.sqrt(), f);
let h = f * g - s;
U.set(i, l - 1, f - g);
@@ -299,7 +299,7 @@ pub trait SVDDecomposableMatrix<T: RealNumber>: BaseMatrix<T> {
let mut h = rv1[k];
let mut f = ((y - z) * (y + z) + (g - h) * (g + h)) / (T::two() * h * y);
g = f.hypot(T::one());
f = ((x - z) * (x + z) + h * ((y / (f + g.copysign(f))) - h)) / x;
f = ((x - z) * (x + z) + h * ((y / (f + RealNumber::copysign(g, f))) - h)) / x;
let mut c = T::one();
let mut s = T::one();
@@ -428,13 +428,13 @@ impl<T: RealNumber, M: SVDDecomposableMatrix<T>> SVD<T, M> {
pub(crate) fn new(U: M, V: M, s: Vec<T>) -> SVD<T, M> {
let m = U.shape().0;
let n = V.shape().0;
let full = s.len() == m.min(n);
let _full = s.len() == m.min(n);
let tol = T::half() * (T::from(m + n).unwrap() + T::one()).sqrt() * s[0] * T::epsilon();
SVD {
U,
V,
s,
full,
_full,
m,
n,
tol,
@@ -482,7 +482,7 @@ impl<T: RealNumber, M: SVDDecomposableMatrix<T>> SVD<T, M> {
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose_symmetric() {
let A = DenseMatrix::from_2d_array(&[
@@ -513,7 +513,7 @@ mod tests {
assert!((s[i] - svd.s[i]).abs() < 1e-4);
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose_asymmetric() {
let A = DenseMatrix::from_2d_array(&[
@@ -714,7 +714,7 @@ mod tests {
assert!((s[i] - svd.s[i]).abs() < 1e-4);
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn solve() {
let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]);
@@ -725,6 +725,7 @@ mod tests {
assert!(w.approximate_eq(&expected_w, 1e-2));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn decompose_restore() {
let a = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0, 4.0], &[5.0, 6.0, 7.0, 8.0]]);
+1
View File
@@ -126,6 +126,7 @@ mod tests {
impl<T: RealNumber, M: Matrix<T>> BiconjugateGradientSolver<T, M> for BGSolver {}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn bg_solver() {
let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]);
+9 -2
View File
@@ -56,6 +56,7 @@
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::fmt::Debug;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator};
@@ -67,7 +68,8 @@ use crate::math::num::RealNumber;
use crate::linear::lasso_optimizer::InteriorPointOptimizer;
/// Elastic net parameters
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct ElasticNetParameters<T: RealNumber> {
/// Regularization parameter.
pub alpha: T,
@@ -84,7 +86,8 @@ pub struct ElasticNetParameters<T: RealNumber> {
}
/// Elastic net
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct ElasticNet<T: RealNumber, M: Matrix<T>> {
coefficients: M,
intercept: T,
@@ -288,6 +291,7 @@ mod tests {
use crate::linalg::naive::dense_matrix::*;
use crate::metrics::mean_absolute_error;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn elasticnet_longley() {
let x = DenseMatrix::from_2d_array(&[
@@ -331,6 +335,7 @@ mod tests {
assert!(mean_absolute_error(&y_hat, &y) < 30.0);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn elasticnet_fit_predict1() {
let x = DenseMatrix::from_2d_array(&[
@@ -397,7 +402,9 @@ mod tests {
assert!(l1_model.coefficients().get(0, 0) > l1_model.coefficients().get(2, 0));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+8 -2
View File
@@ -24,6 +24,7 @@
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::fmt::Debug;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator};
@@ -34,7 +35,8 @@ use crate::linear::lasso_optimizer::InteriorPointOptimizer;
use crate::math::num::RealNumber;
/// Lasso regression parameters
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct LassoParameters<T: RealNumber> {
/// Controls the strength of the penalty to the loss function.
pub alpha: T,
@@ -47,7 +49,8 @@ pub struct LassoParameters<T: RealNumber> {
pub max_iter: usize,
}
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
/// Lasso regressor
pub struct Lasso<T: RealNumber, M: Matrix<T>> {
coefficients: M,
@@ -223,6 +226,7 @@ mod tests {
use crate::linalg::naive::dense_matrix::*;
use crate::metrics::mean_absolute_error;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn lasso_fit_predict() {
let x = DenseMatrix::from_2d_array(&[
@@ -271,7 +275,9 @@ mod tests {
assert!(mean_absolute_error(&y_hat, &y) < 2.0);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+1 -1
View File
@@ -138,7 +138,7 @@ impl<T: RealNumber, M: Matrix<T>> InteriorPointOptimizer<T, M> {
for i in 0..p {
self.prb[i] = T::two() + self.d1[i];
self.prs[i] = self.prb[i] * self.d1[i] - self.d2[i] * self.d2[i];
self.prs[i] = self.prb[i] * self.d1[i] - self.d2[i].powi(2);
}
let normg = grad.norm2();
+13 -6
View File
@@ -62,6 +62,7 @@
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::fmt::Debug;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator};
@@ -69,7 +70,8 @@ use crate::error::Failed;
use crate::linalg::Matrix;
use crate::math::num::RealNumber;
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// Approach to use for estimation of regression coefficients. QR is more efficient but SVD is more stable.
pub enum LinearRegressionSolverName {
/// QR decomposition, see [QR](../../linalg/qr/index.html)
@@ -79,18 +81,20 @@ pub enum LinearRegressionSolverName {
}
/// Linear Regression parameters
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct LinearRegressionParameters {
/// Solver to use for estimation of regression coefficients.
pub solver: LinearRegressionSolverName,
}
/// Linear Regression
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct LinearRegression<T: RealNumber, M: Matrix<T>> {
coefficients: M,
intercept: T,
solver: LinearRegressionSolverName,
_solver: LinearRegressionSolverName,
}
impl LinearRegressionParameters {
@@ -151,7 +155,7 @@ impl<T: RealNumber, M: Matrix<T>> LinearRegression<T, M> {
if x_nrows != y_nrows {
return Err(Failed::fit(
&"Number of rows of X doesn\'t match number of rows of Y".to_string(),
"Number of rows of X doesn\'t match number of rows of Y",
));
}
@@ -167,7 +171,7 @@ impl<T: RealNumber, M: Matrix<T>> LinearRegression<T, M> {
Ok(LinearRegression {
intercept: w.get(num_attributes, 0),
coefficients: wights,
solver: parameters.solver,
_solver: parameters.solver,
})
}
@@ -196,6 +200,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn ols_fit_predict() {
let x = DenseMatrix::from_2d_array(&[
@@ -246,7 +251,9 @@ mod tests {
.all(|(&a, &b)| (a - b).abs() <= 5.0));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+153 -19
View File
@@ -54,8 +54,8 @@
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::cmp::Ordering;
use std::fmt::Debug;
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator};
@@ -67,12 +67,27 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
use crate::optimization::line_search::Backtracking;
use crate::optimization::FunctionOrder;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// Solver options for Logistic regression. Right now only LBFGS solver is supported.
pub enum LogisticRegressionSolverName {
/// Limited-memory BroydenFletcherGoldfarbShanno method, see [LBFGS paper](http://users.iems.northwestern.edu/~nocedal/lbfgsb.html)
LBFGS,
}
/// Logistic Regression parameters
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct LogisticRegressionParameters {}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct LogisticRegressionParameters<T: RealNumber> {
/// Solver to use for estimation of regression coefficients.
pub solver: LogisticRegressionSolverName,
/// Regularization parameter.
pub alpha: T,
}
/// Logistic Regression
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct LogisticRegression<T: RealNumber, M: Matrix<T>> {
coefficients: M,
intercept: M,
@@ -99,12 +114,28 @@ trait ObjectiveFunction<T: RealNumber, M: Matrix<T>> {
struct BinaryObjectiveFunction<'a, T: RealNumber, M: Matrix<T>> {
x: &'a M,
y: Vec<usize>,
phantom: PhantomData<&'a T>,
alpha: T,
}
impl Default for LogisticRegressionParameters {
impl<T: RealNumber> LogisticRegressionParameters<T> {
/// Solver to use for estimation of regression coefficients.
pub fn with_solver(mut self, solver: LogisticRegressionSolverName) -> Self {
self.solver = solver;
self
}
/// Regularization parameter.
pub fn with_alpha(mut self, alpha: T) -> Self {
self.alpha = alpha;
self
}
}
impl<T: RealNumber> Default for LogisticRegressionParameters<T> {
fn default() -> Self {
LogisticRegressionParameters {}
LogisticRegressionParameters {
solver: LogisticRegressionSolverName::LBFGS,
alpha: T::zero(),
}
}
}
@@ -132,13 +163,22 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
{
fn f(&self, w_bias: &M) -> T {
let mut f = T::zero();
let (n, _) = self.x.shape();
let (n, p) = self.x.shape();
for i in 0..n {
let wx = BinaryObjectiveFunction::partial_dot(w_bias, self.x, 0, i);
f += wx.ln_1pe() - (T::from(self.y[i]).unwrap()) * wx;
}
if self.alpha > T::zero() {
let mut w_squared = T::zero();
for i in 0..p {
let w = w_bias.get(0, i);
w_squared += w * w;
}
f += T::half() * self.alpha * w_squared;
}
f
}
@@ -156,6 +196,13 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
}
g.set(0, p, g.get(0, p) - dyi);
}
if self.alpha > T::zero() {
for i in 0..p {
let w = w_bias.get(0, i);
g.set(0, i, g.get(0, i) + self.alpha * w);
}
}
}
}
@@ -163,7 +210,7 @@ struct MultiClassObjectiveFunction<'a, T: RealNumber, M: Matrix<T>> {
x: &'a M,
y: Vec<usize>,
k: usize,
phantom: PhantomData<&'a T>,
alpha: T,
}
impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
@@ -185,6 +232,17 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
f -= prob.get(0, self.y[i]).ln();
}
if self.alpha > T::zero() {
let mut w_squared = T::zero();
for i in 0..self.k {
for j in 0..p {
let wi = w_bias.get(0, i * (p + 1) + j);
w_squared += wi * wi;
}
}
f += T::half() * self.alpha * w_squared;
}
f
}
@@ -215,16 +273,27 @@ impl<'a, T: RealNumber, M: Matrix<T>> ObjectiveFunction<T, M>
g.set(0, j * (p + 1) + p, g.get(0, j * (p + 1) + p) - yi);
}
}
if self.alpha > T::zero() {
for i in 0..self.k {
for j in 0..p {
let pos = i * (p + 1);
let wi = w.get(0, pos + j);
g.set(0, pos + j, g.get(0, pos + j) + self.alpha * wi);
}
}
}
}
}
impl<T: RealNumber, M: Matrix<T>> SupervisedEstimator<M, M::RowVector, LogisticRegressionParameters>
impl<T: RealNumber, M: Matrix<T>>
SupervisedEstimator<M, M::RowVector, LogisticRegressionParameters<T>>
for LogisticRegression<T, M>
{
fn fit(
x: &M,
y: &M::RowVector,
parameters: LogisticRegressionParameters,
parameters: LogisticRegressionParameters<T>,
) -> Result<Self, Failed> {
LogisticRegression::fit(x, y, parameters)
}
@@ -244,7 +313,7 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
pub fn fit(
x: &M,
y: &M::RowVector,
_parameters: LogisticRegressionParameters,
parameters: LogisticRegressionParameters<T>,
) -> Result<LogisticRegression<T, M>, Failed> {
let y_m = M::from_row_vector(y.clone());
let (x_nrows, num_attributes) = x.shape();
@@ -252,7 +321,7 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
if x_nrows != y_nrows {
return Err(Failed::fit(
&"Number of rows of X doesn\'t match number of rows of Y".to_string(),
"Number of rows of X doesn\'t match number of rows of Y",
));
}
@@ -278,7 +347,7 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
let objective = BinaryObjectiveFunction {
x,
y: yi,
phantom: PhantomData,
alpha: parameters.alpha,
};
let result = LogisticRegression::minimize(x0, objective);
@@ -300,7 +369,7 @@ impl<T: RealNumber, M: Matrix<T>> LogisticRegression<T, M> {
x,
y: yi,
k,
phantom: PhantomData,
alpha: parameters.alpha,
};
let result = LogisticRegression::minimize(x0, objective);
@@ -383,6 +452,7 @@ mod tests {
use crate::linalg::naive::dense_matrix::*;
use crate::metrics::accuracy;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn multiclass_objective_f() {
let x = DenseMatrix::from_2d_array(&[
@@ -407,9 +477,9 @@ mod tests {
let objective = MultiClassObjectiveFunction {
x: &x,
y,
y: y.clone(),
k: 3,
phantom: PhantomData,
alpha: 0.0,
};
let mut g: DenseMatrix<f64> = DenseMatrix::zeros(1, 9);
@@ -430,8 +500,27 @@ mod tests {
]));
assert!((f - 408.0052230582765).abs() < std::f64::EPSILON);
let objective_reg = MultiClassObjectiveFunction {
x: &x,
y: y.clone(),
k: 3,
alpha: 1.0,
};
let f = objective_reg.f(&DenseMatrix::row_vector_from_array(&[
1., 2., 3., 4., 5., 6., 7., 8., 9.,
]));
assert!((f - 487.5052).abs() < 1e-4);
objective_reg.df(
&mut g,
&DenseMatrix::row_vector_from_array(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]),
);
assert!((g.get(0, 0).abs() - 32.0).abs() < 1e-4);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn binary_objective_f() {
let x = DenseMatrix::from_2d_array(&[
@@ -456,8 +545,8 @@ mod tests {
let objective = BinaryObjectiveFunction {
x: &x,
y,
phantom: PhantomData,
y: y.clone(),
alpha: 0.0,
};
let mut g: DenseMatrix<f64> = DenseMatrix::zeros(1, 3);
@@ -472,8 +561,23 @@ mod tests {
let f = objective.f(&DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
assert!((f - 59.76994756647412).abs() < std::f64::EPSILON);
let objective_reg = BinaryObjectiveFunction {
x: &x,
y: y.clone(),
alpha: 1.0,
};
let f = objective_reg.f(&DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
assert!((f - 62.2699).abs() < 1e-4);
objective_reg.df(&mut g, &DenseMatrix::row_vector_from_array(&[1., 2., 3.]));
assert!((g.get(0, 0) - 27.0511).abs() < 1e-4);
assert!((g.get(0, 1) - 12.239).abs() < 1e-4);
assert!((g.get(0, 2) - 3.8693).abs() < 1e-4);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn lr_fit_predict() {
let x = DenseMatrix::from_2d_array(&[
@@ -511,6 +615,7 @@ mod tests {
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn lr_fit_predict_multiclass() {
let blobs = make_blobs(15, 4, 3);
@@ -523,8 +628,18 @@ mod tests {
let y_hat = lr.predict(&x).unwrap();
assert!(accuracy(&y_hat, &y) > 0.9);
let lr_reg = LogisticRegression::fit(
&x,
&y,
LogisticRegressionParameters::default().with_alpha(10.0),
)
.unwrap();
assert!(lr_reg.coefficients().abs().sum() < lr.coefficients().abs().sum());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn lr_fit_predict_binary() {
let blobs = make_blobs(20, 4, 2);
@@ -537,9 +652,20 @@ mod tests {
let y_hat = lr.predict(&x).unwrap();
assert!(accuracy(&y_hat, &y) > 0.9);
let lr_reg = LogisticRegression::fit(
&x,
&y,
LogisticRegressionParameters::default().with_alpha(10.0),
)
.unwrap();
assert!(lr_reg.coefficients().abs().sum() < lr.coefficients().abs().sum());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[1., -5.],
@@ -568,6 +694,7 @@ mod tests {
assert_eq!(lr, deserialized_lr);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn lr_fit_predict_iris() {
let x = DenseMatrix::from_2d_array(&[
@@ -597,6 +724,12 @@ mod tests {
];
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
let lr_reg = LogisticRegression::fit(
&x,
&y,
LogisticRegressionParameters::default().with_alpha(1.0),
)
.unwrap();
let y_hat = lr.predict(&x).unwrap();
@@ -607,5 +740,6 @@ mod tests {
.sum();
assert!(error <= 1.0);
assert!(lr_reg.coefficients().abs().sum() < lr.coefficients().abs().sum());
}
}
+12 -5
View File
@@ -58,6 +58,7 @@
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::fmt::Debug;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator};
@@ -66,7 +67,8 @@ use crate::linalg::BaseVector;
use crate::linalg::Matrix;
use crate::math::num::RealNumber;
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// Approach to use for estimation of regression coefficients. Cholesky is more efficient but SVD is more stable.
pub enum RidgeRegressionSolverName {
/// Cholesky decomposition, see [Cholesky](../../linalg/cholesky/index.html)
@@ -76,7 +78,8 @@ pub enum RidgeRegressionSolverName {
}
/// Ridge Regression parameters
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct RidgeRegressionParameters<T: RealNumber> {
/// Solver to use for estimation of regression coefficients.
pub solver: RidgeRegressionSolverName,
@@ -88,11 +91,12 @@ pub struct RidgeRegressionParameters<T: RealNumber> {
}
/// Ridge regression
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct RidgeRegression<T: RealNumber, M: Matrix<T>> {
coefficients: M,
intercept: T,
solver: RidgeRegressionSolverName,
_solver: RidgeRegressionSolverName,
}
impl<T: RealNumber> RidgeRegressionParameters<T> {
@@ -222,7 +226,7 @@ impl<T: RealNumber, M: Matrix<T>> RidgeRegression<T, M> {
Ok(RidgeRegression {
intercept: b,
coefficients: w,
solver: parameters.solver,
_solver: parameters.solver,
})
}
@@ -270,6 +274,7 @@ mod tests {
use crate::linalg::naive::dense_matrix::*;
use crate::metrics::mean_absolute_error;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn ridge_fit_predict() {
let x = DenseMatrix::from_2d_array(&[
@@ -325,7 +330,9 @@ mod tests {
assert!(mean_absolute_error(&y_hat_svd, &y) < 2.0);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+4 -1
View File
@@ -18,6 +18,7 @@
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::math::num::RealNumber;
@@ -25,7 +26,8 @@ use crate::math::num::RealNumber;
use super::Distance;
/// Euclidean distance is a measure of the true straight line distance between two points in Euclidean n-space.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Euclidian {}
impl Euclidian {
@@ -55,6 +57,7 @@ impl<T: RealNumber> Distance<Vec<T>, T> for Euclidian {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn squared_distance() {
let a = vec![1., 2., 3.];
+4 -1
View File
@@ -19,6 +19,7 @@
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::math::num::RealNumber;
@@ -26,7 +27,8 @@ use crate::math::num::RealNumber;
use super::Distance;
/// While comparing two integer-valued vectors of equal length, Hamming distance is the number of bit positions in which the two bits are different
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Hamming {}
impl<T: PartialEq, F: RealNumber> Distance<Vec<T>, F> for Hamming {
@@ -50,6 +52,7 @@ impl<T: PartialEq, F: RealNumber> Distance<Vec<T>, F> for Hamming {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn hamming_distance() {
let a = vec![1, 0, 0, 1, 0, 0, 1];
+4 -1
View File
@@ -44,6 +44,7 @@
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::math::num::RealNumber;
@@ -52,7 +53,8 @@ use super::Distance;
use crate::linalg::Matrix;
/// Mahalanobis distance.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Mahalanobis<T: RealNumber, M: Matrix<T>> {
/// covariance matrix of the dataset
pub sigma: M,
@@ -131,6 +133,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mahalanobis_distance() {
let data = DenseMatrix::from_2d_array(&[
+4 -1
View File
@@ -17,6 +17,7 @@
//! ```
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::math::num::RealNumber;
@@ -24,7 +25,8 @@ use crate::math::num::RealNumber;
use super::Distance;
/// Manhattan distance
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Manhattan {}
impl<T: RealNumber> Distance<Vec<T>, T> for Manhattan {
@@ -46,6 +48,7 @@ impl<T: RealNumber> Distance<Vec<T>, T> for Manhattan {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn manhattan_distance() {
let a = vec![1., 2., 3.];
+4 -1
View File
@@ -21,6 +21,7 @@
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::math::num::RealNumber;
@@ -28,7 +29,8 @@ use crate::math::num::RealNumber;
use super::Distance;
/// Defines the Minkowski distance of order `p`
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Minkowski {
/// order, integer
pub p: u16,
@@ -59,6 +61,7 @@ impl<T: RealNumber> Distance<Vec<T>, T> for Minkowski {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn minkowski_distance() {
let a = vec![1., 2., 3.];
+1
View File
@@ -136,6 +136,7 @@ impl RealNumber for f32 {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn sigmoid() {
assert_eq!(1.0.sigmoid(), 0.7310585786300049);
+1
View File
@@ -30,6 +30,7 @@ impl<T: RealNumber, V: BaseVector<T>> RealNumberVector<T> for V {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn unique_with_indices() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
+4 -1
View File
@@ -16,13 +16,15 @@
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
/// Accuracy metric.
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct Accuracy {}
impl Accuracy {
@@ -55,6 +57,7 @@ impl Accuracy {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn accuracy() {
let y_pred: Vec<f64> = vec![0., 2., 1., 3.];
+4 -1
View File
@@ -20,6 +20,7 @@
//! * ["The ROC-AUC and the Mann-Whitney U-test", Haupt, J.](https://johaupt.github.io/roc-auc/model%20evaluation/Area_under_ROC_curve.html)
#![allow(non_snake_case)]
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::algorithm::sort::quick_sort::QuickArgSort;
@@ -27,7 +28,8 @@ use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
/// Area Under the Receiver Operating Characteristic Curve (ROC AUC)
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct AUC {}
impl AUC {
@@ -91,6 +93,7 @@ impl AUC {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn auc() {
let y_true: Vec<f64> = vec![0., 0., 1., 1.];
+4 -1
View File
@@ -1,10 +1,12 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use crate::metrics::cluster_helpers::*;
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
/// Homogeneity, completeness and V-Measure scores.
pub struct HCVScore {}
@@ -41,6 +43,7 @@ impl HCVScore {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn homogeneity_score() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
+3
View File
@@ -101,6 +101,7 @@ pub fn mutual_info_score<T: RealNumber>(contingency: &[Vec<usize>]) -> T {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn contingency_matrix_test() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
@@ -112,6 +113,7 @@ mod tests {
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn entropy_test() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
@@ -119,6 +121,7 @@ mod tests {
assert!((1.2770f32 - entropy(&v1).unwrap()).abs() < 1e-4);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mutual_info_score_test() {
let v1 = vec![0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0];
+4 -1
View File
@@ -18,6 +18,7 @@
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
@@ -26,7 +27,8 @@ use crate::metrics::precision::Precision;
use crate::metrics::recall::Recall;
/// F-measure
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct F1<T: RealNumber> {
/// a positive real factor
pub beta: T,
@@ -57,6 +59,7 @@ impl<T: RealNumber> F1<T> {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn f1() {
let y_pred: Vec<f64> = vec![0., 0., 1., 1., 1., 1.];
+4 -1
View File
@@ -18,12 +18,14 @@
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
/// Mean Absolute Error
pub struct MeanAbsoluteError {}
@@ -54,6 +56,7 @@ impl MeanAbsoluteError {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mean_absolute_error() {
let y_true: Vec<f64> = vec![3., -0.5, 2., 7.];
+4 -1
View File
@@ -18,12 +18,14 @@
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
/// Mean Squared Error
pub struct MeanSquareError {}
@@ -54,6 +56,7 @@ impl MeanSquareError {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn mean_squared_error() {
let y_true: Vec<f64> = vec![3., -0.5, 2., 7.];
+4 -1
View File
@@ -18,13 +18,15 @@
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
/// Precision metric.
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct Precision {}
impl Precision {
@@ -75,6 +77,7 @@ impl Precision {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn precision() {
let y_true: Vec<f64> = vec![0., 1., 1., 0.];
+4 -1
View File
@@ -18,13 +18,15 @@
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
/// Coefficient of Determination (R2)
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct R2 {}
impl R2 {
@@ -68,6 +70,7 @@ impl R2 {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn r2() {
let y_true: Vec<f64> = vec![3., -0.5, 2., 7.];
+4 -1
View File
@@ -18,13 +18,15 @@
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
/// Recall metric.
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct Recall {}
impl Recall {
@@ -75,6 +77,7 @@ impl Recall {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn recall() {
let y_true: Vec<f64> = vec![0., 1., 1., 0.];
+7
View File
@@ -144,6 +144,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_kfold_return_test_indices_simple() {
let k = KFold {
@@ -158,6 +159,7 @@ mod tests {
assert_eq!(test_indices[2], (22..33).collect::<Vec<usize>>());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_kfold_return_test_indices_odd() {
let k = KFold {
@@ -172,6 +174,7 @@ mod tests {
assert_eq!(test_indices[2], (23..34).collect::<Vec<usize>>());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_kfold_return_test_mask_simple() {
let k = KFold {
@@ -197,6 +200,7 @@ mod tests {
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_kfold_return_split_simple() {
let k = KFold {
@@ -212,6 +216,7 @@ mod tests {
assert_eq!(train_test_splits[1].1, (11..22).collect::<Vec<usize>>());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_kfold_return_split_simple_shuffle() {
let k = KFold {
@@ -227,6 +232,7 @@ mod tests {
assert_eq!(train_test_splits[1].1.len(), 11_usize);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn numpy_parity_test() {
let k = KFold {
@@ -247,6 +253,7 @@ mod tests {
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn numpy_parity_test_shuffle() {
let k = KFold {
+4
View File
@@ -285,6 +285,7 @@ mod tests {
use crate::model_selection::kfold::KFold;
use crate::neighbors::knn_regressor::KNNRegressor;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_train_test_split() {
let n = 123;
@@ -308,6 +309,7 @@ mod tests {
#[derive(Clone)]
struct NoParameters {}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_cross_validate_biased() {
struct BiasedEstimator {}
@@ -367,6 +369,7 @@ mod tests {
assert_eq!(0.4, results.mean_train_score());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_cross_validate_knn() {
let x = DenseMatrix::from_2d_array(&[
@@ -411,6 +414,7 @@ mod tests {
assert!(results.mean_train_score() < results.mean_test_score());
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_cross_val_predict_knn() {
let x = DenseMatrix::from_2d_array(&[
+139 -20
View File
@@ -42,15 +42,49 @@ use crate::math::num::RealNumber;
use crate::math::vector::RealNumberVector;
use crate::naive_bayes::{BaseNaiveBayes, NBDistribution};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// Naive Bayes classifier for Bearnoulli features
#[derive(Serialize, Deserialize, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
struct BernoulliNBDistribution<T: RealNumber> {
/// class labels known to the classifier
class_labels: Vec<T>,
/// number of training samples observed in each class
class_count: Vec<usize>,
/// probability of each class
class_priors: Vec<T>,
feature_prob: Vec<Vec<T>>,
/// Number of samples encountered for each (class, feature)
feature_count: Vec<Vec<usize>>,
/// probability of features per class
feature_log_prob: Vec<Vec<T>>,
/// Number of features of each sample
n_features: usize,
}
impl<T: RealNumber> PartialEq for BernoulliNBDistribution<T> {
fn eq(&self, other: &Self) -> bool {
if self.class_labels == other.class_labels
&& self.class_count == other.class_count
&& self.class_priors == other.class_priors
&& self.feature_count == other.feature_count
&& self.n_features == other.n_features
{
for (a, b) in self
.feature_log_prob
.iter()
.zip(other.feature_log_prob.iter())
{
if !a.approximate_eq(b, T::epsilon()) {
return false;
}
}
true
} else {
false
}
}
}
impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for BernoulliNBDistribution<T> {
@@ -63,9 +97,9 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for BernoulliNBDistributi
for feature in 0..j.len() {
let value = j.get(feature);
if value == T::one() {
likelihood += self.feature_prob[class_index][feature].ln();
likelihood += self.feature_log_prob[class_index][feature];
} else {
likelihood += (T::one() - self.feature_prob[class_index][feature]).ln();
likelihood += (T::one() - self.feature_log_prob[class_index][feature].exp()).ln();
}
}
likelihood
@@ -77,7 +111,8 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for BernoulliNBDistributi
}
/// `BernoulliNB` parameters. Use `Default::default()` for default values.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct BernoulliNBParameters<T: RealNumber> {
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
pub alpha: T,
@@ -154,10 +189,10 @@ impl<T: RealNumber> BernoulliNBDistribution<T> {
let y = y.to_vec();
let (class_labels, indices) = <Vec<T> as RealNumberVector<T>>::unique_with_indices(&y);
let mut class_count = vec![T::zero(); class_labels.len()];
let mut class_count = vec![0_usize; class_labels.len()];
for class_index in indices.iter() {
class_count[*class_index] += T::one();
class_count[*class_index] += 1;
}
let class_priors = if let Some(class_priors) = priors {
@@ -170,25 +205,35 @@ impl<T: RealNumber> BernoulliNBDistribution<T> {
} else {
class_count
.iter()
.map(|&c| c / T::from(n_samples).unwrap())
.map(|&c| T::from(c).unwrap() / T::from(n_samples).unwrap())
.collect()
};
let mut feature_in_class_counter = vec![vec![T::zero(); n_features]; class_labels.len()];
let mut feature_in_class_counter = vec![vec![0_usize; n_features]; class_labels.len()];
for (row, class_index) in row_iter(x).zip(indices) {
for (idx, row_i) in row.iter().enumerate().take(n_features) {
feature_in_class_counter[class_index][idx] += *row_i;
feature_in_class_counter[class_index][idx] +=
row_i.to_usize().ok_or_else(|| {
Failed::fit(&format!(
"Elements of the matrix should be 1.0 or 0.0 |found|=[{}]",
row_i
))
})?;
}
}
let feature_prob = feature_in_class_counter
let feature_log_prob = feature_in_class_counter
.iter()
.enumerate()
.map(|(class_index, feature_count)| {
feature_count
.iter()
.map(|&count| (count + alpha) / (class_count[class_index] + alpha * T::two()))
.map(|&count| {
((T::from(count).unwrap() + alpha)
/ (T::from(class_count[class_index]).unwrap() + alpha * T::two()))
.ln()
})
.collect()
})
.collect();
@@ -196,13 +241,18 @@ impl<T: RealNumber> BernoulliNBDistribution<T> {
Ok(Self {
class_labels,
class_priors,
feature_prob,
class_count,
feature_count: feature_in_class_counter,
feature_log_prob,
n_features,
})
}
}
/// BernoulliNB implements the categorical naive Bayes algorithm for categorically distributed data.
#[derive(Serialize, Deserialize, Debug, PartialEq)]
/// BernoulliNB implements the naive Bayes algorithm for data that follows the Bernoulli
/// distribution.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, PartialEq)]
pub struct BernoulliNB<T: RealNumber, M: Matrix<T>> {
inner: BaseNaiveBayes<T, M, BernoulliNBDistribution<T>>,
binarize: Option<T>,
@@ -262,6 +312,34 @@ impl<T: RealNumber, M: Matrix<T>> BernoulliNB<T, M> {
self.inner.predict(x)
}
}
/// Class labels known to the classifier.
/// Returns a vector of size n_classes.
pub fn classes(&self) -> &Vec<T> {
&self.inner.distribution.class_labels
}
/// Number of training samples observed in each class.
/// Returns a vector of size n_classes.
pub fn class_count(&self) -> &Vec<usize> {
&self.inner.distribution.class_count
}
/// Number of features of each sample
pub fn n_features(&self) -> usize {
self.inner.distribution.n_features
}
/// Number of samples encountered for each (class, feature)
/// Returns a 2d vector of shape (n_classes, n_features)
pub fn feature_count(&self) -> &Vec<Vec<usize>> {
&self.inner.distribution.feature_count
}
/// Empirical log probability of features given a class
pub fn feature_log_prob(&self) -> &Vec<Vec<T>> {
&self.inner.distribution.feature_log_prob
}
}
#[cfg(test)]
@@ -269,6 +347,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_bernoulli_naive_bayes() {
// Tests that BernoulliNB when alpha=1.0 gives the same values as
@@ -292,10 +371,24 @@ mod tests {
assert_eq!(bnb.inner.distribution.class_priors, &[0.75, 0.25]);
assert_eq!(
bnb.inner.distribution.feature_prob,
bnb.feature_log_prob(),
&[
&[0.4, 0.8, 0.2, 0.4, 0.4, 0.2],
&[1. / 3.0, 2. / 3.0, 2. / 3.0, 1. / 3.0, 1. / 3.0, 2. / 3.0]
&[
-0.916290731874155,
-0.2231435513142097,
-1.6094379124341003,
-0.916290731874155,
-0.916290731874155,
-1.6094379124341003
],
&[
-1.0986122886681098,
-0.40546510810816444,
-0.40546510810816444,
-1.0986122886681098,
-1.0986122886681098,
-0.40546510810816444
]
]
);
@@ -307,6 +400,7 @@ mod tests {
assert_eq!(y_hat, &[1.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn bernoulli_nb_scikit_parity() {
let x = DenseMatrix::<f64>::from_2d_array(&[
@@ -331,13 +425,36 @@ mod tests {
let y_hat = bnb.predict(&x).unwrap();
assert_eq!(bnb.classes(), &[0., 1., 2.]);
assert_eq!(bnb.class_count(), &[7, 3, 5]);
assert_eq!(bnb.n_features(), 10);
assert_eq!(
bnb.feature_count(),
&[
&[5, 6, 6, 7, 6, 4, 6, 7, 7, 7],
&[3, 3, 3, 1, 3, 2, 3, 2, 2, 3],
&[4, 4, 3, 4, 5, 2, 4, 5, 3, 4]
]
);
assert!(bnb
.inner
.distribution
.class_priors
.approximate_eq(&vec!(0.46, 0.2, 0.33), 1e-2));
assert!(bnb.inner.distribution.feature_prob[1].approximate_eq(
&vec!(0.8, 0.8, 0.8, 0.4, 0.8, 0.6, 0.8, 0.6, 0.6, 0.8),
assert!(bnb.feature_log_prob()[1].approximate_eq(
&vec![
-0.22314355,
-0.22314355,
-0.22314355,
-0.91629073,
-0.22314355,
-0.51082562,
-0.22314355,
-0.51082562,
-0.51082562,
-0.22314355
],
1e-1
));
assert!(y_hat.approximate_eq(
@@ -346,7 +463,9 @@ mod tests {
));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::<f64>::from_2d_array(&[
&[1., 1., 0., 0., 0., 0.],
+144 -25
View File
@@ -36,19 +36,38 @@ use crate::linalg::BaseVector;
use crate::linalg::Matrix;
use crate::math::num::RealNumber;
use crate::naive_bayes::{BaseNaiveBayes, NBDistribution};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// Naive Bayes classifier for categorical features
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
struct CategoricalNBDistribution<T: RealNumber> {
/// number of training samples observed in each class
class_count: Vec<usize>,
/// class labels known to the classifier
class_labels: Vec<T>,
/// probability of each class
class_priors: Vec<T>,
coefficients: Vec<Vec<Vec<T>>>,
/// Number of features of each sample
n_features: usize,
/// Number of categories for each feature
n_categories: Vec<usize>,
/// Holds arrays of shape (n_classes, n_categories of respective feature)
/// for each feature. Each array provides the number of samples
/// encountered for each class and category of the specific feature.
category_count: Vec<Vec<Vec<usize>>>,
}
impl<T: RealNumber> PartialEq for CategoricalNBDistribution<T> {
fn eq(&self, other: &Self) -> bool {
if self.class_labels == other.class_labels && self.class_priors == other.class_priors {
if self.class_labels == other.class_labels
&& self.class_priors == other.class_priors
&& self.n_features == other.n_features
&& self.n_categories == other.n_categories
&& self.class_count == other.class_count
{
if self.coefficients.len() != other.coefficients.len() {
return false;
}
@@ -88,8 +107,8 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for CategoricalNBDistribu
let mut likelihood = T::zero();
for feature in 0..j.len() {
let value = j.get(feature).floor().to_usize().unwrap();
if self.coefficients[class_index][feature].len() > value {
likelihood += self.coefficients[class_index][feature][value];
if self.coefficients[feature][class_index].len() > value {
likelihood += self.coefficients[feature][class_index][value];
} else {
return T::zero();
}
@@ -142,17 +161,17 @@ impl<T: RealNumber> CategoricalNBDistribution<T> {
let y_max = y
.iter()
.max()
.ok_or_else(|| Failed::fit(&"Failed to get the labels of y.".to_string()))?;
.ok_or_else(|| Failed::fit("Failed to get the labels of y."))?;
let class_labels: Vec<T> = (0..*y_max + 1)
.map(|label| T::from(label).unwrap())
.collect();
let mut classes_count: Vec<T> = vec![T::zero(); class_labels.len()];
let mut class_count = vec![0_usize; class_labels.len()];
for elem in y.iter() {
classes_count[*elem] += T::one();
class_count[*elem] += 1;
}
let mut feature_categories: Vec<Vec<T>> = Vec::with_capacity(n_features);
let mut n_categories: Vec<usize> = Vec::with_capacity(n_features);
for feature in 0..n_features {
let feature_max = x
.get_col_as_vec(feature)
@@ -165,18 +184,15 @@ impl<T: RealNumber> CategoricalNBDistribution<T> {
feature
))
})?;
let feature_types = (0..feature_max + 1)
.map(|feat| T::from(feat).unwrap())
.collect();
feature_categories.push(feature_types);
n_categories.push(feature_max + 1);
}
let mut coefficients: Vec<Vec<Vec<T>>> = Vec::with_capacity(class_labels.len());
for (label, label_count) in class_labels.iter().zip(classes_count.iter()) {
let mut category_count: Vec<Vec<Vec<usize>>> = Vec::with_capacity(class_labels.len());
for (feature_index, &n_categories_i) in n_categories.iter().enumerate().take(n_features) {
let mut coef_i: Vec<Vec<T>> = Vec::with_capacity(n_features);
for (feature_index, feature_options) in
feature_categories.iter().enumerate().take(n_features)
{
let mut category_count_i: Vec<Vec<usize>> = Vec::with_capacity(n_features);
for (label, &label_count) in class_labels.iter().zip(class_count.iter()) {
let col = x
.get_col_as_vec(feature_index)
.iter()
@@ -184,39 +200,48 @@ impl<T: RealNumber> CategoricalNBDistribution<T> {
.filter(|(i, _j)| T::from(y[*i]).unwrap() == *label)
.map(|(_, j)| *j)
.collect::<Vec<T>>();
let mut feat_count: Vec<T> = vec![T::zero(); feature_options.len()];
let mut feat_count: Vec<usize> = vec![0_usize; n_categories_i];
for row in col.iter() {
let index = row.floor().to_usize().unwrap();
feat_count[index] += T::one();
feat_count[index] += 1;
}
let coef_i_j = feat_count
.iter()
.map(|c| {
((*c + alpha)
/ (*label_count + T::from(feature_options.len()).unwrap() * alpha))
((T::from(*c).unwrap() + alpha)
/ (T::from(label_count).unwrap()
+ T::from(n_categories_i).unwrap() * alpha))
.ln()
})
.collect::<Vec<T>>();
category_count_i.push(feat_count);
coef_i.push(coef_i_j);
}
category_count.push(category_count_i);
coefficients.push(coef_i);
}
let class_priors = classes_count
.into_iter()
.map(|count| count / T::from(n_samples).unwrap())
let class_priors = class_count
.iter()
.map(|&count| T::from(count).unwrap() / T::from(n_samples).unwrap())
.collect::<Vec<T>>();
Ok(Self {
class_count,
class_labels,
class_priors,
coefficients,
n_features,
n_categories,
category_count,
})
}
}
/// `CategoricalNB` parameters. Use `Default::default()` for default values.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct CategoricalNBParameters<T: RealNumber> {
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
pub alpha: T,
@@ -237,7 +262,8 @@ impl<T: RealNumber> Default for CategoricalNBParameters<T> {
}
/// CategoricalNB implements the categorical naive Bayes algorithm for categorically distributed data.
#[derive(Serialize, Deserialize, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, PartialEq)]
pub struct CategoricalNB<T: RealNumber, M: Matrix<T>> {
inner: BaseNaiveBayes<T, M, CategoricalNBDistribution<T>>,
}
@@ -283,6 +309,41 @@ impl<T: RealNumber, M: Matrix<T>> CategoricalNB<T, M> {
pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
self.inner.predict(x)
}
/// Class labels known to the classifier.
/// Returns a vector of size n_classes.
pub fn classes(&self) -> &Vec<T> {
&self.inner.distribution.class_labels
}
/// Number of training samples observed in each class.
/// Returns a vector of size n_classes.
pub fn class_count(&self) -> &Vec<usize> {
&self.inner.distribution.class_count
}
/// Number of features of each sample
pub fn n_features(&self) -> usize {
self.inner.distribution.n_features
}
/// Number of features of each sample
pub fn n_categories(&self) -> &Vec<usize> {
&self.inner.distribution.n_categories
}
/// Holds arrays of shape (n_classes, n_categories of respective feature)
/// for each feature. Each array provides the number of samples
/// encountered for each class and category of the specific feature.
pub fn category_count(&self) -> &Vec<Vec<Vec<usize>>> {
&self.inner.distribution.category_count
}
/// Holds arrays of shape (n_classes, n_categories of respective feature)
/// for each feature. Each array provides the empirical log probability
/// of categories given the respective feature and class, ``P(x_i|y)``.
pub fn feature_log_prob(&self) -> &Vec<Vec<Vec<T>>> {
&self.inner.distribution.coefficients
}
}
#[cfg(test)]
@@ -290,6 +351,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_categorical_naive_bayes() {
let x = DenseMatrix::from_2d_array(&[
@@ -311,11 +373,66 @@ mod tests {
let y = vec![0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0.];
let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap();
// checking parity with scikit
assert_eq!(cnb.classes(), &[0., 1.]);
assert_eq!(cnb.class_count(), &[5, 9]);
assert_eq!(cnb.n_features(), 4);
assert_eq!(cnb.n_categories(), &[3, 3, 2, 2]);
assert_eq!(
cnb.category_count(),
&vec![
vec![vec![3, 0, 2], vec![2, 4, 3]],
vec![vec![1, 2, 2], vec![3, 4, 2]],
vec![vec![1, 4], vec![6, 3]],
vec![vec![2, 3], vec![6, 3]]
]
);
assert_eq!(
cnb.feature_log_prob(),
&vec![
vec![
vec![
-0.6931471805599453,
-2.0794415416798357,
-0.9808292530117262
],
vec![
-1.3862943611198906,
-0.8754687373538999,
-1.0986122886681098
]
],
vec![
vec![
-1.3862943611198906,
-0.9808292530117262,
-0.9808292530117262
],
vec![
-1.0986122886681098,
-0.8754687373538999,
-1.3862943611198906
]
],
vec![
vec![-1.252762968495368, -0.3364722366212129],
vec![-0.45198512374305727, -1.0116009116784799]
],
vec![
vec![-0.8472978603872037, -0.5596157879354228],
vec![-0.45198512374305727, -1.0116009116784799]
]
]
);
let x_test = DenseMatrix::from_2d_array(&[&[0., 2., 1., 0.], &[2., 2., 0., 0.]]);
let y_hat = cnb.predict(&x_test).unwrap();
assert_eq!(y_hat, vec![0., 1.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_categorical_naive_bayes2() {
let x = DenseMatrix::from_2d_array(&[
@@ -344,7 +461,9 @@ mod tests {
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::<f64>::from_2d_array(&[
&[3., 4., 0., 1.],
+70 -27
View File
@@ -30,17 +30,21 @@ use crate::linalg::Matrix;
use crate::math::num::RealNumber;
use crate::math::vector::RealNumberVector;
use crate::naive_bayes::{BaseNaiveBayes, NBDistribution};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// Naive Bayes classifier for categorical features
#[derive(Serialize, Deserialize, Debug, PartialEq)]
/// Naive Bayes classifier using Gaussian distribution
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, PartialEq)]
struct GaussianNBDistribution<T: RealNumber> {
/// class labels known to the classifier
class_labels: Vec<T>,
/// number of training samples observed in each class
class_count: Vec<usize>,
/// probability of each class.
class_priors: Vec<T>,
/// variance of each feature per class
sigma: Vec<Vec<T>>,
var: Vec<Vec<T>>,
/// mean of each feature per class
theta: Vec<Vec<T>>,
}
@@ -55,18 +59,14 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for GaussianNBDistributio
}
fn log_likelihood(&self, class_index: usize, j: &M::RowVector) -> T {
if class_index < self.class_labels.len() {
let mut likelihood = T::zero();
for feature in 0..j.len() {
let value = j.get(feature);
let mean = self.theta[class_index][feature];
let variance = self.sigma[class_index][feature];
likelihood += self.calculate_log_probability(value, mean, variance);
}
likelihood
} else {
T::zero()
let mut likelihood = T::zero();
for feature in 0..j.len() {
let value = j.get(feature);
let mean = self.theta[class_index][feature];
let variance = self.var[class_index][feature];
likelihood += self.calculate_log_probability(value, mean, variance);
}
likelihood
}
fn classes(&self) -> &Vec<T> {
@@ -75,7 +75,8 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for GaussianNBDistributio
}
/// `GaussianNB` parameters. Use `Default::default()` for default values.
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Default, Clone)]
pub struct GaussianNBParameters<T: RealNumber> {
/// Prior probabilities of the classes. If specified the priors are not adjusted according to the data
pub priors: Option<Vec<T>>,
@@ -118,12 +119,12 @@ impl<T: RealNumber> GaussianNBDistribution<T> {
let y = y.to_vec();
let (class_labels, indices) = <Vec<T> as RealNumberVector<T>>::unique_with_indices(&y);
let mut class_count = vec![T::zero(); class_labels.len()];
let mut class_count = vec![0_usize; class_labels.len()];
let mut subdataset: Vec<Vec<Vec<T>>> = vec![vec![]; class_labels.len()];
for (row, class_index) in row_iter(x).zip(indices.iter()) {
class_count[*class_index] += T::one();
class_count[*class_index] += 1;
subdataset[*class_index].push(row);
}
@@ -136,8 +137,8 @@ impl<T: RealNumber> GaussianNBDistribution<T> {
class_priors
} else {
class_count
.into_iter()
.map(|c| c / T::from(n_samples).unwrap())
.iter()
.map(|&c| T::from(c).unwrap() / T::from(n_samples).unwrap())
.collect()
};
@@ -154,15 +155,16 @@ impl<T: RealNumber> GaussianNBDistribution<T> {
})
.collect();
let (sigma, theta): (Vec<Vec<T>>, Vec<Vec<T>>) = subdataset
let (var, theta): (Vec<Vec<T>>, Vec<Vec<T>>) = subdataset
.iter()
.map(|data| (data.var(0), data.mean(0)))
.unzip();
Ok(Self {
class_labels,
class_count,
class_priors,
sigma,
var,
theta,
})
}
@@ -177,8 +179,10 @@ impl<T: RealNumber> GaussianNBDistribution<T> {
}
}
/// GaussianNB implements the categorical naive Bayes algorithm for categorically distributed data.
#[derive(Serialize, Deserialize, Debug, PartialEq)]
/// GaussianNB implements the naive Bayes algorithm for data that follows the Gaussian
/// distribution.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, PartialEq)]
pub struct GaussianNB<T: RealNumber, M: Matrix<T>> {
inner: BaseNaiveBayes<T, M, GaussianNBDistribution<T>>,
}
@@ -219,6 +223,36 @@ impl<T: RealNumber, M: Matrix<T>> GaussianNB<T, M> {
pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
self.inner.predict(x)
}
/// Class labels known to the classifier.
/// Returns a vector of size n_classes.
pub fn classes(&self) -> &Vec<T> {
&self.inner.distribution.class_labels
}
/// Number of training samples observed in each class.
/// Returns a vector of size n_classes.
pub fn class_count(&self) -> &Vec<usize> {
&self.inner.distribution.class_count
}
/// Probability of each class
/// Returns a vector of size n_classes.
pub fn class_priors(&self) -> &Vec<T> {
&self.inner.distribution.class_priors
}
/// Mean of each feature per class
/// Returns a 2d vector of shape (n_classes, n_features).
pub fn theta(&self) -> &Vec<Vec<T>> {
&self.inner.distribution.theta
}
/// Variance of each feature per class
/// Returns a 2d vector of shape (n_classes, n_features).
pub fn var(&self) -> &Vec<Vec<T>> {
&self.inner.distribution.var
}
}
#[cfg(test)]
@@ -226,6 +260,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_gaussian_naive_bayes() {
let x = DenseMatrix::from_2d_array(&[
@@ -241,22 +276,28 @@ mod tests {
let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap();
let y_hat = gnb.predict(&x).unwrap();
assert_eq!(y_hat, y);
assert_eq!(gnb.classes(), &[1., 2.]);
assert_eq!(gnb.class_count(), &[3, 3]);
assert_eq!(
gnb.inner.distribution.sigma,
gnb.var(),
&[
&[0.666666666666667, 0.22222222222222232],
&[0.666666666666667, 0.22222222222222232]
]
);
assert_eq!(gnb.inner.distribution.class_priors, &[0.5, 0.5]);
assert_eq!(gnb.class_priors(), &[0.5, 0.5]);
assert_eq!(
gnb.inner.distribution.theta,
gnb.theta(),
&[&[-2., -1.3333333333333333], &[2., 1.3333333333333333]]
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_gaussian_naive_bayes_with_priors() {
let x = DenseMatrix::from_2d_array(&[
@@ -273,10 +314,12 @@ mod tests {
let parameters = GaussianNBParameters::default().with_priors(priors.clone());
let gnb = GaussianNB::fit(&x, &y, parameters).unwrap();
assert_eq!(gnb.inner.distribution.class_priors, priors);
assert_eq!(gnb.class_priors(), &priors);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::<f64>::from_2d_array(&[
&[-1., -1.],
+3 -1
View File
@@ -39,6 +39,7 @@ use crate::error::Failed;
use crate::linalg::BaseVector;
use crate::linalg::Matrix;
use crate::math::num::RealNumber;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::marker::PhantomData;
@@ -55,7 +56,8 @@ pub(crate) trait NBDistribution<T: RealNumber, M: Matrix<T>> {
}
/// Base struct for the Naive Bayes classifier.
#[derive(Serialize, Deserialize, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, PartialEq)]
pub(crate) struct BaseNaiveBayes<T: RealNumber, M: Matrix<T>, D: NBDistribution<T, M>> {
distribution: D,
_phantom_t: PhantomData<T>,
+117 -21
View File
@@ -42,15 +42,25 @@ use crate::math::num::RealNumber;
use crate::math::vector::RealNumberVector;
use crate::naive_bayes::{BaseNaiveBayes, NBDistribution};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// Naive Bayes classifier for Multinomial features
#[derive(Serialize, Deserialize, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, PartialEq)]
struct MultinomialNBDistribution<T: RealNumber> {
/// class labels known to the classifier
class_labels: Vec<T>,
/// number of training samples observed in each class
class_count: Vec<usize>,
/// probability of each class
class_priors: Vec<T>,
feature_prob: Vec<Vec<T>>,
/// Empirical log probability of features given a class
feature_log_prob: Vec<Vec<T>>,
/// Number of samples encountered for each (class, feature)
feature_count: Vec<Vec<usize>>,
/// Number of features of each sample
n_features: usize,
}
impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for MultinomialNBDistribution<T> {
@@ -62,7 +72,7 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for MultinomialNBDistribu
let mut likelihood = T::zero();
for feature in 0..j.len() {
let value = j.get(feature);
likelihood += value * self.feature_prob[class_index][feature].ln();
likelihood += value * self.feature_log_prob[class_index][feature];
}
likelihood
}
@@ -73,7 +83,8 @@ impl<T: RealNumber, M: Matrix<T>> NBDistribution<T, M> for MultinomialNBDistribu
}
/// `MultinomialNB` parameters. Use `Default::default()` for default values.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct MultinomialNBParameters<T: RealNumber> {
/// Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
pub alpha: T,
@@ -141,10 +152,10 @@ impl<T: RealNumber> MultinomialNBDistribution<T> {
let y = y.to_vec();
let (class_labels, indices) = <Vec<T> as RealNumberVector<T>>::unique_with_indices(&y);
let mut class_count = vec![T::zero(); class_labels.len()];
let mut class_count = vec![0_usize; class_labels.len()];
for class_index in indices.iter() {
class_count[*class_index] += T::one();
class_count[*class_index] += 1;
}
let class_priors = if let Some(class_priors) = priors {
@@ -157,39 +168,53 @@ impl<T: RealNumber> MultinomialNBDistribution<T> {
} else {
class_count
.iter()
.map(|&c| c / T::from(n_samples).unwrap())
.map(|&c| T::from(c).unwrap() / T::from(n_samples).unwrap())
.collect()
};
let mut feature_in_class_counter = vec![vec![T::zero(); n_features]; class_labels.len()];
let mut feature_in_class_counter = vec![vec![0_usize; n_features]; class_labels.len()];
for (row, class_index) in row_iter(x).zip(indices) {
for (idx, row_i) in row.iter().enumerate().take(n_features) {
feature_in_class_counter[class_index][idx] += *row_i;
feature_in_class_counter[class_index][idx] +=
row_i.to_usize().ok_or_else(|| {
Failed::fit(&format!(
"Elements of the matrix should be convertible to usize |found|=[{}]",
row_i
))
})?;
}
}
let feature_prob = feature_in_class_counter
let feature_log_prob = feature_in_class_counter
.iter()
.map(|feature_count| {
let n_c = feature_count.sum();
let n_c: usize = feature_count.iter().sum();
feature_count
.iter()
.map(|&count| (count + alpha) / (n_c + alpha * T::from(n_features).unwrap()))
.map(|&count| {
((T::from(count).unwrap() + alpha)
/ (T::from(n_c).unwrap() + alpha * T::from(n_features).unwrap()))
.ln()
})
.collect()
})
.collect();
Ok(Self {
class_count,
class_labels,
class_priors,
feature_prob,
feature_log_prob,
feature_count: feature_in_class_counter,
n_features,
})
}
}
/// MultinomialNB implements the categorical naive Bayes algorithm for categorically distributed data.
#[derive(Serialize, Deserialize, Debug, PartialEq)]
/// MultinomialNB implements the naive Bayes algorithm for multinomially distributed data.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, PartialEq)]
pub struct MultinomialNB<T: RealNumber, M: Matrix<T>> {
inner: BaseNaiveBayes<T, M, MultinomialNBDistribution<T>>,
}
@@ -236,6 +261,35 @@ impl<T: RealNumber, M: Matrix<T>> MultinomialNB<T, M> {
pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
self.inner.predict(x)
}
/// Class labels known to the classifier.
/// Returns a vector of size n_classes.
pub fn classes(&self) -> &Vec<T> {
&self.inner.distribution.class_labels
}
/// Number of training samples observed in each class.
/// Returns a vector of size n_classes.
pub fn class_count(&self) -> &Vec<usize> {
&self.inner.distribution.class_count
}
/// Empirical log probability of features given a class, P(x_i|y).
/// Returns a 2d vector of shape (n_classes, n_features)
pub fn feature_log_prob(&self) -> &Vec<Vec<T>> {
&self.inner.distribution.feature_log_prob
}
/// Number of features of each sample
pub fn n_features(&self) -> usize {
self.inner.distribution.n_features
}
/// Number of samples encountered for each (class, feature)
/// Returns a 2d vector of shape (n_classes, n_features)
pub fn feature_count(&self) -> &Vec<Vec<usize>> {
&self.inner.distribution.feature_count
}
}
#[cfg(test)]
@@ -243,6 +297,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn run_multinomial_naive_bayes() {
// Tests that MultinomialNB when alpha=1.0 gives the same values as
@@ -264,12 +319,29 @@ mod tests {
let y = vec![0., 0., 0., 1.];
let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
assert_eq!(mnb.classes(), &[0., 1.]);
assert_eq!(mnb.class_count(), &[3, 1]);
assert_eq!(mnb.inner.distribution.class_priors, &[0.75, 0.25]);
assert_eq!(
mnb.inner.distribution.feature_prob,
mnb.feature_log_prob(),
&[
&[1. / 7., 3. / 7., 1. / 14., 1. / 7., 1. / 7., 1. / 14.],
&[1. / 9., 2. / 9.0, 2. / 9.0, 1. / 9.0, 1. / 9.0, 2. / 9.0]
&[
(1_f64 / 7_f64).ln(),
(3_f64 / 7_f64).ln(),
(1_f64 / 14_f64).ln(),
(1_f64 / 7_f64).ln(),
(1_f64 / 7_f64).ln(),
(1_f64 / 14_f64).ln()
],
&[
(1_f64 / 9_f64).ln(),
(2_f64 / 9_f64).ln(),
(2_f64 / 9_f64).ln(),
(1_f64 / 9_f64).ln(),
(1_f64 / 9_f64).ln(),
(2_f64 / 9_f64).ln()
]
]
);
@@ -281,6 +353,7 @@ mod tests {
assert_eq!(y_hat, &[0.]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn multinomial_nb_scikit_parity() {
let x = DenseMatrix::<f64>::from_2d_array(&[
@@ -303,6 +376,16 @@ mod tests {
let y = vec![2., 2., 0., 0., 0., 2., 1., 1., 0., 1., 0., 0., 2., 0., 2.];
let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap();
assert_eq!(nb.n_features(), 10);
assert_eq!(
nb.feature_count(),
&[
&[12, 20, 11, 24, 12, 14, 13, 17, 13, 18],
&[9, 6, 9, 4, 7, 3, 8, 5, 4, 9],
&[10, 12, 9, 9, 11, 3, 9, 18, 10, 10]
]
);
let y_hat = nb.predict(&x).unwrap();
assert!(nb
@@ -310,16 +393,29 @@ mod tests {
.distribution
.class_priors
.approximate_eq(&vec!(0.46, 0.2, 0.33), 1e-2));
assert!(nb.inner.distribution.feature_prob[1].approximate_eq(
&vec!(0.07, 0.12, 0.07, 0.15, 0.07, 0.09, 0.08, 0.10, 0.08, 0.11),
1e-1
assert!(nb.feature_log_prob()[1].approximate_eq(
&vec![
-2.00148,
-2.35815494,
-2.00148,
-2.69462718,
-2.22462355,
-2.91777073,
-2.10684052,
-2.51230562,
-2.69462718,
-2.00148
],
1e-5
));
assert!(y_hat.approximate_eq(
&vec!(2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0),
1e-5
));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::<f64>::from_2d_array(&[
&[1., 1., 0., 0., 0., 0.],
+9 -2
View File
@@ -33,6 +33,7 @@
//!
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
@@ -45,7 +46,8 @@ use crate::math::num::RealNumber;
use crate::neighbors::KNNWeightFunction;
/// `KNNClassifier` parameters. Use `Default::default()` for default values.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct KNNClassifierParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
/// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
@@ -62,7 +64,8 @@ pub struct KNNClassifierParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
}
/// K Nearest Neighbors Classifier
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct KNNClassifier<T: RealNumber, D: Distance<Vec<T>, T>> {
classes: Vec<T>,
y: Vec<usize>,
@@ -248,6 +251,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn knn_fit_predict() {
let x =
@@ -259,6 +263,7 @@ mod tests {
assert_eq!(y.to_vec(), y_hat);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn knn_fit_predict_weighted() {
let x = DenseMatrix::from_2d_array(&[&[1.], &[2.], &[3.], &[4.], &[5.]]);
@@ -276,7 +281,9 @@ mod tests {
assert_eq!(vec![3.0], y_hat);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x =
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
+9 -2
View File
@@ -36,6 +36,7 @@
//!
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::algorithm::neighbour::{KNNAlgorithm, KNNAlgorithmName};
@@ -48,7 +49,8 @@ use crate::math::num::RealNumber;
use crate::neighbors::KNNWeightFunction;
/// `KNNRegressor` parameters. Use `Default::default()` for default values.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct KNNRegressorParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
/// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
@@ -65,7 +67,8 @@ pub struct KNNRegressorParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
}
/// K Nearest Neighbors Regressor
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct KNNRegressor<T: RealNumber, D: Distance<Vec<T>, T>> {
y: Vec<T>,
knn_algorithm: KNNAlgorithm<T, D>,
@@ -228,6 +231,7 @@ mod tests {
use crate::linalg::naive::dense_matrix::DenseMatrix;
use crate::math::distance::Distances;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn knn_fit_predict_weighted() {
let x =
@@ -251,6 +255,7 @@ mod tests {
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn knn_fit_predict_uniform() {
let x =
@@ -265,7 +270,9 @@ mod tests {
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x =
DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]);
+3 -1
View File
@@ -33,6 +33,7 @@
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use crate::math::num::RealNumber;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// K Nearest Neighbors Classifier
@@ -48,7 +49,8 @@ pub mod knn_regressor;
pub type KNNAlgorithmName = crate::algorithm::neighbour::KNNAlgorithmName;
/// Weight function that is used to determine estimated value.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub enum KNNWeightFunction {
/// All k nearest points are weighted equally
Uniform,
@@ -50,14 +50,14 @@ impl<T: RealNumber> FirstOrderOptimizer<T> for GradientDescent<T> {
let f_alpha = |alpha: T| -> T {
let mut dx = step.clone();
dx.mul_scalar_mut(alpha);
f(&dx.add_mut(&x)) // f(x) = f(x .+ gvec .* alpha)
f(dx.add_mut(&x)) // f(x) = f(x .+ gvec .* alpha)
};
let df_alpha = |alpha: T| -> T {
let mut dx = step.clone();
let mut dg = gvec.clone();
dx.mul_scalar_mut(alpha);
df(&mut dg, &dx.add_mut(&x)); //df(x) = df(x .+ gvec .* alpha)
df(&mut dg, dx.add_mut(&x)); //df(x) = df(x .+ gvec .* alpha)
gvec.dot(&dg)
};
@@ -66,7 +66,7 @@ impl<T: RealNumber> FirstOrderOptimizer<T> for GradientDescent<T> {
let ls_r = ls.search(&f_alpha, &df_alpha, alpha, fx, df0);
alpha = ls_r.alpha;
fx = ls_r.f_x;
x.add_mut(&step.mul_scalar_mut(alpha));
x.add_mut(step.mul_scalar_mut(alpha));
df(&mut gvec, &x);
gnorm = gvec.norm2();
}
@@ -88,6 +88,7 @@ mod tests {
use crate::optimization::line_search::Backtracking;
use crate::optimization::FunctionOrder;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn gradient_descent() {
let x0 = DenseMatrix::row_vector_from_array(&[-1., 1.]);
+6 -3
View File
@@ -1,3 +1,4 @@
#![allow(clippy::suspicious_operation_groupings)]
use std::default::Default;
use std::fmt::Debug;
@@ -7,6 +8,7 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult};
use crate::optimization::line_search::LineSearchMethod;
use crate::optimization::{DF, F};
#[allow(clippy::upper_case_acronyms)]
pub struct LBFGS<T: RealNumber> {
pub max_iter: usize,
pub g_rtol: T,
@@ -116,14 +118,14 @@ impl<T: RealNumber> LBFGS<T> {
let f_alpha = |alpha: T| -> T {
let mut dx = state.s.clone();
dx.mul_scalar_mut(alpha);
f(&dx.add_mut(&state.x)) // f(x) = f(x .+ gvec .* alpha)
f(dx.add_mut(&state.x)) // f(x) = f(x .+ gvec .* alpha)
};
let df_alpha = |alpha: T| -> T {
let mut dx = state.s.clone();
let mut dg = state.x_df.clone();
dx.mul_scalar_mut(alpha);
df(&mut dg, &dx.add_mut(&state.x)); //df(x) = df(x .+ gvec .* alpha)
df(&mut dg, dx.add_mut(&state.x)); //df(x) = df(x .+ gvec .* alpha)
state.x_df.dot(&dg)
};
@@ -205,7 +207,7 @@ impl<T: RealNumber> FirstOrderOptimizer<T> for LBFGS<T> {
) -> OptimizerResult<T, X> {
let mut state = self.init_state(x0);
df(&mut state.x_df, &x0);
df(&mut state.x_df, x0);
let g_converged = state.x_df.norm(T::infinity()) < self.g_atol;
let mut converged = g_converged;
@@ -238,6 +240,7 @@ mod tests {
use crate::optimization::line_search::Backtracking;
use crate::optimization::FunctionOrder;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn lbfgs() {
let x0 = DenseMatrix::row_vector_from_array(&[0., 0.]);
+1
View File
@@ -112,6 +112,7 @@ impl<T: Float> LineSearchMethod<T> for Backtracking<T> {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn backtracking() {
let f = |x: f64| -> f64 { x.powf(2.) + x };
+1
View File
@@ -4,6 +4,7 @@ pub mod line_search;
pub type F<'a, T, X> = dyn for<'b> Fn(&'b X) -> T + 'a;
pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a;
#[allow(clippy::upper_case_acronyms)]
#[derive(Debug, PartialEq)]
pub enum FunctionOrder {
SECOND,
+333
View File
@@ -0,0 +1,333 @@
//! # One-hot Encoding For [RealNumber](../../math/num/trait.RealNumber.html) Matricies
//! Transform a data [Matrix](../../linalg/trait.BaseMatrix.html) by replacing all categorical variables with their one-hot equivalents
//!
//! Internally OneHotEncoder treats every categorical column as a series and transforms it using [CategoryMapper](../series_encoder/struct.CategoryMapper.html)
//!
//! ### Usage Example
//! ```
//! use smartcore::linalg::naive::dense_matrix::DenseMatrix;
//! use smartcore::preprocessing::categorical::{OneHotEncoder, OneHotEncoderParams};
//! let data = DenseMatrix::from_2d_array(&[
//! &[1.5, 1.0, 1.5, 3.0],
//! &[1.5, 2.0, 1.5, 4.0],
//! &[1.5, 1.0, 1.5, 5.0],
//! &[1.5, 2.0, 1.5, 6.0],
//! ]);
//! let encoder_params = OneHotEncoderParams::from_cat_idx(&[1, 3]);
//! // Infer number of categories from data and return a reusable encoder
//! let encoder = OneHotEncoder::fit(&data, encoder_params).unwrap();
//! // Transform categorical to one-hot encoded (can transform similar)
//! let oh_data = encoder.transform(&data).unwrap();
//! // Produces the following:
//! // &[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0]
//! // &[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0]
//! // &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0]
//! // &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0]
//! ```
use std::iter;
use crate::error::Failed;
use crate::linalg::Matrix;
use crate::preprocessing::data_traits::{CategoricalFloat, Categorizable};
use crate::preprocessing::series_encoder::CategoryMapper;
/// OneHotEncoder Parameters
#[derive(Debug, Clone)]
pub struct OneHotEncoderParams {
/// Column number that contain categorical variable
pub col_idx_categorical: Option<Vec<usize>>,
/// (Currently not implemented) Try and infer which of the matrix columns are categorical variables
infer_categorical: bool,
}
impl OneHotEncoderParams {
/// Generate parameters from categorical variable column numbers
pub fn from_cat_idx(categorical_params: &[usize]) -> Self {
Self {
col_idx_categorical: Some(categorical_params.to_vec()),
infer_categorical: false,
}
}
}
/// Calculate the offset to parameters to due introduction of one-hot encoding
fn find_new_idxs(num_params: usize, cat_sizes: &[usize], cat_idxs: &[usize]) -> Vec<usize> {
// This functions uses iterators and returns a vector.
// In case we get a huge amount of paramenters this might be a problem
// todo: Change this such that it will return an iterator
let cat_idx = cat_idxs.iter().copied().chain((num_params..).take(1));
// Offset is constant between two categorical values, here we calculate the number of steps
// that remain constant
let repeats = cat_idx.scan(0, |a, v| {
let im = v + 1 - *a;
*a = v;
Some(im)
});
// Calculate the offset to parameter idx due to newly intorduced one-hot vectors
let offset_ = cat_sizes.iter().scan(0, |a, &v| {
*a = *a + v - 1;
Some(*a)
});
let offset = (0..1).chain(offset_);
let new_param_idxs: Vec<usize> = (0..num_params)
.zip(
repeats
.zip(offset)
.flat_map(|(r, o)| iter::repeat(o).take(r)),
)
.map(|(idx, ofst)| idx + ofst)
.collect();
new_param_idxs
}
fn validate_col_is_categorical<T: Categorizable>(data: &[T]) -> bool {
for v in data {
if !v.is_valid() {
return false;
}
}
true
}
/// Encode Categorical variavbles of data matrix to one-hot
#[derive(Debug, Clone)]
pub struct OneHotEncoder {
category_mappers: Vec<CategoryMapper<CategoricalFloat>>,
col_idx_categorical: Vec<usize>,
}
impl OneHotEncoder {
/// Create an encoder instance with categories infered from data matrix
pub fn fit<T, M>(data: &M, params: OneHotEncoderParams) -> Result<OneHotEncoder, Failed>
where
T: Categorizable,
M: Matrix<T>,
{
match (params.col_idx_categorical, params.infer_categorical) {
(None, false) => Err(Failed::fit(
"Must pass categorical series ids or infer flag",
)),
(Some(_idxs), true) => Err(Failed::fit(
"Ambigous parameters, got both infer and categroy ids",
)),
(Some(mut idxs), false) => {
// make sure categories have same order as data columns
idxs.sort_unstable();
let (nrows, _) = data.shape();
// col buffer to avoid allocations
let mut col_buf: Vec<T> = iter::repeat(T::zero()).take(nrows).collect();
let mut res: Vec<CategoryMapper<CategoricalFloat>> = Vec::with_capacity(idxs.len());
for &idx in &idxs {
data.copy_col_as_vec(idx, &mut col_buf);
if !validate_col_is_categorical(&col_buf) {
let msg = format!(
"Column {} of data matrix containts non categorizable (integer) values",
idx
);
return Err(Failed::fit(&msg[..]));
}
let hashable_col = col_buf.iter().map(|v| v.to_category());
res.push(CategoryMapper::fit_to_iter(hashable_col));
}
Ok(Self {
category_mappers: res,
col_idx_categorical: idxs,
})
}
(None, true) => {
todo!("Auto-Inference for Categorical Variables not yet implemented")
}
}
}
/// Transform categorical variables to one-hot encoded and return a new matrix
pub fn transform<T, M>(&self, x: &M) -> Result<M, Failed>
where
T: Categorizable,
M: Matrix<T>,
{
let (nrows, p) = x.shape();
let additional_params: Vec<usize> = self
.category_mappers
.iter()
.map(|enc| enc.num_categories())
.collect();
// Eac category of size v adds v-1 params
let expandws_p: usize = p + additional_params.iter().fold(0, |cs, &v| cs + v - 1);
let new_col_idx = find_new_idxs(p, &additional_params[..], &self.col_idx_categorical[..]);
let mut res = M::zeros(nrows, expandws_p);
for (pidx, &old_cidx) in self.col_idx_categorical.iter().enumerate() {
let cidx = new_col_idx[old_cidx];
let col_iter = (0..nrows).map(|r| x.get(r, old_cidx).to_category());
let sencoder = &self.category_mappers[pidx];
let oh_series = col_iter.map(|c| sencoder.get_one_hot::<T, Vec<T>>(&c));
for (row, oh_vec) in oh_series.enumerate() {
match oh_vec {
None => {
// Since we support T types, bad value in a series causes in to be invalid
let msg = format!("At least one value in column {} doesn't conform to category definition", old_cidx);
return Err(Failed::transform(&msg[..]));
}
Some(v) => {
// copy one hot vectors to their place in the data matrix;
for (col_ofst, &val) in v.iter().enumerate() {
res.set(row, cidx + col_ofst, val);
}
}
}
}
}
// copy old data in x to their new location while skipping catergorical vars (already treated)
let mut skip_idx_iter = self.col_idx_categorical.iter();
let mut cur_skip = skip_idx_iter.next();
for (old_p, &new_p) in new_col_idx.iter().enumerate() {
// if found treated varible, skip it
if let Some(&v) = cur_skip {
if v == old_p {
cur_skip = skip_idx_iter.next();
continue;
}
}
for r in 0..nrows {
let val = x.get(r, old_p);
res.set(r, new_p, val);
}
}
Ok(res)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
use crate::preprocessing::series_encoder::CategoryMapper;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn adjust_idxs() {
assert_eq!(find_new_idxs(0, &[], &[]), Vec::<usize>::new());
// [0,1,2] -> [0, 1, 1, 1, 2]
assert_eq!(find_new_idxs(3, &[3], &[1]), vec![0, 1, 4]);
}
fn build_cat_first_and_last() -> (DenseMatrix<f64>, DenseMatrix<f64>) {
let orig = DenseMatrix::from_2d_array(&[
&[1.0, 1.5, 3.0],
&[2.0, 1.5, 4.0],
&[1.0, 1.5, 5.0],
&[2.0, 1.5, 6.0],
]);
let oh_enc = DenseMatrix::from_2d_array(&[
&[1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
&[0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
&[1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
&[0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
]);
(orig, oh_enc)
}
fn build_fake_matrix() -> (DenseMatrix<f64>, DenseMatrix<f64>) {
// Categorical first and last
let orig = DenseMatrix::from_2d_array(&[
&[1.5, 1.0, 1.5, 3.0],
&[1.5, 2.0, 1.5, 4.0],
&[1.5, 1.0, 1.5, 5.0],
&[1.5, 2.0, 1.5, 6.0],
]);
let oh_enc = DenseMatrix::from_2d_array(&[
&[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0],
&[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0],
&[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0],
&[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0],
]);
(orig, oh_enc)
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn hash_encode_f64_series() {
let series = vec![3.0, 1.0, 2.0, 1.0];
let hashable_series: Vec<CategoricalFloat> =
series.iter().map(|v| v.to_category()).collect();
let enc = CategoryMapper::from_positional_category_vec(hashable_series);
let inv = enc.invert_one_hot(vec![0.0, 0.0, 1.0]);
let orig_val: f64 = inv.unwrap().into();
assert_eq!(orig_val, 2.0);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_fit() {
let (x, _) = build_fake_matrix();
let params = OneHotEncoderParams::from_cat_idx(&[1, 3]);
let oh_enc = OneHotEncoder::fit(&x, params).unwrap();
assert_eq!(oh_enc.category_mappers.len(), 2);
let num_cat: Vec<usize> = oh_enc
.category_mappers
.iter()
.map(|a| a.num_categories())
.collect();
assert_eq!(num_cat, vec![2, 4]);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn matrix_transform_test() {
let (x, expected_x) = build_fake_matrix();
let params = OneHotEncoderParams::from_cat_idx(&[1, 3]);
let oh_enc = OneHotEncoder::fit(&x, params).unwrap();
let nm = oh_enc.transform(&x).unwrap();
assert_eq!(nm, expected_x);
let (x, expected_x) = build_cat_first_and_last();
let params = OneHotEncoderParams::from_cat_idx(&[0, 2]);
let oh_enc = OneHotEncoder::fit(&x, params).unwrap();
let nm = oh_enc.transform(&x).unwrap();
assert_eq!(nm, expected_x);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fail_on_bad_category() {
let m = DenseMatrix::from_2d_array(&[
&[1.0, 1.5, 3.0],
&[2.0, 1.5, 4.0],
&[1.0, 1.5, 5.0],
&[2.0, 1.5, 6.0],
]);
let params = OneHotEncoderParams::from_cat_idx(&[1]);
match OneHotEncoder::fit(&m, params) {
Err(_) => {
assert!(true);
}
_ => assert!(false),
}
}
}
+43
View File
@@ -0,0 +1,43 @@
//! Traits to indicate that float variables can be viewed as categorical
//! This module assumes
use crate::math::num::RealNumber;
pub type CategoricalFloat = u16;
// pub struct CategoricalFloat(u16);
const ERROR_MARGIN: f64 = 0.001;
pub trait Categorizable: RealNumber {
type A;
fn to_category(self) -> CategoricalFloat;
fn is_valid(self) -> bool;
}
impl Categorizable for f32 {
type A = CategoricalFloat;
fn to_category(self) -> CategoricalFloat {
self as CategoricalFloat
}
fn is_valid(self) -> bool {
let a = self.to_category();
(a as f32 - self).abs() < (ERROR_MARGIN as f32)
}
}
impl Categorizable for f64 {
type A = CategoricalFloat;
fn to_category(self) -> CategoricalFloat {
self as CategoricalFloat
}
fn is_valid(self) -> bool {
let a = self.to_category();
(a as f64 - self).abs() < ERROR_MARGIN
}
}
+5
View File
@@ -0,0 +1,5 @@
/// Transform a data matrix by replaceing all categorical variables with their one-hot vector equivalents
pub mod categorical;
mod data_traits;
/// Encode a series (column, array) of categorical variables as one-hot vectors
pub mod series_encoder;
+282
View File
@@ -0,0 +1,282 @@
#![allow(clippy::ptr_arg)]
//! # Series Encoder
//! Encode a series of categorical features as a one-hot numeric array.
use crate::error::Failed;
use crate::linalg::BaseVector;
use crate::math::num::RealNumber;
use std::collections::HashMap;
use std::hash::Hash;
/// ## Bi-directional map category <-> label num.
/// Turn Hashable objects into a one-hot vectors or ordinal values.
/// This struct encodes single class per exmample
///
/// You can fit_to_iter a category enumeration by passing an iterator of categories.
/// category numbers will be assigned in the order they are encountered
///
/// Example:
/// ```
/// use std::collections::HashMap;
/// use smartcore::preprocessing::series_encoder::CategoryMapper;
///
/// let fake_categories: Vec<usize> = vec![1, 2, 3, 4, 5, 3, 5, 3, 1, 2, 4];
/// let it = fake_categories.iter().map(|&a| a);
/// let enc = CategoryMapper::<usize>::fit_to_iter(it);
/// let oh_vec: Vec<f64> = enc.get_one_hot(&1).unwrap();
/// // notice that 1 is actually a zero-th positional category
/// assert_eq!(oh_vec, vec![1.0, 0.0, 0.0, 0.0, 0.0]);
/// ```
///
/// You can also pass a predefined category enumeration such as a hashmap `HashMap<C, usize>` or a vector `Vec<C>`
///
///
/// ```
/// use std::collections::HashMap;
/// use smartcore::preprocessing::series_encoder::CategoryMapper;
///
/// let category_map: HashMap<&str, usize> =
/// vec![("cat", 2), ("background",0), ("dog", 1)]
/// .into_iter()
/// .collect();
/// let category_vec = vec!["background", "dog", "cat"];
///
/// let enc_lv = CategoryMapper::<&str>::from_positional_category_vec(category_vec);
/// let enc_lm = CategoryMapper::<&str>::from_category_map(category_map);
///
/// // ["background", "dog", "cat"]
/// println!("{:?}", enc_lv.get_categories());
/// let lv: Vec<f64> = enc_lv.get_one_hot(&"dog").unwrap();
/// let lm: Vec<f64> = enc_lm.get_one_hot(&"dog").unwrap();
/// assert_eq!(lv, lm);
/// ```
#[derive(Debug, Clone)]
pub struct CategoryMapper<C> {
category_map: HashMap<C, usize>,
categories: Vec<C>,
num_categories: usize,
}
impl<C> CategoryMapper<C>
where
C: Hash + Eq + Clone,
{
/// Get the number of categories in the mapper
pub fn num_categories(&self) -> usize {
self.num_categories
}
/// Fit an encoder to a lable iterator
pub fn fit_to_iter(categories: impl Iterator<Item = C>) -> Self {
let mut category_map: HashMap<C, usize> = HashMap::new();
let mut category_num = 0usize;
let mut unique_lables: Vec<C> = Vec::new();
for l in categories {
if !category_map.contains_key(&l) {
category_map.insert(l.clone(), category_num);
unique_lables.push(l.clone());
category_num += 1;
}
}
Self {
category_map,
num_categories: category_num,
categories: unique_lables,
}
}
/// Build an encoder from a predefined (category -> class number) map
pub fn from_category_map(category_map: HashMap<C, usize>) -> Self {
let mut _unique_cat: Vec<(C, usize)> =
category_map.iter().map(|(k, v)| (k.clone(), *v)).collect();
_unique_cat.sort_by(|a, b| a.1.cmp(&b.1));
let categories: Vec<C> = _unique_cat.into_iter().map(|a| a.0).collect();
Self {
num_categories: categories.len(),
categories,
category_map,
}
}
/// Build an encoder from a predefined positional category-class num vector
pub fn from_positional_category_vec(categories: Vec<C>) -> Self {
let category_map: HashMap<C, usize> = categories
.iter()
.enumerate()
.map(|(v, k)| (k.clone(), v))
.collect();
Self {
num_categories: categories.len(),
category_map,
categories,
}
}
/// Get label num of a category
pub fn get_num(&self, category: &C) -> Option<&usize> {
self.category_map.get(category)
}
/// Return category corresponding to label num
pub fn get_cat(&self, num: usize) -> &C {
&self.categories[num]
}
/// List all categories (position = category number)
pub fn get_categories(&self) -> &[C] {
&self.categories[..]
}
/// Get one-hot encoding of the category
pub fn get_one_hot<U, V>(&self, category: &C) -> Option<V>
where
U: RealNumber,
V: BaseVector<U>,
{
self.get_num(category)
.map(|&idx| make_one_hot::<U, V>(idx, self.num_categories))
}
/// Invert one-hot vector, back to the category
pub fn invert_one_hot<U, V>(&self, one_hot: V) -> Result<C, Failed>
where
U: RealNumber,
V: BaseVector<U>,
{
let pos = U::one();
let oh_it = (0..one_hot.len()).map(|idx| one_hot.get(idx));
let s: Vec<usize> = oh_it
.enumerate()
.filter_map(|(idx, v)| if v == pos { Some(idx) } else { None })
.collect();
if s.len() == 1 {
let idx = s[0];
return Ok(self.get_cat(idx).clone());
}
let pos_entries = format!(
"Expected a single positive entry, {} entires found",
s.len()
);
Err(Failed::transform(&pos_entries[..]))
}
/// Get ordinal encoding of the catergory
pub fn get_ordinal<U>(&self, category: &C) -> Option<U>
where
U: RealNumber,
{
match self.get_num(category) {
None => None,
Some(&idx) => U::from_usize(idx),
}
}
}
/// Make a one-hot encoded vector from a categorical variable
///
/// Example:
/// ```
/// use smartcore::preprocessing::series_encoder::make_one_hot;
/// let one_hot: Vec<f64> = make_one_hot(2, 3);
/// assert_eq!(one_hot, vec![0.0, 0.0, 1.0]);
/// ```
pub fn make_one_hot<T, V>(category_idx: usize, num_categories: usize) -> V
where
T: RealNumber,
V: BaseVector<T>,
{
let pos = T::one();
let mut z = V::zeros(num_categories);
z.set(category_idx, pos);
z
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn from_categories() {
let fake_categories: Vec<usize> = vec![1, 2, 3, 4, 5, 3, 5, 3, 1, 2, 4];
let it = fake_categories.iter().map(|&a| a);
let enc = CategoryMapper::<usize>::fit_to_iter(it);
let oh_vec: Vec<f64> = match enc.get_one_hot(&1) {
None => panic!("Wrong categories"),
Some(v) => v,
};
let res: Vec<f64> = vec![1f64, 0f64, 0f64, 0f64, 0f64];
assert_eq!(oh_vec, res);
}
fn build_fake_str_enc<'a>() -> CategoryMapper<&'a str> {
let fake_category_pos = vec!["background", "dog", "cat"];
let enc = CategoryMapper::<&str>::from_positional_category_vec(fake_category_pos);
enc
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn ordinal_encoding() {
let enc = build_fake_str_enc();
assert_eq!(1f64, enc.get_ordinal::<f64>(&"dog").unwrap())
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn category_map_and_vec() {
let category_map: HashMap<&str, usize> = vec![("background", 0), ("dog", 1), ("cat", 2)]
.into_iter()
.collect();
let enc = CategoryMapper::<&str>::from_category_map(category_map);
let oh_vec: Vec<f64> = match enc.get_one_hot(&"dog") {
None => panic!("Wrong categories"),
Some(v) => v,
};
let res: Vec<f64> = vec![0f64, 1f64, 0f64];
assert_eq!(oh_vec, res);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn positional_categories_vec() {
let enc = build_fake_str_enc();
let oh_vec: Vec<f64> = match enc.get_one_hot(&"dog") {
None => panic!("Wrong categories"),
Some(v) => v,
};
let res: Vec<f64> = vec![0.0, 1.0, 0.0];
assert_eq!(oh_vec, res);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn invert_label_test() {
let enc = build_fake_str_enc();
let res: Vec<f64> = vec![0.0, 1.0, 0.0];
let lab = enc.invert_one_hot(res).unwrap();
assert_eq!(lab, "dog");
if let Err(e) = enc.invert_one_hot(vec![0.0, 0.0, 0.0]) {
let pos_entries = format!("Expected a single positive entry, 0 entires found");
assert_eq!(e, Failed::transform(&pos_entries[..]));
};
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn test_many_categorys() {
let enc = build_fake_str_enc();
let cat_it = ["dog", "cat", "fish", "background"].iter().cloned();
let res: Vec<Option<Vec<f64>>> = cat_it.map(|v| enc.get_one_hot(&v)).collect();
let v = vec![
Some(vec![0.0, 1.0, 0.0]),
Some(vec![0.0, 0.0, 1.0]),
None,
Some(vec![1.0, 0.0, 0.0]),
];
assert_eq!(res, v)
}
}
+13 -4
View File
@@ -26,6 +26,7 @@
pub mod svc;
pub mod svr;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::linalg::BaseVector;
@@ -93,18 +94,21 @@ impl Kernels {
}
/// Linear Kernel
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct LinearKernel {}
/// Radial basis function (Gaussian) kernel
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct RBFKernel<T: RealNumber> {
/// kernel coefficient
pub gamma: T,
}
/// Polynomial kernel
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct PolynomialKernel<T: RealNumber> {
/// degree of the polynomial
pub degree: T,
@@ -115,7 +119,8 @@ pub struct PolynomialKernel<T: RealNumber> {
}
/// Sigmoid (hyperbolic tangent) kernel
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct SigmoidKernel<T: RealNumber> {
/// kernel coefficient
pub gamma: T,
@@ -154,6 +159,7 @@ impl<T: RealNumber, V: BaseVector<T>> Kernel<T, V> for SigmoidKernel<T> {
mod tests {
use super::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn linear_kernel() {
let v1 = vec![1., 2., 3.];
@@ -162,6 +168,7 @@ mod tests {
assert_eq!(32f64, Kernels::linear().apply(&v1, &v2));
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn rbf_kernel() {
let v1 = vec![1., 2., 3.];
@@ -170,6 +177,7 @@ mod tests {
assert!((0.2265f64 - Kernels::rbf(0.055).apply(&v1, &v2)).abs() < 1e-4);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn polynomial_kernel() {
let v1 = vec![1., 2., 3.];
@@ -181,6 +189,7 @@ mod tests {
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn sigmoid_kernel() {
let v1 = vec![1., 2., 3.];
+85 -26
View File
@@ -57,9 +57,9 @@
//! let y = vec![ 0., 0., 0., 0., 0., 0., 0., 0.,
//! 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.];
//!
//! let svr = SVC::fit(&x, &y, SVCParameters::default().with_c(200.0)).unwrap();
//! let svc = SVC::fit(&x, &y, SVCParameters::default().with_c(200.0)).unwrap();
//!
//! let y_hat = svr.predict(&x).unwrap();
//! let y_hat = svc.predict(&x).unwrap();
//! ```
//!
//! ## References:
@@ -76,6 +76,7 @@ use std::marker::PhantomData;
use rand::seq::SliceRandom;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator};
@@ -85,7 +86,8 @@ use crate::linalg::Matrix;
use crate::math::num::RealNumber;
use crate::svm::{Kernel, Kernels, LinearKernel};
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// SVC Parameters
pub struct SVCParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
/// Number of epochs.
@@ -100,11 +102,15 @@ pub struct SVCParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>
m: PhantomData<M>,
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(bound(
serialize = "M::RowVector: Serialize, K: Serialize, T: Serialize",
deserialize = "M::RowVector: Deserialize<'de>, K: Deserialize<'de>, T: Deserialize<'de>",
))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
#[cfg_attr(
feature = "serde",
serde(bound(
serialize = "M::RowVector: Serialize, K: Serialize, T: Serialize",
deserialize = "M::RowVector: Deserialize<'de>, K: Deserialize<'de>, T: Deserialize<'de>",
))
)]
/// Support Vector Classifier
pub struct SVC<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
classes: Vec<T>,
@@ -114,7 +120,8 @@ pub struct SVC<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
b: T,
}
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
struct SupportVector<T: RealNumber, V: BaseVector<T>> {
index: usize,
x: V,
@@ -215,7 +222,7 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
if n != y.len() {
return Err(Failed::fit(
&"Number of rows of X doesn\'t match number of rows of Y".to_string(),
"Number of rows of X doesn\'t match number of rows of Y",
));
}
@@ -256,21 +263,33 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
/// Predicts estimated class labels from `x`
/// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
pub fn predict(&self, x: &M) -> Result<M::RowVector, Failed> {
let (n, _) = x.shape();
let mut y_hat = self.decision_function(x)?;
let mut y_hat = M::RowVector::zeros(n);
for i in 0..n {
let cls_idx = match self.predict_for_row(x.get_row(i)) == T::one() {
for i in 0..y_hat.len() {
let cls_idx = match y_hat.get(i) > T::zero() {
false => self.classes[0],
true => self.classes[1],
};
y_hat.set(i, cls_idx);
}
Ok(y_hat)
}
/// Evaluates the decision function for the rows in `x`
/// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
pub fn decision_function(&self, x: &M) -> Result<M::RowVector, Failed> {
let (n, _) = x.shape();
let mut y_hat = M::RowVector::zeros(n);
for i in 0..n {
y_hat.set(i, self.predict_for_row(x.get_row(i)));
}
Ok(y_hat)
}
fn predict_for_row(&self, x: M::RowVector) -> T {
let mut f = self.b;
@@ -278,11 +297,7 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
f += self.w[i] * self.kernel.apply(&x, &self.instances[i]);
}
if f > T::zero() {
T::one()
} else {
-T::one()
}
f
}
}
@@ -370,7 +385,7 @@ impl<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Optimizer<'a,
Optimizer {
x,
y,
parameters: &parameters,
parameters,
svmin: 0,
svmax: 0,
gmin: T::max_value(),
@@ -582,7 +597,7 @@ impl<'a, T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> Optimizer<'a,
for i in 0..self.sv.len() {
let v = &self.sv[i];
let z = v.grad - gm;
let k = cache.get(sv1, &v);
let k = cache.get(sv1, v);
let mut curv = km + v.k - T::two() * k;
if curv <= T::zero() {
curv = self.tau;
@@ -719,8 +734,10 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
use crate::metrics::accuracy;
#[cfg(feature = "serde")]
use crate::svm::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn svc_fit_predict() {
let x = DenseMatrix::from_2d_array(&[
@@ -763,6 +780,46 @@ mod tests {
assert!(accuracy(&y_hat, &y) >= 0.9);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn svc_fit_decision_function() {
let x = DenseMatrix::from_2d_array(&[&[4.0, 0.0], &[0.0, 4.0], &[8.0, 0.0], &[0.0, 8.0]]);
let x2 = DenseMatrix::from_2d_array(&[
&[3.0, 3.0],
&[4.0, 4.0],
&[6.0, 6.0],
&[10.0, 10.0],
&[1.0, 1.0],
&[0.0, 0.0],
]);
let y: Vec<f64> = vec![0., 0., 1., 1.];
let y_hat = SVC::fit(
&x,
&y,
SVCParameters::default()
.with_c(200.0)
.with_kernel(Kernels::linear()),
)
.and_then(|lr| lr.decision_function(&x2))
.unwrap();
// x can be classified by a straight line through [6.0, 0.0] and [0.0, 6.0],
// so the score should increase as points get further away from that line
println!("{:?}", y_hat);
assert!(y_hat[1] < y_hat[2]);
assert!(y_hat[2] < y_hat[3]);
// for negative scores the score should decrease
assert!(y_hat[4] > y_hat[5]);
// y_hat[0] is on the line, so its score should be close to 0
assert!(y_hat[0].abs() <= 0.1);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn svc_fit_predict_rbf() {
let x = DenseMatrix::from_2d_array(&[
@@ -806,7 +863,9 @@ mod tests {
assert!(accuracy(&y_hat, &y) >= 0.9);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn svc_serde() {
let x = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
@@ -835,11 +894,11 @@ mod tests {
-1., -1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
];
let svr = SVC::fit(&x, &y, Default::default()).unwrap();
let svc = SVC::fit(&x, &y, Default::default()).unwrap();
let deserialized_svr: SVC<f64, DenseMatrix<f64>, LinearKernel> =
serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap();
let deserialized_svc: SVC<f64, DenseMatrix<f64>, LinearKernel> =
serde_json::from_str(&serde_json::to_string(&svc).unwrap()).unwrap();
assert_eq!(svr, deserialized_svr);
assert_eq!(svc, deserialized_svc);
}
}
+19 -8
View File
@@ -68,6 +68,7 @@ use std::cell::{Ref, RefCell};
use std::fmt::Debug;
use std::marker::PhantomData;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::api::{Predictor, SupervisedEstimator};
@@ -77,7 +78,8 @@ use crate::linalg::Matrix;
use crate::math::num::RealNumber;
use crate::svm::{Kernel, Kernels, LinearKernel};
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// SVR Parameters
pub struct SVRParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
/// Epsilon in the epsilon-SVR model.
@@ -92,11 +94,15 @@ pub struct SVRParameters<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>
m: PhantomData<M>,
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(bound(
serialize = "M::RowVector: Serialize, K: Serialize, T: Serialize",
deserialize = "M::RowVector: Deserialize<'de>, K: Deserialize<'de>, T: Deserialize<'de>",
))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
#[cfg_attr(
feature = "serde",
serde(bound(
serialize = "M::RowVector: Serialize, K: Serialize, T: Serialize",
deserialize = "M::RowVector: Deserialize<'de>, K: Deserialize<'de>, T: Deserialize<'de>",
))
)]
/// Epsilon-Support Vector Regression
pub struct SVR<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
@@ -106,7 +112,8 @@ pub struct SVR<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> {
b: T,
}
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
struct SupportVector<T: RealNumber, V: BaseVector<T>> {
index: usize,
x: V,
@@ -205,7 +212,7 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVR<T, M, K> {
if n != y.len() {
return Err(Failed::fit(
&"Number of rows of X doesn\'t match number of rows of Y".to_string(),
"Number of rows of X doesn\'t match number of rows of Y",
));
}
@@ -526,8 +533,10 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;
use crate::metrics::mean_squared_error;
#[cfg(feature = "serde")]
use crate::svm::*;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn svr_fit_predict() {
let x = DenseMatrix::from_2d_array(&[
@@ -561,7 +570,9 @@ mod tests {
assert!(mean_squared_error(&y_hat, &y) < 2.5);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn svr_serde() {
let x = DenseMatrix::from_2d_array(&[
&[234.289, 235.6, 159.0, 107.608, 1947., 60.323],
+36 -15
View File
@@ -68,6 +68,8 @@ use std::fmt::Debug;
use std::marker::PhantomData;
use rand::seq::SliceRandom;
use rand::Rng;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::algorithm::sort::quick_sort::QuickArgSort;
@@ -76,7 +78,8 @@ use crate::error::Failed;
use crate::linalg::Matrix;
use crate::math::num::RealNumber;
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// Parameters of Decision Tree
pub struct DecisionTreeClassifierParameters {
/// Split criteria to use when building a tree.
@@ -90,7 +93,8 @@ pub struct DecisionTreeClassifierParameters {
}
/// Decision Tree
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct DecisionTreeClassifier<T: RealNumber> {
nodes: Vec<Node<T>>,
parameters: DecisionTreeClassifierParameters,
@@ -100,7 +104,8 @@ pub struct DecisionTreeClassifier<T: RealNumber> {
}
/// The function to measure the quality of a split.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub enum SplitCriterion {
/// [Gini index](../decision_tree_classifier/index.html)
Gini,
@@ -110,9 +115,10 @@ pub enum SplitCriterion {
ClassificationError,
}
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
struct Node<T: RealNumber> {
index: usize,
_index: usize,
output: usize,
split_feature: usize,
split_value: Option<T>,
@@ -198,7 +204,7 @@ impl Default for DecisionTreeClassifierParameters {
impl<T: RealNumber> Node<T> {
fn new(index: usize, output: usize) -> Self {
Node {
index,
_index: index,
output,
split_feature: 0,
split_value: Option::None,
@@ -323,7 +329,14 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
) -> Result<DecisionTreeClassifier<T>, Failed> {
let (x_nrows, num_attributes) = x.shape();
let samples = vec![1; x_nrows];
DecisionTreeClassifier::fit_weak_learner(x, y, samples, num_attributes, parameters)
DecisionTreeClassifier::fit_weak_learner(
x,
y,
samples,
num_attributes,
parameters,
&mut rand::thread_rng(),
)
}
pub(crate) fn fit_weak_learner<M: Matrix<T>>(
@@ -332,6 +345,7 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
samples: Vec<usize>,
mtry: usize,
parameters: DecisionTreeClassifierParameters,
rng: &mut impl Rng,
) -> Result<DecisionTreeClassifier<T>, Failed> {
let y_m = M::from_row_vector(y.clone());
let (_, y_ncols) = y_m.shape();
@@ -375,17 +389,17 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
depth: 0,
};
let mut visitor = NodeVisitor::<T, M>::new(0, samples, &order, &x, &yi, 1);
let mut visitor = NodeVisitor::<T, M>::new(0, samples, &order, x, &yi, 1);
let mut visitor_queue: LinkedList<NodeVisitor<'_, T, M>> = LinkedList::new();
if tree.find_best_cutoff(&mut visitor, mtry) {
if tree.find_best_cutoff(&mut visitor, mtry, rng) {
visitor_queue.push_back(visitor);
}
while tree.depth < tree.parameters.max_depth.unwrap_or(std::u16::MAX) {
match visitor_queue.pop_front() {
Some(node) => tree.split(node, mtry, &mut visitor_queue),
Some(node) => tree.split(node, mtry, &mut visitor_queue, rng),
None => break,
};
}
@@ -438,6 +452,7 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
&mut self,
visitor: &mut NodeVisitor<'_, T, M>,
mtry: usize,
rng: &mut impl Rng,
) -> bool {
let (n_rows, n_attr) = visitor.x.shape();
@@ -477,7 +492,7 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
let mut variables = (0..n_attr).collect::<Vec<_>>();
if mtry < n_attr {
variables.shuffle(&mut rand::thread_rng());
variables.shuffle(rng);
}
for variable in variables.iter().take(mtry) {
@@ -499,7 +514,7 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
visitor: &mut NodeVisitor<'_, T, M>,
n: usize,
count: &[usize],
false_count: &mut Vec<usize>,
false_count: &mut [usize],
parent_impurity: T,
j: usize,
) {
@@ -536,7 +551,7 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
- T::from(tc).unwrap() / T::from(n).unwrap()
* impurity(&self.parameters.criterion, &true_count, tc)
- T::from(fc).unwrap() / T::from(n).unwrap()
* impurity(&self.parameters.criterion, &false_count, fc);
* impurity(&self.parameters.criterion, false_count, fc);
if self.nodes[visitor.node].split_score == Option::None
|| gain > self.nodes[visitor.node].split_score.unwrap()
@@ -561,6 +576,7 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
mut visitor: NodeVisitor<'a, T, M>,
mtry: usize,
visitor_queue: &mut LinkedList<NodeVisitor<'a, T, M>>,
rng: &mut impl Rng,
) -> bool {
let (n, _) = visitor.x.shape();
let mut tc = 0;
@@ -609,7 +625,7 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
visitor.level + 1,
);
if self.find_best_cutoff(&mut true_visitor, mtry) {
if self.find_best_cutoff(&mut true_visitor, mtry, rng) {
visitor_queue.push_back(true_visitor);
}
@@ -622,7 +638,7 @@ impl<T: RealNumber> DecisionTreeClassifier<T> {
visitor.level + 1,
);
if self.find_best_cutoff(&mut false_visitor, mtry) {
if self.find_best_cutoff(&mut false_visitor, mtry, rng) {
visitor_queue.push_back(false_visitor);
}
@@ -635,6 +651,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn gini_impurity() {
assert!(
@@ -651,6 +668,7 @@ mod tests {
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_predict_iris() {
let x = DenseMatrix::from_2d_array(&[
@@ -703,6 +721,7 @@ mod tests {
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_predict_baloons() {
let x = DenseMatrix::from_2d_array(&[
@@ -739,7 +758,9 @@ mod tests {
);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[1., 1., 1., 0.],
+30 -12
View File
@@ -63,6 +63,8 @@ use std::default::Default;
use std::fmt::Debug;
use rand::seq::SliceRandom;
use rand::Rng;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::algorithm::sort::quick_sort::QuickArgSort;
@@ -71,7 +73,8 @@ use crate::error::Failed;
use crate::linalg::Matrix;
use crate::math::num::RealNumber;
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// Parameters of Regression Tree
pub struct DecisionTreeRegressorParameters {
/// The maximum depth of the tree.
@@ -83,16 +86,18 @@ pub struct DecisionTreeRegressorParameters {
}
/// Regression Tree
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub struct DecisionTreeRegressor<T: RealNumber> {
nodes: Vec<Node<T>>,
parameters: DecisionTreeRegressorParameters,
depth: u16,
}
#[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
struct Node<T: RealNumber> {
index: usize,
_index: usize,
output: T,
split_feature: usize,
split_value: Option<T>,
@@ -132,7 +137,7 @@ impl Default for DecisionTreeRegressorParameters {
impl<T: RealNumber> Node<T> {
fn new(index: usize, output: T) -> Self {
Node {
index,
_index: index,
output,
split_feature: 0,
split_value: Option::None,
@@ -238,7 +243,14 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
) -> Result<DecisionTreeRegressor<T>, Failed> {
let (x_nrows, num_attributes) = x.shape();
let samples = vec![1; x_nrows];
DecisionTreeRegressor::fit_weak_learner(x, y, samples, num_attributes, parameters)
DecisionTreeRegressor::fit_weak_learner(
x,
y,
samples,
num_attributes,
parameters,
&mut rand::thread_rng(),
)
}
pub(crate) fn fit_weak_learner<M: Matrix<T>>(
@@ -247,6 +259,7 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
samples: Vec<usize>,
mtry: usize,
parameters: DecisionTreeRegressorParameters,
rng: &mut impl Rng,
) -> Result<DecisionTreeRegressor<T>, Failed> {
let y_m = M::from_row_vector(y.clone());
@@ -276,17 +289,17 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
depth: 0,
};
let mut visitor = NodeVisitor::<T, M>::new(0, samples, &order, &x, &y_m, 1);
let mut visitor = NodeVisitor::<T, M>::new(0, samples, &order, x, &y_m, 1);
let mut visitor_queue: LinkedList<NodeVisitor<'_, T, M>> = LinkedList::new();
if tree.find_best_cutoff(&mut visitor, mtry) {
if tree.find_best_cutoff(&mut visitor, mtry, rng) {
visitor_queue.push_back(visitor);
}
while tree.depth < tree.parameters.max_depth.unwrap_or(std::u16::MAX) {
match visitor_queue.pop_front() {
Some(node) => tree.split(node, mtry, &mut visitor_queue),
Some(node) => tree.split(node, mtry, &mut visitor_queue, rng),
None => break,
};
}
@@ -339,6 +352,7 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
&mut self,
visitor: &mut NodeVisitor<'_, T, M>,
mtry: usize,
rng: &mut impl Rng,
) -> bool {
let (_, n_attr) = visitor.x.shape();
@@ -353,7 +367,7 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
let mut variables = (0..n_attr).collect::<Vec<_>>();
if mtry < n_attr {
variables.shuffle(&mut rand::thread_rng());
variables.shuffle(rng);
}
let parent_gain =
@@ -428,6 +442,7 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
mut visitor: NodeVisitor<'a, T, M>,
mtry: usize,
visitor_queue: &mut LinkedList<NodeVisitor<'a, T, M>>,
rng: &mut impl Rng,
) -> bool {
let (n, _) = visitor.x.shape();
let mut tc = 0;
@@ -476,7 +491,7 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
visitor.level + 1,
);
if self.find_best_cutoff(&mut true_visitor, mtry) {
if self.find_best_cutoff(&mut true_visitor, mtry, rng) {
visitor_queue.push_back(true_visitor);
}
@@ -489,7 +504,7 @@ impl<T: RealNumber> DecisionTreeRegressor<T> {
visitor.level + 1,
);
if self.find_best_cutoff(&mut false_visitor, mtry) {
if self.find_best_cutoff(&mut false_visitor, mtry, rng) {
visitor_queue.push_back(false_visitor);
}
@@ -502,6 +517,7 @@ mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::DenseMatrix;
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn fit_longley() {
let x = DenseMatrix::from_2d_array(&[
@@ -576,7 +592,9 @@ mod tests {
}
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
#[cfg(feature = "serde")]
fn serde() {
let x = DenseMatrix::from_2d_array(&[
&[234.289, 235.6, 159., 107.608, 1947., 60.323],