From f93286ffbd8179a8b848c31b5cd25335b28ff0c2 Mon Sep 17 00:00:00 2001 From: Kiran Eiden Date: Sun, 2 Jan 2022 19:46:21 -0800 Subject: [PATCH 1/2] Fix bug in cover tree KNN algorithm Prior to this change, the find function implementation for the CoverTree class could have potentially returned the wrong result in cases where there were multiple points in the dataset equidistant from p. For example, the current test passed for k=3 but failed to produce the correct result for k=4 (it claimed that 3, 4, 5, and 7 were the 4 closest points to 5 in the dataset rather than 3, 4, 5, and 6). Sorting the neighbors vector before collecting the first k values from it resolved this issue. --- src/algorithm/neighbour/cover_tree.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index 8fb8b7d..355a448 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -179,7 +179,8 @@ impl> CoverTree } } } - + + neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); Ok(neighbors.into_iter().take(k).collect()) } From 389b0e8e6725d82940e7eca54798798e87ac429e Mon Sep 17 00:00:00 2001 From: Kiran Eiden Date: Tue, 4 Jan 2022 14:50:47 -0800 Subject: [PATCH 2/2] Only sort in CoverTree::find function if there are more than k points Sorting only needs to be done if the list of KNN candidates is greater than length k. --- src/algorithm/neighbour/cover_tree.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index 355a448..ad2b071 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -180,7 +180,9 @@ impl> CoverTree } } - neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + if neighbors.len() > k { + neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + } Ok(neighbors.into_iter().take(k).collect()) }