diff --git a/DESCRIPTION b/DESCRIPTION index 22ea7ca..a49ea3d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,6 +36,6 @@ VignetteBuilder: knitr Collate: 'RcppExports.R' 'clevr.R' + 'measures_clusterings.R' 'transformations.R' 'measures_pairs.R' - 'measures_clusterings.R' diff --git a/tests/testthat/test-clustering_point_estimates.R.bak b/tests/testthat/test-clustering_point_estimates.R.bak new file mode 100644 index 0000000..21ce530 --- /dev/null +++ b/tests/testthat/test-clustering_point_estimates.R.bak @@ -0,0 +1,25 @@ + +context("Most probable clusters") + +test_that("output of most probable clusters is correct on a simple example", { + samples <- rbind(c(1, 1, 2, 2), c(1, 1, 2, 2), c(1, 1, 2, 3)) + result <- most_probable_clusters(samples) + expect_equal(result$clusters, list(c(1, 2), c(1, 2), c(3, 4), c(3, 4))) + expect_equal(result$probabilities, c(1, 1, 2/3, 2/3)) +}) + +test_that("output of most probable clusters is correct when record ids are provided", { + samples <- rbind(c(1, 1, 2, 2), c(1, 1, 2, 2), c(1, 1, 2, 3)) + result <- most_probable_clusters(samples, rec_ids = c("B", "D", "C", "A")) + expect_equal(result$clusters, list("B" = c("B", "D"), "D" = c("B", "D"), "C" = c("C", "A"), "A" = c("C", "A"))) + expect_equal(result$probabilities, c("B" = 1, "D" = 1, "C" = 2/3, "A" = 2/3)) +}) + +context("Shared most probable clusters") + +test_that("output of shared most probable clusters is correct on a simple example", { + samples <- rbind(c(1, 1, 2, 2), c(1, 1, 2, 2), c(1, 1, 2, 3)) + result <- shared_most_probable_clusters(samples) + expect_equal(shared_most_probable_clusters(samples), + list(c(3, 4), c(1, 2))) +}) diff --git a/tests/testthat/test-measures_pairs.R b/tests/testthat/test-measures_pairs.R new file mode 100644 index 0000000..0e6c787 --- /dev/null +++ b/tests/testthat/test-measures_pairs.R @@ -0,0 +1,200 @@ + +test_that("pairwise contingency table is correct for a simple example", { + pred_pairs <- rbind(c(1, 2), c(1, 3), c(4, 5)) + true_pairs <- rbind(c(1, 2), c(1, 5)) + result <- contingency_table_pairs(pred_pairs, true_pairs, num_pairs = 25) + true_result <- rbind("TRUE" = c("TRUE" = 1,"FALSE" = 1), "FALSE" = c("TRUE" = 2, "FALSE" = 21)) + true_result <- as.table(true_result) + names(dimnames(true_result)) <- c("Prediction", "Truth") + expect_equal(result, true_result) +}) + + +# Examples to test +make_pairs_identical <- function() { + true <- rbind(c(1, 2), c(1, 3), c(2, 3), c(4, 5)) + pred <- rbind(c(1, 2), c(1, 3), c(2, 3), c(4, 5)) + num_pairs <- 10 + measures <- list( + "precision_pairs" = 1.0, + "recall_pairs" = 1.0, + "specificity_pairs" = 1.0, + "sensitivity_pairs" = 1.0, + "f_measure_pairs" = 1.0, + "accuracy_pairs" = 1.0, + "balanced_accuracy_pairs" = 1.0, + "fowlkes_mallows_pairs" = 1.0 + ) + list("true" = true, "pred" = pred, "num_pairs" = num_pairs, "true_measures" = measures, + "description" = "pairs in complete agreement") +} + +make_pairs_distinct <- function() { + true <- rbind(c(1, 2), c(1, 3), c(2, 3)) + pred <- rbind(c(1, 4), c(2, 4), c(3, 4)) + num_pairs <- 6 + measures <- list( + "precision_pairs" = 0.0, + "recall_pairs" = 0.0, + "specificity_pairs" = 0.0, + "sensitivity_pairs" = 0.0, + "f_measure_pairs" = 0.0, + "accuracy_pairs" = 0.0, + "balanced_accuracy_pairs" = 0.0, + "fowlkes_mallows_pairs" = 0.0 + ) + list("true" = true, "pred" = pred, "num_pairs" = num_pairs, "true_measures" = measures, + "description" = "pairs in complete disagreement") +} + +make_pairs_no_pred <- function() { + true <- rbind(c(1, 2), c(1, 3), c(2, 3)) + pred <- matrix(0L, nrow = 0, ncol = 2) + num_pairs <- 3 + measures <- list( + "precision_pairs" = NaN, + "recall_pairs" = 0.0, + "specificity_pairs" = NaN, + "sensitivity_pairs" = 0.0, + "f_measure_pairs" = NaN, + "accuracy_pairs" = 0.0, + "balanced_accuracy_pairs" = NaN, + "fowlkes_mallows_pairs" = NaN + ) + list("true" = true, "pred" = pred, "num_pairs" = num_pairs, "true_measures" = measures, + "description" = "pairs with zero recall") +} + +make_pairs_one_fp <- function() { + true <- rbind(c(1, 2), c(1, 3), c(2, 3), c(4, 5)) + pred <- rbind(c(1, 2), c(1, 3), c(2, 3), c(4, 5), c(1, 4)) + num_pairs <- 10 + measures <- list( + "precision_pairs" = 4/5, + "recall_pairs" = 1.0, + "specificity_pairs" = 5/6, + "sensitivity_pairs" = 1.0, + "f_measure_pairs" = 8/9, + "accuracy_pairs" = 9/10, + "balanced_accuracy_pairs" = 11/12, + "fowlkes_mallows_pairs" = 2/sqrt(5) + ) + list("true" = true, "pred" = pred, "num_pairs" = num_pairs, "true_measures" = measures, + "description" = "pairs with one false positive error") +} + +make_pairs_no_true <- function() { + true <- matrix(0L, nrow = 0, ncol = 2) + pred <- rbind(c(1, 2), c(1, 3), c(2, 3)) + num_pairs <- 3 + measures <- list( + "precision_pairs" = 0.0, + "recall_pairs" = NaN, + "specificity_pairs" = 0.0, + "sensitivity_pairs" = NaN, + "f_measure_pairs" = NaN, + "accuracy_pairs" = 0.0, + "balanced_accuracy_pairs" = NaN, + "fowlkes_mallows_pairs" = NaN + ) + list("true" = true, "pred" = pred, "num_pairs" = num_pairs, "true_measures" = measures, + "description" = "pairs with zero precision") +} + +examples_to_test <- list(make_pairs_identical, + make_pairs_distinct, + make_pairs_no_pred, + make_pairs_no_true, + make_pairs_one_fp) + + +context("Precision of Linked Pairs") +for (example in examples_to_test) { + example <- example() + test_that(paste("measure is correct for", example$description), { + true <- example$true + pred <- example$pred + expect_equal(precision_pairs(true, pred), + example$true_measures[["precision_pairs"]]) + }) +} + +context("Recall of Linked Pairs") +for (example in examples_to_test) { + example <- example() + test_that(paste("measure is correct for", example$description), { + true <- example$true + pred <- example$pred + expect_equal(recall_pairs(true, pred), + example$true_measures[["recall_pairs"]]) + }) +} + +context("Specificity of Linked Pairs") +for (example in examples_to_test) { + example <- example() + test_that(paste("measure is correct for", example$description), { + true <- example$true + pred <- example$pred + num_pairs <- example$num_pairs + expect_equal(specificity_pairs(true, pred, num_pairs), + example$true_measures[["specificity_pairs"]]) + }) +} + +context("Sensitivity of Linked Pairs") +for (example in examples_to_test) { + example <- example() + test_that(paste("measure is correct for", example$description), { + true <- example$true + pred <- example$pred + expect_equal(sensitivity_pairs(true, pred), + example$true_measures[["sensitivity_pairs"]]) + }) +} + +context("F-Measure of Linked Pairs") +for (example in examples_to_test) { + example <- example() + test_that(paste("measure is correct for", example$description), { + true <- example$true + pred <- example$pred + expect_equal(f_measure_pairs(true, pred), + example$true_measures[["f_measure_pairs"]]) + }) +} + +context("Accuracy of Linked Pairs") +for (example in examples_to_test) { + example <- example() + test_that(paste("measure is correct for", example$description), { + true <- example$true + pred <- example$pred + num_pairs <- example$num_pairs + expect_equal(accuracy_pairs(true, pred, num_pairs), + example$true_measures[["accuracy_pairs"]]) + }) +} + +context("Balanced Accuracy of Linked Pairs") +for (example in examples_to_test) { + example <- example() + test_that(paste("measure is correct for", example$description), { + true <- example$true + pred <- example$pred + num_pairs <- example$num_pairs + expect_equal(balanced_accuracy_pairs(true, pred, num_pairs), + example$true_measures[["balanced_accuracy_pairs"]]) + }) +} + +context("Fowlkes-Mallows Index of Linked Pairs") +for (example in examples_to_test) { + example <- example() + test_that(paste("measure is correct for", example$description), { + true <- example$true + pred <- example$pred + expect_equal(fowlkes_mallows_pairs(true, pred), + example$true_measures[["fowlkes_mallows_pairs"]]) + }) +} diff --git a/tests/testthat/test-pairwise_measures.R b/tests/testthat/test-pairwise_measures.R deleted file mode 100644 index 161fb40..0000000 --- a/tests/testthat/test-pairwise_measures.R +++ /dev/null @@ -1,10 +0,0 @@ - -test_that("pairwise contingency table is correct for a simple example", { - pred_pairs <- rbind(c(1, 2), c(1, 3), c(4, 5)) - true_pairs <- rbind(c(1, 2), c(1, 5)) - result <- contingency_table_pairs(pred_pairs, true_pairs, num_pairs = 25) - true_result <- rbind("TRUE" = c("TRUE" = 1,"FALSE" = 1), "FALSE" = c("TRUE" = 2, "FALSE" = 21)) - true_result <- as.table(true_result) - names(dimnames(true_result)) <- c("Prediction", "Truth") - expect_equal(result, true_result) -}) diff --git a/tests/testthat/testthat-problems.rds b/tests/testthat/testthat-problems.rds deleted file mode 100644 index 365136d..0000000 Binary files a/tests/testthat/testthat-problems.rds and /dev/null differ