Skip to content

Commit

Permalink
remove only empty rank cols (microbiome#572)
Browse files Browse the repository at this point in the history
Signed-off-by: Daena Rys <[email protected]>
Co-authored-by: Tuomas Borman <[email protected]>
Co-authored-by: TuomasBorman <[email protected]>
  • Loading branch information
3 people authored Jun 12, 2024
1 parent 9546b55 commit 1b9ed9f
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 26 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mia
Type: Package
Version: 1.13.21
Version: 1.13.22
Authors@R:
c(person(given = "Felix G.M.", family = "Ernst", role = c("aut"),
email = "[email protected]",
Expand Down
24 changes: 14 additions & 10 deletions R/agglomerate.R
Original file line number Diff line number Diff line change
Expand Up @@ -390,8 +390,8 @@ setMethod(
x
}

# This function removes empty columns from rowdata. (Those that include only
# NA values)
# This function removes empty rank columns from rowdata. (Those that include
# only NA values)
.remove_NA_cols_from_rowdata <- function(x, remove_empty_ranks = FALSE, ...){
# Check remove_empty_ranks
if( !.is_a_bool(remove_empty_ranks) ){
Expand All @@ -400,14 +400,18 @@ setMethod(
}
# If user wants to remove those columns
if( remove_empty_ranks ){
# Get rowData
rd <- rowData(x)
# Does teh column include data?
columns_including_data <- apply(rd, 2, function(x){!all(is.na(x))})
# Subset data so that it includes only columns that include data
rd <- rd[, columns_including_data]
# Assign it back to SE
rowData(x) <- rd
# Get columns that include taxonomy information
rank_cols <- taxonomyRanks(x)
# Get rowData with only taxonomy
rd <- rowData(x)[ , rank_cols, drop = FALSE]
# Remove taxonomy from rowData
rowData(x) <- rowData(x)[
, !colnames(rowData(x)) %in% rank_cols, drop = FALSE]
# Subset data so that it includes only rank columns that include data
non_empty_ranks <- apply(rd, 2, function(x) !all(is.na(x)))
rd <- rd[ , non_empty_ranks, drop = FALSE]
# Adding taxonomy back to SE
rowData(x) <- cbind(rowData(x), rd)
}
return(x)
}
Expand Down
45 changes: 30 additions & 15 deletions tests/testthat/test-3agglomerate.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,36 @@ test_that("agglomerate", {
expect_equal(nrow(test0), 945)
expect_equal(nrow(test1), 2307)

# Test that remove_empty_ranks work
expect_error(
agglomerateByRank(tse, rank = "Class", remove_empty_ranks = NULL))
expect_error(
agglomerateByRank(tse, rank = "Class", remove_empty_ranks = "NULL"))
expect_error(
agglomerateByRank(tse, rank = "Class", remove_empty_ranks = 1))
expect_error(
agglomerateByRank(
tse, rank = "Class", remove_empty_ranks = c(TRUE, TRUE)))

# Add a column to rowData(se) to test that only NA rank columns are removed
# when remove_empty_ranks = TRUE
rank <- "Class"
rowData(tse)[["test"]] <- rep(NA, nrow(rowData(tse)))
x <- agglomerateByRank(tse, rank = rank)
rd1 <- rowData(x)
x <- agglomerateByRank(tse, rank = rank, remove_empty_ranks = TRUE)
rd2 <- rowData(x)
cols <- taxonomyRanks(tse)[ seq_len(which(taxonomyRanks(tse) == "Class")) ]
cols <- c(cols, "test")
expect_equal(rd1[, cols], rd2[, cols])
expect_true( ncol(rd1) > ncol(rd2) )
# Test that make_unique work
uniq <- agglomerateByRank(tse, rank = "Species", na.rm = FALSE)
not_uniq <- agglomerateByRank(
tse, rank = "Species", make_unique = FALSE, na.rm = FALSE)
expect_true( !any( duplicated(rownames(uniq)) ) )
expect_true( any( duplicated(rownames(not_uniq)) ) )

# Load data from miaTime package
skip_if_not(require("miaTime", quietly = TRUE))
data(SilvermanAGutData)
Expand Down Expand Up @@ -165,19 +195,4 @@ test_that("agglomerate", {
"NYTDRRKDVHNKNDRVGRNDRSBRRAWTBYNHRKKKWRSSRKKRAAWKSSKWR",
"RWDWTNDBRVRRAMHHCMRDKKSSRARGSSVSYYHNYBRRVHNDNNHYKRMVV",
"YKVRDNNNSRAARSBDKGGKK"))
# Test that remove_empty_ranks work
expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = NULL))
expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = "NULL"))
expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = 1))
expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = c(TRUE, TRUE)))
x <- agglomerateByRank(se, rank = "Class")
rd1 <- rowData(x)[, 1:3]
x <- agglomerateByRank(se, rank = "Class", remove_empty_ranks = TRUE)
rd2 <- rowData(x)
expect_equal(rd1, rd2)
# Test that make_unique work
uniq <- agglomerateByRank(se, rank = "Species")
not_uniq <- agglomerateByRank(se, rank = "Species", make_unique = FALSE)
expect_true( !any( duplicated(rownames(uniq)) ) )
expect_true( any( duplicated(rownames(not_uniq)) ) )
})

0 comments on commit 1b9ed9f

Please sign in to comment.