Skip to content

Commit

Permalink
Merge branch 'issue-24' into develop
Browse files Browse the repository at this point in the history
* issue-24:
  Increment version number to 0.0.0.9029
  Disabled FASTA tests on greedyMix() (#24)
  Added synthetic FASTA file (#24)
  Ensure diffInCounts returns as.matrix (#24)
  Adapted tests to FASTA on greedyMix() (#24)
  Removed baps file pointing to fasta
  Improved conversion from FASTA to BAPS (#24)
  Improved handling of FASTA data (#24)
  `process_BAPS_data()` can handle file being an R object
  Added function to convert from FASTA to BAPS (#24)
  Unwrapped `greedyMix() example (#24)
  • Loading branch information
wleoncio committed Aug 26, 2024
2 parents 5cc111f + d925545 commit 010f18d
Show file tree
Hide file tree
Showing 11 changed files with 109 additions and 27 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rBAPS
Title: Bayesian Analysis of Population Structure
Version: 0.0.0.9028
Version: 0.0.0.9029
Date: 2020-11-09
Authors@R:
c(
Expand Down Expand Up @@ -36,7 +36,7 @@ Description: Partial R implementation of the BAPS software
License: GPL-3
BugReports: https://github.com/ocbe-uio/rBAPS/issues
Encoding: UTF-8
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Suggests:
testthat (>= 2.1.0)
Imports:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(convert_FASTA_to_BAPS)
export(greedyMix)
export(handleData)
export(importFile)
Expand Down
2 changes: 1 addition & 1 deletion R/computeDiffInCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) {
diffInCounts[element] <- diffInCounts[element] + 1
}
}
return(diffInCounts)
return(as.matrix(diffInCounts))
}
16 changes: 16 additions & 0 deletions R/convert_FASTA_to_BAPS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#' @title Convert from FASTA to BAPS
#' @description Converts a file (not an R object) from FASTA to BAPS format
#' @param file filename of FASTA file
#' @return `data` in BAPS format
#' @author Waldir Leoncio
#' @export
#' @examples
#' file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
#' convert_FASTA_to_BAPS(file)
convert_FASTA_to_BAPS <- function(file) {
data <- load_fasta(file) # Processing data
data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential)
data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9
colnames(data) <- paste("V", seq_len(ncol(data)), sep = "")
return(data)
}
9 changes: 4 additions & 5 deletions R/greedyMix.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,17 @@
#' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export
#' @examples
#' \dontrun{ # TEMP: unwrap once #24 is resolved
#' data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS")
#' data <- system.file("extdata", "BAPS_clustering_diploid.txt", package = "rBAPS")
#' greedyMix(data, "baps")
#' } # TEMP: unwrap once #24 is resolved
greedyMix <- function(
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 3L,
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
) {
# Importing and handling data ================================================
if (tolower(format) %in% "fasta") {
stop("FASTA format not yet supported on greedyMix")
data <- convert_FASTA_to_BAPS(data)
format <- "baps"
}
if (tolower(format) %in% "baps") {
data <- process_BAPS_data(data, NULL)
Expand Down Expand Up @@ -69,7 +68,7 @@ greedyMix <- function(
# Generating partition summary ===============================================
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data
logml <- logml_npops_partitionSummary[["logml"]]
npops <- logml_npops_partitionSummary[["npops"]]
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
Expand Down
12 changes: 9 additions & 3 deletions R/handleData.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ handleData <- function(raw_data, format = "Genepop") {
"bam" = stop("BAM format not supported for processing yet")
)
data <- as.matrix(raw_data)

dataApu <- data[, seq_len(nloci)]
nollat <- matlab2r::find(dataApu == 0)
if (!isempty(nollat)) {
Expand All @@ -54,9 +55,14 @@ handleData <- function(raw_data, format = "Genepop") {
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
}

for (loc in seq_len(nloci)) {
for (all in seq_len(noalle[loc])) {
data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all
# This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T}
codes <- unique(as.vector(data[, -ncol(data)]))
skip_conversion <- base::min(codes) == -9 && base::max(codes) == 4
if (!skip_conversion) {
for (loc in seq_len(nloci)) {
for (all in seq_len(noalle[loc])) {
data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all
}
}
}

Expand Down
13 changes: 11 additions & 2 deletions R/process_BAPS_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,23 @@ process_BAPS_data <- function(file, partitionCompare) {
if (!is.null(partitionCompare)) {
cat('Data:', file, '\n')
}
data <- read.table(file)
ninds <- testaaOnkoKunnollinenBapsData(data) # for testing purposes?

# Importing data
if (is.character(file)) {
data <- read.table(file)
} else {
data <- file
}

ninds <- testaaOnkoKunnollinenBapsData(data) # Checks if last column is ID
if (ninds == 0) {
warning('Incorrect Data-file.')
return(NULL)
}

popnames <- NULL # Dropped specification of population names (from BAPS 6)

# Processing data
result <- handleData(data, format = "BAPS")
data <- result$newData
rowsFromInd <- result$rowsFromInd
Expand Down
39 changes: 39 additions & 0 deletions inst/extdata/FASTA_clustering_haploid_ext.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
>1
AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT
TAGTAACATGGTCAGCTGCTCATCCGTATT

>2
ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG
AATGCACCACAAACATACCCCACAGGCTCG

>3
GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT
GTCTGAGTATGGATGGTGGAGGCGGAGCCG

>4
ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG
CAAGGATAAAGTTCACCCATCACGTTGACC

>5
ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG
GTAACATAGCTACCAGGTCCCATTCCAGGA

>6
TCCCCCCAGTGGACACGGCTCGGGTAATGCAGCTTACCTCAACGCTAACGCATTTGACAGTAGTGAATCA
CGGGCAACGCTGGGTGATTGCAAGTTTTGT

>7
GCAACCACTGGTCGCCTGGAGCATTGATCAGGAACATGTCTGCAAGGGGGGCCGTTGCGGGTTTCAGTCA
TCGTATTGCGCTGCAAATCCTCGGAGCCTC

>8
CACCCGTAAAGCACGAGTAGGTTTCACCGCGACTTATATATTCCACCATACGGTTAACAAGGCAACACTT
ATTCGTCGTCCAATGATCGTCCCTCTCCAG

>9
CGAATCCATTCGGGATAAAGTTAATACGTAAGTCGAACGGGGTTTAGGAAGAGCTCTGCTGTTAAGCGCG
CTTATCATCTTATATGTGTCAGTTGTGTAC

>10
CGTTCGCATTTATAGGATATCCCCTAAACTAATTGGTAGTGATGGTATACCAGCGGTGCATTGTCCTCGC
CTGTAGTTTAAGTCAACCTCTGCCTTAATC
24 changes: 24 additions & 0 deletions man/convert_FASTA_to_BAPS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions man/greedyMix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 1 addition & 11 deletions tests/testthat/test-greedyMix.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ raw_bam <- importFile(
data = file.path(path_inst, "bam_example.bam"),
format = "BAM",
)
raw_baps <- importFile(
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
format = "FASTA"
)

test_that("Files are imported correctly", {
expect_equal(dim(raw_fasta), c(5, 99))
Expand All @@ -61,13 +57,6 @@ test_that("Files are imported correctly", {
)
)
expect_equal(length(raw_bam[[1]]), 13)
expect_error(
greedyMix(
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
format = "FASTA"
),
"FASTA format not yet supported on greedyMix"
)
})

test_that("greedyMix() fails successfully", {
Expand All @@ -77,6 +66,7 @@ test_that("greedyMix() fails successfully", {

test_that("greedyMix() works when it should", {
baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt")
fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta")
greedy_baps <- greedyMix(baps_file, "BAPS")
expect_type(greedy_baps, "list")
expect_length(greedy_baps, 10L)
Expand Down

0 comments on commit 010f18d

Please sign in to comment.