diff --git a/DESCRIPTION b/DESCRIPTION index a2e2c03..695be07 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rBAPS Title: Bayesian Analysis of Population Structure -Version: 0.0.0.9028 +Version: 0.0.0.9029 Date: 2020-11-09 Authors@R: c( @@ -36,7 +36,7 @@ Description: Partial R implementation of the BAPS software License: GPL-3 BugReports: https://github.com/ocbe-uio/rBAPS/issues Encoding: UTF-8 -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Suggests: testthat (>= 2.1.0) Imports: diff --git a/NAMESPACE b/NAMESPACE index 09db974..3094ed5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(convert_FASTA_to_BAPS) export(greedyMix) export(handleData) export(importFile) diff --git a/R/computeDiffInCounts.R b/R/computeDiffInCounts.R index 10a3beb..210ecdb 100644 --- a/R/computeDiffInCounts.R +++ b/R/computeDiffInCounts.R @@ -13,5 +13,5 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) { diffInCounts[element] <- diffInCounts[element] + 1 } } - return(diffInCounts) + return(as.matrix(diffInCounts)) } diff --git a/R/convert_FASTA_to_BAPS.R b/R/convert_FASTA_to_BAPS.R new file mode 100644 index 0000000..c152635 --- /dev/null +++ b/R/convert_FASTA_to_BAPS.R @@ -0,0 +1,16 @@ +#' @title Convert from FASTA to BAPS +#' @description Converts a file (not an R object) from FASTA to BAPS format +#' @param file filename of FASTA file +#' @return `data` in BAPS format +#' @author Waldir Leoncio +#' @export +#' @examples +#' file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS") +#' convert_FASTA_to_BAPS(file) +convert_FASTA_to_BAPS <- function(file) { + data <- load_fasta(file) # Processing data + data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential) + data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9 + colnames(data) <- paste("V", seq_len(ncol(data)), sep = "") + return(data) +} diff --git a/R/greedyMix.R b/R/greedyMix.R index 3bff02b..dd0d117 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -19,10 +19,8 @@ #' with high-throughput sequencing data. #' @export #' @examples -#' \dontrun{ # TEMP: unwrap once #24 is resolved -#' data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS") +#' data <- system.file("extdata", "BAPS_clustering_diploid.txt", package = "rBAPS") #' greedyMix(data, "baps") -#' } # TEMP: unwrap once #24 is resolved greedyMix <- function( data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 3L, counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL, @@ -30,7 +28,8 @@ greedyMix <- function( ) { # Importing and handling data ================================================ if (tolower(format) %in% "fasta") { - stop("FASTA format not yet supported on greedyMix") + data <- convert_FASTA_to_BAPS(data) + format <- "baps" } if (tolower(format) %in% "baps") { data <- process_BAPS_data(data, NULL) @@ -69,7 +68,7 @@ greedyMix <- function( # Generating partition summary =============================================== ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]]) c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L) - logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) + logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data logml <- logml_npops_partitionSummary[["logml"]] npops <- logml_npops_partitionSummary[["npops"]] partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]] diff --git a/R/handleData.R b/R/handleData.R index 8868fc2..fbdd9c9 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -30,6 +30,7 @@ handleData <- function(raw_data, format = "Genepop") { "bam" = stop("BAM format not supported for processing yet") ) data <- as.matrix(raw_data) + dataApu <- data[, seq_len(nloci)] nollat <- matlab2r::find(dataApu == 0) if (!isempty(nollat)) { @@ -54,9 +55,14 @@ handleData <- function(raw_data, format = "Genepop") { alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1))) } - for (loc in seq_len(nloci)) { - for (all in seq_len(noalle[loc])) { - data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all + # This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T} + codes <- unique(as.vector(data[, -ncol(data)])) + skip_conversion <- base::min(codes) == -9 && base::max(codes) == 4 + if (!skip_conversion) { + for (loc in seq_len(nloci)) { + for (all in seq_len(noalle[loc])) { + data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all + } } } diff --git a/R/process_BAPS_data.R b/R/process_BAPS_data.R index 7a76bf6..1bc61cf 100644 --- a/R/process_BAPS_data.R +++ b/R/process_BAPS_data.R @@ -2,14 +2,23 @@ process_BAPS_data <- function(file, partitionCompare) { if (!is.null(partitionCompare)) { cat('Data:', file, '\n') } - data <- read.table(file) - ninds <- testaaOnkoKunnollinenBapsData(data) # for testing purposes? + + # Importing data + if (is.character(file)) { + data <- read.table(file) + } else { + data <- file + } + + ninds <- testaaOnkoKunnollinenBapsData(data) # Checks if last column is ID if (ninds == 0) { warning('Incorrect Data-file.') return(NULL) } + popnames <- NULL # Dropped specification of population names (from BAPS 6) + # Processing data result <- handleData(data, format = "BAPS") data <- result$newData rowsFromInd <- result$rowsFromInd diff --git a/inst/extdata/FASTA_clustering_haploid_ext.fasta b/inst/extdata/FASTA_clustering_haploid_ext.fasta new file mode 100644 index 0000000..c7ba753 --- /dev/null +++ b/inst/extdata/FASTA_clustering_haploid_ext.fasta @@ -0,0 +1,39 @@ +>1 +AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT +TAGTAACATGGTCAGCTGCTCATCCGTATT + +>2 +ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG +AATGCACCACAAACATACCCCACAGGCTCG + +>3 +GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT +GTCTGAGTATGGATGGTGGAGGCGGAGCCG + +>4 +ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG +CAAGGATAAAGTTCACCCATCACGTTGACC + +>5 +ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG +GTAACATAGCTACCAGGTCCCATTCCAGGA + +>6 +TCCCCCCAGTGGACACGGCTCGGGTAATGCAGCTTACCTCAACGCTAACGCATTTGACAGTAGTGAATCA +CGGGCAACGCTGGGTGATTGCAAGTTTTGT + +>7 +GCAACCACTGGTCGCCTGGAGCATTGATCAGGAACATGTCTGCAAGGGGGGCCGTTGCGGGTTTCAGTCA +TCGTATTGCGCTGCAAATCCTCGGAGCCTC + +>8 +CACCCGTAAAGCACGAGTAGGTTTCACCGCGACTTATATATTCCACCATACGGTTAACAAGGCAACACTT +ATTCGTCGTCCAATGATCGTCCCTCTCCAG + +>9 +CGAATCCATTCGGGATAAAGTTAATACGTAAGTCGAACGGGGTTTAGGAAGAGCTCTGCTGTTAAGCGCG +CTTATCATCTTATATGTGTCAGTTGTGTAC + +>10 +CGTTCGCATTTATAGGATATCCCCTAAACTAATTGGTAGTGATGGTATACCAGCGGTGCATTGTCCTCGC +CTGTAGTTTAAGTCAACCTCTGCCTTAATC diff --git a/man/convert_FASTA_to_BAPS.Rd b/man/convert_FASTA_to_BAPS.Rd new file mode 100644 index 0000000..e43d7eb --- /dev/null +++ b/man/convert_FASTA_to_BAPS.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convert_FASTA_to_BAPS.R +\name{convert_FASTA_to_BAPS} +\alias{convert_FASTA_to_BAPS} +\title{Convert from FASTA to BAPS} +\usage{ +convert_FASTA_to_BAPS(file) +} +\arguments{ +\item{file}{filename of FASTA file} +} +\value{ +`data` in BAPS format +} +\description{ +Converts a file (not an R object) from FASTA to BAPS format +} +\examples{ +file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS") +convert_FASTA_to_BAPS(file) +} +\author{ +Waldir Leoncio +} diff --git a/man/greedyMix.Rd b/man/greedyMix.Rd index 2d1a0e4..5974165 100644 --- a/man/greedyMix.Rd +++ b/man/greedyMix.Rd @@ -48,10 +48,8 @@ greedyMix( Clustering of individuals } \examples{ -\dontrun{ # TEMP: unwrap once #24 is resolved -data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS") +data <- system.file("extdata", "BAPS_clustering_diploid.txt", package = "rBAPS") greedyMix(data, "baps") -} # TEMP: unwrap once #24 is resolved } \references{ Samtools: a suite of programs for interacting diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 6029c63..9353b0a 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -46,10 +46,6 @@ raw_bam <- importFile( data = file.path(path_inst, "bam_example.bam"), format = "BAM", ) -raw_baps <- importFile( - data = file.path(path_inst, "FASTA_clustering_haploid.fasta"), - format = "FASTA" -) test_that("Files are imported correctly", { expect_equal(dim(raw_fasta), c(5, 99)) @@ -61,13 +57,6 @@ test_that("Files are imported correctly", { ) ) expect_equal(length(raw_bam[[1]]), 13) - expect_error( - greedyMix( - data = file.path(path_inst, "FASTA_clustering_haploid.fasta"), - format = "FASTA" - ), - "FASTA format not yet supported on greedyMix" - ) }) test_that("greedyMix() fails successfully", { @@ -77,6 +66,7 @@ test_that("greedyMix() fails successfully", { test_that("greedyMix() works when it should", { baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt") + fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta") greedy_baps <- greedyMix(baps_file, "BAPS") expect_type(greedy_baps, "list") expect_length(greedy_baps, 10L)