Skip to content

Commit

Permalink
Merge branch 'issue-24' into develop
Browse files Browse the repository at this point in the history
* issue-24:
  Updated docs
  Temp-wrapping disfunctional code
  Aligned processing of Genepop with MATLAB code
  Dropped support for GenePop on `importFile()`
  Added `process_GenePop_data()`
  Fixed `addAlleles()`
  handleData defaults to BAPS format
  Added unit tests for `greedyMix()` on GenePop (#24)
  Added GenePop data, txt and gen (#24)
  • Loading branch information
wleoncio committed Sep 13, 2024
2 parents fa7e750 + aa675d1 commit 1d87be9
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 19 deletions.
5 changes: 2 additions & 3 deletions R/addAlleles.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ addAlleles <- function(data, ind, line, divider) {
# line. Jos data on 3 digit formaatissa on divider=1000.
# Jos data on 2 digit formaatissa on divider=100.

nloci <- size(data, 2) # added 1 from original code
nloci <- size(data, 2) - 1L
if (size(data, 1) < (2 * ind)) {
data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
}
Expand All @@ -22,8 +22,7 @@ addAlleles <- function(data, ind, line, divider) {
k <- k + 1
merkki <- substring(line, k, k)
}
line <- substring(line, k + 1)
# clear k; clear merkki;
line <- trimws(substring(line, k + 1))

if (grepl(" ", line)) {
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
Expand Down
14 changes: 13 additions & 1 deletion R/greedyMix.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ greedyMix <- function(
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
) {
# Importing and handling data ================================================
# TODO: use format as class and make handling data a generic
if (tolower(format) %in% "fasta") {
data <- convert_FASTA_to_BAPS(data)
format <- "baps"
Expand All @@ -42,6 +43,17 @@ greedyMix <- function(
Z = data[["Z"]],
dist = data[["dist"]]
)
} else if (tolower(format) %in% "genepop") {
data <- process_GenePop_data(data)
c <- list(
noalle = data[["noalle"]],
data = data[["data"]],
adjprior = data[["adjprior"]],
priorTerm = data[["priorTerm"]],
rowsFromInd = data[["rowsFromInd"]],
Z = data[["Z"]],
dist = data[["dist"]]
)
} else {
data <- importFile(data, format, verbose)
data <- handleData(data, tolower(format))
Expand All @@ -68,7 +80,7 @@ greedyMix <- function(
# Generating partition summary ===============================================
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA, GenePop
logml <- logml_npops_partitionSummary[["logml"]]
npops <- logml_npops_partitionSummary[["npops"]]
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
Expand Down
2 changes: 1 addition & 1 deletion R/handleData.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' the function changes the allele codes so that one locus j
#' codes get values between? 1, ..., noalle(j).
#' @export
handleData <- function(raw_data, format = "Genepop") {
handleData <- function(raw_data, format = "baps") {
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
Expand Down
12 changes: 1 addition & 11 deletions R/importFile.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#' @title Import data file
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM,
#' Genepop).
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM).
#' @param data raw dataset
#' @param format data format (guesses from extension if not provided)
#' @param verbose if \code{TRUE}, prints extra output information
Expand Down Expand Up @@ -33,15 +32,6 @@ importFile <- function(data, format, verbose) {
)
} else if (format == "bam") {
out <- Rsamtools::scanBam(data)
} else if (format == "genepop") {
if (toupper(adegenet::.readExt(data)) == "TXT") {
message("Creating a copy of the file with the .gen extension")
dataGen <- gsub("txt", "gen", data)
file.copy(data, dataGen)
out <- adegenet::read.genepop(dataGen)
} else {
out <- adegenet::read.genepop(data)
}
} else {
stop("Format not supported.")
}
Expand Down
24 changes: 24 additions & 0 deletions R/process_GenePop_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
process_GenePop_data <- function(filename) {
# Pre-processing GenePop data
kunnossa <- testaaGenePopData(filename)
data_popnames <- lueGenePopData(filename)
data <- data_popnames$data
popnames <- data_popnames$popnames

# Processing GenePop data
c <- handleData(data, "genepop")
Z_dist <- newGetDistances(c$newData, c$rowsFromInd)

# Forming and returning pre-processed data
list(
"data" = c$newData,
"rowsFromInd" = c$rowsFromInd,
"alleleCodes" = c$alleleCodes,
"noalle" = c$noalle,
"adjprior" = c$adjprior,
"priorTerm" = c$priorTerm,
"dist" = Z_dist$dist,
"popnames" = popnames,
"Z" = Z_dist$Z
)
}
12 changes: 12 additions & 0 deletions inst/extdata/GenePop.gen
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Example GenePop Data
Loc1
Loc2
Loc3
Pop
Ind1, 0101 0202 0303
Ind2, 0102 0201 0303
Ind3, 0101 0202 0303
Pop
Ind4, 0101 0202 0303
Ind5, 0102 0201 0303
Ind6, 0101 0202 0303
12 changes: 12 additions & 0 deletions inst/extdata/GenePop.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Example GenePop Data
Loc1
Loc2
Loc3
Pop
Ind1, 0101 0202 0303
Ind2, 0102 0201 0303
Ind3, 0101 0202 0303
Pop
Ind4, 0101 0202 0303
Ind5, 0102 0201 0303
Ind6, 0101 0202 0303
2 changes: 1 addition & 1 deletion man/handleData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/importFile.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions tests/testthat/test-greedyMix.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,10 @@ test_that("greedyMix() fails successfully", {

test_that("greedyMix() works when it should", {
baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt")
genepop_file <- file.path(path_inst, "GenePop.txt")
fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta")
greedy_baps <- greedyMix(baps_file, "BAPS")
expect_error(greedy_genepop <- greedyMix(genepop_file, "GenePop")) # TEMP: fails
expect_type(greedy_baps, "list")
expect_length(greedy_baps, 10L)
})
Expand Down

0 comments on commit 1d87be9

Please sign in to comment.