Skip to content

Commit

Permalink
Merge branch 'baps-format' into develop
Browse files Browse the repository at this point in the history
* baps-format:
  Increment version number to 0.0.0.9025
  Skips `greedyMix()` example until #24 is fixed
  Updated documentation (#24)
  Fails for FAST input to `greedyMix()` (#24)
  Fixed calculation og `ninds` (#24)
  Bugfixes to `indMix()` and subfunctions (#24)
  Improved handling of BAPS format (#24)
  Fixed calculation of `ninds` `ekat` (#24)
  Added processing of BAPS format
  • Loading branch information
wleoncio committed Apr 8, 2024
2 parents d2ec589 + 6d875df commit 9b8da3a
Show file tree
Hide file tree
Showing 14 changed files with 134 additions and 57 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rBAPS
Title: Bayesian Analysis of Population Structure
Version: 0.0.0.9024
Version: 0.0.0.9025
Date: 2020-11-09
Authors@R:
c(
Expand Down Expand Up @@ -36,7 +36,7 @@ Description: Partial R implementation of the BAPS software
License: GPL-3
BugReports: https://github.com/ocbe-uio/rBAPS/issues
Encoding: UTF-8
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
testthat (>= 2.1.0)
Imports:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,6 @@ importFrom(methods,is)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(utils,read.delim)
importFrom(utils,read.table)
importFrom(vcfR,read.vcfR)
importFrom(zeallot,"%<-%")
51 changes: 35 additions & 16 deletions R/greedyMix.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#' @param data data file
#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
#' @param partitionCompare a list of partitions to compare
#' @param ninds number of individuals
#' @param npops number of populations
#' @param counts counts
#' @param sumcounts sumcounts
Expand All @@ -20,36 +19,56 @@
#' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export
#' @examples
#' data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
#' greedyMix(data, "fasta")
#' \dontrun{ # TEMP: unwrap once #24 is resolved
#' data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS")
#' greedyMix(data, "baps")
#' } # TEMP: unwrap once #24 is resolved
greedyMix <- function(
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, ninds = 1L, npops = 1L,
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 1L,
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
) {
# Importing and handling data ================================================
data <- importFile(data, format, verbose)
data <- handleData(data, tolower(format))
c <- list(
noalle = data[["noalle"]],
data = data[["newData"]],
adjprior = data[["adjprior"]],
priorTerm = data[["priorTerm"]],
rowsFromInd = data[["rowsFromInd"]]
)
if (tolower(format) %in% "fasta") {
stop("FASTA format not yet supported on greedyMix")
}
if (tolower(format) %in% "baps") {
data <- process_BAPS_data(data, NULL)
c <- list(
noalle = data[["noalle"]],
data = data[["data"]],
adjprior = data[["adjprior"]],
priorTerm = data[["priorTerm"]],
rowsFromInd = data[["rowsFromInd"]],
Z = data[["Z"]],
dist = data[["dist"]]
)
} else {
data <- importFile(data, format, verbose)
data <- handleData(data, tolower(format))
c <- list(
noalle = data[["noalle"]],
data = data[["newData"]],
adjprior = data[["adjprior"]],
priorTerm = data[["priorTerm"]],
rowsFromInd = data[["rowsFromInd"]],
Z = data[["Z"]],
dist = data[["dist"]]
)
}

# Comparing partitions =======================================================
ninds <- length(unique(c[["data"]][, ncol(c[["data"]])]))
if (!is.null(partitionCompare)) {
logmls <- comparePartitions(
c[["data"]], nrow(c[["data"]]), partitionCompare[["partitions"]], ninds,
c[["rowsFromInd"]], c[["noalle"]], c[["adjprior"]]
)
}


# Generating partition summary ===============================================
ekat <- seq(1L, c[["rowsFromInd"]], ninds * c[["rowsFromInd"]]) # ekat = (1:rowsFromInd:ninds*rowsFromInd)';
c[["rows"]] <- c(ekat, ekat + c[["rowsFromInd"]] - 1L) # c.rows = [ekat ekat+rowsFromInd-1]
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
logml <- logml_npops_partitionSummary[["logml"]]
npops <- logml_npops_partitionSummary[["npops"]]
Expand Down
10 changes: 5 additions & 5 deletions R/handleData.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,25 @@ handleData <- function(raw_data, format = "Genepop") {
"bam" = stop("BAM format not supported for processing yet")
)
data <- as.matrix(raw_data)
dataApu <- data[, 1:nloci]
dataApu <- data[, seq_len(nloci)]
nollat <- matlab2r::find(dataApu == 0)
if (!isempty(nollat)) {
isoinAlleeli <- base::max(base::max(dataApu))
dataApu[nollat] <- isoinAlleeli + 1
data[, 1:nloci] <- dataApu
data[, seq_len(nloci)] <- dataApu
}

noalle <- zeros(1, nloci)
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
for (i in 1:nloci) {
for (i in seq_len(nloci)) {
alleelitLokuksessaI <- unique(data[, i])
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
matlab2r::find(alleelitLokuksessaI >= 0)
])
noalle[i] <- length(alleelitLokuksessa[[i]])
}
alleleCodes <- zeros(base::max(noalle), nloci)
for (i in 1:nloci) {
for (i in seq_len(nloci)) {
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
Expand Down Expand Up @@ -83,7 +83,7 @@ handleData <- function(raw_data, format = "Genepop") {

adjprior <- zeros(base::max(noalle), nloci)
priorTerm <- 0
for (j in 1:nloci) {
for (j in seq_len(nloci)) {
adjprior[, j] <- as.matrix(c(
repmat(1 / noalle[j], c(noalle[j], 1)),
ones(base::max(noalle) - noalle[j], 1)
Expand Down
28 changes: 13 additions & 15 deletions R/indMix.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,9 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
Z <- NULL
}


rm(c)
nargin <- length(as.list(match.call())) - 1
if (nargin < 2) {
dispText <- 1
if (missing(npops)) {
# Recreate npopsTaulu from objects other than npops
dispText <- TRUE
npopstext <- matrix()
ready <- FALSE
teksti <- "Input upper bound to the number of populations (possibly multiple values)"
Expand Down Expand Up @@ -75,7 +73,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
partitionSummary[, 1] <- zeros(30, 1)
worstLogml <- -1e50
worstIndex <- 1
for (run in seq_along(nruns)) {
for (run in seq_len(nruns)) {
npops <- npopsTaulu[[run]]
if (dispText) {
dispLine()
Expand Down Expand Up @@ -110,7 +108,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
# PARHAAN MIXTURE-PARTITION ETSIMINEN
nRoundTypes <- 7
kokeiltu <- zeros(nRoundTypes, 1)
roundTypes <- c(1, 1) # Ykk�svaiheen sykli kahteen kertaan.
roundTypes <- t(c(1, 1)) # Ykk�svaiheen sykli kahteen kertaan.
ready <- 0
vaihe <- 1

Expand Down Expand Up @@ -143,9 +141,9 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
if (kokeiltu[round] == 1) { # Askelta kokeiltu viime muutoksen j�lkeen
} else if (round == 0 | round == 1) { # Yksil�n siirt�minen toiseen populaatioon.
inds <- seq_len(ninds)
aputaulu <- cbind(t(inds), rand(ninds, 1))
aputaulu <- cbind(inds, rand(ninds, 1))
aputaulu <- matrix(sortrows(aputaulu, 2), nrow = nrow(aputaulu))
inds <- t(aputaulu[, 1])
inds <- aputaulu[, 1]
muutosNyt <- 0

for (ind in inds) {
Expand Down Expand Up @@ -324,7 +322,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
if (round == 5) {
aputaulu <- c(inds, rand(length(inds), 1))
aputaulu <- sortrows(aputaulu, 2)
inds <- t(aputaulu[, 1])
inds <- aputaulu[, 1]
} else if (round == 6) {
inds <- returnInOrder(
inds, pop, rows, data, adjprior, priorTerm
Expand Down Expand Up @@ -525,15 +523,15 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d

if (ready == 0) {
if (vaihe == 1) {
roundTypes <- 1
roundTypes <- t(1)
} else if (vaihe == 2) {
roundTypes <- c(2, 1)
roundTypes <- t(c(2, 1))
} else if (vaihe == 3) {
roundTypes <- c(5, 5, 7)
roundTypes <- t(c(5, 5, 7))
} else if (vaihe == 4) {
roundTypes <- c(4, 3, 1)
roundTypes <- t(c(4, 3, 1))
} else if (vaihe == 5) {
roundTypes <- c(6, 7, 2, 3, 4, 1)
roundTypes <- t(c(6, 7, 2, 3, 4, 1))
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions R/initialCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ initialCounts <- function(partition, data, npops, rows, noalle, adjprior) {

counts <- zeros(base::max(noalle), nloci, npops)
sumcounts <- zeros(npops, nloci)
for (i in 1:npops) {
for (j in 1:nloci) {
for (i in seq_len(npops)) {
for (j in seq_len(nloci)) {
havainnotLokuksessa <- matlab2r::find(partition == i & data[, j] >= 0)
sumcounts[i, j] <- length(havainnotLokuksessa)
for (k in 1:noalle[j]) {
Expand Down
11 changes: 7 additions & 4 deletions R/linkage.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ linkage <- function(Y, method = "co") {
monotonic <- 1
Z <- zeros(m - 1, 3) # allocate the output matrix.
N <- zeros(1, 2 * m - 1)
N[1:m] <- 1
N[seq_len(m)] <- 1
n <- m # since m is changing, we need to save m in n.
R <- 1:n
R <- seq_len(n)
for (s in 1:(n - 1)) {
X <- as.matrix(as.vector(Y), ncol = 1)
X <- as.matrix(as.vector(Y), nrow = 1)
v <- matlab2r::min(X)$mins
k <- matlab2r::min(X)$idx

Expand Down Expand Up @@ -83,11 +83,14 @@ linkage <- function(Y, method = "co") {
)
J <- c(J, i * (m - (i + 1) / 2) - m + j)
Y <- Y[-J] # no need for the cluster information about j

# update m, N, R
m <- m - 1
N[n + s] <- N[R[i]] + N[R[j]]
R[i] <- n + s
R[j:(n - 1)] <- R[(j + 1):n]
if (j < n) {
R[j:(n - 1)] <- R[(j + 1):n]
}
}
return(Z)
}
1 change: 0 additions & 1 deletion R/logml2String.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#' @title Logml to string
#' @description Returns a string representation of a logml
#' @param logml input Logml
#' @param
#' @param leading_zeros_replacement string to replace leading zeros with
#' @return String version of logml
logml2String <- function(logml, leading_zeros_replacement = " ") {
Expand Down
2 changes: 1 addition & 1 deletion R/newGetDistances.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ newGetDistances <- function(data, rowsFromInd) {
y <- zeros(size(toka))
y <- apply(y, 2, as.integer)

for (j in 1:nloci) {
for (j in seq_len(nloci)) {
for (k in 1:rowsFromInd) {
x[, k] <- data[eka[, k], j]
y[, k] <- data[toka[, k], j]
Expand Down
37 changes: 37 additions & 0 deletions R/process_BAPS_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
process_BAPS_data <- function(file, partitionCompare) {
if (!is.null(partitionCompare)) {
cat('Data:', file, '\n')
}
data <- read.table(file)
ninds <- testaaOnkoKunnollinenBapsData(data) # for testing purposes?
if (ninds == 0) {
warning('Incorrect Data-file.')
return(NULL)
}
popnames <- NULL # Dropped specification of population names (from BAPS 6)

result <- handleData(data, format = "BAPS")
data <- result$newData
rowsFromInd <- result$rowsFromInd
alleleCodes <- result$alleleCodes
noalle <- result$noalle
adjprior <- result$adjprior
priorTerm <- result$priorTerm
result <- newGetDistances(data, rowsFromInd)
Z <- result$Z
dist <- result$dist

# Forming and saving pre-processed data
processed_data <- list(
data = data,
rowsFromInd = rowsFromInd,
alleleCodes = alleleCodes,
noalle = noalle,
adjprior = adjprior,
priorTerm = priorTerm,
dist = dist,
popnames = popnames,
Z = Z
)
return(processed_data)
}
4 changes: 2 additions & 2 deletions R/rBAPS-package.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#' @title Bayesian Analysis of Population Structure
#' @description This is a partial implementation of the BAPS software
#' @docType package
#' @name rBAPS
#' @note Found a bug? Want to suggest a feature? Contribute to the scientific
#' and open source communities by opening an issue on our home page.
Expand All @@ -11,4 +10,5 @@
#' @importFrom stats runif
#' @importFrom zeallot %<-%
#' @importFrom matlab2r nargin log2
NULL
#' @importFrom utils read.table
"_PACKAGE"
9 changes: 4 additions & 5 deletions man/greedyMix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions man/rBAPS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 9b8da3a

Please sign in to comment.