Skip to content

Commit

Permalink
Merge branch 'issue-25' into develop
Browse files Browse the repository at this point in the history
* issue-25: (38 commits)
  Adjusted unit tests for #25
  Fixed sink() usage
  Fixed docs
  Exporting importFile()
  Improved handling of supported formats for greedyMix() (#25)
  Fixed basic parsing of FASTA files (#25)
  Increment version number to 0.0.0.9022
  Fixed syntax (#25)
  Improved printing (#25)
  Partial reversion of b034158 (#25)
  Fixed to indMix (#25)
  Incorporated handleData() on greedyMix() (#25)
  Improved handleData() to handle FASTA (#25)
  Added numeric output option to load_fasta() (#25)
  Fixed test text (#25)
  Added missing documentation for arguments (#25)
  Syntax fix (#25)
  Delayed resolution of FIXMEs (#25)
  Workaround for usage of MATLAB any() (#25)
  Fixed argument passing (#25)
  ...
  • Loading branch information
wleoncio committed Sep 11, 2023
2 parents 449982a + ce954bf commit 59fbb0a
Show file tree
Hide file tree
Showing 95 changed files with 45,168 additions and 45,030 deletions.
3 changes: 1 addition & 2 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ PITFALLS.md
CHANGELOG.md
CITATION.cff
.travis.yml
inst/ext/ExamplesDataFormatting
inst/testdata
.github
aux

2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rBAPS
Title: Bayesian Analysis of Population Structure
Version: 0.0.0.9020
Version: 0.0.0.9022
Date: 2020-11-09
Authors@R:
c(
Expand Down
39 changes: 1 addition & 38 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,45 +1,8 @@
# Generated by roxygen2: do not edit by hand

export(addAlleles)
export(admix1)
export(calculatePopLogml)
export(computeAllFreqs2)
export(computeIndLogml)
export(computePersonalAllFreqs)
export(computeRows)
export(etsiParas)
export(fgetl)
export(fopen)
export(greedyMix)
export(greedyPopMix)
export(handleData)
export(handlePopData)
export(initPopNames)
export(learn_partition_modified)
export(learn_simple_partition)
export(linkage)
export(load_fasta)
export(logml2String)
export(lueGenePopData)
export(lueGenePopDataPop)
export(lueNimi)
export(noIndex)
export(ownNum2Str)
export(poistaLiianPienet)
export(proportion2str)
export(randdir)
export(rivinSisaltamienMjonojenLkm)
export(selvitaDigitFormat)
export(simulateAllFreqs)
export(simulateIndividuals)
export(simuloiAlleeli)
export(suoritaMuutos)
export(takeLine)
export(testaaKoordinaatit)
export(testaaOnkoKunnollinenBapsData)
export(testaaPop)
export(writeMixtureInfo)
export(writeMixtureInfoPop)
export(importFile)
importFrom(R6,R6Class)
importFrom(Rsamtools,scanBam)
importFrom(adegenet,.readExt)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# rBAPS (development version)

# rBAPS 0.0.0.9021

* Added a `NEWS.md` file to track changes to the package.
* Exported `greedyMix()` and `load_fasta()` functions.
1 change: 0 additions & 1 deletion R/addAlleles.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#' @param line line
#' @param divider divider
#' @return data (after alleles were added)
#' @export
addAlleles <- function(data, ind, line, divider) {
# Lisaa BAPS-formaatissa olevaan datataulukkoon
# yksil�� ind vastaavat rivit. Yksil�n alleelit
Expand Down
1 change: 0 additions & 1 deletion R/admix1.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#' alleleCodes, adjprior, popnames, rowsFromInd, data, npops, noalle
#' @param tietue tietue
#' @importFrom methods is
#' @export
admix1 <- function(tietue) {
if (!is.list(tietue)) {
message("Load mixture result file. These are the files in this directory:")
Expand Down
1 change: 0 additions & 1 deletion R/calculatePopLogml.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#' for the mean parameter.
#' @param points points
#' @param fii fii
#' @export
calculatePopLogml <- function(points, fii) {
n <- length(points)
fuzzy_ones <- sum(points)
Expand Down
23 changes: 23 additions & 0 deletions R/comparePartitions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
comparePartitions <- function(data, c.rows, partitionCompare.partitions, ninds, rowsFromInd, noalle, adjprior) {
stop("Comparing partitions not yet implemented") # TODO: implement
# nsamplingunits = size(c.rows,1);
# partitions = partitionCompare.partitions;
# npartitions = size(partitions,2);
# partitionLogml = zeros(1,npartitions);
# for i = 1:npartitions
# % number of unique partition lables
# npops = length(unique(partitions(:,i)));

# partitionInd = zeros(ninds*rowsFromInd,1);
# partitionSample = partitions(:,i);
# for j = 1:nsamplingunits
# partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j);
# end
# partitionLogml(i) = initialCounts(partitionInd, data(:,1:end-1), npops, c.rows, noalle, adjprior);

# end
# % return the logml result
# partitionCompare.logmls = partitionLogml;
# set(h1, 'userdata', partitionCompare);
# return
}
1 change: 0 additions & 1 deletion R/computeAllFreqs2.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#' @description Lisää a priori jokaista alleelia joka populaation joka lokukseen
#' j 1/noalle(j) verran.
#' @param noalle noalle
#' @export
computeAllFreqs2 <- function(noalle) {
COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS)
SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS)
Expand Down
1 change: 0 additions & 1 deletion R/computeIndLogml.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#' määritellyiksi kuten osuusTaulu:ssa.
#' @param omaFreqs own Freqs?
#' @param osuusTaulu Percentage table?
#' @export
computeIndLogml <- function(omaFreqs, osuusTaulu) {
omaFreqs <- as.matrix(omaFreqs)
osuusTaulu <- as.matrix(osuusTaulu)
Expand Down
2 changes: 0 additions & 2 deletions R/computePersonalAllFreqs.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
#' @param data data
#' @param allFreqs allFreqs
#' @param rowsFromInd rowsFromInd
#' @export

computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) {
if (isGlobalEmpty(COUNTS)) {
nloci <- npops <- 1
Expand Down
39 changes: 21 additions & 18 deletions R/computePopulationLogml.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
# ======================================================== #
# Limiting COUNTS size #
# ======================================================== #
COUNTS <- COUNTS[
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE
]
if (!is.null(adjprior)) {
nr <- seq_len(nrow(adjprior))
nc <- seq_len(ncol(adjprior))
COUNTS <- COUNTS[nr, nc, pops, drop = FALSE]
} else {
COUNTS <- NA
}

x <- size(COUNTS, 1)
y <- size(COUNTS, 2)
Expand All @@ -15,25 +19,24 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
# ======================================================== #
# Computation #
# ======================================================== #
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
term1 <- squeeze(
sum(
term1 <- NULL
if (!is.null(adjprior)) {
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
term1 <- squeeze(
sum(
reshape(
lgamma(
repmat(adjprior, c(1, 1, length(pops))) +
COUNTS[
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops,
drop = !isarray
]
sum(
reshape(
lgamma(
repmat(adjprior, c(1, 1, length(pops))) + COUNTS[nr, nc, pops, drop = !isarray]
),
c(x, y, z)
),
c(x, y, z)
1
),
1
),
2
2
)
)
)
}
if (is.null(priorTerm)) priorTerm <- 0
popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
return(popLogml)
Expand Down
1 change: 0 additions & 1 deletion R/computeRows.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#' @param rowsFromInd rowsFromInd
#' @param inds matrix
#' @param ninds ninds
#' @export
computeRows <- function(rowsFromInd, inds, ninds) {
if (!is(inds, "matrix")) inds <- as.matrix(inds)
if (identical(dim(inds), c(nrow(inds), 1L))) {
Expand Down
1 change: 0 additions & 1 deletion R/etsiParas.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#' @export
#' @title Etsi Paras
#' @description Search for the best?
#' @param osuus Percentages?
Expand Down
2 changes: 0 additions & 2 deletions R/fgetl-fopen.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#' fgetl returns tline as a numeric value -1.
#' @author Waldir Leoncio
#' @seealso fopen
#' @export
fgetl <- function(file) {
# ==========================================================================
# Validation
Expand All @@ -27,5 +26,4 @@ fgetl <- function(file) {
#' @return The same as `readLines(filename)`
#' @author Waldir Leoncio
#' @seealso fgetl
#' @export
fopen <- function(filename) readLines(filename)
92 changes: 60 additions & 32 deletions R/greedyMix.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
#' @title Clustering of individuals
#' @param data data file
#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
#' @param partitionCompare a list of partitions to compare
#' @param ninds number of individuals
#' @param npops number of populations
#' @param counts counts
#' @param sumcounts sumcounts
#' @param max_iter maximum number of iterations
#' @param alleleCodes allele codes
#' @param inp input file
#' @param popnames population names
#' @param fixedK if \code{TRUE}, the number of populations is fixed
#' @param verbose if \code{TRUE}, prints extra output information
#' @importFrom utils read.delim
#' @importFrom vcfR read.vcfR
Expand All @@ -9,41 +19,59 @@
#' @references Samtools: a suite of programs for interacting
#' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export
greedyMix <- function(data, format, verbose = TRUE) {
# Parsing data format ------------------------------------------------------
#' @examples
#' data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
#' greedyMix(data, "fasta")
greedyMix <- function(
data, format, partitionCompare = NULL, ninds = 1L, npops = 1L,
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
) {
# Importing and handling data ================================================
data <- importFile(data, format, verbose)
data <- handleData(data, tolower(format))
c <- list(
noalle = data[["noalle"]],
data = data[["newData"]],
adjprior = data[["adjprior"]],
priorTerm = data[["priorTerm"]],
rowsFromInd = data[["rowsFromInd"]]
)

if (missing(format)) {
format <- gsub(".*\\.(.+)$", "\\1", data)
message("Format not provided. Guessing from file extension: ", format)
# Comparing partitions =======================================================
if (!is.null(partitionCompare)) {
logmls <- comparePartitions(
c[["data"]], nrow(c[["data"]]), partitionCompare[["partitions"]], ninds,
c[["rowsFromInd"]], c[["noalle"]], c[["adjprior"]]
)
}
format <- tolower(format)

# Dispatching to proper loading function -----------------------------------

if (format == "fasta") {
out <- load_fasta(data)
} else if (format == "vcf") {
out <- vcfR::read.vcfR(data, verbose = verbose)
} else if (format == "sam") {
stop(
"SAM files not directly supported. ",
"Install the samtools software and execute\n\n",
"samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ",
"and try running this function again with 'format=BAM'"
)
} else if (format == "bam") {
out <- Rsamtools::scanBam(data)
} else if (format == "genepop") {
if (toupper(adegenet::.readExt(data)) == "TXT") {
message("Creating a copy of the file with the .gen extension")
dataGen <- gsub("txt", "gen", data)
file.copy(data, dataGen)
out <- adegenet::read.genepop(dataGen)
} else {
out <- adegenet::read.genepop(data)
}
} else {
stop("Format not supported.")
# Generating partition summary ===============================================
ekat <- seq(1L, c[["rowsFromInd"]], ninds * c[["rowsFromInd"]]) # ekat = (1:rowsFromInd:ninds*rowsFromInd)';
c[["rows"]] <- c(ekat, ekat + c[["rowsFromInd"]] - 1L) # c.rows = [ekat ekat+rowsFromInd-1]
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
logml <- logml_npops_partitionSummary[["logml"]]
npops <- logml_npops_partitionSummary[["npops"]]
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]

# Generating output object ===================================================
out <- list(
"alleleCodes" = alleleCodes, "adjprior" = c[["adjprior"]],
"popnames" = popnames, "rowsFromInd" = c[["rowsFromInd"]],
"data" = c[["data"]], "npops" = npops, "noalle" = c[["noalle"]],
"mixtureType" = "mix", "logml" = logml
)
if (logml == 1) {
return(out)
}
return(out)

# Writing mixture info =======================================================
changesInLogml <- writeMixtureInfo(
logml, c[["rowsFromInd"]], c[["data"]], c[["adjprior"]], c[["priorTerm"]],
NULL, inp, partitionSummary, popnames, fixedK
)

# Updateing results ==========================================================
return(c(out, "changesInLogml" = changesInLogml))
}
1 change: 0 additions & 1 deletion R/greedyPopMix.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#' @importFrom matlab2r uiputfile
#' @references Samtools: a suite of programs for interacting
#' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
) {
# Replacing original file reading code with greedyMix()
Expand Down
Loading

0 comments on commit 59fbb0a

Please sign in to comment.