From 17b7ee5b1afd5ee246bf22bca97b0f295eb569b8 Mon Sep 17 00:00:00 2001 From: Josh Sumner <51797700+joshqsumner@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:10:30 -0600 Subject: [PATCH 1/5] messy update --- NAMESPACE | 6 +++ R/demultiplex.R | 121 ++++++++++++++++++++++++++++++------------------ 2 files changed, 82 insertions(+), 45 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9ffa5c4..a9c507d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,10 +33,15 @@ import(patchwork) import(stats) import(uwot) import(viridis) +importClassesFrom(ShortRead,ShortReadQ) importFrom(ComplexUpset,upset) importFrom(FactoMineR,PCA) importFrom(Hmisc,rcorr) importFrom(MASS,glm.nb) +importFrom(ShortRead,countFastq) +importFrom(ShortRead,readFastq) +importFrom(ShortRead,sread) +importFrom(ShortRead,writeFastq) importFrom(compositions,clr) importFrom(dbscan,dbscan) importFrom(igraph,E) @@ -72,3 +77,4 @@ importFrom(utils,write.csv) importFrom(uwot,umap) importFrom(vegan,vegdist) importFrom(viridis,scale_fill_viridis) +importMethodsFrom(ShortRead,append) diff --git a/R/demultiplex.R b/R/demultiplex.R index ddc6480..8c7c969 100644 --- a/R/demultiplex.R +++ b/R/demultiplex.R @@ -1,6 +1,8 @@ #' Function to demultiplex sequences either to single files or to multiple files for use with dada2. #' -#' @param reads A ShortReadQ object or a file path to a fastq file that will be read in with +#' @param fwd_reads A ShortReadQ object or a file path to a fastq file that will be read in with +#' \code{ShortRead::readFastq}. +#' @param rev_reads A ShortReadQ object or a file path to a fastq file that will be read in with #' \code{ShortRead::readFastq}. #' @param barcodes A dataframe of barcodes and metadata for naming or a file path to such a flat file #' readable by \code{utils::read.delim}. @@ -27,6 +29,9 @@ #' #' @importFrom utils write.csv #' @importFrom methods is +#' @importClassesFrom ShortRead ShortReadQ +#' @importMethodsFrom ShortRead append +#' @importFrom ShortRead readFastq writeFastq sread countFastq #' @import parallel #' #' @return Writes out fq files and summary statistics depending on writeOut and stat arguments. @@ -70,14 +75,17 @@ #' @export -demultiplex <- function(reads, barcodes, fwd = "FWD", rev = "REV", name = c("Name", "Sample_name"), +demultiplex <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV", name = c("Name", "Sample_name"), mode = c("dada", "general"), cores = getOption("mc.cores", 1), writeOut = TRUE, stat = FALSE) { - if (is.character(reads)) { - reads <- ShortRead::readFastq(reads) + if (is.character(fwd_reads)) { + fwd_reads <- ShortRead::readFastq(fwd_reads) + } + if (is.character(rev_reads)) { + rev_reads <- ShortRead::readFastq(rev_reads) } - if (!methods::is(reads, "ShortReadQ")) { - stop("reads must be a ShortReadQ object or a file path to a fastq file.") + if (!methods::is(fwd_reads, "ShortReadQ") || !methods::is(rev_reads, "ShortReadQ")) { + stop("fwd and rev reads must be a ShortReadQ object or a file path to a fastq file.") } if (is.character(barcodes)) { barcodes <- utils::read.delim(barcodes) @@ -90,9 +98,9 @@ demultiplex <- function(reads, barcodes, fwd = "FWD", rev = "REV", name = c("Nam } mode <- mode[1] if (mode == "dada") { - out <- .demultiplex_for_dada(reads, barcodes, fwd, rev, name, writeOut, stat, cores) + out <- .demultiplex_for_dada(fwd_reads, rev_reads, barcodes, fwd, rev, name, writeOut, stat, cores) } else if (mode == "general") { - out <- .demultiplex_general(reads, barcodes, fwd, rev, name, writeOut, stat, cores) + out <- .demultiplex_general(fwd_reads, rev_reads, barcodes, fwd, rev, name, writeOut, stat, cores) } else { stop("mode must be one of 'dada' or 'general'") } @@ -113,44 +121,61 @@ demultiplex <- function(reads, barcodes, fwd = "FWD", rev = "REV", name = c("Nam #' @keywords internal #' @noRd -.demultiplex_for_dada <- function(reads, barcodes, fwd = "FWD", rev = "REV", +.demultiplex_for_dada <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV", name = c("Name", "Sample_name"), writeOut = TRUE, stat = FALSE, cores = 1) { # format barcodes to uppercase barcodes[[fwd]] <- toupper(barcodes[[fwd]]) barcodes[[rev]] <- toupper(barcodes[[rev]]) - # per barcode write out a fq and optionally a summary table + # get indices of reverse reads that start with forward or reverse barcodes + # x <- parallel::mclapply(seq_len(nrow(barcodes)), function(i) { + # rev_bar <- barcodes[i, rev] + # fwd_bar <- barcodes[i, fwd] + # rev_bar_rc <- Biostrings::reverseComplement(Biostrings::DNAString(rev_bar)) + # fwd_bar_rc <- Biostrings::reverseComplement(Biostrings::DNAString(fwd_bar)) + # + # rev_index <- which(grepl(paste0("^", rev_bar), ShortRead::sread(rev_reads))) + # fwd_index <- which(grepl(paste0("^", fwd_bar), ShortRead::sread(rev_reads))) + # rev_rc_index <- which(grepl(paste0("^", rev_bar_rc), ShortRead::sread(rev_reads))) + # fwd_rc_index <- which(grepl(paste0("^", fwd_bar_rc), ShortRead::sread(rev_reads))) + # return( + # list("fwd" = fwd_index, "rev" = rev_index, "fwd_rc" = fwd_rc_index, "rev_rc" = rev_rc_index) + # ) + # }, mc.cores = cores) + # rev_reads_fwd_barcodes <- sort(unique(unlist(lapply(x, function(i) { + # i$fwd + # })))) + # rev_reads_rev_barcodes <- sort(unique(unlist(lapply(x, function(i) { + # i$rev + # })))) + # rev_reads_rev_rc_barcodes <- sort(unique(unlist(lapply(x, function(i) { + # i$rev_rc + # })))) + # rev_reads_fwd_rc_barcodes <- sort(unique(unlist(lapply(x, function(i) { + # i$fwd_rc + # })))) + # per barcode write out a fastq.gz and optionally a summary table + fwd_reads_char <- as.character(ShortRead::sread(fwd_reads)) # could gsub out "N"s + rev_reads_char <- as.character(ShortRead::sread(rev_reads)) + test <- parallel::mclapply(seq_len(nrow(barcodes)), function(i) { # get a name for the sub sample - name <- paste(barcodes[i, c(name)], collapse = ".") + file_name <- paste(barcodes[i, c(name)], collapse = "_") # get barcodes fwd_bar <- barcodes[i, fwd] rev_bar <- barcodes[i, rev] - # filter for barcodes at beginning of sequence - rowReads <- reads[grepl(paste0("^", fwd_bar), ShortRead::sread(reads))] - columnReads <- reads[grepl(paste0("^", rev_bar), ShortRead::sread(reads))] - # make reverse complements of fwd and reverse barcodes - fwd_bar_reverse_complement <- as.character( - Biostrings::reverseComplement( - Biostrings::DNAString(fwd_bar) - ) - ) - rev_bar_reverse_complement <- as.character( - Biostrings::reverseComplement( - Biostrings::DNAString(rev_bar) - ) - ) - # search for reverse complements of opposite barcode at end of sequences - rowReads <- rowReads[grepl( - paste0(rev_bar_reverse_complement, "$"), - ShortRead::sread(rowReads) - )] - columnReads <- columnReads[grepl( - paste0(fwd_bar_reverse_complement, "$"), - ShortRead::sread(columnReads) - )] - # append row and column reads - clone <- append(rowReads, columnReads) + + fwd_reads_w_fwd_bars <- which(grepl(paste0("^", fwd_bar), fwd_reads_char)) + rev_reads_w_rev_bars <- which(grepl(paste0("^", rev_bar), rev_reads_char)) + fwd_index_both <- intersect(fwd_reads_w_fwd_bars, rev_reads_w_rev_bars) # logical AND + + fwd_reads_w_rev_bars <- which(grepl(paste0("^", rev_bar), fwd_reads_char)) + rev_reads_w_fwd_bars <- which(grepl(paste0("^", fwd_bar), rev_reads_char)) + rev_index_both <- intersect(fwd_reads_w_rev_bars, rev_reads_w_fwd_bars) # logical AND + # union of both groups of reads + total_index <- union(fwd_index_both, rev_index_both) # logical OR + fwd_clone <- fwd_reads[total_index] + rev_clone <- rev_reads[total_index] # write fastq if (!is.logical(writeOut)) { write <- writeOut @@ -163,18 +188,23 @@ demultiplex <- function(reads, barcodes, fwd = "FWD", rev = "REV", name = c("Nam } if (writeOut) { ShortRead::writeFastq( - object = clone, file = paste0(write, name, ".fq"), + object = fwd_clone, file = paste0(write, file_name, "_f.fastq.gz"), + mode = "w", compress = TRUE + ) + ShortRead::writeFastq( + object = rev_clone, file = paste0(write, file_name, "_r.fastq.gz"), mode = "w", compress = TRUE ) } if (stat & writeOut) { stats <- data.frame( - name = name, - totalReads = length(reads), - groupedReads = ShortRead::countFastq(paste0(name, ".fq"))[1, 1] + name = file_name, + totalReads = length(fwd_reads), + groupedReads_fwd = ShortRead::countFastq(paste0(write, file_name, "_f.fastq.gz"))[1, 1], + groupedReads_rev = ShortRead::countFastq(paste0(write, file_name, "_r.fastq.gz"))[1, 1] ) - stats$ratio <- signif(stats$groupedReads / stats$totalReads, digits = 4) - utils::write.csv(stats, file = paste0(name, ".csv"), row.names = FALSE) + stats$ratio <- signif(stats$groupedReads_fwd / stats$totalReads, digits = 4) + utils::write.csv(stats, file = paste0(write, file_name, ".csv"), row.names = FALSE) return(stats) } }, mc.cores = cores) @@ -204,7 +234,7 @@ demultiplex <- function(reads, barcodes, fwd = "FWD", rev = "REV", name = c("Nam # per barcode write out a fq and optionally a summary table lapply(seq_len(nrow(barcodes)), function(i) { # get a name for the sub sample - name <- paste(barcodes[i, c(name)], collapse = ".") + name <- paste(barcodes[i, c(name)], collapse = "_") # get barcodes fwd_bar <- barcodes[i, fwd] rev_bar <- barcodes[i, rev] @@ -247,7 +277,7 @@ demultiplex <- function(reads, barcodes, fwd = "FWD", rev = "REV", name = c("Nam md <- "a" } ShortRead::writeFastq( - object = clone, file = paste0(write, name, ".fq"), + object = clone, file = paste0(write, name, ".fastq.gz"), mode = md, compress = TRUE ) } @@ -256,10 +286,11 @@ demultiplex <- function(reads, barcodes, fwd = "FWD", rev = "REV", name = c("Nam stats <- data.frame( name = name, totalReads = length(reads), - groupedReads = ShortRead::countFastq(paste0(name, ".fq"))[1, 1] + groupedReads = ShortRead::countFastq(paste0(name, ".fastq.gz"))[1, 1] ) stats$ratio <- signif(stats$groupedReads / stats$totalReads, digits = 4) utils::write.csv(stats, file = paste0(name, ".csv"), row.names = FALSE) return(stats) } } + From c94a4d4e4c235cb6bbe9a6231c37c33fb0be5f05 Mon Sep 17 00:00:00 2001 From: Josh Sumner <51797700+joshqsumner@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:16:16 -0600 Subject: [PATCH 2/5] stop gap --- R/demultiplex.R | 87 ++++++++----------------------------------------- 1 file changed, 13 insertions(+), 74 deletions(-) diff --git a/R/demultiplex.R b/R/demultiplex.R index 8c7c969..4f2ed62 100644 --- a/R/demultiplex.R +++ b/R/demultiplex.R @@ -37,47 +37,11 @@ #' @return Writes out fq files and summary statistics depending on writeOut and stat arguments. #' If both are TRUE then summary stats are returned as a dataframe. #' -#' @examples -#' # these examples are not run because they require specific fastq files. -#' \dontrun{ -#' library(ShortRead) -#' library(Biostrings) -#' library(parallel) -#' setwd("~/scripts/SINC/sincUtils/syncomBuilder/nastya96WellEx") -#' -#' barcodes <- read.delim("barcode_tab.tsv") -#' name <- c("Name", "Well") -#' reads <- ShortRead::readFastq("Plate11_R1_001.fastq.gz") -#' -#' demultiplex(reads, barcodes[1:10, ], -#' fwd = "FWD", rev = "REV", -#' name = name, mode = "dada", cores = 10, writeOut = "tests", stat = FALSE -#' ) -#' -#' path <- paste0( -#' "/run/user/1000/gvfs/smb-share:server=nas01,share=research/bart_lab/", -#' "Previous_people/Mingsheng_Qi/research_dr/SINC/limiting_dilution/20210314/p2p11_rawseq" -#' ) -#' barcodes <- read.csv(paste0(path, "/barcodetable1.csv")) -#' barcodes$exp <- "exp" -#' reads <- ShortRead::readFastq(paste0(path, "/210314S05P10_merge.fq")) -#' -#' x <- demultiplex(reads, barcodes, -#' fwd = "FWD_primer", rev = "REV_primer", name = c("exp"), mode = "general", -#' writeOut = TRUE, stat = TRUE, cores = 10 -#' ) -#' x -#' # benchmarking against: -#' # "plate","totalReads","groupedReads","ratio" -#' # "P10",234509,154507,0.6589 -#' } -#' #' @export - -demultiplex <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV", name = c("Name", "Sample_name"), - mode = c("dada", "general"), cores = getOption("mc.cores", 1), writeOut = TRUE, - stat = FALSE) { +demultiplex <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV", + name = c("Name", "Sample_name"), mode = c("dada", "general"), + cores = getOption("mc.cores", 1), writeOut = TRUE, stat = FALSE) { if (is.character(fwd_reads)) { fwd_reads <- ShortRead::readFastq(fwd_reads) } @@ -127,35 +91,7 @@ demultiplex <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV" # format barcodes to uppercase barcodes[[fwd]] <- toupper(barcodes[[fwd]]) barcodes[[rev]] <- toupper(barcodes[[rev]]) - # get indices of reverse reads that start with forward or reverse barcodes - # x <- parallel::mclapply(seq_len(nrow(barcodes)), function(i) { - # rev_bar <- barcodes[i, rev] - # fwd_bar <- barcodes[i, fwd] - # rev_bar_rc <- Biostrings::reverseComplement(Biostrings::DNAString(rev_bar)) - # fwd_bar_rc <- Biostrings::reverseComplement(Biostrings::DNAString(fwd_bar)) - # - # rev_index <- which(grepl(paste0("^", rev_bar), ShortRead::sread(rev_reads))) - # fwd_index <- which(grepl(paste0("^", fwd_bar), ShortRead::sread(rev_reads))) - # rev_rc_index <- which(grepl(paste0("^", rev_bar_rc), ShortRead::sread(rev_reads))) - # fwd_rc_index <- which(grepl(paste0("^", fwd_bar_rc), ShortRead::sread(rev_reads))) - # return( - # list("fwd" = fwd_index, "rev" = rev_index, "fwd_rc" = fwd_rc_index, "rev_rc" = rev_rc_index) - # ) - # }, mc.cores = cores) - # rev_reads_fwd_barcodes <- sort(unique(unlist(lapply(x, function(i) { - # i$fwd - # })))) - # rev_reads_rev_barcodes <- sort(unique(unlist(lapply(x, function(i) { - # i$rev - # })))) - # rev_reads_rev_rc_barcodes <- sort(unique(unlist(lapply(x, function(i) { - # i$rev_rc - # })))) - # rev_reads_fwd_rc_barcodes <- sort(unique(unlist(lapply(x, function(i) { - # i$fwd_rc - # })))) - # per barcode write out a fastq.gz and optionally a summary table - fwd_reads_char <- as.character(ShortRead::sread(fwd_reads)) # could gsub out "N"s + fwd_reads_char <- as.character(ShortRead::sread(fwd_reads)) rev_reads_char <- as.character(ShortRead::sread(rev_reads)) test <- parallel::mclapply(seq_len(nrow(barcodes)), function(i) { @@ -164,11 +100,11 @@ demultiplex <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV" # get barcodes fwd_bar <- barcodes[i, fwd] rev_bar <- barcodes[i, rev] - + fwd_reads_w_fwd_bars <- which(grepl(paste0("^", fwd_bar), fwd_reads_char)) rev_reads_w_rev_bars <- which(grepl(paste0("^", rev_bar), rev_reads_char)) fwd_index_both <- intersect(fwd_reads_w_fwd_bars, rev_reads_w_rev_bars) # logical AND - + fwd_reads_w_rev_bars <- which(grepl(paste0("^", rev_bar), fwd_reads_char)) rev_reads_w_fwd_bars <- which(grepl(paste0("^", fwd_bar), rev_reads_char)) rev_index_both <- intersect(fwd_reads_w_rev_bars, rev_reads_w_fwd_bars) # logical AND @@ -225,9 +161,13 @@ demultiplex <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV" #' @keywords internal #' @noRd -.demultiplex_general <- function(reads, barcodes, fwd = "FWD", rev = "REV", - name = c("Name", "Sample_name"), - writeOut = TRUE, stat = FALSE, cores = 1) { +.demultiplex_general <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV", + name = c("Name", "Sample_name"), writeOut = TRUE, + stat = FALSE, cores = 1) { + # placeholder until I change how general works + return(.demultiplex_for_dada(fwd_reads, rev_reads, barcodes, fwd, rev, + name, writeOut, + stat, cores)) # format barcodes to uppercase barcodes[[fwd]] <- toupper(barcodes[[fwd]]) barcodes[[rev]] <- toupper(barcodes[[rev]]) @@ -293,4 +233,3 @@ demultiplex <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV" return(stats) } } - From e839de49e44ab99bc8bdc11da7fa31cae84eb9f0 Mon Sep 17 00:00:00 2001 From: Josh Sumner <51797700+joshqsumner@users.noreply.github.com> Date: Tue, 26 Nov 2024 08:15:13 -0600 Subject: [PATCH 3/5] only using dada mode --- R/demultiplex.R | 8 ++----- man/demultiplex.Rd | 52 +++++++--------------------------------------- 2 files changed, 10 insertions(+), 50 deletions(-) diff --git a/R/demultiplex.R b/R/demultiplex.R index 4f2ed62..ff94bdd 100644 --- a/R/demultiplex.R +++ b/R/demultiplex.R @@ -11,12 +11,8 @@ #' @param rev The column name of the barcodes dataframe representing reverse barcodes. #' Defaults to "REV". #' @param name One or more column names from the barcodes dataframe to be used in naming samples. -#' @param mode One of "dada" or "general". This controls how files are written out. -#' If mode is "dada" then a fastq file will be written out for -#' each row of the barcodes dataframe and the files will be named using the name argument. -#' If mode is "general" then each input fastq file will yield one -#' output fastq file with sample names in that file corresponding to the name argument. -#' Defaults to "dada". +#' @param mode Currently only "dada" is supported. This exists for tentative plans to have several +#' output modes. #' @param cores Optionally the number of cores to run in parallel. This defaults to 1 if the "mc.cores" #' option is not set. #' @param writeOut Logical, should fastq/stats files be written out. Defaults to TRUE. diff --git a/man/demultiplex.Rd b/man/demultiplex.Rd index 46e467e..e665b8e 100644 --- a/man/demultiplex.Rd +++ b/man/demultiplex.Rd @@ -5,7 +5,8 @@ \title{Function to demultiplex sequences either to single files or to multiple files for use with dada2.} \usage{ demultiplex( - reads, + fwd_reads, + rev_reads, barcodes, fwd = "FWD", rev = "REV", @@ -17,7 +18,10 @@ demultiplex( ) } \arguments{ -\item{reads}{A ShortReadQ object or a file path to a fastq file that will be read in with +\item{fwd_reads}{A ShortReadQ object or a file path to a fastq file that will be read in with +\code{ShortRead::readFastq}.} + +\item{rev_reads}{A ShortReadQ object or a file path to a fastq file that will be read in with \code{ShortRead::readFastq}.} \item{barcodes}{A dataframe of barcodes and metadata for naming or a file path to such a flat file @@ -31,12 +35,8 @@ Defaults to "REV".} \item{name}{One or more column names from the barcodes dataframe to be used in naming samples.} -\item{mode}{One of "dada" or "general". This controls how files are written out. -If mode is "dada" then a fastq file will be written out for -each row of the barcodes dataframe and the files will be named using the name argument. -If mode is "general" then each input fastq file will yield one -output fastq file with sample names in that file corresponding to the name argument. -Defaults to "dada".} +\item{mode}{Currently only "dada" is supported. This exists for tentative plans to have several +output modes.} \item{cores}{Optionally the number of cores to run in parallel. This defaults to 1 if the "mc.cores" option is not set.} @@ -53,42 +53,6 @@ If both are TRUE then summary stats are returned as a dataframe. } \description{ Function to demultiplex sequences either to single files or to multiple files for use with dada2. -} -\examples{ -# these examples are not run because they require specific fastq files. -\dontrun{ -library(ShortRead) -library(Biostrings) -library(parallel) -setwd("~/scripts/SINC/sincUtils/syncomBuilder/nastya96WellEx") - -barcodes <- read.delim("barcode_tab.tsv") -name <- c("Name", "Well") -reads <- ShortRead::readFastq("Plate11_R1_001.fastq.gz") - -demultiplex(reads, barcodes[1:10, ], - fwd = "FWD", rev = "REV", - name = name, mode = "dada", cores = 10, writeOut = "tests", stat = FALSE -) - -path <- paste0( - "/run/user/1000/gvfs/smb-share:server=nas01,share=research/bart_lab/", - "Previous_people/Mingsheng_Qi/research_dr/SINC/limiting_dilution/20210314/p2p11_rawseq" -) -barcodes <- read.csv(paste0(path, "/barcodetable1.csv")) -barcodes$exp <- "exp" -reads <- ShortRead::readFastq(paste0(path, "/210314S05P10_merge.fq")) - -x <- demultiplex(reads, barcodes, - fwd = "FWD_primer", rev = "REV_primer", name = c("exp"), mode = "general", - writeOut = TRUE, stat = TRUE, cores = 10 -) -x -# benchmarking against: -# "plate","totalReads","groupedReads","ratio" -# "P10",234509,154507,0.6589 -} - } \keyword{dada2} \keyword{demultiplex,} From 9ff0a63a2a82dfb9bf123a9667724c65c6d3279a Mon Sep 17 00:00:00 2001 From: Josh Sumner <51797700+joshqsumner@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:31:32 -0600 Subject: [PATCH 4/5] moving shortread --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f135a6f..173bee1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,12 +33,12 @@ Imports: ComplexUpset, plotly, Hmisc, - compositions + compositions, + ShortRead Suggests: knitr, brms, NetCoMi, - ShortRead, Biostrings, seqinr, ape, From 06df97f70c1318d4878a52565925016e3b201fcf Mon Sep 17 00:00:00 2001 From: Josh Sumner <51797700+joshqsumner@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:31:39 -0600 Subject: [PATCH 5/5] linting --- R/demultiplex.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/demultiplex.R b/R/demultiplex.R index ff94bdd..ae8ec91 100644 --- a/R/demultiplex.R +++ b/R/demultiplex.R @@ -175,8 +175,8 @@ demultiplex <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV" fwd_bar <- barcodes[i, fwd] rev_bar <- barcodes[i, rev] # filter for barcodes at beginning of sequence - rowReads <- reads[grepl(paste0("^", fwd_bar), ShortRead::sread(reads))] - columnReads <- reads[grepl(paste0("^", rev_bar), ShortRead::sread(reads))] + rowReads <- fwd_reads[grepl(paste0("^", fwd_bar), ShortRead::sread(fwd_reads))] + columnReads <- fwd_reads[grepl(paste0("^", rev_bar), ShortRead::sread(fwd_reads))] # make reverse complements of fwd and reverse barcodes fwd_bar_reverse_complement <- as.character( Biostrings::reverseComplement( @@ -221,7 +221,7 @@ demultiplex <- function(fwd_reads, rev_reads, barcodes, fwd = "FWD", rev = "REV" if (stat && writeOut) { stats <- data.frame( name = name, - totalReads = length(reads), + totalReads = length(fwd_reads), groupedReads = ShortRead::countFastq(paste0(name, ".fastq.gz"))[1, 1] ) stats$ratio <- signif(stats$groupedReads / stats$totalReads, digits = 4)