From 8aa2677986a1570178f2e39aa22f43695a820210 Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Tue, 25 Dec 2018 07:35:10 +0800 Subject: [PATCH 1/6] fix read_tsv skip skip 1 more line --- R/qc_read.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/qc_read.R b/R/qc_read.R index 7557067..a5a9fef 100644 --- a/R/qc_read.R +++ b/R/qc_read.R @@ -100,9 +100,12 @@ qc_read <- function(file, modules = "all", verbose = TRUE){ res <- lapply(modules, function(module, all.data){ index <- grep(module, all.data, ignore.case = TRUE) - skip <- ifelse(module == "Sequence Duplication Levels", 2, 1) - if(length(index) >0) readr::read_tsv(all.data[index[1]], skip = skip) - else tibble::tibble() + skip <- ifelse(module == "Sequence Duplication Levels", 3, 2) + if(length(index) >0) { + readr::read_tsv(all.data[index[1]], skip = skip) + } else { + tibble::tibble() + } }, all.data ) @@ -110,7 +113,7 @@ qc_read <- function(file, modules = "all", verbose = TRUE){ if("Sequence Duplication Levels" %in% modules){ index <- grep("Sequence Duplication Levels", all.data, ignore.case = TRUE) if(length(index) >0) - res$total_deduplicated_percentage <- readr::read_tsv(all.data[index[1]], skip = 1, n_max = 0) %>% + res$total_deduplicated_percentage <- readr::read_tsv(all.data[index[1]], skip = 2, n_max = 0) %>% colnames(.) %>% .[2] %>% as.numeric() %>% From 5a10faa09175c857a1ce2203ff6b142d9214328f Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Mon, 14 Jan 2019 22:11:01 +0800 Subject: [PATCH 2/6] fix length distribution convert data.frame Length to factor make bar plot --- R/qc_plot.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/qc_plot.R b/R/qc_plot.R index cac3d38..63cfd36 100644 --- a/R/qc_plot.R +++ b/R/qc_plot.R @@ -194,9 +194,13 @@ print.qctable <- function(x, ...){ d <- qc$sequence_length_distribution if(nrow(d) == 0) return(NULL) + + # convert Length to factor + d$Length <- factor(d$Length, levels = d$Length) ggplot(d, aes_string(x = "Length", y = "Count"))+ - geom_line() + + # geom_line() + + geom_bar(stat = "identity", color = "blue") + labs(title = "Sequence length distribution", x = "Sequence Length (pb)", y = "Count", subtitle = "Distribution of sequence lengths over all sequences", From d7a9c90fb210f2f45e4b2ec974d773c722254c17 Mon Sep 17 00:00:00 2001 From: bakerwm Date: Mon, 14 Jan 2019 23:46:09 +0800 Subject: [PATCH 3/6] change plot length distribution --- NAMESPACE | 1 + R/qc_plot.R | 88 ++++++++++++++++++++++++++--------------------------- 2 files changed, 45 insertions(+), 44 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 278e0d3..e97410f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ importFrom(ggplot2,element_text) importFrom(ggplot2,expand_limits) importFrom(ggplot2,facet_wrap) importFrom(ggplot2,geom_line) +importFrom(ggplot2,geom_bar) importFrom(ggplot2,geom_rect) importFrom(ggplot2,ggplot) importFrom(ggplot2,labs) diff --git a/R/qc_plot.R b/R/qc_plot.R index 63cfd36..491cdb7 100644 --- a/R/qc_plot.R +++ b/R/qc_plot.R @@ -3,6 +3,7 @@ #' @importFrom ggplot2 aes #' @importFrom ggplot2 aes_string #' @importFrom ggplot2 geom_line +#' @importFrom ggplot2 geom_bar #' @importFrom ggplot2 theme_minimal #' @importFrom ggplot2 coord_cartesian #' @importFrom ggplot2 labs @@ -61,18 +62,18 @@ NULL #' #' @export qc_plot <- function(qc, modules = "all"){ - + if(inherits(qc, "character")) qc <- qc_read(qc) if(!inherits(qc, "qc_read")) stop("data should be an object of class qc_read") - + . <- NULL modules <- .valid_fastqc_modules(modules) %>% tolower() %>% gsub(" ", "_", .) - - res <- lapply(modules, + + res <- lapply(modules, function(module, qc){ plot.func <- .plot_funct(module) status <- .get_status(qc, gsub("_", " ", module)) @@ -80,7 +81,7 @@ qc_plot <- function(qc, modules = "all"){ }, qc ) - + names(res) <- modules if(length(res) == 1) res[[1]] else res @@ -99,7 +100,7 @@ print.qctable <- function(x, ...){ # Extrcat the plotting function according to the module .plot_funct <- function(module){ - + switch(module, per_sequence_gc_content = .plot_gc_content, per_base_sequence_quality = .plot_base_quality, @@ -114,7 +115,7 @@ print.qctable <- function(x, ...){ adapter_content = .plot_adapter_content, kmer_content = .plot_kmer_content, function(x){NULL} -) + ) } @@ -141,7 +142,7 @@ print.qctable <- function(x, ...){ .names <- names(qc) if(!("per_sequence_gc_content" %in% .names)) return(NULL) - + d <- qc$per_sequence_gc_content if(nrow(d) == 0) return(NULL) colnames(d) <- make.names(colnames(d)) @@ -158,22 +159,22 @@ print.qctable <- function(x, ...){ .plot_N_content <- function(qc, ggtheme = theme_minimal(), status = NULL, ...){ if(!("per_base_n_content" %in% names(qc))) return(NULL) - + . <- NULL - + d <- qc$per_base_n_content if(nrow(d) == 0) return(NULL) colnames(d) <- make.names(colnames(d)) d$Base <- factor(d$Base, levels = d$Base) - + # Select some breaks nlev <- nlevels(d$Base) breaks <- scales::extended_breaks()(1:nlev)[-1] %>% # index c(1, ., nlev) %>% # Add the minimum & the max d$Base[.] %>% # Values as.vector() - - + + ggplot(d, aes_string(x = "Base", y = "N.Count", group = 1))+ geom_line() + scale_x_discrete(breaks = breaks)+ @@ -191,17 +192,16 @@ print.qctable <- function(x, ...){ .plot_seq_length_distribution <- function(qc, ggtheme = theme_minimal(), status = NULL, ...){ if(!("sequence_length_distribution" %in% names(qc))) return(NULL) - + d <- qc$sequence_length_distribution if(nrow(d) == 0) return(NULL) # convert Length to factor d$Length <- factor(d$Length, levels = d$Length) - + ggplot(d, aes_string(x = "Length", y = "Count"))+ - # geom_line() + - geom_bar(stat = "identity", color = "blue") + - labs(title = "Sequence length distribution", x = "Sequence Length (pb)", + geom_bar(stat = "identity", fill = "blue") + + labs(title = "Sequence length distribution", x = "Sequence Length (bp)", y = "Count", subtitle = "Distribution of sequence lengths over all sequences", caption = paste0("Status: ", status))+ @@ -211,12 +211,12 @@ print.qctable <- function(x, ...){ # Per base sequence quality .plot_base_quality <- function(qc, ggtheme = theme_minimal(), status = NULL, ...){ - + .names <- names(qc) if(!("per_base_sequence_quality" %in% .names)) return(NULL) . <- NULL - + d <- qc$per_base_sequence_quality if(nrow(d) == 0) return(NULL) @@ -228,8 +228,8 @@ print.qctable <- function(x, ...){ c(1, ., nlev) %>% # Add the minimum & the max d$Base[.] %>% # Values as.vector() - - + + ggplot()+ geom_line(data = d, aes_string(x = "Base", y = "Median", group = 1)) + expand_limits(x = 0, y = 0)+ @@ -253,7 +253,7 @@ print.qctable <- function(x, ...){ .names <- names(qc) if(!("per_sequence_quality_scores" %in% .names)) return(NULL) - + d <- qc$per_sequence_quality_scores if(nrow(d) == 0) return(NULL) @@ -272,9 +272,9 @@ print.qctable <- function(x, ...){ .names <- names(qc) if(!("per_base_sequence_content" %in% .names)) return(NULL) - + . <- NULL - + Base <- NULL d <- qc$per_base_sequence_content if(nrow(d) == 0) return(NULL) @@ -283,15 +283,15 @@ print.qctable <- function(x, ...){ d <- d %>% tidyr::gather(key = "base_name", value = "Count", -Base) - + # Select some breaks nlev <- nlevels(d$Base) breaks <- scales::extended_breaks()(1:nlev)[-1] %>% # index c(1, ., nlev) %>% # Add the minimum & the max d$Base[.] %>% # Values as.vector() - - + + ggplot(d, aes_string(x = "Base", y = "Count", group = "base_name", color = "base_name"))+ geom_line() + scale_x_discrete(breaks = breaks)+ @@ -312,7 +312,7 @@ print.qctable <- function(x, ...){ .names <- names(qc) if(!("per_base_sequence_content" %in% .names)) return(NULL) - + . <- NULL Duplication.Level <- NULL d <- qc$sequence_duplication_levels @@ -321,15 +321,15 @@ print.qctable <- function(x, ...){ d$Duplication.Level <- factor(d$Duplication.Level, levels = d$Duplication.Level) d <- d %>% tidyr::gather(key = "Dup", value = "pct", -Duplication.Level) - + # Select some breaks nlev <- nlevels(d$Duplication.Level) breaks <- scales::extended_breaks()(1:nlev)[-1] %>% # index c(1, ., nlev) %>% # Add the minimum & the max d$Duplication.Level[.] %>% # Values as.vector() - - + + ggplot(d, aes_string(x = "Duplication.Level", y = "pct", group = "Dup", color = "Dup"))+ geom_line() + # scale_x_discrete(breaks = breaks)+ @@ -349,9 +349,9 @@ print.qctable <- function(x, ...){ .plot_overrepresented_sequences <- function(qc, status = NULL, ...){ if(!("overrepresented_sequences" %in% names(qc))) return(NULL) - + d <- qc$overrepresented_sequences - + if(nrow(d) == 0 ) ggplot(d)+ labs(title = "Overrepresented sequences")+ @@ -359,22 +359,22 @@ print.qctable <- function(x, ...){ size = 5, color = "steelblue")+ ggplot2::theme_void()+ theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) - + else { d <- qc$overrepresented_sequences # d <- structure(d, class = c("qctable", class(d))) d } - + } # Adapter Content .plot_adapter_content <- function(qc, status = NULL, ...){ if(!("adapter_content" %in% names(qc))) return(NULL) - + Position <- NULL - + d <- qc$adapter_content colnames(d) <- make.names(colnames(d)) d <- d %>% @@ -395,9 +395,9 @@ print.qctable <- function(x, ...){ .plot_kmer_content <- function(qc, status = NULL, ...){ if(!("kmer_content" %in% names(qc))) return(NULL) - + d <- qc$kmer_content - + if(nrow(d) == 0 ) ggplot(d)+ labs(title = "Kmer content")+ @@ -405,25 +405,25 @@ print.qctable <- function(x, ...){ size = 5, color = "steelblue")+ ggplot2::theme_void()+ theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) - + else { # d <- structure(d, class = c("qctable", class(d))) qc$kmer_content } - + } .plot_tile_seq_quality <- function(qc, status = NULL, ...){ if(!("per_tile_sequence_quality" %in% names(qc))) return(NULL) - + d <- qc$per_tile_sequence_quality if(nrow(d) == 0) return(NULL) d$Tile <- as.character(d$Tile) d$Base <- factor(d$Base, levels = d$Base) - + ggplot(d, aes_string(x = "Base", y = "Tile", fill = "Mean"))+ ggplot2::geom_tile() + labs(title = "Per tile sequence quality", From d996b5b46260ad4f78372b55a9754b92b959bd0d Mon Sep 17 00:00:00 2001 From: bakerwm Date: Tue, 15 Jan 2019 10:03:28 +0800 Subject: [PATCH 4/6] update qc plots --- NAMESPACE | 1 + R/qc_plot.R | 166 +++++++++++++++++++++++++++++++++------------------- 2 files changed, 108 insertions(+), 59 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e97410f..daa0c60 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ export(qc_stats) export(qc_unzip) export(qc_warns) import(dplyr) +importFrom(tibble,rowid_to_column) importFrom(ggplot2,aes) importFrom(ggplot2,aes_string) importFrom(ggplot2,coord_cartesian) diff --git a/R/qc_plot.R b/R/qc_plot.R index 491cdb7..7bee011 100644 --- a/R/qc_plot.R +++ b/R/qc_plot.R @@ -1,4 +1,5 @@ #' @include utilities.R +#' @importFrom tibble rowid_to_column #' @importFrom ggplot2 ggplot #' @importFrom ggplot2 aes #' @importFrom ggplot2 aes_string @@ -128,6 +129,7 @@ print.qctable <- function(x, ...){ d } + # Plot summary .plot_summary <- function(qc, ggtheme = theme_minimal(), ...){ if(!("summary" %in% names(qc))) @@ -137,6 +139,7 @@ print.qctable <- function(x, ...){ d } + # Per sequence GC content .plot_gc_content <- function(qc, ggtheme = theme_minimal(), status = NULL, ...){ .names <- names(qc) @@ -147,11 +150,15 @@ print.qctable <- function(x, ...){ if(nrow(d) == 0) return(NULL) colnames(d) <- make.names(colnames(d)) ggplot(d, aes_string(x = "GC.Content", y = "Count"))+ - geom_line() + + geom_line(color = "red", size = 1) + labs(title = "Per sequence GC content", x = "Mean GC Content (%)", caption = paste0("Status: ", status))+ theme_minimal()+ - theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) + theme(plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) } @@ -176,7 +183,7 @@ print.qctable <- function(x, ...){ ggplot(d, aes_string(x = "Base", y = "N.Count", group = 1))+ - geom_line() + + geom_line(color = "red3", size = 1) + scale_x_discrete(breaks = breaks)+ coord_cartesian(ylim = c(0, 100))+ labs(title = "Per base N content", x = "Position in read (bp)", @@ -196,19 +203,31 @@ print.qctable <- function(x, ...){ d <- qc$sequence_length_distribution if(nrow(d) == 0) return(NULL) - # convert Length to factor - d$Length <- factor(d$Length, levels = d$Length) + # add x-axis + d <- rowid_to_column(d) %>% as.data.frame() + + # Select breaks + breaks <- seq.int(1, nrow(d), length.out = 6) + labels <- d$Base[breaks] - ggplot(d, aes_string(x = "Length", y = "Count"))+ - geom_bar(stat = "identity", fill = "blue") + + ggplot(d, aes(x = rowid, y = Count)) + + geom_line(color = "orange", size = 1) + + geom_point(color = "grey40", size = .5) + + scale_x_continuous(breaks = breaks, + labels = labels) + labs(title = "Sequence length distribution", x = "Sequence Length (bp)", y = "Count", subtitle = "Distribution of sequence lengths over all sequences", - caption = paste0("Status: ", status))+ - theme_minimal()+ - theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) + caption = paste0("Status: ", status)) + + theme_minimal() + + theme(plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) } + # Per base sequence quality .plot_base_quality <- function(qc, ggtheme = theme_minimal(), status = NULL, ...){ @@ -221,52 +240,60 @@ print.qctable <- function(x, ...){ if(nrow(d) == 0) return(NULL) colnames(d) <- make.names(colnames(d)) - d$Base <- factor(d$Base, levels = d$Base) - # Select some breaks - nlev <- nlevels(d$Base) - breaks <- scales::extended_breaks()(1:nlev)[-1] %>% # index - c(1, ., nlev) %>% # Add the minimum & the max - d$Base[.] %>% # Values - as.vector() + # add x-axis + d <- rowid_to_column(d) %>% as.data.frame() + + # Select breaks + breaks <- seq.int(1, nrow(d), length.out = 6) + labels <- d$Base[breaks] - ggplot()+ - geom_line(data = d, aes_string(x = "Base", y = "Median", group = 1)) + - expand_limits(x = 0, y = 0)+ + ggplot() + + geom_line(data = d, aes(x = rowid, y = Median, group = 1), color = "red2", size = 1) + + expand_limits(x = 0, y = 0) + + scale_x_continuous(breaks = breaks, labels = labels) + geom_rect(aes(xmin = 0, ymin = 0, ymax = 20, xmax = Inf), - fill = "red", alpha = 0.2)+ + fill = "red", alpha = 0.2) + geom_rect(aes(xmin = 0, ymin = 20, ymax = 28, xmax = Inf), - fill = "yellow", alpha = 0.2)+ + fill = "yellow", alpha = 0.2) + geom_rect(aes(xmin = 0, ymin = 28, ymax = Inf, xmax = Inf), - fill = "#00AFBB", alpha = 0.2)+ - scale_x_discrete(breaks = breaks)+ - labs(title = "Per base sequence quality", x = "Position in read (pb)", + fill = "#00AFBB", alpha = 0.2) + + labs(title = "Per base sequence quality", x = "Position in read (bp)", y = "Median quality scores", subtitle = "Red: low quality zone", caption = paste0("Status: ", status))+ - theme_minimal()+ - theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) + theme_minimal() + + theme(plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) } + # Per sequence quality scores .plot_sequence_quality <- function(qc, ggtheme = theme_minimal(), status = NULL, ...){ .names <- names(qc) if(!("per_sequence_quality_scores" %in% .names)) return(NULL) - d <- qc$per_sequence_quality_scores + d <- as.data.frame(qc$per_sequence_quality_scores) if(nrow(d) == 0) return(NULL) ggplot(d, aes_string(x = "Quality", y = "Count"))+ - geom_line() + + geom_line(color = "red3", size = 1) + labs(title = "Per sequence quality scores", subtitle = "Quality score distribution over all sequences", x = "Mean Sequence Quality (Phred Score)", caption = paste0("Status: ", status))+ theme_minimal()+ - theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) + theme(plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) } + # Per base sequence content .plot_sequence_content <- function(qc, ggtheme = theme_minimal(), status = NULL, ...){ .names <- names(qc) @@ -276,34 +303,36 @@ print.qctable <- function(x, ...){ . <- NULL Base <- NULL - d <- qc$per_base_sequence_content + d <- as.data.frame(qc$per_base_sequence_content) if(nrow(d) == 0) return(NULL) - d$Base <- factor(d$Base, levels = d$Base) - d <- d %>% - tidyr::gather(key = "base_name", value = "Count", -Base) - - - # Select some breaks - nlev <- nlevels(d$Base) - breaks <- scales::extended_breaks()(1:nlev)[-1] %>% # index - c(1, ., nlev) %>% # Add the minimum & the max - d$Base[.] %>% # Values - as.vector() + # Select breaks + breaks <- as.integer(seq.int(1, nrow(d), length.out = 6)) + labels <- d$Base[breaks] + # add x-axis + d <- rowid_to_column(d) %>% + as.data.frame() %>% + tidyr::gather("base", "count", G:C) - ggplot(d, aes_string(x = "Base", y = "Count", group = "base_name", color = "base_name"))+ - geom_line() + - scale_x_discrete(breaks = breaks)+ + ggplot(d, aes(x = rowid, y = count, group = base, color = base)) + + geom_line(size = .5) + + scale_x_continuous(breaks = breaks, + labels = labels) + labs(title = "Per base sequence content", subtitle = "Sequence content across all bases", caption = paste0("Status: ", status), - x = "Position in read (pb)", y = "Nucleotide frequency (%)", + x = "Position in read (bp)", y = "Nucleotide frequency (%)", color = "Nucleotide")+ coord_cartesian(ylim = c(0, 100))+ theme_minimal() + - theme(legend.position = c(0.5, 0.7), legend.direction = "horizontal")+ - theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) + theme(legend.position = c(0.5, 0.7), + legend.direction = "horizontal", + plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) } @@ -340,11 +369,14 @@ print.qctable <- function(x, ...){ color = "")+ theme_minimal() + theme(legend.position = c(0.5, 0.7))+ - theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) + theme(plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) } - # Overrepresented sequences .plot_overrepresented_sequences <- function(qc, status = NULL, ...){ if(!("overrepresented_sequences" %in% names(qc))) @@ -358,8 +390,11 @@ print.qctable <- function(x, ...){ ggplot2::annotate("text", x = 0.5, y = 0.5, label = "No overrepresented sequences", size = 5, color = "steelblue")+ ggplot2::theme_void()+ - theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) - + theme(plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) else { d <- qc$overrepresented_sequences # d <- structure(d, class = c("qctable", class(d))) @@ -383,20 +418,25 @@ print.qctable <- function(x, ...){ geom_line() + labs(title = "Adapter content", caption = paste0("Status: ", status), - x = "Position in read (pb)", y = "% Adapter", + x = "Position in read (bp)", y = "% Adapter", color = "")+ theme_minimal() + coord_cartesian(ylim = c(0, 100))+ theme(legend.position = c(0.5, 0.8))+ - theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) + theme(plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) } + # Overrepresented sequences .plot_kmer_content <- function(qc, status = NULL, ...){ if(!("kmer_content" %in% names(qc))) return(NULL) - d <- qc$kmer_content + d <- qc$kmer_content if(nrow(d) == 0 ) ggplot(d)+ @@ -404,8 +444,11 @@ print.qctable <- function(x, ...){ ggplot2::annotate("text", x = 0.5, y = 0.5, label = "No overrepresented kmers", size = 5, color = "steelblue")+ ggplot2::theme_void()+ - theme(plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) - + theme(plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) else { # d <- structure(d, class = c("qctable", class(d))) qc$kmer_content @@ -429,8 +472,13 @@ print.qctable <- function(x, ...){ labs(title = "Per tile sequence quality", subtitle = "Quality per tile", caption = paste0("Status: ", status), - x = "Position in read (pb)")+ - theme_minimal() + x = "Position in read (bp)")+ + theme_minimal() + + theme(plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, + PASS = "#00AFBB", + WARN = "#E7B800", + FAIL = "#FC4E07"))) } From 14800adf53b7d27b339ff662e7b6a5df413a8026 Mon Sep 17 00:00:00 2001 From: bakerwm Date: Wed, 16 Jan 2019 08:28:08 +0800 Subject: [PATCH 5/6] change plots --- NAMESPACE | 2 ++ R/qc_plot.R | 25 +++++++++++++++---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index daa0c60..31b70bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,11 +24,13 @@ importFrom(ggplot2,element_text) importFrom(ggplot2,expand_limits) importFrom(ggplot2,facet_wrap) importFrom(ggplot2,geom_line) +importFrom(ggplot2,geom_point) importFrom(ggplot2,geom_bar) importFrom(ggplot2,geom_rect) importFrom(ggplot2,ggplot) importFrom(ggplot2,labs) importFrom(ggplot2,scale_x_discrete) +importFrom(ggplot2,scale_x_continuous) importFrom(ggplot2,theme) importFrom(ggplot2,theme_minimal) importFrom(magrittr,"%>%") diff --git a/R/qc_plot.R b/R/qc_plot.R index 7bee011..663057d 100644 --- a/R/qc_plot.R +++ b/R/qc_plot.R @@ -4,9 +4,11 @@ #' @importFrom ggplot2 aes #' @importFrom ggplot2 aes_string #' @importFrom ggplot2 geom_line +#' @importFrom ggplot2 geom_point #' @importFrom ggplot2 geom_bar #' @importFrom ggplot2 theme_minimal #' @importFrom ggplot2 coord_cartesian +#' @importFrom ggplot2 scale_x_continuous #' @importFrom ggplot2 labs #' @importFrom ggplot2 theme #' @importFrom ggplot2 expand_limits @@ -150,7 +152,7 @@ print.qctable <- function(x, ...){ if(nrow(d) == 0) return(NULL) colnames(d) <- make.names(colnames(d)) ggplot(d, aes_string(x = "GC.Content", y = "Count"))+ - geom_line(color = "red", size = 1) + + geom_line(color = "red", size = 0.7) + labs(title = "Per sequence GC content", x = "Mean GC Content (%)", caption = paste0("Status: ", status))+ theme_minimal()+ @@ -183,7 +185,7 @@ print.qctable <- function(x, ...){ ggplot(d, aes_string(x = "Base", y = "N.Count", group = 1))+ - geom_line(color = "red3", size = 1) + + geom_line(color = "red3", size = .7) + scale_x_discrete(breaks = breaks)+ coord_cartesian(ylim = c(0, 100))+ labs(title = "Per base N content", x = "Position in read (bp)", @@ -207,12 +209,12 @@ print.qctable <- function(x, ...){ d <- rowid_to_column(d) %>% as.data.frame() # Select breaks - breaks <- seq.int(1, nrow(d), length.out = 6) - labels <- d$Base[breaks] + breaks <- as.integer(seq.int(1, nrow(d), length.out = 6)) + labels <- d$Length[breaks] ggplot(d, aes(x = rowid, y = Count)) + - geom_line(color = "orange", size = 1) + - geom_point(color = "grey40", size = .5) + + geom_line(color = "red3", size = .7) + + # geom_point(color = "blue", size = .5) + scale_x_continuous(breaks = breaks, labels = labels) + labs(title = "Sequence length distribution", x = "Sequence Length (bp)", @@ -263,7 +265,8 @@ print.qctable <- function(x, ...){ subtitle = "Red: low quality zone", caption = paste0("Status: ", status))+ theme_minimal() + - theme(plot.caption = element_text(color = switch(status, + theme(plot.title = element_text(hjust = .5), + plot.caption = element_text(color = switch(status, PASS = "#00AFBB", WARN = "#E7B800", FAIL = "#FC4E07"))) @@ -280,7 +283,7 @@ print.qctable <- function(x, ...){ if(nrow(d) == 0) return(NULL) ggplot(d, aes_string(x = "Quality", y = "Count"))+ - geom_line(color = "red3", size = 1) + + geom_line(color = "red3", size = 0.7) + labs(title = "Per sequence quality scores", subtitle = "Quality score distribution over all sequences", x = "Mean Sequence Quality (Phred Score)", @@ -363,7 +366,8 @@ print.qctable <- function(x, ...){ geom_line() + # scale_x_discrete(breaks = breaks)+ labs(title = "Sequence Duplication Levels", - subtitle = paste0("Percentage of distinct reads: ", qc$total_deduplicated_percentage, "%"), + subtitle = paste0("Percentage of distinct reads: ", + qc$total_deduplicated_percentage, "%"), x = "Sequence Duplication Level", y = "Percentage", caption = paste0("Status: ", status), color = "")+ @@ -387,7 +391,8 @@ print.qctable <- function(x, ...){ if(nrow(d) == 0 ) ggplot(d)+ labs(title = "Overrepresented sequences")+ - ggplot2::annotate("text", x = 0.5, y = 0.5, label = "No overrepresented sequences", + ggplot2::annotate("text", x = 0.5, y = 0.5, + label = "No overrepresented sequences", size = 5, color = "steelblue")+ ggplot2::theme_void()+ theme(plot.title = element_text(hjust = .5), From 564d25760dad9d14c6980d6606d6353fb6ce1683 Mon Sep 17 00:00:00 2001 From: bakerwm Date: Tue, 19 Feb 2019 09:16:32 +0800 Subject: [PATCH 6/6] fix len_distribution --- R/qc_plot.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/qc_plot.R b/R/qc_plot.R index 663057d..a10c29c 100644 --- a/R/qc_plot.R +++ b/R/qc_plot.R @@ -203,10 +203,18 @@ print.qctable <- function(x, ...){ return(NULL) d <- qc$sequence_length_distribution - if(nrow(d) == 0) return(NULL) + if(nrow(d) == 0) { + return(NULL) + } else if(nrow(d) == 1) { + d <- tibble::add_row(d, .before = 1, + Length = d$Length - 1, + Count = 0) %>% + tibble::add_row(Length = d$Length + 1, + Count = 0) + } # add x-axis - d <- rowid_to_column(d) %>% as.data.frame() + d <- tibble::rowid_to_column(d) %>% as.data.frame() # Select breaks breaks <- as.integer(seq.int(1, nrow(d), length.out = 6))