Merge pull request #48 from christopher-mohr/add_params

Add parameter to specify id column for heatmap
nf-core · Aug 23, 2023 · 390fe60 · 390fe60
2 parents 84ff170 + 2f90252
commit 390fe60
Show file tree

Hide file tree

Showing 6 changed files with 33 additions and 22 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,14 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
+- [#48](https://github.com/nf-core/nanostring/pull/48) - Allow users to specify id column for heatmap [#39](https://github.com/nf-core/nanostring/issues/39)
 - [#46](https://github.com/nf-core/nanostring/pull/46) - Update to nf-core template `2.9`
 - [#42](https://github.com/nf-core/nanostring/pull/42) - Allow users to specify normalization method: `GEO` (default) or `GLM`
 
 ### `Fixed`
 
 - [#46](https://github.com/nf-core/nanostring/pull/46) - Publish `NACHO` QC reports [#44](https://github.com/nf-core/nanostring/issues/44)
 - [#47](https://github.com/nf-core/nanostring/pull/47) - Update `NACHO` R package including bug fix [#45](https://github.com/nf-core/nanostring/issues/45)
-- [#48](https://github.com/nf-core/nanostring/pull/48) - Set correct `conda` environment for `COMPUTE_GENE_SCORES` process
+- [#47](https://github.com/nf-core/nanostring/pull/47) - Set correct `conda` environment for `COMPUTE_GENE_SCORES` process
 
 ### `Dependencies`
 

diff --git a/bin/compute_gene_heatmap.R b/bin/compute_gene_heatmap.R
@@ -11,31 +11,26 @@ library(tidylog)
 ###Command line argument parsing###
 args = commandArgs(trailingOnly=TRUE)
 if (length(args) < 1) {
-    stop("Usage: compute_gene_heatmap.R <annotated_counts.tsv> or compute_gene_heatmap.R <annotated_counts.tsv> <genes.yaml>", call.=FALSE)
+    stop("Usage: compute_gene_heatmap.R <annotated_counts.tsv> <sample_id_col> or compute_gene_heatmap.R <annotated_counts.tsv> <genes.yaml> <sample_id_col>", call.=FALSE)
 }
 input_counts <- args[1]
+id_col       <- tail(args, 1)
 
 #Read annotated counts
 # HEADER is always RCC_FILE + GENES + SAMPLE_ID and additional metadata such as GROUP TREATMENT OTHER_METADATA
 counts <- read.table(input_counts, sep="\t", check.names = FALSE, header=TRUE, stringsAsFactors = FALSE)
 
-if (length(args) == 2) {
+if (length(args) == 3) {
     input_genes <- args[2]
     genes <- read_yaml(input_genes)
 } else {
-    gene_cols <- counts %>% dplyr::select(- any_of(c("RCC_FILE", "SAMPLE_ID", "TIME", "TREATMENT", "OTHER_METADATA")))
+    gene_cols <- counts %>% dplyr::select(- any_of(c(unique(c("SAMPLE_ID", id_col)), "RCC_FILE", "TIME", "TREATMENT", "OTHER_METADATA")))
     genes <- colnames(gene_cols)
 }
 
 #Select counts of interest
 counts_selected <- counts %>% dplyr::select(all_of(genes))
 
-#Add proper Rownames
-rownames(counts_selected) <- counts$RCC_FILE_NAME
-
-#sort dataframe by rownames to make it easier comparable across heatmaps
-counts_selected[order(row.names(counts_selected)), ]
-
 #log2+1
 counts_selected <- log2(counts_selected + 1)
 
@@ -48,16 +43,28 @@ max_value <- max(colMax(counts_selected))
 min_value <- min(colMin(counts_selected))
 
 #Save as PDF
-
 prefix <- ""
 if (grepl("wo_HKnorm",input_counts)) {
     prefix <- "wo_HKnorm_"
 }
 
 agg_png(file = paste0(prefix, "gene_heatmap_mqc.png"), width = 1200, height = 2000, unit = "px")
 
-Heatmap(counts_selected, name = "Gene-Count Heatmap", column_title = "Gene (log2 +1)",
-        row_title_rot = 90, row_title = "SampleID",row_dend_reorder = FALSE, show_row_dend = FALSE, row_names_side = "left",
-        show_column_dend = FALSE, col = colorRamp2(c(min_value, max_value), c("#f7f7f7", "#67a9cf")))
+#Add proper row names
+counts_matrix <- as.matrix(counts_selected)
+row.names(counts_matrix) <- counts[[id_col]]
+
+Heatmap(counts_matrix,
+        name = "Gene-Count Heatmap",
+        column_title = "Gene (log2 +1)",
+        row_order = order(row.names(counts_matrix)),
+        row_title_rot = 90,
+        row_title = "SampleID",
+        row_dend_reorder = FALSE,
+        show_row_dend = FALSE,
+        row_names_side = "left",
+        show_column_dend = FALSE,
+        col = colorRamp2(c(min_value, max_value), c("#f7f7f7", "#67a9cf"))
+    )
 
 dev.off()
diff --git a/docs/usage.md b/docs/usage.md
@@ -105,6 +105,10 @@ The pipeline will generate one heatmap each, for the Housekeeping-normalized and
 
 > ⚠️ If you want to use other metadata in your samplesheet than the one shown in the section [Full samplesheet](#full-samplesheet), please make sure to specify the `yml` file with all endogenous genes or a subset of it.
 
+Per default, the `SAMPLE_ID` column will be used for the rows in the generated heatmap. Therefore, we expect these values to be unique. If this is not the case or if you want to use other row names for the heatmap anyway, you can specify this column, provided in the samplesheet, using the parameter `--heatmap_id_column`.
+
+You can also skip the heatmap generation step entirely by specifying the parameter `--skip_heatmap`.
+
 ### Normalization
 
 The normalization can be adjusted with the parameter `--normalization_method` and choosing either `GEO` or `GLM` as the method for normalization. The default is `GEO`. Future additions will incorporate possibilities to adjust further normalization parameters.

diff --git a/modules/local/create_gene_heatmap.nf b/modules/local/create_gene_heatmap.nf
@@ -22,7 +22,7 @@ process CREATE_GENE_HEATMAP {
     def gene_filter = params.heatmap_genes_to_filter ?: ""
 
     """
-    compute_gene_heatmap.R $annotated_counts $gene_filter
+    compute_gene_heatmap.R $annotated_counts $gene_filter $params.heatmap_id_column
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/nextflow.config b/nextflow.config
@@ -17,6 +17,7 @@ params {
 
     // Pipeline options
     heatmap_genes_to_filter    = null
+    heatmap_id_column          = "SAMPLE_ID"
 
     //Normalization options
     normalization_method       = "GEO"

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -49,6 +49,11 @@
             "default": "",
             "fa_icon": "fas fa-exchange-alt",
             "properties": {
+                "heatmap_id_column": {
+                    "type": "string",
+                    "default": "SAMPLE_ID",
+                    "description": "The column used for heatmap generation, specifying the rows. The values in this column have to be unique."
+                },
                 "heatmap_genes_to_filter": {
                     "type": "string",
                     "description": "Path to yml file (list, one item per line) to specify which genes should be used for the gene-count heatmap."
@@ -227,14 +232,12 @@
                     "type": "boolean",
                     "description": "Display help text.",
                     "fa_icon": "fas fa-question-circle",
-                    "default": false,
                     "hidden": true
                 },
                 "version": {
                     "type": "boolean",
                     "description": "Display version and exit.",
                     "fa_icon": "fas fa-question-circle",
-                    "default": false,
                     "hidden": true
                 },
                 "publish_dir_mode": {
@@ -258,7 +261,6 @@
                     "type": "boolean",
                     "description": "Send plain-text email instead of HTML.",
                     "fa_icon": "fas fa-remove-format",
-                    "default": false,
                     "hidden": true
                 },
                 "max_multiqc_email_size": {
@@ -273,7 +275,6 @@
                     "type": "boolean",
                     "description": "Do not use coloured log outputs.",
                     "fa_icon": "fas fa-palette",
-                    "default": false,
                     "hidden": true
                 },
                 "hook_url": {
@@ -312,23 +313,20 @@
                     "type": "boolean",
                     "fa_icon": "far fa-eye-slash",
                     "description": "Show all params when using `--help`",
-                    "default": false,
                     "hidden": true,
                     "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
                 },
                 "validationFailUnrecognisedParams": {
                     "type": "boolean",
                     "fa_icon": "far fa-check-circle",
                     "description": "Validation of parameters fails when an unrecognised parameter is found.",
-                    "default": false,
                     "hidden": true,
                     "help_text": "By default, when an unrecognised parameter is found, it returns a warinig."
                 },
                 "validationLenientMode": {
                     "type": "boolean",
                     "fa_icon": "far fa-check-circle",
                     "description": "Validation of parameters in lenient more.",
-                    "default": false,
                     "hidden": true,
                     "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
                 },