zavolanlab · deliaBlue · Jan 29, 2024 · Jan 24, 2024 · Jan 24, 2024 · Jan 24, 2024
diff --git a/README.md b/README.md
@@ -209,18 +209,23 @@ There are 4 files you must provide:
    resource][chrMap] provides such files for various organisms, and in the
    expected format.
 
+5. **OPTIONAL**: A **BED6** file with regions for which to produce
+   [ASCII-style alignment pileups][ascii-pileups]. If not provided, no pileups
+   will be generated. See [here][bed-format] for the expected format.
+
 > General note: If you want to process the genome resources before use (e.g.,
 > filtering), you can do that, but make sure the formats of any modified
 > resource files meet the formatting expectations outlined above!
 
+
 #### 3. Prepare a configuration file
 
 We recommend creating a copy of the
 [configuration file template](config/config_template.yaml):
 
 ```bash
 cp  config/config_template.yaml  path/to/config.yaml
-``` So on that PR I could move this information in the section/file all of this will be written. 
+```
 
 Open the new copy in your editor of choice and adjust the configuration
 parameters to your liking. The template explains what each of the
@@ -278,6 +283,13 @@ represents a sample library. Each read is counted towards all the annotated
 miRNA species it aligns to, with 1/n, where n is the number of genomic and/or
 transcriptomic loci that read aligns to.
 
+5. **OPTIONAL**. ASCII-style pileups of read alignments produced for individual
+libraries, combinations of libraries and/or all libraries of a given run. The
+exact number and nature of the outputs depends on the workflow
+inputs/parameters. See the
+[pileups section](pipeline_documentation.md/#pileup-workflow) for a detailed
+description.
+
 To retain all intermediate files, include `--no-hooks` in the workflow call.
 
 ```bash
@@ -319,10 +331,11 @@ be aligned separately against the genome and transcriptome. For increased
 fidelity, two separated aligners, [Segemehl][segemehl] and our in-house tool 
 [Oligomap][oligomap], are used. All the resulting alignments are merged such 
 that only the best alignments of each read are kept (smallest edit distance).
-Finally, alignments are intersected with the user-provided, pre-processed
-miRNA annotation file using [BEDTools][bedtools]. Counts are tabulated 
-separately for reads consistent with either miRNA precursors, mature miRNA
-and/or isomiRs.
+Alignments are intersected with the user-provided, pre-processed miRNA
+annotation file using [BEDTools][bedtools]. Counts are tabulated separately for
+reads consistent with either miRNA precursors, mature miRNA and/or isomiRs.
+Finally, ASCII-style alignment pileups are optionally generated for
+user-defined regions of interest.
 
 > **NOTE:** For a detailed description of each rule, please, refer to the
 > [workflow documentation](pipeline_documentation.md)
@@ -350,6 +363,8 @@ For questions or suggestions regarding the code, please use the [issue tracker][
 
 &copy; 2023 [Zavolab, Biozentrum, University of Basel][zavolab]
 
+[ascii-pileups]: <https://git.scicore.unibas.ch/zavolan_group/tools/ascii-alignment-pileup>
+[bed-format]: <https://gist.github.com/deliaBlue/19ad3740c95937378bd9281bd9d1bc72>
 [bedtools]: <https://github.com/arq5x/bedtools2>
 [chrMap]: <https://github.com/dpryan79/ChromosomeMappings>
 [conda]: <https://docs.conda.io/projects/conda/en/latest/index.html>

diff --git a/config/README.md b/config/README.md
@@ -100,6 +100,10 @@ There are 4 files you must provide:
    resource][chrMap] provides such files for various organisms, and in the
    expected format.
 
+5. **OPTIONAL**: A **BED6** file with regions for which to produce
+   [ASCII-style alignment pileups][ascii-pileups]. If not provided, no pileups
+   will be generated. See [here][bed-format] for the expected format.
+
 > General note: If you want to process the genome resources before use (e.g.,
 > filtering), you can do that, but make sure the formats of any modified
 > resource files meet the formatting expectations outlined above!
@@ -118,8 +122,10 @@ Open the new copy in your editor of choice and adjust the configuration
 parameters to your liking. The template explains what each of the parameters
 mean and how you can meaningfully adjust them.
 
-
+[ascii-pileups]: <https://git.scicore.unibas.ch/zavolan_group/tools/ascii-alignment-pileup>
+[bed-format]: <https://gist.github.com/deliaBlue/19ad3740c95937378bd9281bd9d1bc72>
 [chrMap]: <https://github.com/dpryan79/ChromosomeMappings>
 [ensembl]: <https://ensembl.org/>
+[ensembl-bed]: <https://www.ensembl.org/info/website/upload/bed.html>
 [mamba]: <https://github.com/mamba-org/mamba>
 [mirbase]: <https://mirbase.org/>
diff --git a/config/config_schema.json b/config/config_schema.json
@@ -9,6 +9,11 @@
             "type": "string",
             "description": "Path to the samples table."
         },
+        "bed_file":{
+            "type": "string",
+            "default": "",
+            "description": "Path to the genomic regions file to do the pileups for."
+        },
         "genome_file":{
             "type": "string",
             "description": "Path to the reference genome file."
@@ -35,6 +40,11 @@
             "default": "results/intermediates",
             "description": "Path to the directory storing the intermediate files."
         },
+        "pileups_dir":{
+            "type": "string",
+            "default": "results/pileups",
+            "description": "Path to the directory storing the ASCII-style pileups."
+        },
         "local_log":{
             "type": "string",
             "default": "logs/local/",
@@ -101,7 +111,16 @@
                 "type": "string",
                 "enum": ["isomir", "mirna", "pri-mir"]
             },
-            "default": ["isomir", "mirna", "pri-mir"]
+            "default": ["isomir", "mirna", "pri-mir"],
+            "description": "miRNA speices to be quantified."
+        },
+        "lib_dict":{
+            "type": "object",
+            "additionalProperties":{
+                "type": "array",
+            },
+            "default": {},
+            "description": "Dictionary of arbitrary condition names (keys) and library names to aggregate alignment pileups for (values; MUST correspond to names in samples table)."
         }
     }
 }
diff --git a/config/config_template.yaml b/config/config_template.yaml
@@ -15,6 +15,7 @@ samples: path/to/samples_table.tsv
 genome_file: path/to/gzipped/ensembl/genome.fa.gz
 gtf_file: path/to/gzipped/ensembl/gene_annotations.gtf.gz
 mirna_file: path/to/unzipped/mirbase/mirna_annotations.gff3
+bed_file: path/to/pileups/genomic_regions.bed
 
 # Tab-separated mappings table between UCSC (column 1)
 # and Ensembl (coulm 2) chromosome names 
@@ -32,6 +33,7 @@ map_chr_file: path/to/ucsc_ensembl_mappings.tsv
 #### DIRECTORIES ####
 
 output_dir: results/
+pileups_dir: results/pileups
 intermediates_dir: results/intermediates
 local_log: logs/local/
 cluster_log: logs/cluster/
@@ -63,4 +65,15 @@ nh: 100 # discard reads with more mappings than the indicated number
 # If 'isomir' and 'mirna' are both in the list, a single table with both types
 # is made.
 mir_list: ['isomir', 'mirna', 'pri-mir']
+
+#### ASCII-STYLE ALIGNMENT PILEUPS PARAMETERS ####
+
+# Dictionary with the list of library names to aggregate when performing the
+# pileups as values and the condition as keys. Library names must match the
+# ones in the samples table `sample` column. The dictionary keys will be used
+# as the pileup's output directory name.
+# e.g. lib_dict: {"group_A": ["lib_1", "lib_3"], "group_B": ["lib_2"]}
+#
+# Leave as an empty dictionary if no pileups are desired.
+lib_dict: {}
 ...