From 537648d273f5d883c190a171326c776f76653d2c Mon Sep 17 00:00:00 2001
From: Arjun Arkal Rao <aarao@ucsc.edu>
Date: Sun, 5 Mar 2017 15:22:16 -0800
Subject: [PATCH] Cleanup codebase (resolves #169)

resolves #169
resolves #163

Multiple docstrings were outdated and very few were sphinx-compatible.
Made entire codebase PEP8 compliant (except for 100-char lines)
renamed pipeline_launchpad to lanch_protect
---
 src/protect/addons.py                         |  28 ++-
 src/protect/alignment/common.py               |  19 +-
 src/protect/alignment/dna.py                  | 100 ++++----
 src/protect/alignment/rna.py                  |  61 +++--
 src/protect/binding_prediction/__init__.py    |   2 +-
 src/protect/binding_prediction/common.py      | 155 +++++-------
 src/protect/binding_prediction/mhci.py        |  19 +-
 src/protect/binding_prediction/mhcii.py       |  24 +-
 src/protect/common.py                         | 197 ++++++---------
 src/protect/expression_profiling/__init__.py  |   2 +-
 src/protect/expression_profiling/rsem.py      |  42 ++--
 src/protect/haplotyping/__init__.py           |   2 +-
 src/protect/haplotyping/phlat.py              |  76 +++---
 src/protect/mutation_annotation/__init__.py   |   2 +-
 src/protect/mutation_annotation/snpeff.py     |  35 ++-
 src/protect/mutation_calling/__init__.py      |   2 +-
 src/protect/mutation_calling/common.py        |  80 +++---
 src/protect/mutation_calling/fusion.py        |   9 +-
 src/protect/mutation_calling/indel.py         |  10 +-
 src/protect/mutation_calling/muse.py          | 128 +++++-----
 src/protect/mutation_calling/mutect.py        | 116 ++++-----
 src/protect/mutation_calling/radia.py         | 163 ++++++------
 src/protect/mutation_calling/somaticsniper.py | 140 +++++------
 src/protect/mutation_calling/strelka.py       | 130 +++++-----
 src/protect/mutation_translation.py           |  45 ++--
 src/protect/pipeline/ProTECT.py               | 237 +++++++++---------
 src/protect/qc/__init__.py                    |   2 +-
 src/protect/qc/rna.py                         |  24 +-
 src/protect/rankboost.py                      |  37 ++-
 src/protect/test/ci/test_protect.py           |   3 +-
 30 files changed, 915 insertions(+), 975 deletions(-)

diff --git a/src/protect/addons.py b/src/protect/addons.py
index df5a6430..e2a4dd9b 100644
--- a/src/protect/addons.py
+++ b/src/protect/addons.py
@@ -13,7 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import print_function
+
 from collections import Counter
+
 from protect.common import export_results, get_files_from_filestore, untargz
 from protect.haplotyping.phlat import parse_phlat_file
 
@@ -23,13 +25,14 @@
 
 def run_mhc_gene_assessment(job, rsem_files, rna_haplotype, univ_options, mhc_genes_options):
     """
-    This is a convenience function that runs assess_mhc_genes.
-    :param job job:
+    A wrapper for assess_mhc_genes.
+
     :param dict rsem_files: Results form running rsem
     :param str rna_haplotype: The job store id for the rna haplotype file
-    :param dict univ_options: Universal Options
+    :param dict univ_options: Dict of universal options used by almost all tools
     :param dict mhc_genes_options: Options specific to assessing the MHC genes
     :return: The results of running assess_mhc_genes
+    :rtype: toil.fileStore.FileID
     """
     return job.addChildJobFn(assess_mhc_genes, rsem_files['rsem.isoforms.results'], rna_haplotype,
                              univ_options, mhc_genes_options).rv()
@@ -37,12 +40,15 @@ def run_mhc_gene_assessment(job, rsem_files, rna_haplotype, univ_options, mhc_ge
 
 def assess_mhc_genes(job, isoform_expression, rna_haplotype, univ_options, mhc_genes_options):
     """
-    This module will assess the prevalence of the various genes in the MHC pathway and return a
-    report in the tsv format
-    :param isoform_expression: Isoform expression from run_rsem
-    :param rna_haplotype: PHLAT output from running on rna
-    :param univ_options: Universal options for the pipeline
-    :param mhc_genes_options: options specific to this module
+    Assess the prevalence of the various genes in the MHC pathway and return a report in the tsv
+    format.
+
+    :param toil.fileStore.FileID isoform_expression: fsID for the rsem isoform expression file
+    :param toil.fileStore.FileID rna_haplotype: fsID for the RNA PHLAT file
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict mhc_genes_options: Options specific to assessing the MHC genes
+    :return: The fsID for the mhc pathway report file
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running mhc gene assessment on %s' % univ_options['patient'])
     work_dir = os.getcwd()
@@ -91,7 +97,9 @@ def assess_mhc_genes(job, isoform_expression, rna_haplotype, univ_options, mhc_g
                 for mhcii_allele in ('HLA_DQA', 'HLA_DQB', 'HLA_DRA', 'HLA_DRB'):
                     if mhcii_allele != 'HLA_DRA':
                         num_alleles = len(mhc_alleles[mhcii_allele])
-                        result = 'FAIL' if num_alleles == 0 else 'LOW' if num_alleles == 1 else 'PASS'
+                        result = ('FAIL' if num_alleles == 0 else
+                                  'LOW' if num_alleles == 1 else
+                                  'PASS')
                         print("{:12}{:<12}{:<12}{:12}".format(mhcii_allele, 2, num_alleles, result),
                               file=mpr)
                     else:
diff --git a/src/protect/alignment/common.py b/src/protect/alignment/common.py
index 12e63760..0d07468c 100644
--- a/src/protect/alignment/common.py
+++ b/src/protect/alignment/common.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 from __future__ import absolute_import
 from math import ceil
-from protect.common import docker_call, get_files_from_filestore, export_results
+from protect.common import docker_call,  export_results, get_files_from_filestore
 
 import os
 
@@ -28,14 +28,15 @@ def index_bamfile(job, bamfile, sample_type, univ_options, samtools_options):
     """
     This module indexes BAMFILE
     ARGUMENTS
-    1. bamfile: <JSid for a bam file>
-    2. sample_type: string of 'tumor_dna' or 'normal_dna'
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    RETURN VALUES
-    1. output_files: REFER output_files in run_bwa(). This module is the one is
-                     the one that generates the files.
+    :param toil.fileStore.FileID bamfile: fsID for the bam file
+    :param str sample_type: Description of the sample to inject into the filename
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict samtools_options: Options specific to samtools
+    :return: Dict containing input bam and the generated index (.bam.bai)
+             output_files:
+                 |- '<sample_type>_fix_pg_sorted.bam': fsID
+                 +- '<sample_type>_fix_pg_sorted.bam.bai': fsID
+    :rtype: dict
     """
     job.fileStore.logToMaster('Running samtools-index on %s:%s' % (univ_options['patient'],
                                                                    sample_type))
diff --git a/src/protect/alignment/dna.py b/src/protect/alignment/dna.py
index 586d3578..f529d533 100644
--- a/src/protect/alignment/dna.py
+++ b/src/protect/alignment/dna.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 from __future__ import absolute_import, print_function
 from math import ceil
+
 from protect.alignment.common import index_bamfile, index_disk
 from protect.common import docker_call, docker_path, get_files_from_filestore, is_gzipfile, untargz
 from toil.job import PromisedRequirement
@@ -41,7 +42,17 @@ def regroup_disk(reheader_bam):
 
 def align_dna(job, fastqs, sample_type, univ_options, bwa_options):
     """
-    This is a convenience function that runs the entire dna alignment subgraph
+    A wrapper for the entire dna alignment subgraph.
+
+    :param list fastqs: The input fastqs for alignment
+    :param str sample_type: Description of the sample to inject into the filename
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict bwa_options: Options specific to bwa
+    :return: Dict containing output bam and bai
+             output_files:
+                 |- '<sample_type>_fix_pg_sorted.bam': fsID
+                 +- '<sample_type>_fix_pg_sorted.bam.bai': fsID
+    :rtype: dict
     """
     bwa = job.wrapJobFn(run_bwa, fastqs, sample_type, univ_options, bwa_options,
                         disk=PromisedRequirement(bwa_disk, fastqs, bwa_options['index']),
@@ -69,28 +80,14 @@ def align_dna(job, fastqs, sample_type, univ_options, bwa_options):
 
 def run_bwa(job, fastqs, sample_type, univ_options, bwa_options):
     """
-    This module aligns the SAMPLE_TYPE dna fastqs to the reference
-
-    ARGUMENTS -- <ST> depicts the sample type. Substitute with 'tumor'/'normal'
-    1. fastqs: Dict of list of input WGS/WXS fastqs
-         fastqs
-              +- '<ST>_dna': [<JSid for 1.fastq> , <JSid for 2.fastq>]
-    2. sample_type: string of 'tumor_dna' or 'normal_dna'
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    4. bwa_options: Dict of parameters specific to bwa
-         bwa_options
-              |- 'index': <JSid for the bwa index tarball>
-              +- 'n': <number of threads to allocate>
-
-    RETURN VALUES
-    1. output_files: Dict of aligned bam + reference (nested return)
-         output_files
-             |- '<ST>_fix_pg_sorted.bam': <JSid>
-             +- '<ST>_fix_pg_sorted.bam.bai': <JSid>
-
-    This module corresponds to nodes 3 and 4 on the tree
+    Align a pair of fastqs with bwa.
+
+    :param list fastqs: The input fastqs for alignment
+    :param str sample_type: Description of the sample to inject into the filename
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict bwa_options: Options specific to bwa
+    :return: fsID for the generated sam
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running bwa on %s:%s' % (univ_options['patient'], sample_type))
     work_dir = os.getcwd()
@@ -126,16 +123,14 @@ def run_bwa(job, fastqs, sample_type, univ_options, bwa_options):
 
 def bam_conversion(job, samfile, sample_type, univ_options, samtools_options):
     """
-    This module converts SAMFILE from sam to bam
-
-    ARGUMENTS
-    1. samfile: <JSid for a sam file>
-    2. sample_type: string of 'tumor_dna' or 'normal_dna'
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    RETURN VALUES
-    1. output_files: REFER output_files in run_bwa()
+    Convert a sam to a bam.
+
+    :param dict samfile: The input sam file
+    :param str sample_type: Description of the sample to inject into the filename
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict samtools_options: Options specific to samtools
+    :return: fsID for the generated bam
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running sam2bam on %s:%s' % (univ_options['patient'], sample_type))
     work_dir = os.getcwd()
@@ -159,16 +154,15 @@ def bam_conversion(job, samfile, sample_type, univ_options, samtools_options):
 
 def fix_bam_header(job, bamfile, sample_type, univ_options, samtools_options):
     """
-    This module modified the header in BAMFILE
-
-    ARGUMENTS
-    1. bamfile: <JSid for a bam file>
-    2. sample_type: string of 'tumor_dna' or 'normal_dna'
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    RETURN VALUES
-    1. output_files: REFER output_files in run_bwa()
+    Fix the bam header to remove the command line call.  Failing to do this causes Picard to reject
+    the bam.
+
+    :param dict bamfile: The input bam file
+    :param str sample_type: Description of the sample to inject into the filename
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict samtools_options: Options specific to samtools
+    :return: fsID for the output bam
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running reheader on %s:%s' % (univ_options['patient'], sample_type))
     work_dir = os.getcwd()
@@ -203,16 +197,14 @@ def fix_bam_header(job, bamfile, sample_type, univ_options, samtools_options):
 
 def add_readgroups(job, bamfile, sample_type, univ_options, picard_options):
     """
-    This module adds the appropriate read groups to the bam file
-    ARGUMENTS
-    1. bamfile: <JSid for a bam file>
-    2. sample_type: string of 'tumor_dna' or 'normal_dna'
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                |- 'dockerhub': <dockerhub to use>
-                +- 'java_Xmx': value for max heap passed to java
-    RETURN VALUES
-    1. output_files: REFER output_files in run_bwa()
+    Add read groups to the bam.
+
+    :param dict bamfile: The input bam file
+    :param str sample_type: Description of the sample to inject into the filename
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict picard_options: Options specific to picard
+    :return: fsID for the output bam
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running add_read_groups on %s:%s' % (univ_options['patient'],
                                                                     sample_type))
@@ -231,7 +223,7 @@ def add_readgroups(job, bamfile, sample_type, univ_options, picard_options):
                   'PU=12345',
                   ''.join(['SM=', sample_type.rstrip('_dna')])]
     docker_call(tool='picard', tool_parameters=parameters, work_dir=work_dir,
-                dockerhub=univ_options['dockerhub'], java_opts=univ_options['java_Xmx'],
+                dockerhub=univ_options['dockerhub'], java_xmx=univ_options['java_Xmx'],
                 tool_version=picard_options['version'])
     output_file = job.fileStore.writeGlobalFile(
         '/'.join([work_dir, sample_type + '_aligned_fixpg_sorted_reheader.bam']))
diff --git a/src/protect/alignment/rna.py b/src/protect/alignment/rna.py
index eeab3880..fed2cc83 100644
--- a/src/protect/alignment/rna.py
+++ b/src/protect/alignment/rna.py
@@ -15,8 +15,14 @@
 from __future__ import absolute_import, print_function
 from collections import defaultdict
 from math import ceil
+
 from protect.alignment.common import index_bamfile, index_disk
-from protect.common import docker_call, get_files_from_filestore, is_gzipfile, untargz, docker_path
+from protect.common import (docker_call,
+                            docker_path,
+                            export_results,
+                            get_files_from_filestore,
+                            is_gzipfile,
+                            untargz)
 from toil.job import PromisedRequirement
 
 import os
@@ -31,7 +37,13 @@ def star_disk(rna_fastqs, star_tar):
 
 def align_rna(job, fastqs, univ_options, star_options):
     """
-    This is a convenience function that runs the entire rna alignment subgraph
+    A wrapper for the entire rna alignment subgraph.
+
+    :param list fastqs: The input fastqs for alignment
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict star_options: Options specific to star
+    :return: Dict containing input bam and the generated index (.bam.bai)
+    :rtype: dict
     """
     star = job.wrapJobFn(run_star, fastqs, univ_options, star_options,
                          cores=star_options['n'],
@@ -47,26 +59,17 @@ def align_rna(job, fastqs, univ_options, star_options):
 
 def run_star(job, fastqs, univ_options, star_options):
     """
-    This module uses STAR to align the RNA fastqs to the reference
-
-    ARGUMENTS
-    1. fastqs: REFER RETURN VALUE of run_cutadapt()
-    2. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-              +- 'dockerhub': <dockerhub to use>
-    3. star_options: Dict of parameters specific to STAR
-         star_options
-             |- 'index': <JSid for the STAR index tarball>
-             +- 'n': <number of threads to allocate>
-    RETURN VALUES
-    1. output_files: Dict of aligned bams
-         output_files
-             |- 'rnaAligned.toTranscriptome.out.bam': <JSid>
-             +- 'rnaAligned.sortedByCoord.out.bam': Dict of genome bam + bai
-                                |- 'rna_fix_pg_sorted.bam': <JSid>
-                                +- 'rna_fix_pg_sorted.bam.bai': <JSid>
+    Align a pair of fastqs with STAR.
 
-    This module corresponds to node 9 on the tree
+    :param list fastqs: The input fastqs for alignment
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict star_options: Options specific to star
+    :return: Dict containing output genome bam, genome bai, and transcriptome bam
+                 output_files:
+                    |- 'rnaAligned.toTranscriptome.out.bam': fsID
+                    +- 'rnaAligned.sortedByCoord.out.bam':
+                                        +- 'rna_fix_pg_sorted.bam': fsID
+    :rtype: dict
     """
     assert star_options['type'] in ('star', 'starlong')
     job.fileStore.logToMaster('Running STAR on %s' % univ_options['patient'])
@@ -110,13 +113,27 @@ def run_star(job, fastqs, univ_options, star_options):
                      'rnaAligned.sortedByCoord.out.bam']:
         output_files[bam_file] = job.fileStore.writeGlobalFile('/'.join([
             work_dir, bam_file]))
+    export_results(job, output_files['rnaAligned.toTranscriptome.out.bam'], 'rna_transcriptome.bam',
+                   univ_options, subfolder='alignments')
     return output_files
 
 
 def index_star(job, star_bams, univ_options, star_options):
     """
-    This is a wrapper functiion for index_bamfile in protect.common which is required since run_star
+    A wrapper for indexing the genomic star bam generated by run_star. It is required since run_star
     returns a dict of 2 bams
+
+    :param dict star_bams: The bams from run_star
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict star_options: Options specific to star
+    :return: Dict containing input bam and the generated index (.bam.bai)
+                     output_files:
+                        |- 'rnaAligned.toTranscriptome.out.bam': fsID
+                        +- 'rnaAligned.sortedByCoord.out.bam':
+                                        |- 'rna_fix_pg_sorted.bam': fsID
+                                        +- 'rna_fix_pg_sorted.bam.bai': fsID
+
+    :rtype: dict
     """
     index = job.wrapJobFn(index_bamfile, star_bams['rnaAligned.sortedByCoord.out.bam'], 'rna',
                           univ_options, samtools_options=star_options['samtools'],
diff --git a/src/protect/binding_prediction/__init__.py b/src/protect/binding_prediction/__init__.py
index c592ef89..6656c66f 100644
--- a/src/protect/binding_prediction/__init__.py
+++ b/src/protect/binding_prediction/__init__.py
@@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
\ No newline at end of file
+from __future__ import absolute_import
diff --git a/src/protect/binding_prediction/common.py b/src/protect/binding_prediction/common.py
index 308d4cdd..1002d7f2 100644
--- a/src/protect/binding_prediction/common.py
+++ b/src/protect/binding_prediction/common.py
@@ -13,62 +13,49 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import, print_function
-
-import json
-import os
-import re
 from collections import defaultdict
 
-import pandas
 from protect.binding_prediction.mhci import predict_mhci_binding
 from protect.binding_prediction.mhcii import predict_mhcii_binding, predict_netmhcii_binding
-from protect.common import get_files_from_filestore, untargz, export_results, read_peptide_file
+from protect.common import get_files_from_filestore, export_results, read_peptide_file, untargz
+
+import json
+import os
+import pandas
+import re
 
 
 def spawn_antigen_predictors(job, transgened_files, phlat_files, univ_options, mhc_options):
     """
-    Based on the number of alleles obtained from node 14, this module will spawn callers to predict
-    MHCI:peptide and MHCII:peptide binding on the peptide files from node 17.  Once all MHC:peptide
-    predictions are made, merge them via a follow-on job.
-
-    ARGUMENTS
-    1. transgened_files: REFER RETURN VALUE of run_transgene()
-    2. phlat_files: REFER RETURN VALUE of merge_phlat_calls()
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    4. mhc_options: Dict of dicts of parameters specific to mhci and mhcii
-                    respectively
-         mhc_options
-              |- 'mhci'
-              |     |- 'method_file': <JSid for json file containing data
-              |     |                  linking alleles, peptide lengths, and
-              |     |                  prediction methods>
-              |     +- 'pred': String describing prediction method to use
-              +- 'mhcii'
-                    |- 'method_file': <JSid for json file containing data
-                    |                  linking alleles and prediction methods>
-                    +- 'pred': String describing prediction method to use
-
-    RETURN VALUES
-    1. tuple of (mhci_preds, mhcii_preds)
-         mhci_preds: Dict of return value from running predictions on a given
-                     mhc for all peptides of length 9 and 10.
-             mhci_preds
-                |- <MHC molecule 1>_9_mer.faa: <PromisedJobReturnValue>
-                |- <MHC molecule 1>_10_mer.faa: <PromisedJobReturnValue>
-                |
-                ..
-                +- <MHC molecule n>_10_mer.faa: <PromisedJobReturnValue>
-         mhcii_preds: Dict of return value from running predictions on a given
-                     mhc for all peptides of length 15.
-             mhci_preds
-                |- <MHC molecule 1>_15_mer.faa: <PromisedJobReturnValue>
-                |
-                ..
-                +- <MHC molecule n>_15_mer.faa: <PromisedJobReturnValue>
-
-    This module corresponds to node 18 on the tree
+    Spawn a job to predict MHCI:peptide and MHCII:peptide binding on the input peptide file for each
+    allele in in the input haplotype (phlat) files.
+
+    :param dict transgened_files: Dict of tumor and normal peptide fsIDs and the tumor .map fsIDs
+    :param dict phlat_files: Dict of MHCI and MHCII haplotypes
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param tuple mhc_options: Options specific to mhci and mhcii binding predictions
+    :return: Tuple of dicts of mhci and mhcii predictions
+                (mhci_preds, mhcii_preds)
+                      |          |- <allele>:
+                      |          |     |- 'tumor': fsID
+                      |          |     +- 'normal': (fsID, str)
+                      |          |
+                      |          |- ...
+                      |          |
+                      |          +- <allele>:
+                      |                |- 'tumor': fsID
+                      |                +- 'normal': (fsID, str)
+                      |
+                      |- <allele>:
+                      |     |- 'tumor': fsID
+                      |     +- 'normal': fsID
+                      |
+                      |- ...
+                      |
+                      +- <allele>:
+                            |- 'tumor': fsID
+                            +- 'normal': fsID
+    :rtype: tuple(dict, dict)
     """
     job.fileStore.logToMaster('Running spawn_anti on %s' % univ_options['patient'])
     work_dir = os.getcwd()
@@ -151,9 +138,9 @@ def spawn_antigen_predictors(job, transgened_files, phlat_files, univ_options, m
 
 def read_fastas(input_files):
     """
-    Reads the tumor and normal fastas to disk
+    Read the tumor and normal fastas into a joint dict.
 
-    :param dict input_files: A dict containing filename:filepath for T_ and N_ transgened files.
+    :param dict input_files: A dict containing filename: filepath for T_ and N_ transgened files.
     :return: The read fastas in a dictionary of tuples
     :rtype: dict
     """
@@ -169,7 +156,7 @@ def read_fastas(input_files):
 
 def _read_fasta(fasta_file, output_dict):
     """
-    Actually read the peptide fasta.
+    Read the peptide fasta into an existing dict.
 
     :param str fasta_file: The peptide file
     :param dict output_dict: The dict to appends results to.
@@ -192,8 +179,8 @@ def _read_fasta(fasta_file, output_dict):
 
 def _process_consensus_mhcii(mhc_file, normal=False):
     """
-    Process the results from running IEDB binding predictions using the consensus method into a
-    pandas dataframe.
+    Process the results from running IEDB MHCII binding predictions using the consensus method into
+    a pandas dataframe.
 
     :param str mhc_file: Output file containing consensus mhcii:peptide binding predictions
     :param bool normal: Is this processing the results of a normal?
@@ -235,8 +222,8 @@ def _process_consensus_mhcii(mhc_file, normal=False):
 
 def _process_sturniolo_mhcii(mhc_file, normal=False):
     """
-    Process the results from running IEDB binding predictions using the sturniolo method into a
-    pandas dataframe.
+    Process the results from running IEDB MHCII binding predictions using the sturniolo method into
+    a pandas dataframe.
 
     :param str mhc_file: Output file containing sturniolo mhcii:peptide binding predictions
     :param bool normal: Is this processing the results of a normal?
@@ -292,7 +279,7 @@ def _process_net_mhcii(mhc_file, normal=False):
 
 def _process_mhci(mhc_file, normal=False):
     """
-    Process the results from running IEDB mhci binding predictions into a pandas dataframe
+    Process the results from running IEDB MHCI binding predictions into a pandas dataframe.
 
     :param str mhc_file: Output file containing netmhciipan mhci:peptide binding predictions
     :param bool normal: Is this processing the results of a normal?
@@ -354,28 +341,18 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
     Predict the binding score for the normal counterparts of the peptides in mhc_dict and then
     return the results in a properly formatted structure.
 
-    :param job: Job
     :param str binding_result: The results from running predict_mhci_binding or
            predict_mhcii_binding on a single allele
     :param dict transgened_files: A dictionary containing the jobstore IDs for "T_<peplen>_mer.faa"
            and "N_<peplen>_mer.faa"
     :param str allele: The allele to get binding for
     :param str peplen: The peptide length
-    :param dict univ_options: Universal options for ProTECT
-                univ_options
-                     +- 'dockerhub': <dockerhub to use>
-    :param dict mhc_options: MHC specific options
-                mhc_options
-                      |- 'mhci'
-                      |     |- 'method_file': <JSid for json file containing data
-                      |     |                  linking alleles, peptide lengths, and
-                      |     |                  prediction methods>
-                      |     +- 'pred': String describing prediction method to use
-                      +- 'mhcii'
-                            |- 'method_file': <JSid for json file containing data
-                            |                  linking alleles and prediction methods>
-                            +- 'pred': String describing prediction method to use
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict mhc_options: Options specific to mhci or mhcii binding predictions
     :return: A fully filled out mhc_dict with normal information
+             output_dict:
+                |- 'tumor': fsID
+                +- 'normal': fsID or (fsID, str)     -- Depending on MHCI or MHCII
     :rtype: dict
     """
     job.fileStore.logToMaster('Running predict_normal_binding on %s for allele %s and length %s' %
@@ -454,15 +431,15 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
 
 def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files, univ_options):
     """
-    Merge all the calls from nodes 18 and 19.
-
-    This module corresponds to node 19 on the tree
+    Merge all the calls generated by spawn_antigen_predictors.
 
-    :param job: Job
     :param dict antigen_predictions: The return value from running :meth:`spawn_antigen_predictors`
     :param dict transgened_files: The transgened peptide files
     :param dict univ_options: Universal options for ProTECT
     :return: merged binding predictions
+             output_files:
+                 |- 'mhcii_merged_files.list': fsID
+                 +- 'mhci_merged_files.list': fsID
     :rtype: dict
     """
     job.fileStore.logToMaster('Merging MHC calls')
@@ -542,24 +519,16 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files, univ_opt
 
 def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile, netmhc=False):
     """
-    To reduce code redundancy, this module will accept data from merge_mhc_peptide_calls for a given
-    neoepitope and print it to outfile
-    ARGUMENTS
-    1. neoepitope_info: pandas.core.frame with allele, pept, pred, core, normal_pept, normal_pred
-    2. peptides: Dict of all IARS considered
-           peptides
-              |- 'neoepitope_1': <peptide_sequence>
-              ..
-              |- 'neoepitope_n': <peptide_sequence>
-    3. pepmap: Info correlating neoepitope with the gene and transcript level
-               mutations.
-           peptides
-              |- 'neoepitope_1':
-              |      'ensembl_gene\thugo_gene\tcomma_sep_transcript_mutations'
-              ..
-              +- 'neoepitope_n':
-                     'ensembl_gene\thugo_gene\tcomma_sep_transcript_mutations'
-
+    Accept data about one neoepitope from merge_mhc_peptide_calls and print it to outfile.  This is
+    a generic module to reduce code redundancy.
+
+    :param pandas.core.frame neoepitope_info: object containing with allele, pept, pred, core,
+           normal_pept, normal_pred
+    :param dict peptides: Dict of pepname: pep sequence for all IARS considered
+    :param dict pepmap: Dict containing teh contents from the peptide map file.
+    :param file outfile: An open file descriptor to the output file
+    :param bool netmhc: Does this record correspond to a netmhcIIpan record? These are processed
+           differently.
     """
     if netmhc:
         peptide_names = [neoepitope_info.peptide_name]
diff --git a/src/protect/binding_prediction/mhci.py b/src/protect/binding_prediction/mhci.py
index 148c9797..7d1c84c8 100644
--- a/src/protect/binding_prediction/mhci.py
+++ b/src/protect/binding_prediction/mhci.py
@@ -14,18 +14,23 @@
 # limitations under the License.
 from __future__ import absolute_import, print_function
 
-import os
-
 from protect.common import docker_call, get_files_from_filestore, read_peptide_file
 
+import os
+
 
-def predict_mhci_binding(job, peptfile, allele, peplen, univ_options,
-                         mhci_options):
+def predict_mhci_binding(job, peptfile, allele, peplen, univ_options, mhci_options):
     """
-    This module will predict MHC:peptide binding for peptides in the files created in node XX to
-    ALLELE.  ALLELE represents an MHCI allele.
+    Predict binding for each peptide in `peptfile` to `allele` using the IEDB mhci binding
+    prediction tool.
 
-    This module corresponds to node 18 on the tree
+    :param toil.fileStore.FileID peptfile: The input peptide fasta
+    :param str allele: Allele to predict binding against
+    :param str peplen: Length of peptides to process
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict mhci_options: Options specific to mhci binding prediction
+    :return: fsID for file containing the predictions
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running mhci on %s:%s:%s' % (univ_options['patient'], allele,
                                                             peplen))
diff --git a/src/protect/binding_prediction/mhcii.py b/src/protect/binding_prediction/mhcii.py
index 874095ba..07498fdf 100644
--- a/src/protect/binding_prediction/mhcii.py
+++ b/src/protect/binding_prediction/mhcii.py
@@ -22,13 +22,15 @@
 
 def predict_mhcii_binding(job, peptfile, allele, univ_options, mhcii_options):
     """
-    This module will predict MHC:peptide binding for peptides in the files created in node YY to
-    ALLELE.  ALLELE represents an MHCII allele.
+    Predict binding for each peptide in `peptfile` to `allele` using the IEDB mhcii binding
+    prediction tool.
 
-    The module returns (PREDFILE, PREDICTOR) where PREDFILE contains the predictions and PREDICTOR
-    is the predictor used (Consensus, NetMHCIIpan, or Sturniolo).
-
-    This module corresponds to node 19 on the tree
+    :param toil.fileStore.FileID peptfile: The input peptide fasta
+    :param str allele: Allele to predict binding against
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict mhcii_options: Options specific to mhcii binding prediction
+    :return: tuple of fsID for file containing the predictions and the predictor used
+    :rtype: tuple(toil.fileStore.FileID, str)
     """
     job.fileStore.logToMaster('Running mhcii on %s:%s' % (univ_options['patient'], allele))
     work_dir = os.getcwd()
@@ -72,10 +74,14 @@ def predict_mhcii_binding(job, peptfile, allele, univ_options, mhcii_options):
 
 def predict_netmhcii_binding(job, peptfile, allele, univ_options, netmhciipan_options):
     """
-    This module will predict MHC:peptide binding for peptides in the files created in node YY to
-    ALLELE.  ALLELE represents an MHCII allele.
+    Predict binding for each peptide in `peptfile` to `allele` using netMHCIIpan.
 
-    This module corresponds to node 19 on the tree
+    :param toil.fileStore.FileID peptfile: The input peptide fasta
+    :param str allele: Allele to predict binding against
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict netmhciipan_options: Options specific to netmhciipan binding prediction
+    :return: tuple of fsID for file containing the predictions and the predictor used (netMHCIIpan)
+    :rtype: tuple(toil.fileStore.FileID, str)
     """
     job.fileStore.logToMaster('Running netmhciipan on %s' % allele)
     work_dir = os.getcwd()
diff --git a/src/protect/common.py b/src/protect/common.py
index 3a303978..a2548501 100644
--- a/src/protect/common.py
+++ b/src/protect/common.py
@@ -28,29 +28,23 @@
 import errno
 import gzip
 import os
-import re
-import shutil
 import subprocess
 import sys
 import tarfile
-import time
 import urllib2
 import uuid
 
 
 def get_files_from_filestore(job, files, work_dir, docker=False):
     """
-    This is adapted from John Vivian's return_input_paths from the RNA-Seq pipeline.
+    Download a dict of files to the given directory and modify the path to a docker-friendly one if
+    requested.
 
-    Returns the paths of files from the FileStore if they are not present.
-    If docker=True, return the docker path for the file.
-    If the file extension is tar.gz, then tar -zxvf it.
-
-    files is a dict with:
-        keys = the name of the file to be returned in toil space
-        value = the input value for the file (can be toil temp file)
-    work_dir is the location where the file should be stored
-    cache indiciates whether caching should be used
+    :param dict files: A dictionary of filenames: fsIDs
+    :param str work_dir: The destination directory
+    :param bool docker: Should the file path be converted to our standard docker '/data/filename'?
+    :return: Dict of files: (optionallly docker-friendly) fileepaths
+    :rtype: dict
     """
     for name in files.keys():
         outfile = job.fileStore.readGlobalFile(files[name], '/'.join([work_dir, name]))
@@ -65,17 +59,29 @@ def get_files_from_filestore(job, files, work_dir, docker=False):
 
 def docker_path(filepath):
     """
-    Given a path, returns that files path inside the docker mount directory
-    (/data).
+    Given a path, return that files path inside the docker mount directory (/data).
+
+    :param str filepath: The path to a file
+    :return: The docker-friendly path for `filepath`
+    :rtype: str
     """
     return os.path.join('/data', os.path.basename(filepath))
 
 
-def docker_call(tool, tool_parameters, work_dir, java_opts=None, outfile=None,
+def docker_call(tool, tool_parameters, work_dir, java_xmx=None, outfile=None,
                 dockerhub='aarjunrao', interactive=False, tool_version='latest'):
     """
-    Makes subprocess call of a command to a docker container. work_dir MUST BE AN ABSOLUTE PATH or
-    the call will fail.  outfile is an open file descriptor to a writeable file.
+    Make a subprocess call of a command to a docker container.
+
+    :param str tool: The tool to run
+    :param list tool_parameters: Parameters passed to `tool`
+    :param str work_dir: The absolute path to the working directory to be mounted into the container
+    :param str java_xmx: The heap space in human readable format to provide java ('20G' will pass
+           -Xmx20G to java)
+    :param file outfile: The file object to dump stdout
+    :param str dockerhub: The dockerhub from where the tool will be pulled
+    :param bool interactive: Should the docker container be run in interactive mode?
+    :param str tool_version: What dockerised tool version should be used?
     """
     # If an outifle has been provided, then ensure that it is of type file, it is writeable, and
     # that it is open.
@@ -106,8 +112,8 @@ def docker_call(tool, tool_parameters, work_dir, java_opts=None, outfile=None,
             raise RuntimeError('docker not found on system. Install on all' +
                                ' nodes.')
     # If java options have been provided, it needs to be in the docker call
-    if java_opts:
-        base_docker_call = ' docker run -e JAVA_OPTS=-Xmx{} '.format(java_opts) + '--rm=true ' + \
+    if java_xmx:
+        base_docker_call = ' docker run -e JAVA_OPTS=-Xmx{} '.format(java_xmx) + '--rm=true ' + \
             '-v {}:/data --log-driver=none '.format(work_dir) + interactive
     else:
         base_docker_call = ' docker run --rm=true -v {}:/data '.format(work_dir) + \
@@ -124,12 +130,14 @@ def docker_call(tool, tool_parameters, work_dir, java_opts=None, outfile=None,
 
 def untargz(input_targz_file, untar_to_dir):
     """
-    This module accepts a tar.gz archive and untars it.
+    Accept a tar.gz archive and untar it to the given location.  The archive can have either one
+    file, or many files in a single directory.
 
-    RETURN VALUE: path to the untar-ed directory/file
+    :param str input_targz_file: Path to a tar.gz archive
+    :param str untar_to_dir: The directory where untared files will be dumped
 
-    NOTE: this module expects the multiple files to be in a directory before
-          being tar-ed.
+    :return: path to the untar-ed directory/file
+    :rtype: str
     """
     assert tarfile.is_tarfile(input_targz_file), 'Not a tar file.'
     tarball = tarfile.open(input_targz_file)
@@ -142,8 +150,10 @@ def untargz(input_targz_file, untar_to_dir):
 def gunzip(input_gzip_file, block_size=1024):
     """
     Gunzips the input file to the same directory
+
     :param input_gzip_file: File to be gunzipped
     :return: path to the gunzipped file
+    :rtype: str
     """
     assert os.path.splitext(input_gzip_file)[1] == '.gz'
     assert is_gzipfile(input_gzip_file)
@@ -160,10 +170,15 @@ def gunzip(input_gzip_file, block_size=1024):
 
 def is_gzipfile(filename):
     """
-    This function attempts to ascertain the gzip status of a file based on the "magic signatures" of
-    the file. This was taken from the stack overflow
+    Attempt to ascertain the gzip status of a file based on the "magic signatures" of the file.
+
+    This was taken from the stack overflow post
     http://stackoverflow.com/questions/13044562/python-mechanism-to-identify-compressed-file-type\
         -and-uncompress
+
+    :param str filename: A path to a file
+    :return: True if the file appears to be gzipped else false
+    :rtype: bool
     """
     assert os.path.exists(filename), 'Input {} does not '.format(filename) + \
         'point to a file.'
@@ -178,21 +193,24 @@ def is_gzipfile(filename):
 def get_file_from_s3(job, s3_url, encryption_key=None, per_file_encryption=True,
                      write_to_jobstore=True):
     """
-    Downloads a supplied URL that points to an unencrypted, unprotected file on Amazon S3. The file
-    is downloaded and a subsequently written to the jobstore and the return value is a the path to
-    the file in the jobstore.
+    Download a supplied URL that points to a file on Amazon S3.  If the file is encrypted using
+    sse-c (with the user-provided key or with a hash of the usesr provided master key) then the
+    encryption keys will be used when downloading.  The file is downloaded and written to the
+    jobstore if requested.
 
-    :param str s3_url: URL for the file (can be s3 or https)
+    :param str s3_url: URL for the file (can be s3, S3 or https)
     :param str encryption_key: Path to the master key
     :param bool per_file_encryption: If encrypted, was the file encrypted using the per-file method?
     :param bool write_to_jobstore: Should the file be written to the job store?
+    :return: Path to the downloaded file or fsID (if write_to_jobstore was True)
+    :rtype: str|toil.fileStore.FileID
     """
     work_dir = job.fileStore.getLocalTempDir()
 
     parsed_url = urlparse(s3_url)
     if parsed_url.scheme == 'https':
         download_url = 'S3:/' + parsed_url.path  # path contains the second /
-    elif parsed_url.scheme in ('s3','S3'):
+    elif parsed_url.scheme in ('s3', 'S3'):
         download_url = s3_url
     else:
         raise RuntimeError('Unexpected url scheme: %s' % s3_url)
@@ -261,14 +279,18 @@ def get_file_from_s3(job, s3_url, encryption_key=None, per_file_encryption=True,
 def get_file_from_url(job, any_url, encryption_key=None, per_file_encryption=True,
                       write_to_jobstore=True):
     """
-    Downloads a supplied URL (ftp,http,https) that points to a file. The file
-    is downloaded and a subsequently written to the jobstore and the return value is a the path to
-    the file in the jobstore. If URL link is invalid the function will raise an error.
+    Download a supplied URL that points to a file on an http, https or ftp server.  If the file is
+    found to be an https s3 link then the file is downloaded using `get_file_from_s3`. The file is
+    downloaded and written to the jobstore if requested.
+    Encryption arguments are for passing to `get_file_from_s3` if required.
 
     :param str any_url: URL for the file
+    :param str encryption_key: Path to the master key
+    :param bool per_file_encryption: If encrypted, was the file encrypted using the per-file method?
     :param bool write_to_jobstore: Should the file be written to the job store?
+    :return: Path to the downloaded file or fsID (if write_to_jobstore was True)
+    :rtype: str|toil.fileStore.FileID
     """
-
     work_dir = job.fileStore.getLocalTempDir()
 
     filename = '/'.join([work_dir, str(uuid.uuid4())])
@@ -276,9 +298,8 @@ def get_file_from_url(job, any_url, encryption_key=None, per_file_encryption=Tru
     parsed_url = urlparse(any_url)
     try:
         response = urllib2.urlopen(url)
-
     except urllib2.HTTPError:
-        if (parsed_url.netloc).startswith(('s3', 'S3')):
+        if parsed_url.netloc.startswith(('s3', 'S3')):
             job.fileStore.logToMaster("Detected https link is for an encrypted s3 file.")
             return get_file_from_s3(job, any_url, encryption_key=encryption_key,
                                     per_file_encryption=per_file_encryption,
@@ -296,14 +317,13 @@ def get_file_from_url(job, any_url, encryption_key=None, per_file_encryption=Tru
 
 def bam2fastq(bamfile, univ_options, picard_options):
     """
-    split an input bam to paired fastqs.
-
-    ARGUMENTS
-    1. bamfile: Path to a bam file
-    2. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                |- 'dockerhub': <dockerhub to use>
-                +- 'java_Xmx': value for max heap passed to java
+    Split an input bam to paired fastqs.
+
+    :param str bamfile: Path to a bam file
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict picard_options: Dict of options specific to Picard
+    :return: Path to the _1.fastq file
+    :rtype: str
     """
     work_dir = os.path.split(bamfile)[0]
     base_name = os.path.split(os.path.splitext(bamfile)[0])[1]
@@ -313,31 +333,27 @@ def bam2fastq(bamfile, univ_options, picard_options):
                   ''.join(['F2=/data/', base_name, '_2.fastq']),
                   ''.join(['FU=/data/', base_name, '_UP.fastq'])]
     docker_call(tool='picard', tool_parameters=parameters, work_dir=work_dir,
-                dockerhub=univ_options['dockerhub'], java_opts=univ_options['java_Xmx'],
+                dockerhub=univ_options['dockerhub'], java_xmx=univ_options['java_Xmx'],
                 tool_version=picard_options['version'])
     first_fastq = ''.join([work_dir, '/', base_name, '_1.fastq'])
     assert os.path.exists(first_fastq)
     return first_fastq
 
 
-def export_results(job, fsid, file_path, univ_options, subfolder=None):
+def export_results(job, fsid, file_name, univ_options, subfolder=None):
     """
     Write out a file to a given location. The location can be either a directory on the local
     machine, or a folder with a bucket on AWS.
-    TODO: Azure support
+
     :param str fsid: The file store id for the file to be exported
-    :param str file_path: The path to the file that neeeds to be exported
-    :param dict univ_options: A dict of the universal options passed to this script. The important
-           dict entries are ouput_folder and storage_location.
-                * storage_location: 'Local' or an 'aws:<bucket_name>'.
-                * output_folder: The folder to store the file. This must exist on the local machine
-                                 if storage_location is 'Local'. If the storage_location is an aws
-                                 bucket,  this string represents the path to the file in the bucket.
-                                 To keep it in the base directory, specify 'NA'.
+    :param str file_name: The name of the file that neeeds to be exported (path to file is also
+           acceptable)
+    :param dict univ_options: Dict of universal options used by almost all tools
     :param str subfolder: A sub folder within the main folder where this data should go
     :return: None
     """
     job.fileStore.logToMaster('Exporting %s to output location' % fsid)
+    file_name = os.path.basename(file_name)
     try:
         assert univ_options['output_folder'], 'Need a path to a folder to write out files'
         assert univ_options['storage_location'], 'Need to know where the files need to go. ' + \
@@ -361,75 +377,24 @@ def export_results(job, fsid, file_path, univ_options, subfolder=None):
         except OSError as err:
             if err.errno != errno.EEXIST:
                 raise
-        output_url = 'file://' + os.path.join(output_folder, os.path.basename(file_path))
+        output_url = 'file://' + os.path.join(output_folder, file_name)
     elif univ_options['storage_location'].startswith('aws'):
         # Handle AWS
         bucket_name = univ_options['storage_location'].split(':')[-1]
-
-        file_name = os.path.basename(file_path)
         output_url = os.path.join('S3://', bucket_name, output_folder.strip('/'), file_name)
     # Can't do Azure or google yet.
     else:
+        # TODO: Azure support
         print("Currently doesn't support anything but Local and aws.")
         return
     job.fileStore.exportFile(fsid, output_url)
 
 
-def write_to_s3(file_path, key_path, bucket_name, output_folder, overwrite=True):
-    """
-    Write the file to S3.
-
-    :param file_path: The file to be written
-    :param key_path: Path to the encryption Key
-    :param bucket_name: The bucket where the data will be written
-    :param output_folder: The location in the bucket for the output data
-    :param overwrite: Should the data be overwritten if it exists in the bucket?
-    """
-    assert overwrite in (True, False)
-    overwrite = 'overwrite' if overwrite else 'skip'
-    file_name = os.path.basename(file_path)
-    output_file = os.path.join('S3://', bucket_name, output_folder.strip('/'), file_name)
-    # Adding resume doesn't affect a new upload, but protects in failed upload cases.
-    subprocess.check_call(['s3am', 'upload', '--exists=' + overwrite, '--resume',
-                           '--sse-key-file', key_path, file_path, output_file])
-
-
-def file_xext(filepath):
-    """
-    Get the file extension wrt compression from the filename (is it tar or targz)
-    :param str filepath: Path to the file
-    :return str ext: Compression extension name
-    """
-    ext = os.path.splitext(filepath)[1]
-    if ext == '.gz':
-        xext = os.path.splitext(os.path.splitext(filepath)[0])[1]
-        if xext == '.tar':
-            ext = xext + ext
-    elif ext == '.tar':
-        pass  # ext is already .tar
-    else:
-        ext = ''
-    return ext
-
-
-def strip_xext(filepath):
-    """
-    Strips the compression extension from the filename
-    :param filepath: Path to compressed file.
-    :return str filepath: Path to the file with the compression extension stripped off.
-    """
-    ext_size = len(file_xext(filepath).split('.')) - 1
-    for i in xrange(0, ext_size):
-        filepath = os.path.splitext(filepath)[0]
-    return filepath
-
-
 def delete_fastqs(job, fastqs):
     """
-    This module will delete the fastqs from the job Store once their purpose has been achieved (i.e.
-    after all mapping steps)
+    This module will delete the fastqs from the job Store once their purpose has been achieved
+    (i.e. after all mapping steps)
 
-    ARGUMENTS
     :param dict fastqs: Dict of list of input fastqs
     """
     for key in fastqs.keys():
@@ -451,12 +416,14 @@ class ParameterError(Exception):
 
 def read_peptide_file(in_peptfile):
     """
-    This module reads an input peptide fasta file into memory in the form of a dict with
-    key = fasta record name
-    value = corresponding peptide sequence
+    Reads an input peptide fasta file into memory in the form of a dict of fasta record: sequence
+
+    :param str in_peptfile: Path to a peptide fasta
+    :return: Dict of fasta record: sequence
+    :rtype: dict
     """
     peptides = defaultdict()
-    pept=None
+    pept = None
     with open(in_peptfile, 'r') as peptfile:
         for line in peptfile:
             if line.startswith('>'):
diff --git a/src/protect/expression_profiling/__init__.py b/src/protect/expression_profiling/__init__.py
index c592ef89..6656c66f 100644
--- a/src/protect/expression_profiling/__init__.py
+++ b/src/protect/expression_profiling/__init__.py
@@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
\ No newline at end of file
+from __future__ import absolute_import
diff --git a/src/protect/expression_profiling/rsem.py b/src/protect/expression_profiling/rsem.py
index b2ce9279..7e3b6511 100644
--- a/src/protect/expression_profiling/rsem.py
+++ b/src/protect/expression_profiling/rsem.py
@@ -14,8 +14,12 @@
 # limitations under the License.
 from __future__ import print_function
 from math import ceil
-from protect.common import (docker_call, get_files_from_filestore, untargz, docker_path,
-                            export_results)
+
+from protect.common import (docker_call,
+                            docker_path,
+                            export_results,
+                            get_files_from_filestore,
+                            untargz)
 from toil.job import PromisedRequirement
 
 import os
@@ -30,13 +34,16 @@ def rsem_disk(star_bams, rsem_index):
 
 def wrap_rsem(job, star_bams, univ_options, rsem_options):
     """
-    This is a convenience function that runs rsem from the star outputs.
+    A wrapper for run_rsem using the results from run_star as input.
 
-    :param job job: job
     :param dict star_bams: dict of results from star
-    :param dict univ_options: Universal Options
+    :param dict univ_options: Dict of universal options used by almost all tools
     :param dict rsem_options: Options specific to rsem
-    :return:
+    :return: Dict of gene- and isoform-level expression calls
+             output_files:
+                 |- 'rsem.genes.results': fsID
+                 +- 'rsem.isoforms.results': fsID
+    :rtype: dict
     """
     rsem = job.addChildJobFn(run_rsem, star_bams['rnaAligned.toTranscriptome.out.bam'],
                              univ_options, rsem_options, cores=rsem_options['n'],
@@ -48,22 +55,17 @@ def wrap_rsem(job, star_bams, univ_options, rsem_options):
 
 def run_rsem(job, rna_bam, univ_options, rsem_options):
     """
-    This module will run rsem on the RNA Bam file.
+    Run rsem on the input RNA bam.
 
     ARGUMENTS
-    1. rna_bam: <JSid of rnaAligned.toTranscriptome.out.bam>
-    2. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    3. rsem_options: Dict of parameters specific to rsem
-         rsem_options
-              |- 'index': <JSid for the rsem index tarball>
-              +- 'n': <number of threads to allocate>
-
-    RETURN VALUES
-    1. output_file: <Jsid of rsem.isoforms.results>
-
-    This module corresponds to node 9 on the tree
+    :param toil.fileStore.FileID rna_bam: fsID of a transcriptome bam generated by STAR
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict rsem_options: Options specific to rsem
+    :return: Dict of gene- and isoform-level expression calls
+             output_files:
+                 |- 'rsem.genes.results': fsID
+                 +- 'rsem.isoforms.results': fsID
+    :rtype: dict
     """
     job.fileStore.logToMaster('Running rsem on %s' % univ_options['patient'])
     work_dir = os.getcwd()
diff --git a/src/protect/haplotyping/__init__.py b/src/protect/haplotyping/__init__.py
index c592ef89..6656c66f 100644
--- a/src/protect/haplotyping/__init__.py
+++ b/src/protect/haplotyping/__init__.py
@@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
\ No newline at end of file
+from __future__ import absolute_import
diff --git a/src/protect/haplotyping/phlat.py b/src/protect/haplotyping/phlat.py
index 151361ee..1979ceaf 100644
--- a/src/protect/haplotyping/phlat.py
+++ b/src/protect/haplotyping/phlat.py
@@ -15,12 +15,16 @@
 from __future__ import absolute_import, print_function
 from collections import defaultdict
 from math import ceil
-from protect.common import (docker_call, get_files_from_filestore, is_gzipfile, untargz,
-                            docker_path, export_results)
+
+from protect.common import (docker_call,
+                            docker_path,
+                            export_results,
+                            get_files_from_filestore,
+                            is_gzipfile,
+                            untargz)
 
 import os
 import re
-import sys
 
 
 # disk for phlat
@@ -31,26 +35,14 @@ def phlat_disk(rna_fastqs):
 
 def run_phlat(job, fastqs, sample_type, univ_options, phlat_options):
     """
-    This module will run PHLAT on SAMPLE_TYPE fastqs.
-
-    ARGUMENTS -- <ST> depicts the sample type. Substitute with 'tumor_dna',
-                 'normal_dna', or 'tumor_rna'
-    1. fastqs: Dict of list of input WGS/WXS fastqs
-         fastqs
-              +- '<ST>': [<JSid for 1.fastq> , <JSid for 2.fastq>]
-    2. sample_type: string of 'tumor' or 'normal'
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    4. phlat_options: Dict of parameters specific to phlat
-         phlat_options
-              |- 'index': <JSid for the PHLAT index tarball>
-              +- 'n': <number of threads to allocate>
-
-    RETURN VALUES
-    1. output_file: <JSid for the allele predictions for ST>
-
-    This module corresponds to nodes 5, 6 and 7 on the tree
+    Run PHLAT on a pair of input fastqs of type `sample_type`.
+
+    :param list fastqs: List of input fastq files
+    :param str sample_type: Description of the sample type to inject into the file name.
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict phlat_options: Options specific to PHLAT
+    :return: fsID for the HLA haplotype called from teh input fastqs
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running phlat on %s:%s' % (univ_options['patient'], sample_type))
     work_dir = os.getcwd()
@@ -85,21 +77,17 @@ def run_phlat(job, fastqs, sample_type, univ_options, phlat_options):
 
 def merge_phlat_calls(job, tumor_phlat, normal_phlat, rna_phlat, univ_options):
     """
-    This module will merge the results form running PHLAT on the 3 input fastq
-    pairs.
-
-    ARGUMENTS
-    1. tumor_phlat: <JSid for tumor DNA called alleles>
-    2. normal_phlat: <JSid for normal DNA called alleles>
-    3. rna_phlat: <JSid for tumor RNA called alleles>
+    Merge tumor, normal and tumor rna Haplotypes into consensus calls.
 
-    RETURN VALUES
-    1. output_files: Dict of JSids for consensus MHCI and MHCII alleles
+    :param toil.fileStore.FileID tumor_phlat: fsID for HLA haplotypes called from tumor DNA
+    :param toil.fileStore.FileID normal_phlat: fsID for HLA haplotypes called from normal DNA
+    :param toil.fileStore.FileID rna_phlat: fsID for HLA haplotypes called from tumor RNA
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :return: Dict of fsIDs for consensus MHCI and MHCII alleles
              output_files
-                    |- 'mhci_alleles.list': <JSid>
-                    +- 'mhcii_alleles.list': <JSid>
-
-    This module corresponds to node 14 on the tree
+                    |- 'mhci_alleles.list': fsID
+                    +- 'mhcii_alleles.list': fsID
+    :rtype: dict
     """
     job.fileStore.logToMaster('Merging Phlat calls')
     work_dir = os.getcwd()
@@ -140,9 +128,12 @@ def merge_phlat_calls(job, tumor_phlat, normal_phlat, rna_phlat, univ_options):
 
 def parse_phlat_file(phlatfile, mhc_alleles):
     """
-    Parse the input phlat file to pull out the alleles it contains
-    :param phlatfile: Open file descriptor for a phlat output sum file
-    :param mhc_alleles: dictionary of alleles.
+    Parse an input phlat file to identify predicted HLA alleles.
+
+    :param file phlatfile: Open file descriptor for a phlat output sum file
+    :param dict mhc_alleles: Dict of alleles.
+    :return: Updated dict of alleles
+    :rtype: dict
     """
     for line in phlatfile:
         if line.startswith('Locus'):
@@ -168,8 +159,11 @@ def parse_phlat_file(phlatfile, mhc_alleles):
 
 def most_probable_alleles(allele_list):
     """
-    This module accepts a list of tuples of (allele, p_value) pairs. It returns the 2 most probable
-    alleles for that group.
+    Identify the most probable haplotype in a list
+
+    :param list allele_list: List of tuples of (allele, p_value)
+    :return: List of 2 most probable alleles in the group
+    :rtype: list[str]
     """
     all_alleles = defaultdict()
     # First collect all the keys.  Make a dict with allele as key and list of pvalues as value
diff --git a/src/protect/mutation_annotation/__init__.py b/src/protect/mutation_annotation/__init__.py
index c592ef89..6656c66f 100644
--- a/src/protect/mutation_annotation/__init__.py
+++ b/src/protect/mutation_annotation/__init__.py
@@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
\ No newline at end of file
+from __future__ import absolute_import
diff --git a/src/protect/mutation_annotation/snpeff.py b/src/protect/mutation_annotation/snpeff.py
index 151fdbd2..b00301ae 100644
--- a/src/protect/mutation_annotation/snpeff.py
+++ b/src/protect/mutation_annotation/snpeff.py
@@ -14,9 +14,12 @@
 # limitations under the License.
 from __future__ import absolute_import, print_function
 from math import ceil
-from protect.common import (docker_call, get_files_from_filestore, export_results, untargz,
-                            docker_path)
 
+from protect.common import (docker_call,
+                            docker_path,
+                            export_results,
+                            get_files_from_filestore,
+                            untargz)
 import os
 
 
@@ -27,23 +30,13 @@ def snpeff_disk(snpeff_index):
 
 def run_snpeff(job, merged_mutation_file, univ_options, snpeff_options):
     """
-    This module will run snpeff on the aggregated mutation calls.  Currently the only mutations
-    called are SNPs hence SnpEff suffices. This node will be replaced in the future with another
-    translator.
+    Run snpeff on an input vcf.
 
-    ARGUMENTS
-    1. merged_mutation_file: <JSid for merged vcf>
-    2. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    3. snpeff_options: Dict of parameters specific to snpeff
-         snpeff_options
-                +- 'index': <JSid for the snpEff index tarball>
-
-    RETURN VALUES
-    1. output_file: <JSid for the snpeffed vcf>
-
-    This node corresponds to node 16 on the tree
+    :param toil.fileStore.FileID merged_mutation_file: fsID for input vcf
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict snpeff_options: Options specific to snpeff
+    :return: fsID for the snpeffed vcf
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running snpeff on %s' % univ_options['patient'])
     work_dir = os.getcwd()
@@ -56,8 +49,8 @@ def run_snpeff(job, merged_mutation_file, univ_options, snpeff_options):
 
     parameters = ['eff',
                   '-dataDir', input_files['snpeff_index'],
-                  '-c', '/'.join([input_files['snpeff_index'], 'snpEff_' + univ_options['ref'] +
-                                  '_gencode.config']),
+                  '-c', '/'.join([input_files['snpeff_index'],
+                                  'snpEff_' + univ_options['ref'] + '_gencode.config']),
                   '-no-intergenic',
                   '-no-downstream',
                   '-no-upstream',
@@ -68,7 +61,7 @@ def run_snpeff(job, merged_mutation_file, univ_options, snpeff_options):
     xmx = snpeff_options['java_Xmx'] if snpeff_options['java_Xmx'] else univ_options['java_Xmx']
     with open('/'.join([work_dir, 'mutations.vcf']), 'w') as snpeff_file:
         docker_call(tool='snpeff', tool_parameters=parameters, work_dir=work_dir,
-                    dockerhub=univ_options['dockerhub'], java_opts=xmx, outfile=snpeff_file,
+                    dockerhub=univ_options['dockerhub'], java_xmx=xmx, outfile=snpeff_file,
                     tool_version=snpeff_options['version'])
     output_file = job.fileStore.writeGlobalFile(snpeff_file.name)
     export_results(job, output_file, snpeff_file.name, univ_options, subfolder='mutations/snpeffed')
diff --git a/src/protect/mutation_calling/__init__.py b/src/protect/mutation_calling/__init__.py
index c592ef89..6656c66f 100644
--- a/src/protect/mutation_calling/__init__.py
+++ b/src/protect/mutation_calling/__init__.py
@@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
\ No newline at end of file
+from __future__ import absolute_import
diff --git a/src/protect/mutation_calling/common.py b/src/protect/mutation_calling/common.py
index 3b92098d..b1fedd38 100644
--- a/src/protect/mutation_calling/common.py
+++ b/src/protect/mutation_calling/common.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 from __future__ import absolute_import, print_function
 from collections import defaultdict
+
 from protect.common import export_results, get_files_from_filestore, untargz
 
 import itertools
@@ -22,11 +23,11 @@
 
 def sample_chromosomes(job, genome_fai_file):
     """
-    Get a list of chromosomes in the input data
+    Get a list of chromosomes in the input data.
 
-    :param job: job
     :param string genome_fai_file: Job store file ID for the genome fai file
-    :returns list: Chromosomes in the sample
+    :return: Chromosomes in the sample
+    :rtype: list[str]
     """
     work_dir = os.getcwd()
     genome_fai = untargz(job.fileStore.readGlobalFile(genome_fai_file), work_dir)
@@ -34,6 +35,13 @@ def sample_chromosomes(job, genome_fai_file):
 
 
 def chromosomes_from_fai(genome_fai):
+    """
+    Read a fasta index (fai) file and parse the input chromosomes.
+
+    :param str genome_fai: Path to the fai file.
+    :return: list of input chromosomes
+    :rtype: list[str]
+    """
     chromosomes = []
     with open(genome_fai) as fai_file:
         for line in fai_file:
@@ -44,20 +52,17 @@ def chromosomes_from_fai(genome_fai):
 
 def run_mutation_aggregator(job, mutation_results, univ_options):
     """
-    This module will aggregate all the mutations called in the previous steps
+    Aggregate all the called mutations.
 
-    :param job: job
     :param dict mutation_results: Dict of dicts of the various mutation callers in a per chromosome
-                                  format
-    :param dict univ_options: Universal Options
-    :param int numthreads: number of threads to use in the merge step
-    :returns: job store file id fo rthe merged mutations file
-
-    This module corresponds to node 15 on the tree
+           format
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :returns: fsID for the merged mutations file
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Aggregating mutations for %s' % univ_options['patient'])
-    # Setup an input data structure for the merge function
 
+    # Setup an input data structure for the merge function
     out = {}
     for chrom in mutation_results['mutect'].keys():
         out[chrom] = job.addChildJobFn(merge_perchrom_mutations, chrom, mutation_results,
@@ -68,15 +73,14 @@ def run_mutation_aggregator(job, mutation_results, univ_options):
 
 def merge_perchrom_mutations(job, chrom, mutations, univ_options):
     """
-    This module will accept job store ids for vcf files for all snvs, and will merge the calls for a
-    single provided chromosome.
+    Merge the mutation calls for a single chromosome.
 
-    :param job: job
     :param str chrom: Chromosome to process
     :param dict mutations: dict of dicts of the various mutation caller names as keys, and a dict of
-                           per chromosome job store ids for vcfs as value
-    :param dict univ_options: Universal Options
-    :returns dict of merged vcf
+           per chromosome job store ids for vcfs as value
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :returns fsID for vcf contaning merged calls for the given chromosome
+    :rtype: toil.fileStore.FileID
     """
     work_dir = os.getcwd()
     from protect.mutation_calling.muse import process_muse_vcf
@@ -125,10 +129,11 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options):
 
 def read_vcf(vcf_file):
     """
-    Reads a vcf file to a list of lists
+    Read a vcf file to a dict of lists.
 
-    :param vcf_file:
-    :return:
+    :param str vcf_file: Path to a vcf file.
+    :return: dict of lists of vcf records
+    :rtype: dict
     """
     vcf_dict = []
     with open(vcf_file, 'r') as invcf:
@@ -142,17 +147,18 @@ def read_vcf(vcf_file):
 
 def chrom_sorted(in_chroms):
     """
-    This module will sort a list of chromosomes in the order 1..22, X, Y, M.
-    :param list in_chroms: input chromsomes
-    :return: sorted chromosomes
-    :rtype: list
+    Sort a list of chromosomes in the order 1..22, X, Y, M.
+
+    :param list in_chroms: Input chromsomes
+    :return: Sorted chromosomes
+    :rtype: list[str]
     """
     chr_prefix = False
     if in_chroms[0].startswith('chr'):
         in_chroms = [x.lstrip('chr') for x in in_chroms]
         chr_prefix = True
-    assert in_chroms[0] in [str(x) for x in range(1,23)] + ['X', 'Y', 'M']
-    in_chroms = sorted(in_chroms, key=lambda x: int(x) if x not in ('X', 'Y', 'M') else x)
+    assert in_chroms[0] in [str(x) for x in range(1, 23)] + ['X', 'Y', 'M']
+    in_chroms = sorted(in_chroms, key=lambda c: int(c) if c not in ('X', 'Y', 'M') else c)
     try:
         m_index = in_chroms.index('M')
     except ValueError:
@@ -168,13 +174,13 @@ def chrom_sorted(in_chroms):
 
 def merge_perchrom_vcfs(job, perchrom_vcfs, tool_name, univ_options):
     """
-    This module will merge per-chromosome vcf files into a single genome level vcf.
+    Merge per-chromosome vcf files into a single genome level vcf.
 
-    :param dict perchrom_vcfs: Dictionary with chromosome name as key and jobstore ID of
-                               corresponding vcf as value
+    :param dict perchrom_vcfs: Dictionary with chromosome name as key and fsID of the corresponding
+           vcf as value
     :param str tool_name: Name of the tool that generated the vcfs
-
-    :returns: Job Store File ID for the merged vcf
+    :returns: fsID for the merged vcf
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running merge_perchrom_vcfs  for %s' % tool_name)
     work_dir = os.getcwd()
@@ -199,13 +205,13 @@ def merge_perchrom_vcfs(job, perchrom_vcfs, tool_name, univ_options):
 
 def unmerge(job, input_vcf, tool_name, tool_options, univ_options):
     """
-    Un-merges a vcf file into a file per chromosome.
+    Un-merge a vcf file into per-chromosome vcfs.
 
     :param str input_vcf: Input vcf
     :param str tool_name: The name of the mutation caller
-    :param dict tool_options: Options specific to Somatic Sniper
-    :param dict univ_options: Universal options
-    :returns: dict of jsIDs, onr for each chromosomal vcf
+    :param dict tool_options: Options specific to the mutation caller
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :return: dict of fsIDs, one for each chromosomal vcf
     :rtype: dict
     """
     work_dir = os.getcwd()
@@ -243,4 +249,4 @@ def unmerge(job, input_vcf, tool_name, tool_options, univ_options):
         outdict[chrom] = job.fileStore.writeGlobalFile(chromvcf.name)
         export_results(job, outdict[chrom], chromvcf.name, univ_options,
                        subfolder='mutations/' + tool_name)
-    return outdict
\ No newline at end of file
+    return outdict
diff --git a/src/protect/mutation_calling/fusion.py b/src/protect/mutation_calling/fusion.py
index 8d723879..d5ee9beb 100644
--- a/src/protect/mutation_calling/fusion.py
+++ b/src/protect/mutation_calling/fusion.py
@@ -17,10 +17,13 @@
 
 def run_fusion_caller(job, star_bam, univ_options, fusion_options):
     """
-    This module will run a fusion caller on DNA bams.  This module will be
-    implemented in the future.
+    Run a fusion caller on DNA bams.  This module will be implemented in the future.
 
-    This module corresponds to node 10 on the tree
+    :param toil.fileStore.FileID star_bam: The input RNA-Seq bam
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict fusion_options: Options specific to fusion calling
+    :return: fsID to the merged fusion calls
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running FUSION on %s' % univ_options['patient'])
     job.fileStore.logToMaster('Fusions are currently unsupported.... Skipping.')
diff --git a/src/protect/mutation_calling/indel.py b/src/protect/mutation_calling/indel.py
index 35b07f0e..41f6624d 100644
--- a/src/protect/mutation_calling/indel.py
+++ b/src/protect/mutation_calling/indel.py
@@ -17,10 +17,14 @@
 
 def run_indel_caller(job, tumor_bam, normal_bam, univ_options, indel_options):
     """
-    This module will run an indel caller on the DNA bams.  This module will be
-    implemented in the future.
+    Run an indel caller on the DNA bams.  This module will be implemented in the future.
 
-    This module corresponds to node 13 on the tree
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict indel_options: Options specific to indel calling
+    :return: fsID to the merged fusion calls
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running INDEL on %s' % univ_options['patient'])
     job.fileStore.logToMaster('INDELs are currently unsupported.... Skipping.')
diff --git a/src/protect/mutation_calling/muse.py b/src/protect/mutation_calling/muse.py
index d6503520..a634782f 100644
--- a/src/protect/mutation_calling/muse.py
+++ b/src/protect/mutation_calling/muse.py
@@ -15,14 +15,17 @@
 from __future__ import print_function
 from collections import defaultdict
 from math import ceil
-from protect.common import (get_files_from_filestore, docker_path, docker_call, untargz,
-                            export_results)
-from protect.mutation_calling.common import sample_chromosomes, merge_perchrom_vcfs
+
+from protect.common import (docker_call,
+                            docker_path,
+                            export_results,
+                            get_files_from_filestore,
+                            untargz)
+from protect.mutation_calling.common import merge_perchrom_vcfs, sample_chromosomes
 from toil.job import PromisedRequirement
 
 import os
 import shutil
-import sys
 import time
 
 
@@ -39,9 +42,16 @@ def muse_sump_disk(dbsnp):
 
 def run_muse_with_merge(job, tumor_bam, normal_bam, univ_options, muse_options):
     """
-    This is a convenience function that runs the entire muse sub-graph.
+    A wrapper for the the entire MuSE sub-graph.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict muse_options: Options specific to MuSE
+    :return: fsID to the merged MuSE calls
+    :rtype: toil.fileStore.FileID
     """
-    spawn = job.wrapJobFn(run_muse, tumor_bam, normal_bam, univ_options,muse_options,
+    spawn = job.wrapJobFn(run_muse, tumor_bam, normal_bam, univ_options, muse_options,
                           disk='100M').encapsulate()
     merge = job.wrapJobFn(merge_perchrom_vcfs, spawn.rv(), disk='100M')
     job.addChild(spawn)
@@ -51,51 +61,32 @@ def run_muse_with_merge(job, tumor_bam, normal_bam, univ_options, muse_options):
 
 def run_muse(job, tumor_bam, normal_bam, univ_options, muse_options):
     """
-    This module will spawn a muse job for each chromosome on the DNA bams.
-
-    ARGUMENTS
-    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
-         tumor_bam
-              |- 'tumor_fix_pg_sorted.bam': <JSid>
-              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
-    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
-         normal_bam
-              |- 'normal_fix_pg_sorted.bam': <JSid>
-              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    4. muse_options: Dict of parameters specific to muse
-         muse_options
-              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
-              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
-              |- 'cosmic_vcf': <JSid for cosmic vcf file>
-              |- 'cosmic_idx': <JSid for cosmic vcf index file>
-              |- 'genome_fasta': <JSid for genome fasta file>
-              +- 'genome_dict': <JSid for genome fasta dict file>
-              +- 'genome_fai': <JSid for genome fasta index file>
-
-    RETURN VALUES
-    1. perchrom_muse: Dict of results of muse per chromosome
-         perchrom_muse
-              |- 'chr1'
-              |   +- <JSid for muse_chr1.vcf>
-              |- 'chr2'
-              |   +- <JSid for muse_chr2.vcf>
-             etc...
-
-    This module corresponds to node 11 on the tree
+    Spawn a MuSE job for each chromosome on the DNA bams.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict muse_options: Options specific to MuSE
+    :return: Dict of results from running MuSE on every chromosome
+             perchrom_muse:
+                 |- 'chr1': fsID
+                 |- 'chr2' fsID
+                 |
+                 |-...
+                 |
+                 +- 'chrM': fsID
+    :rtype: dict
     """
     # Get a list of chromosomes to handle
     chromosomes = sample_chromosomes(job, muse_options['genome_fai'])
     perchrom_muse = defaultdict()
     for chrom in chromosomes:
         call = job.addChildJobFn(run_muse_perchrom, tumor_bam, normal_bam, univ_options,
-                                 muse_options, chrom,
-                                 disk=PromisedRequirement(muse_disk,
-                                                          tumor_bam['tumor_dna_fix_pg_sorted.bam'],
-                                                          normal_bam['normal_dna_fix_pg_sorted.bam'],
-                                                          muse_options['genome_fasta']),
+                                 muse_options, chrom, disk=PromisedRequirement(
+                                     muse_disk,
+                                     tumor_bam['tumor_dna_fix_pg_sorted.bam'],
+                                     normal_bam['normal_dna_fix_pg_sorted.bam'],
+                                     muse_options['genome_fasta']),
                                  memory='6G')
         sump = call.addChildJobFn(run_muse_sump_perchrom, call.rv(), univ_options, muse_options,
                                   chrom,
@@ -108,21 +99,17 @@ def run_muse(job, tumor_bam, normal_bam, univ_options, muse_options):
 
 def run_muse_perchrom(job, tumor_bam, normal_bam, univ_options, muse_options, chrom):
     """
-    This module will run muse on the DNA bams
-
-    ARGUMENTS
-    1. tumor_bam: REFER ARGUMENTS of spawn_muse()
-    2. normal_bam: REFER ARGUMENTS of spawn_muse()
-    3. univ_options: REFER ARGUMENTS of spawn_muse()
-    4. muse_options: REFER ARGUMENTS of spawn_muse()
-    5. chrom: String containing chromosome name with chr appended
-
-    RETURN VALUES
-    1. output_files: <JSid for CHROM.MuSe.txt>
-
-    This module corresponds to node 12 on the tree
+    Run MuSE call on a single chromosome in the input bams.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict muse_options: Options specific to MuSE
+    :param str chrom: Chromosome to process
+    :return: fsID for the chromsome vcf
+    :rtype: toil.fileStore.FileID
     """
-    job.fileStore.logToMaster('Running muse on %s:%s' % (univ_options['patient'], chrom))
+    job.fileStore.logToMaster('Running MuSE on %s:%s' % (univ_options['patient'], chrom))
     work_dir = os.getcwd()
     input_files = {
         'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
@@ -153,9 +140,16 @@ def run_muse_perchrom(job, tumor_bam, normal_bam, univ_options, muse_options, ch
 
 def run_muse_sump_perchrom(job, muse_output, univ_options, muse_options, chrom):
     """
-    This module will run muse sump on the muse output
+    Run MuSE sump on the MuSE call generated vcf.
+
+    :param toil.fileStore.FileID muse_output: vcf generated by MuSE call
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict muse_options: Options specific to MuSE
+    :param str chrom: Chromosome to process
+    :return: fsID for the chromsome vcf
+    :rtype: toil.fileStore.FileID
     """
-    job.fileStore.logToMaster('Running muse sump on %s:%s' % (univ_options['patient'], chrom))
+    job.fileStore.logToMaster('Running MuSE sump on %s:%s' % (univ_options['patient'], chrom))
     work_dir = os.getcwd()
     input_files = {
         'MuSE.txt': muse_output,
@@ -185,11 +179,13 @@ def run_muse_sump_perchrom(job, muse_output, univ_options, muse_options, chrom):
 
 def process_muse_vcf(job, muse_vcf, work_dir, univ_options):
     """
-    This does nothing for now except to download and return the muse vcfs
-    :param job: job
-    :param str muse_vcf: Job Store ID corresponding to a muse vcf for 1 chromosome
-    :param univ_options: Universal options
-    :returns dict: Dict with chromosomes as keys and path to the corresponding muse vcfs as values
+    Process the MuSE vcf for accepted calls.
+
+    :param toil.fileStore.FileID muse_vcf: fsID for a MuSE generated chromosome vcf
+    :param str work_dir: Working directory
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :return: Path to the processed vcf
+    :rtype: str
     """
     muse_vcf = job.fileStore.readGlobalFile(muse_vcf)
 
diff --git a/src/protect/mutation_calling/mutect.py b/src/protect/mutation_calling/mutect.py
index be1cc1a2..4647e2f2 100644
--- a/src/protect/mutation_calling/mutect.py
+++ b/src/protect/mutation_calling/mutect.py
@@ -15,18 +15,20 @@
 from __future__ import print_function
 from collections import defaultdict
 from math import ceil
-from protect.common import get_files_from_filestore, docker_path, docker_call, export_results, \
-    untargz, gunzip
+
+from protect.common import (docker_call,
+                            docker_path,
+                            export_results,
+                            get_files_from_filestore,
+                            gunzip,
+                            untargz)
 from protect.mutation_calling.common import sample_chromosomes, merge_perchrom_vcfs
+from toil.job import PromisedRequirement
 
 import os
-import sys
 
 
 # disk for mutect.
-from toil.job import PromisedRequirement
-
-
 def mutect_disk(tumor_bam, normal_bam, fasta, dbsnp, cosmic):
     return int(ceil(tumor_bam.size) +
                ceil(normal_bam.size) +
@@ -37,7 +39,14 @@ def mutect_disk(tumor_bam, normal_bam, fasta, dbsnp, cosmic):
 
 def run_mutect_with_merge(job, tumor_bam, normal_bam, univ_options, mutect_options):
     """
-    This is a convenience function that runs the entire mutect sub-graph.
+    A wrapper for the the entire MuTect sub-graph.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict mutect_options: Options specific to MuTect
+    :return: fsID to the merged MuTect calls
+    :rtype: toil.fileStore.FileID
     """
     spawn = job.wrapJobFn(run_mutect, tumor_bam, normal_bam, univ_options,
                           mutect_options).encapsulate()
@@ -49,42 +58,21 @@ def run_mutect_with_merge(job, tumor_bam, normal_bam, univ_options, mutect_optio
 
 def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
     """
-    This module will spawn a mutect job for each chromosome on the DNA bams.
-
-    ARGUMENTS
-    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
-         tumor_bam
-              |- 'tumor_fix_pg_sorted.bam': <JSid>
-              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
-    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
-         normal_bam
-              |- 'normal_fix_pg_sorted.bam': <JSid>
-              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    4. mutect_options: Dict of parameters specific to mutect
-         mutect_options
-              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
-              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
-              |- 'cosmic_vcf': <JSid for cosmic vcf file>
-              |- 'cosmic_idx': <JSid for cosmic vcf index file>
-              |- 'genome_fasta': <JSid for genome fasta file>
-              +- 'genome_dict': <JSid for genome fasta dict file>
-              +- 'genome_fai': <JSid for genome fasta index file>
-
-    RETURN VALUES
-    1. perchrom_mutect: Dict of results of mutect per chromosome
-         perchrom_mutect
-              |- 'chr1'
-              |   +- 'mutect_chr1.vcf': <JSid>
-              |   +- 'mutect_chr1.out': <JSid>
-              |- 'chr2'
-              |   |- 'mutect_chr2.vcf': <JSid>
-              |   +- 'mutect_chr2.out': <JSid>
-             etc...
-
-    This module corresponds to node 11 on the tree
+    Spawn a MuTect job for each chromosome on the DNA bams.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict mutect_options: Options specific to MuTect
+    :return: Dict of results from running MuTect on every chromosome
+             perchrom_mutect:
+                 |- 'chr1': fsID
+                 |- 'chr2' fsID
+                 |
+                 |-...
+                 |
+                 +- 'chrM': fsID
+    :rtype: dict
     """
     # Get a list of chromosomes to handle
     chromosomes = sample_chromosomes(job, mutect_options['genome_fai'])
@@ -103,24 +91,17 @@ def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
 
 def run_mutect_perchrom(job, tumor_bam, normal_bam, univ_options, mutect_options, chrom):
     """
-    This module will run mutect on the DNA bams
-
-    ARGUMENTS
-    1. tumor_bam: REFER ARGUMENTS of spawn_mutect()
-    2. normal_bam: REFER ARGUMENTS of spawn_mutect()
-    3. univ_options: REFER ARGUMENTS of spawn_mutect()
-    4. mutect_options: REFER ARGUMENTS of spawn_mutect()
-    5. chrom: String containing chromosome name with chr appended
-
-    RETURN VALUES
-    1. output_files: Dict of results of mutect for chromosome
-            output_files
-              |- 'mutect_CHROM.vcf': <JSid>
-              +- 'mutect_CHROM.out': <JSid>
-
-    This module corresponds to node 12 on the tree
+    Run MuTect call on a single chromosome in the input bams.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict mutect_options: Options specific to MuTect
+    :param str chrom: Chromosome to process
+    :return: fsID for the chromsome vcf
+    :rtype: toil.fileStore.FileID
     """
-    job.fileStore.logToMaster('Running mutect on %s:%s' % (univ_options['patient'], chrom))
+    job.fileStore.logToMaster('Running MuTect on %s:%s' % (univ_options['patient'], chrom))
     work_dir = os.getcwd()
     input_files = {
         'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
@@ -158,7 +139,7 @@ def run_mutect_perchrom(job, tumor_bam, normal_bam, univ_options, mutect_options
     java_xmx = mutect_options['java_Xmx'] if mutect_options['java_Xmx'] \
         else univ_options['java_Xmx']
     docker_call(tool='mutect', tool_parameters=parameters, work_dir=work_dir,
-                dockerhub=univ_options['dockerhub'], java_opts=java_xmx,
+                dockerhub=univ_options['dockerhub'], java_xmx=java_xmx,
                 tool_version=mutect_options['version'])
     output_file = job.fileStore.writeGlobalFile(mutvcf)
     export_results(job, output_file, mutvcf, univ_options, subfolder='mutations/mutect')
@@ -167,12 +148,13 @@ def run_mutect_perchrom(job, tumor_bam, normal_bam, univ_options, mutect_options
 
 def process_mutect_vcf(job, mutect_vcf, work_dir, univ_options):
     """
-    This will process all the mutect vcfs to return only passing calls
-    :param job: job
-    :param str mutect_vcf: Job Store ID corresponding to a mutect vcf for 1 chromosome
-    :param univ_options: Universal options
-    :returns dict: Dict with chromosomes as keys and path to the corresponding parsed mutect vcfs as
-                   values
+    Process the MuTect vcf for accepted calls.
+
+    :param toil.fileStore.FileID mutect_vcf: fsID for a MuTect generated chromosome vcf
+    :param str work_dir: Working directory
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :return: Path to the processed vcf
+    :rtype: str
     """
     mutect_vcf = job.fileStore.readGlobalFile(mutect_vcf)
 
diff --git a/src/protect/mutation_calling/radia.py b/src/protect/mutation_calling/radia.py
index 1456f873..6be03c40 100644
--- a/src/protect/mutation_calling/radia.py
+++ b/src/protect/mutation_calling/radia.py
@@ -15,18 +15,20 @@
 from __future__ import print_function
 from collections import defaultdict
 from math import ceil
-from protect.common import (get_files_from_filestore, docker_path, docker_call, export_results,
+
+from protect.common import (docker_path,
+                            docker_call,
+                            export_results,
+                            get_files_from_filestore,
                             untargz)
 from protect.mutation_calling.common import sample_chromosomes, merge_perchrom_vcfs
+from toil.job import PromisedRequirement
 
 import os
 import sys
 
 
 # disk for radia and filterradia.
-from toil.job import PromisedRequirement
-
-
 def radia_disk(tumor_bam, normal_bam, rna_bam, fasta):
     return int(ceil(tumor_bam.size) +
                ceil(normal_bam.size) +
@@ -36,10 +38,20 @@ def radia_disk(tumor_bam, normal_bam, rna_bam, fasta):
 
 def run_radia_with_merge(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options):
     """
-    This is a convenience function that runs the entire mutect sub-graph.
+    A wrapper for the the entire RADIA sub-graph.
+
+    :param dict rna_bam: Dict dicts of bam and bai for tumor RNA-Seq obtained by running STAR within
+           ProTECT.
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict radia_options: Options specific to RADIA
+    :return: fsID to the merged RADIA calls
+    :rtype: toil.fileStore.FileID
     """
-    spawn = job.wrapJobFn(run_radia, rna_bam, tumor_bam, normal_bam, univ_options,
-                          radia_options, disk='100M', memory='100M').encapsulate()
+    spawn = job.wrapJobFn(run_radia, rna_bam['rnaAligned.sortedByCoord.out.bam'], tumor_bam,
+                          normal_bam, univ_options, radia_options, disk='100M',
+                          memory='100M').encapsulate()
     merge = job.wrapJobFn(merge_perchrom_vcfs, spawn.rv(), univ_options, disk='100M', memory='100M')
     job.addChild(spawn)
     spawn.addChild(merge)
@@ -48,45 +60,43 @@ def run_radia_with_merge(job, rna_bam, tumor_bam, normal_bam, univ_options, radi
 
 def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options):
     """
-    This module will spawn a radia job for each chromosome, on the RNA and DNA.
+    Spawn a RADIA job for each chromosome on the input bam trios.
 
-    ARGUMENTS
-    1. rna_bam: Dict of input STAR bams
-         rna_bam
-              |- 'rnaAligned.sortedByCoord.out.bam': REFER run_star()
-                                |- 'rna_fix_pg_sorted.bam': <JSid>
-                                +- 'rna_fix_pg_sorted.bam.bai': <JSid>
-    2. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
-         tumor_bam
-              |- 'tumor_fix_pg_sorted.bam': <JSid>
-              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
-    3. normal_bam: Dict of input normal WGS/WSQ bam + bai
-         normal_bam
-              |- 'normal_fix_pg_sorted.bam': <JSid>
-              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
-    4. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    5. radia_options: Dict of parameters specific to radia
-         radia_options
-              |- 'genome_fasta': <JSid for genome fasta file>
-              +- 'genome_fai': <JSid for genome fai file>
-
-    RETURN VALUES
-    1. perchrom_radia: Dict of results of radia per chromosome
-         perchrom_radia
-              |- 'chr1'
-              |   +- 'radia_filtered_chr1.vcf': <JSid>
-              |- 'chr2'
-              |   +- 'radia_filtered_chr2.vcf': <JSid>
-             etc...
-
-    This module corresponds to node 11 on the tree
+    :param dict rna_bam: Dict of bam and bai for tumor DNA-Seq.  It can be one of two formats
+           rna_bam:   # Just the genomic bam and bai
+                |- 'rna_fix_pg_sorted.bam': fsID
+                +- 'rna_fix_pg_sorted.bam.bai': fsID
+           OR
+           rna_bam:   # The output from run_star
+               |- 'rnaAligned.toTranscriptome.out.bam': fsID
+               |- 'rnaAligned.sortedByCoord.out.bam':     # Only this part will be used
+                                |- 'rna_fix_pg_sorted.bam': fsID
+                                +- 'rna_fix_pg_sorted.bam.bai': fsID
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict radia_options: Options specific to RADIA
+    :return: Dict of results from running RADIA on every chromosome
+             perchrom_radia:
+                 |- 'chr1': fsID
+                 |- 'chr2' fsID
+                 |
+                 |-...
+                 |
+                 +- 'chrM': fsID
+    :rtype: dict
     """
     job.fileStore.logToMaster('Running spawn_radia on %s' % univ_options['patient'])
-    rna_bam_key = 'rnaAligned.sortedByCoord.out.bam'  # to reduce next line size
-    bams = {'tumor_rna': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'],
-            'tumor_rnai': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam.bai'],
+    if set(rna_bam.keys()) == {'rnaAligned.toTranscriptome.out.bam',
+                               'rnaAligned.sortedByCoord.out.bam'}:
+        rna_bam = rna_bam['rnaAligned.sortedByCoord.out.bam']
+    elif set(rna_bam.keys()) == {'rna_fix_pg_sorted.bam', 'rna_fix_pg_sorted.bam.bai'}:
+        pass
+    else:
+        raise RuntimeError('An improperly formatted dict was passed to rna_bam.')
+        
+    bams = {'tumor_rna': rna_bam['rna_fix_pg_sorted.bam'],
+            'tumor_rnai': rna_bam['rna_fix_pg_sorted.bam.bai'],
             'tumor_dna': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
             'tumor_dnai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
             'normal_dna': normal_bam['normal_dna_fix_pg_sorted.bam'],
@@ -100,14 +110,14 @@ def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options):
                                   disk=PromisedRequirement(
                                       radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                       normal_bam['normal_dna_fix_pg_sorted.bam'],
-                                      rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'],
+                                      rna_bam['rna_fix_pg_sorted.bam'],
                                       radia_options['genome_fasta']))
         filter_radia = radia.addChildJobFn(run_filter_radia, bams, radia.rv(), univ_options,
                                            radia_options, chrom, memory='6G',
                                            disk=PromisedRequirement(
                                                radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                normal_bam['normal_dna_fix_pg_sorted.bam'],
-                                               rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'],
+                                               rna_bam['rna_fix_pg_sorted.bam'],
                                                radia_options['genome_fasta']))
         perchrom_radia[chrom] = filter_radia.rv()
     return perchrom_radia
@@ -115,30 +125,14 @@ def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options):
 
 def run_radia_perchrom(job, bams, univ_options, radia_options, chrom):
     """
-    This module will run radia on the RNA and DNA bams
-
-    ARGUMENTS
-    1. bams: Dict of bams and their indexes
-        bams
-         |- 'tumor_rna': <JSid>
-         |- 'tumor_rnai': <JSid>
-         |- 'tumor_dna': <JSid>
-         |- 'tumor_dnai': <JSid>
-         |- 'normal_dna': <JSid>
-         +- 'normal_dnai': <JSid>
-    2. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    3. radia_options: Dict of parameters specific to radia
-         radia_options
-              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
-              +- 'genome': <JSid for genome fasta file>
-    4. chrom: String containing chromosome name with chr appended
+    Run RADIA call on a single chromosome in the input bams.
 
-    RETURN VALUES
-    1. Dict of filtered radia output vcf and logfile (Nested return)
-        |- 'radia_filtered_CHROM.vcf': <JSid>
-        +- 'radia_filtered_CHROM_radia.log': <JSid>
+    :param dict bams: Dict of bam and bai for tumor DNA-Seq, normal DNA-Seq and tumor RNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict radia_options: Options specific to RADIA
+    :param str chrom: Chromosome to process
+    :return: fsID for the chromsome vcf
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running radia on %s:%s' % (univ_options['patient'], chrom))
     work_dir = os.getcwd()
@@ -183,17 +177,15 @@ def run_radia_perchrom(job, bams, univ_options, radia_options, chrom):
 
 def run_filter_radia(job, bams, radia_file, univ_options, radia_options, chrom):
     """
-    This module will run filterradia on the RNA and DNA bams.
+    Run filterradia on the RADIA output.
 
-    ARGUMENTS
-    1. bams: REFER ARGUMENTS of run_radia()
-    2. univ_options: REFER ARGUMENTS of run_radia()
-    3. radia_file: <JSid of vcf generated by run_radia()>
-    3. radia_options: REFER ARGUMENTS of run_radia()
-    4. chrom: REFER ARGUMENTS of run_radia()
-
-    RETURN VALUES
-    1. output_file: <JSid of radia_filtered_CHROM.vcf>
+    :param dict bams: Dict of bam and bai for tumor DNA-Seq, normal DNA-Seq and tumor RNA-Seq
+    :param toil.fileStore.FileID radia_file: The vcf from runnning RADIA
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict radia_options: Options specific to RADIA
+    :param str chrom: Chromosome to process
+    :return: fsID for the filtered chromsome vcf
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster('Running filter-radia on %s:%s' % (univ_options['patient'], chrom))
     work_dir = os.getcwd()
@@ -252,14 +244,14 @@ def run_filter_radia(job, bams, radia_file, univ_options, radia_options, chrom):
 
 def process_radia_vcf(job, radia_vcf, work_dir, univ_options):
     """
-    This function will parse the vcf to detect sites having multiple alt alleles and pick out on the
-    most likely ones.
+    Process the RADIA vcf to for passing calls and additionally sites having multiple alt alleles
+    to pick out on the most likely ones.
 
-    :param job: job
-    :param str radia_vcf: Job Store ID corresponding to a radia vcf for 1 chromosome
-    :param univ_options: Universal options
-    :returns dict: Dict with chromosomes as keys and path to the corresponding parsed radia vcfs as
-                   values
+    :param toil.fileStore.FileID radia_vcf: fsID for a RADIA generated chromosome vcf
+    :param str work_dir: Working directory
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :return: Path to the processed vcf
+    :rtype: str
     """
     radia_vcf = job.fileStore.readGlobalFile(radia_vcf)
     with open(radia_vcf, 'r') as infile, open(radia_vcf + 'radia_parsed.tmp', 'w') as outfile:
@@ -373,4 +365,3 @@ def process_radia_vcf(job, radia_vcf, work_dir, univ_options):
                 else:
                     pass
     return outfile.name
-
diff --git a/src/protect/mutation_calling/somaticsniper.py b/src/protect/mutation_calling/somaticsniper.py
index 0d551c0e..f2b117eb 100644
--- a/src/protect/mutation_calling/somaticsniper.py
+++ b/src/protect/mutation_calling/somaticsniper.py
@@ -15,12 +15,13 @@
 from __future__ import print_function
 from collections import defaultdict
 from math import ceil
-from protect.common import (get_files_from_filestore,
-                            docker_path,
+
+from protect.common import (docker_path,
                             docker_call,
+                            get_files_from_filestore,
                             untargz)
-from protect.mutation_calling.common import (unmerge,
-                                             sample_chromosomes)
+from protect.mutation_calling.common import (sample_chromosomes,
+                                             unmerge)
 from toil.job import PromisedRequirement
 
 import os
@@ -43,10 +44,18 @@ def sniper_filter_disk(tumor_bam, fasta):
                2 * ceil(fasta.size))
 
 
-# Somatic Sniper is a different tool from the other callers because it is run in full
+# Somatic Sniper is a different tool from the other callers because it is run in full.  Certain
+# redundant modules in this file are kept to maintain a similar naming schema to other callers.
 def run_somaticsniper_with_merge(job, tumor_bam, normal_bam, univ_options, somaticsniper_options):
     """
-    This is a convenience function that runs the entire somaticsniper sub-graph.
+    A wrapper for the the entire SomaticSniper sub-graph.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict somaticsniper_options: Options specific to SomaticSniper
+    :return: fsID to the merged SomaticSniper calls
+    :rtype: toil.fileStore.FileID
     """
     spawn = job.wrapJobFn(run_somaticsniper, tumor_bam, normal_bam, univ_options,
                           somaticsniper_options, split=False).encapsulate()
@@ -56,40 +65,24 @@ def run_somaticsniper_with_merge(job, tumor_bam, normal_bam, univ_options, somat
 
 def run_somaticsniper(job, tumor_bam, normal_bam, univ_options, somaticsniper_options, split=True):
     """
-    This module will spawn a somaticsniper job for each chromosome on the DNA bams.
-
-    ARGUMENTS
-    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
-         tumor_bam
-              |- 'tumor_fix_pg_sorted.bam': <JSid>
-              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
-    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
-         normal_bam
-              |- 'normal_fix_pg_sorted.bam': <JSid>
-              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    4. somaticsniper_options: Dict of parameters specific to somaticsniper
-         somaticsniper_options
-              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
-              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
-              |- 'cosmic_vcf': <JSid for cosmic vcf file>
-              |- 'cosmic_idx': <JSid for cosmic vcf index file>
-              |- 'genome_fasta': <JSid for genome fasta file>
-              +- 'genome_dict': <JSid for genome fasta dict file>
-              +- 'genome_fai': <JSid for genome fasta index file>
-
-    RETURN VALUES
-    1. perchrom_somaticsniper: Dict of results of somaticsniper per chromosome
-         perchrom_somaticsniper
-              |- 'chr1'
-              |   +- <JSid for somaticsniper_chr1.vcf>
-              |- 'chr2'
-              |   +- <JSid for somaticsniper_chr2.vcf>
-             etc...
-
-    This module corresponds to node 11 on the tree
+    Run the SomaticSniper subgraph on the DNA bams.  Optionally split the results into
+    per-chromosome vcfs.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict somaticsniper_options: Options specific to SomaticSniper
+    :param bool split: Should the results be split into perchrom vcfs?
+    :return: Either the fsID to the genome-level vcf or a dict of results from running SomaticSniper
+             on every chromosome
+             perchrom_somaticsniper:
+                 |- 'chr1': fsID
+                 |- 'chr2' fsID
+                 |
+                 |-...
+                 |
+                 +- 'chrM': fsID
+    :rtype: toil.fileStore.FileID|dict
     """
     # Get a list of chromosomes to handle
     chromosomes = sample_chromosomes(job, somaticsniper_options['genome_fai'])
@@ -128,19 +121,16 @@ def run_somaticsniper(job, tumor_bam, normal_bam, univ_options, somaticsniper_op
 
 def run_somaticsniper_full(job, tumor_bam, normal_bam, univ_options, somaticsniper_options):
     """
-    This module will run somaticsniper on the DNA bams.
-
-    ARGUMENTS
-    :param dict tumor_bam: REFER ARGUMENTS of spawn_somaticsniper()
-    :param dict normal_bam: REFER ARGUMENTS of spawn_somaticsniper()
-    :param dict univ_options: REFER ARGUMENTS of spawn_somaticsniper()
-    :param dict somaticsniper_options: REFER ARGUMENTS of spawn_somaticsniper()
-
-    RETURN VALUES
-    :returns: dict of output vcfs for each chromosome
-    :rtype: dict
+    Run SomaticSniper on the DNA bams.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict somaticsniper_options: Options specific to SomaticSniper
+    :return: fsID to the genome-level vcf
+    :rtype: toil.fileStore.FileID
     """
-    job.fileStore.logToMaster('Running somaticsniper on %s' % univ_options['patient'])
+    job.fileStore.logToMaster('Running SomaticSniper on %s' % univ_options['patient'])
     work_dir = os.getcwd()
     input_files = {
         'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
@@ -174,18 +164,17 @@ def run_somaticsniper_full(job, tumor_bam, normal_bam, univ_options, somaticsnip
 def filter_somaticsniper(job, tumor_bam, somaticsniper_output, tumor_pileup, univ_options,
                          somaticsniper_options):
     """
-    This module will filter the somaticsniper output for a single chromosome
-
-    :param toil.Job job: Job
-    :param dict tumor_bam: Tumor bam file and it's bai
-    :param str somaticsniper_output: jsID from somatic sniper
-    :param str tumor_pileup: jsID for pileup file for this chromsome
-    :param dict univ_options: Universal options
-    :param dict somaticsniper_options: Options specific to Somatic Sniper
-    :returns: filtered chromsome vcf
-    :rtype: str
+    Filter SomaticSniper calls.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param toil.fileStore.FileID somaticsniper_output: SomaticSniper output vcf
+    :param toil.fileStore.FileID tumor_pileup: Pileup generated for the tumor bam
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict somaticsniper_options: Options specific to SomaticSniper
+    :returns: fsID for the filtered genome-level vcf
+    :rtype: toil.fileStore.FileID
     """
-    job.fileStore.logToMaster('Filtering somaticsniper for %s' % univ_options['patient'])
+    job.fileStore.logToMaster('Filtering SomaticSniper for %s' % univ_options['patient'])
     work_dir = os.getcwd()
     input_files = {
         'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
@@ -251,12 +240,15 @@ def filter_somaticsniper(job, tumor_bam, somaticsniper_output, tumor_pileup, uni
 
 def process_somaticsniper_vcf(job, somaticsniper_vcf, work_dir, univ_options):
     """
-    This does nothing for now except to download and return the somaticsniper vcfs
-    :param job: job
-    :param str somaticsniper_vcf: Job Store ID corresponding to a somaticsniper vcf for 1 chromosome
-    :param univ_options: Universal options
-    :returns dict: Dict with chromosomes as keys and path to the corresponding somaticsniper vcfs as
-    values
+    Process the SomaticSniper vcf for accepted calls.  Since the calls are pre-filtered, we just
+    obtain the file from the file store and return it.
+
+    :param toil.fileStore.FileID somaticsniper_vcf: fsID for a SomaticSniper generated chromosome
+           vcf
+    :param str work_dir: Working directory
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :return: Path to the processed vcf
+    :rtype: str
     """
     return job.fileStore.readGlobalFile(somaticsniper_vcf)
 
@@ -265,11 +257,11 @@ def run_pileup(job, tumor_bam, univ_options, somaticsniper_options):
     """
     Runs a samtools pileup on the tumor bam.
 
-    :param toil.Job job: job
-    :param dict tumor_bam: Tumor bam file
-    :param dict univ_options: Universal Options
-    :returns: jsID for the chromsome pileup file
-    :rtype: str
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict somaticsniper_options: Options specific to SomaticSniper
+    :return: fsID for the pileup file
+    :rtype: toil.fileStore.FileID
     """
     job.fileStore.logToMaster(
         'Running samtools pileup on %s' % univ_options['patient'])
diff --git a/src/protect/mutation_calling/strelka.py b/src/protect/mutation_calling/strelka.py
index 67ee75db..eca70109 100644
--- a/src/protect/mutation_calling/strelka.py
+++ b/src/protect/mutation_calling/strelka.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 from __future__ import print_function
 from math import ceil
+
 from protect.common import (docker_call,
                             docker_path,
                             get_files_from_filestore,
@@ -32,10 +33,18 @@ def strelka_disk(tumor_bam, normal_bam, fasta):
                4 * ceil(fasta.size))
 
 
-# Strelka is a different tool from the other callers because it is run in full
+# Strelka is a different tool from the other callers because it is run in full.  Certain
+# redundant modules in this file are kept to maintain a similar naming schema to other callers.
 def run_strelka_with_merge(job, tumor_bam, normal_bam, univ_options, strelka_options):
     """
-    This is a convenience function that runs the entire strelka sub-graph.
+    A wrapper for the the entire strelka sub-graph.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict strelka_options: Options specific to strelka
+    :return: fsID to the merged strelka calls
+    :rtype: toil.fileStore.FileID
     """
     spawn = job.wrapJobFn(run_strelka, tumor_bam, normal_bam, univ_options,
                           strelka_options, split=False).encapsulate()
@@ -45,42 +54,29 @@ def run_strelka_with_merge(job, tumor_bam, normal_bam, univ_options, strelka_opt
 
 def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split=True):
     """
-    This module will spawn a strelka job for each chromosome on the DNA bams.
-
-    ARGUMENTS
-    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
-         tumor_bam
-              |- 'tumor_fix_pg_sorted.bam': <JSid>
-              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
-    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
-         normal_bam
-              |- 'normal_fix_pg_sorted.bam': <JSid>
-              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
-    3. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    4. strelka_options: Dict of parameters specific to strelka
-         strelka_options
-              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
-              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
-              |- 'cosmic_vcf': <JSid for cosmic vcf file>
-              |- 'cosmic_idx': <JSid for cosmic vcf index file>
-              |- 'genome_fasta': <JSid for genome fasta file>
-              +- 'genome_dict': <JSid for genome fasta dict file>
-              +- 'genome_fai': <JSid for genome fasta index file>
-
-    RETURN VALUES
-    1. perchrom_strelka: Dict of results of strelka per chromosome
-         perchrom_strelka
-              |- 'chr1'
-              |   +- 'strelka_chr1.vcf': <JSid>
-              |   +- 'strelka_chr1.out': <JSid>
-              |- 'chr2'
-              |   |- 'strelka_chr2.vcf': <JSid>
-              |   +- 'strelka_chr2.out': <JSid>
-             etc...
-
-    This module corresponds to node 11 on the tree
+    Run the strelka subgraph on the DNA bams.  Optionally split the results into per-chromosome
+    vcfs.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict strelka_options: Options specific to strelka
+    :param bool split: Should the results be split into perchrom vcfs?
+    :return: Either the fsID to the genome-level vcf or a dict of results from running strelka
+             on every chromosome
+             perchrom_strelka:
+                 |- 'chr1':
+                 |      |-'snvs': fsID
+                 |      +-'indels': fsID
+                 |- 'chr2':
+                 |      |-'snvs': fsID
+                 |      +-'indels': fsID
+                 |-...
+                 |
+                 +- 'chrM':
+                        |-'snvs': fsID
+                        +-'indels': fsID
+    :rtype: toil.fileStore.FileID|dict
     """
     chromosomes = sample_chromosomes(job, strelka_options['genome_fai'])
     num_cores = min(len(chromosomes), univ_options['max_cores'])
@@ -104,16 +100,16 @@ def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split
 
 def run_strelka_full(job, tumor_bam, normal_bam, univ_options, strelka_options):
     """
-    This module will run strelka on the DNA bams.
-
-    ARGUMENTS
-    :param dict tumor_bam: REFER ARGUMENTS of spawn_strelka()
-    :param dict normal_bam: REFER ARGUMENTS of spawn_strelka()
-    :param dict univ_options: REFER ARGUMENTS of spawn_strelka()
-    :param dict strelka_options: REFER ARGUMENTS of spawn_strelka()
-
-    RETURN VALUES
-    :returns: dict of output vcfs for each chromosome
+    Run strelka on the DNA bams.
+
+    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
+    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict strelka_options: Options specific to strelka
+    :return: Dict of fsIDs snv and indel prediction files
+             output_dict:
+                 |-'snvs': fsID
+                 +-'indels': fsID
     :rtype: dict
     """
     job.fileStore.logToMaster('Running strelka on %s' % univ_options['patient'])
@@ -150,24 +146,38 @@ def run_strelka_full(job, tumor_bam, normal_bam, univ_options, strelka_options):
 
 def process_strelka_vcf(job, strelka_vcf, work_dir, univ_options):
     """
-    This will process all the strelka vcfs to return only passing calls
-    :param job: job
-    :param str strelka_vcf: Job Store ID corresponding to a strelka vcf for 1 chromosome
-    :param univ_options: Universal options
-    :returns dict: Dict with chromosomes as keys and path to the corresponding parsed strelka vcfs as
-                   values
+    Process the strelka vcf for accepted calls. Since the calls are pre-filtered, we just obtain the
+    file from the file store and return it.
+
+    :param toil.fileStore.FileID strelka_vcf: fsID for a strelka generated chromosome vcf
+    :param str work_dir: Working directory
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :return: Path to the processed vcf
+    :rtype: str
     """
     return job.fileStore.readGlobalFile(strelka_vcf)
 
 
 def wrap_unmerge(job, strelka_out, strelka_options, univ_options):
     """
-    This is a convenience function that wraps the unmerge for strelkas snvs and indels
-
-    :param toil.Job job: job
-    :param dict strelka_out:
-    :param dict strelka_options:
-    :param dict univ_options:
+    A wwrapper to unmerge the strelka snvs and indels
+
+    :param dict strelka_out: Results from run_strelka
+    :param dict strelka_options: Options specific to strelka
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :return: Dict of dicts containing the fsIDs for the per-chromosome snv and indel calls
+             output:
+               |- 'snvs':
+               |      |- 'chr1': fsID
+               |      |- 'chr2': fsID
+               |      |- ...
+               |      +- 'chrM': fsID
+               +- 'indels':
+                      |- 'chr1': fsID
+                      |- 'chr2': fsID
+                      |- ...
+                      +- 'chrM': fsID
+    :rtype: dict
     """
     return {'snvs': job.addChildJobFn(unmerge, strelka_out['snvs'], 'strelka/snv', strelka_options,
                                       univ_options).rv(),
diff --git a/src/protect/mutation_translation.py b/src/protect/mutation_translation.py
index 534feda0..82c44306 100644
--- a/src/protect/mutation_translation.py
+++ b/src/protect/mutation_translation.py
@@ -13,35 +13,40 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import, print_function
+
 from collections import defaultdict
-from protect.common import docker_call, get_files_from_filestore, export_results, untargz, \
-    docker_path
+
+from protect.common import (docker_call,
+                            get_files_from_filestore,
+                            export_results,
+                            untargz,
+                            docker_path)
 
 import os
 
 
 def run_transgene(job, snpeffed_file, rna_bam, univ_options, transgene_options):
     """
-    This module will run transgene on the input vcf file from the aggregator and produce the
-    peptides for MHC prediction
-
-    ARGUMENTS
-    1. snpeffed_file: <JSid for snpeffed vcf>
-    2. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-                +- 'dockerhub': <dockerhub to use>
-    3. transgene_options: Dict of parameters specific to transgene
-         transgene_options
-                +- 'gencode_peptide_fasta': <JSid for the gencode protein fasta>
+    Run transgene on an input snpeffed vcf filereturn the peptides for MHC prediction.
 
-    RETURN VALUES
-    1. output_files: Dict of transgened n-mer peptide fastas
-         output_files
-                |- 'transgened_tumor_9_mer_snpeffed.faa': <JSid>
-                |- 'transgened_tumor_10_mer_snpeffed.faa': <JSid>
-                +- 'transgened_tumor_15_mer_snpeffed.faa': <JSid>
 
-    This module corresponds to node 17 on the tree
+    :param toil.fileStore.FileID snpeffed_file: fsID for snpeffed vcf
+    :param dict rna_bam: The dict of bams returned by running star
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict transgene_options: Options specific to Transgene
+    :return: A dictionary of 9 files (9-, 10-, and 15-mer peptides each for Tumor and Normal and the
+             corresponding .map files for the 3 Tumor fastas)
+             output_files:
+                 |- 'transgened_normal_10_mer_snpeffed.faa': fsID
+                 |- 'transgened_normal_15_mer_snpeffed.faa': fsID
+                 |- 'transgened_normal_9_mer_snpeffed.faa': fsID
+                 |- 'transgened_tumor_10_mer_snpeffed.faa': fsID
+                 |- 'transgened_tumor_10_mer_snpeffed.faa.map': fsID
+                 |- 'transgened_tumor_15_mer_snpeffed.faa': fsID
+                 |- 'transgened_tumor_15_mer_snpeffed.faa.map': fsID
+                 |- 'transgened_tumor_9_mer_snpeffed.faa': fsID
+                 +- 'transgened_tumor_9_mer_snpeffed.faa.map': fsID
+    :rtype: dict
     """
     job.fileStore.logToMaster('Running transgene on %s' % univ_options['patient'])
     work_dir = os.getcwd()
diff --git a/src/protect/pipeline/ProTECT.py b/src/protect/pipeline/ProTECT.py
index 5a45ee18..9d102579 100644
--- a/src/protect/pipeline/ProTECT.py
+++ b/src/protect/pipeline/ProTECT.py
@@ -21,16 +21,13 @@
 Details can also be obtained by running the script with -h .
 """
 from __future__ import print_function
-
-
 from multiprocessing import cpu_count
-from urlparse import urlparse
 
 from protect.addons import run_mhc_gene_assessment
 from protect.alignment.dna import align_dna
 from protect.alignment.rna import align_rna
-from protect.binding_prediction.common import spawn_antigen_predictors, merge_mhc_peptide_calls
-from protect.common import delete_fastqs, ParameterError
+from protect.binding_prediction.common import merge_mhc_peptide_calls, spawn_antigen_predictors
+from protect.common import delete_fastqs, get_file_from_s3, get_file_from_url, ParameterError
 from protect.expression_profiling.rsem import wrap_rsem
 from protect.haplotyping.phlat import merge_phlat_calls, phlat_disk, run_phlat
 from protect.mutation_annotation.snpeff import run_snpeff, snpeff_disk
@@ -46,14 +43,11 @@
 from protect.qc.rna import cutadapt_disk, run_cutadapt
 from protect.rankboost import wrap_rankboost
 from toil.job import Job, PromisedRequirement
-from protect.common import get_file_from_s3
-from protect.common import get_file_from_url
 
 import argparse
 import os
 import pkg_resources
 import shutil
-import subprocess
 import sys
 import yaml
 
@@ -62,6 +56,7 @@ def _ensure_set_contains(test_object, required_object, test_set_name=None):
     """
     Ensure that the required entries (set or keys of a dict) are present in the test set or keys
     of the test dict.
+
     :param set|dict test_object: The test set or dict
     :param set|dict required_object: The entries that need to be present in the test set (keys of
             input dict if input is dict)
@@ -121,8 +116,9 @@ def _process_group(input_group, required_group, groupname, append_subgroups=None
     :param dict input_group: The dict of values of the input group
     :param dict required_group: The dict of required values for the input group
     :param str groupname: The name of the group being processed
-    :param list append_group: list of subgroups to append to each, other subgroup in this group
-    :return: dict
+    :param list append_subgroups: list of subgroups to append to each, other subgroup in this group
+    :return: processed dict of entries for the group
+    :rtype: dict
     """
     if append_subgroups is None:
         append_subgroups = []
@@ -144,17 +140,12 @@ def _process_group(input_group, required_group, groupname, append_subgroups=None
 
 def _parse_config_file(job, config_file, max_cores=None):
     """
-    This module will parse the config file withing params and set up the variables that will be
-    passed to the various tools in the pipeline.
-
-    ARGUMENTS
-    config_file: string containing path to a config file.  An example config
-                 file is available at
-                        https://s3-us-west-2.amazonaws.com/pimmuno-references
-                        /input_parameters.list
+    Parse the input yaml config file and download all tool inputs into the file store.
 
-    RETURN VALUES
-    None
+    :param str config_file: Path to the input config file
+    :param int max_cores: The maximum cores to use for any single high-compute job.
+    :return: tuple of dict of sample dicts, dict of universal options, dict of dicts of tool options
+    :rtype: tuple(dict, dict, dict)
     """
     job.fileStore.logToMaster('Parsing config file')
     config_file = os.path.abspath(config_file)
@@ -166,7 +157,6 @@ def _parse_config_file(job, config_file, max_cores=None):
     univ_options = {}
     tool_options = {}
 
-
     # Read in the input yaml
     with open(config_file, 'r') as conf:
         input_config = yaml.load(conf.read())
@@ -264,17 +254,10 @@ def _parse_config_file(job, config_file, max_cores=None):
 
 def parse_config_file(job, config_file, max_cores=None):
     """
-    This module will parse the config file withing params and set up the variables that will be
-    passed to the various tools in the pipeline.
-
-    ARGUMENTS
-    config_file: string containing path to a config file.  An example config
-                 file is available at
-                        https://s3-us-west-2.amazonaws.com/pimmuno-references
-                        /input_parameters.list
+    Parse the config file and spawn a ProTECT job for every input sample.
 
-    RETURN VALUES
-    None
+    :param str config_file: Path to the input config file
+    :param int max_cores: The maximum cores to use for any single high-compute job.
     """
     sample_set, univ_options, processed_tool_inputs = _parse_config_file(job, config_file,
                                                                          max_cores)
@@ -282,12 +265,19 @@ def parse_config_file(job, config_file, max_cores=None):
     for patient_id in sample_set.keys():
         # Add the patient id to the sample set.  Typecast to str so cat operations work later on.
         sample_set[patient_id]['patient_id'] = str(patient_id)
-        job.addFollowOnJobFn(pipeline_launchpad, sample_set[patient_id], univ_options,
+        job.addFollowOnJobFn(launch_protect, sample_set[patient_id], univ_options,
                              processed_tool_inputs)
     return None
 
 
 def ascertain_cpu_share(max_cores=None):
+    """
+    Ascertain the number of cpus allowed for each high-compte job instance (bwa, star, rsem, phlat).
+
+    :param max_cores: The user-specified max
+    :return: The number of cpus allowed for each high-compte job instance
+    :rtype: int
+    """
     # Ascertain the number of available CPUs. Jobs will be given fractions of this value.
     num_cores = cpu_count()
     # The minimum number of cpus should be at least 6 if possible
@@ -298,15 +288,13 @@ def ascertain_cpu_share(max_cores=None):
     return cpu_share
 
 
-def pipeline_launchpad(job, fastqs, univ_options, tool_options):
+def launch_protect(job, fastqs, univ_options, tool_options):
     """
-    The precision immuno pipeline begins at this module. The DAG can be viewed in Flowchart.txt
+    The launchpad for ProTECT. The DAG for ProTECT can be viewed in Flowchart.txt.
 
-    :param job job: job
-    :param dict fastqs: Dict of lists of fastq files
-    :param univ_options: Universal Options
-    :param tool_options: Options for the various tools
-    :return: None
+    :param dict fastqs: Dict of lists of fastq files and the patient ID
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict tool_options: Options for the various tools
     """
     # Add Patient id to univ_options as is is passed to every major node in the DAG and can be used
     # as a prefix for the logfile.
@@ -391,90 +379,90 @@ def pipeline_launchpad(job, fastqs, univ_options, tool_options):
                               univ_options, tool_options['rankboost'], disk='100M', memory='100M',
                               cores=1)
     # Define the DAG in a static form
-    job.addChild(sample_prep)  # Edge  0->1
+    job.addChild(sample_prep)
     # A. The first step is running the alignments and the MHC haplotypers
-    sample_prep.addChild(tumor_dna_fqs)  # Edge  1->2
-    sample_prep.addChild(normal_dna_fqs)  # Edge  1->2
-    sample_prep.addChild(tumor_rna_fqs)  # Edge  1->2
+    sample_prep.addChild(tumor_dna_fqs)
+    sample_prep.addChild(normal_dna_fqs)
+    sample_prep.addChild(tumor_rna_fqs)
 
-    tumor_rna_fqs.addChild(cutadapt)  # Edge  1->2
-    tumor_dna_fqs.addChild(bwa_tumor)  # Edge  1->3
-    normal_dna_fqs.addChild(bwa_normal)  # Edge  1->4
+    tumor_rna_fqs.addChild(cutadapt)
+    tumor_dna_fqs.addChild(bwa_tumor)
+    normal_dna_fqs.addChild(bwa_normal)
 
-    tumor_dna_fqs.addChild(phlat_tumor_dna)  # Edge  1->5
-    normal_dna_fqs.addChild(phlat_normal_dna)  # Edge  1->6
-    tumor_rna_fqs.addChild(phlat_tumor_rna)  # Edge  1->7
+    tumor_dna_fqs.addChild(phlat_tumor_dna)
+    normal_dna_fqs.addChild(phlat_normal_dna)
+    tumor_rna_fqs.addChild(phlat_tumor_rna)
     # B. cutadapt will be followed by star
-    cutadapt.addChild(star)  # Edge 2->9
+    cutadapt.addChild(star)
     # Ci.  gene expression and fusion detection follow start alignment
-    star.addChild(rsem)  # Edge  9->10
-    star.addChild(fusions)  # Edge  9->11
+    star.addChild(rsem)
+    star.addChild(fusions)
     # Cii.  Radia depends on all 3 alignments
-    star.addChild(radia)  # Edge  9->12
-    bwa_tumor.addChild(radia)  # Edge  3->12
-    bwa_normal.addChild(radia)  # Edge  4->12
+    star.addChild(radia)
+    bwa_tumor.addChild(radia)
+    bwa_normal.addChild(radia)
     # Ciii. mutect and indel calling depends on dna to have been aligned
-    bwa_tumor.addChild(mutect)  # Edge  3->13
-    bwa_normal.addChild(mutect)  # Edge  4->13
-    bwa_tumor.addChild(muse)  # Edge  3->13
-    bwa_normal.addChild(muse)  # Edge  4->13
-    bwa_tumor.addChild(somaticsniper)  # Edge  3->13
-    bwa_normal.addChild(somaticsniper)  # Edge  4->13
-    bwa_tumor.addChild(strelka)  # Edge  3->13
-    bwa_normal.addChild(strelka)  # Edge  4->13
-    bwa_tumor.addChild(indels)  # Edge  3->14
-    bwa_normal.addChild(indels)  # Edge  4->14
+    bwa_tumor.addChild(mutect)
+    bwa_normal.addChild(mutect)
+    bwa_tumor.addChild(muse)
+    bwa_normal.addChild(muse)
+    bwa_tumor.addChild(somaticsniper)
+    bwa_normal.addChild(somaticsniper)
+    bwa_tumor.addChild(strelka)
+    bwa_normal.addChild(strelka)
+    bwa_tumor.addChild(indels)
+    bwa_normal.addChild(indels)
     # D. MHC haplotypes will be merged once all 3 samples have been PHLAT-ed
-    phlat_tumor_dna.addChild(merge_phlat)  # Edge  5->15
-    phlat_normal_dna.addChild(merge_phlat)  # Edge  6->15
-    phlat_tumor_rna.addChild(merge_phlat)  # Edge  7->15
+    phlat_tumor_dna.addChild(merge_phlat)
+    phlat_normal_dna.addChild(merge_phlat)
+    phlat_tumor_rna.addChild(merge_phlat)
     # E. Delete the fastqs from the job store since all alignments are complete
-    sample_prep.addChild(fastq_deletion_1)  # Edge 1->8
-    cutadapt.addChild(fastq_deletion_1)  # Edge 2->8
-    bwa_normal.addChild(fastq_deletion_1)  # Edge 3->8
-    bwa_tumor.addChild(fastq_deletion_1)  # Edge 4->8
-    phlat_normal_dna.addChild(fastq_deletion_1)  # Edge 5->8
-    phlat_tumor_dna.addChild(fastq_deletion_1)  # Edge 6>8
-    phlat_tumor_rna.addChild(fastq_deletion_1)  # Edge 7->8
+    sample_prep.addChild(fastq_deletion_1)
+    cutadapt.addChild(fastq_deletion_1)
+    bwa_normal.addChild(fastq_deletion_1)
+    bwa_tumor.addChild(fastq_deletion_1)
+    phlat_normal_dna.addChild(fastq_deletion_1)
+    phlat_tumor_dna.addChild(fastq_deletion_1)
+    phlat_tumor_rna.addChild(fastq_deletion_1)
     star.addChild(fastq_deletion_2)
     # F. Mutation calls need to be merged before they can be used
     # G. All mutations get aggregated when they have finished running
-    fusions.addChild(merge_mutations)  # Edge 11->18
-    radia.addChild(merge_mutations)  # Edge 16->18
-    mutect.addChild(merge_mutations)  # Edge 17->18
-    muse.addChild(merge_mutations)  # Edge 17->18
-    somaticsniper.addChild(merge_mutations)  # Edge 17->18
-    strelka.addChild(merge_mutations)  # Edge 17->18
-    indels.addChild(merge_mutations)  # Edge 14->18
+    fusions.addChild(merge_mutations)
+    radia.addChild(merge_mutations)
+    mutect.addChild(merge_mutations)
+    muse.addChild(merge_mutations)
+    somaticsniper.addChild(merge_mutations)
+    strelka.addChild(merge_mutations)
+    indels.addChild(merge_mutations)
     # H. Aggregated mutations will be translated to protein space
-    merge_mutations.addChild(snpeff)  # Edge 18->19
+    merge_mutations.addChild(snpeff)
     # I. snpeffed mutations will be converted into peptides.
     # Transgene also accepts the RNA-seq bam and bai so that it can be rna-aware
-    snpeff.addChild(transgene)  # Edge 19->20
+    snpeff.addChild(transgene)
     star.addChild(transgene)
     # J. Merged haplotypes and peptides will be converted into jobs and submitted for mhc:peptide
     # binding prediction
-    merge_phlat.addChild(spawn_mhc)  # Edge 15->21
-    transgene.addChild(spawn_mhc)  # Edge 20->21
+    merge_phlat.addChild(spawn_mhc)
+    transgene.addChild(spawn_mhc)
     # K. The results from all the predictions will be merged. This is a follow-on job because
     # spawn_mhc will spawn an undetermined number of children.
-    spawn_mhc.addFollowOn(merge_mhc)  # Edges 21->XX->22 and 21->YY->22
+    spawn_mhc.addFollowOn(merge_mhc)
     # L. Finally, the merged mhc along with the gene expression will be used for rank boosting
-    rsem.addChild(rankboost)  # Edge  10->23
-    merge_mhc.addChild(rankboost)  # Edge 22->23
+    rsem.addChild(rankboost)
+    merge_mhc.addChild(rankboost)
     # M. Assess the status of the MHC genes in the patient
-    phlat_tumor_rna.addChild(mhc_pathway_assessment)  # Edge 7->24
-    rsem.addChild(mhc_pathway_assessment)  # Edge 10->24
+    phlat_tumor_rna.addChild(mhc_pathway_assessment)
+    rsem.addChild(mhc_pathway_assessment)
     return None
 
 
 def get_all_tool_inputs(job, tools):
     """
-    This function will iterate through all the tool options and download the rquired file from their
-    remote locations.
+    Iterate through all the tool options and download required files from their remote locations.
 
     :param dict tools: A dict of dicts of all tools, and their options
-    :returns dict: The fully resolved tool dictionary
+    :return: The fully resolved tool dictionary
+    :rtype: dict
     """
     for tool in tools:
         for option in tools[tool]:
@@ -496,22 +484,23 @@ def get_all_tool_inputs(job, tools):
 def get_pipeline_inputs(job, input_flag, input_file, encryption_key=None,
                         per_file_encryption=False):
     """
-    Get the input file from s3 or disk, untargz if necessary and then write to file job store.
-    :param job: job
+    Get the input file from s3 or disk and write to file store.
+
     :param str input_flag: The name of the flag
     :param str input_file: The value passed in the config file
-    :param str encryption_key: Path to the encryption key
+    :param str encryption_key: Path to the encryption key if encrypted with sse-c
     :param bool per_file_encryption: If encrypted, was the file encrypted using the per-file method?
-    :return: The jobstore ID for the file
-    :rtype: str
+    :return: fsID for the file
+    :rtype: toil.fileStore.FileID
     """
     work_dir = os.getcwd()
     job.fileStore.logToMaster('Obtaining file (%s) to the file job store' %
                               os.path.basename(input_file))
     if input_file.startswith(('http', 'https', 'ftp')):
-        input_file=get_file_from_url(job, input_file,encryption_key=encryption_key,
-                        per_file_encryption=per_file_encryption, write_to_jobstore=False)
-    elif input_file.startswith(('S3','s3')):
+        input_file = get_file_from_url(job, input_file, encryption_key=encryption_key,
+                                       per_file_encryption=per_file_encryption,
+                                       write_to_jobstore=False)
+    elif input_file.startswith(('S3', 's3')):
         input_file = get_file_from_s3(job, input_file, write_to_jobstore=False,
                                       encryption_key=encryption_key,
                                       per_file_encryption=per_file_encryption)
@@ -522,27 +511,28 @@ def get_pipeline_inputs(job, input_flag, input_file, encryption_key=None,
 
 def prepare_samples(job, fastqs, univ_options):
     """
-    This module will accept a dict object holding the 3 input prefixes and the patient id and will
-    attempt to store the fastqs to the jobstore.  The input files must satisfy the following
-    1. File extensions can only be fastq or fq (.gz is also allowed)
-    2. Forward and reverse reads MUST be in the same folder with the same prefix
-    3. The files must be on the form
+    Obtain the 6 input fastqs and write them to the file store.  The input files must satisfy the
+    following conditions:
+    1. Files must be on the form
                     <prefix_for_file>1.<fastq/fq>[.gz]
                     <prefix_for_file>2.<fastq/fq>[.gz]
-    The input  dict is:
-    tumor_dna: prefix_to_tumor_dna
-    tumor_rna: prefix_to_tumor_rna
-    normal_dna: prefix_to_normal_dna
-    patient_id: patient ID
-
-    The input dict is updated to
-    tumor_dna: [jobStoreID_for_fastq1, jobStoreID_for_fastq2]
-    tumor_rna: [jobStoreID_for_fastq1, jobStoreID_for_fastq2]
-    normal_dna: [jobStoreID_for_fastq1, jobStoreID_for_fastq2]
-    patient_id: patient ID
-    gzipped: True/False
-
-    This module corresponds to node 1 in the tree
+    2. Forward and reverse reads MUST be in the same folder with the same prefix
+
+    :param dict fastqs: The input fastq dict
+           fastqs:
+               |- 'tumor_dna': str
+               |- 'tumor_rna': str
+               |- 'normal_dna': str
+               +- 'patient_id': str
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :return: Updated fastq dict
+             sample_fastqs:
+                 |-  'tumor_dna': [fsID, fsID]
+                 |-  'normal_dna': [fsID, fsID]
+                 |-  'tumor_rna': [fsID, fsID]
+                 +- 'gzipped': bool
+                 +- 'patient_id': str
+    :rtype: dict
     """
     job.fileStore.logToMaster('Downloading Inputs for %s' % univ_options['patient'])
     # For each sample type, check if the prefix is an S3 link or a regular file
@@ -594,10 +584,10 @@ def get_fqs(job, fastqs, sample_type):
     """
     Convenience function to return only a list of tumor_rna, tumor_dna or normal_dna fqs
 
-    :param job: job
     :param dict fastqs: dict of list of fq files
     :param str sample_type: key in sample_type to return
     :return: fastqs[sample_type]
+    :rtype: list[toil.fileStore.FileID]
     """
     return fastqs[sample_type]
 
@@ -605,6 +595,7 @@ def get_fqs(job, fastqs, sample_type):
 def generate_config_file():
     """
     Generate a config file for a ProTECT run on hg19.
+
     :return: None
     """
     shutil.copy(os.path.join(os.path.dirname(__file__), 'input_parameters.yaml'),
@@ -613,7 +604,7 @@ def generate_config_file():
 
 def main():
     """
-    This is the main function for the UCSC Precision Immuno pipeline.
+    This is the main function for ProTECT.
     """
     parser = argparse.ArgumentParser(prog='ProTECT',
                                      description='Prediction of T-Cell Epitopes for Cancer Therapy',
diff --git a/src/protect/qc/__init__.py b/src/protect/qc/__init__.py
index c592ef89..6656c66f 100644
--- a/src/protect/qc/__init__.py
+++ b/src/protect/qc/__init__.py
@@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
\ No newline at end of file
+from __future__ import absolute_import
diff --git a/src/protect/qc/rna.py b/src/protect/qc/rna.py
index 0c8a76c4..68e67db4 100644
--- a/src/protect/qc/rna.py
+++ b/src/protect/qc/rna.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 from __future__ import print_function
 from math import ceil
+
 from protect.common import docker_call, docker_path, get_files_from_filestore, is_gzipfile
 
 import os
@@ -26,24 +27,13 @@ def cutadapt_disk(rna_fastqs):
 
 def run_cutadapt(job, fastqs, univ_options, cutadapt_options):
     """
-    This module runs cutadapt on the input RNA fastq files and then calls the RNA aligners.
-
-    ARGUMENTS
-    1. fastqs: List of input RNA-Seq fastqs [<JSid for 1.fastq> , <JSid for 2.fastq>]
-    2. univ_options: Dict of universal arguments used by almost all tools
-         univ_options
-              +- 'dockerhub': <dockerhub to use>
-    3. cutadapt_options: Dict of parameters specific to cutadapt
-         cutadapt_options
-              |- 'a': <sequence of 3' adapter to trim from fwd read>
-              +- 'A': <sequence of 3' adapter to trim from rev read>
-    RETURN VALUES
-    1. output_files: Dict of cutadapted fastqs
-         output_files
-             |- 'rna_cutadapt_1.fastq': <JSid>
-             +- 'rna_cutadapt_2.fastq': <JSid>
+    Runs cutadapt on the input RNA fastq files.
 
-    This module corresponds to node 2 on the tree
+    :param list fastqs: List of fsIDs for input an RNA-Seq fastq pair
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict cutadapt_options: Options specific to cutadapt
+    :return: List of fsIDs of cutadapted fastqs
+    :rtype: list[toil.fileStore.FileID]
     """
     job.fileStore.logToMaster('Running cutadapt on %s' % univ_options['patient'])
     work_dir = os.getcwd()
diff --git a/src/protect/rankboost.py b/src/protect/rankboost.py
index d773ff02..c5e0473e 100644
--- a/src/protect/rankboost.py
+++ b/src/protect/rankboost.py
@@ -20,15 +20,20 @@
 def wrap_rankboost(job, rsem_files, merged_mhc_calls, transgene_out, univ_options,
                    rankboost_options):
     """
-    This is a convenience function that runs rankboost on pipeline outputs.
+    A wrapper for boost_ranks.
 
-    :param job job: job
-    :param dict rsem_files: dict of results from rsem
-    :param dict merged_mhc_calls: dict of results from merging mhc peptide binding predictions
-    :param dict transgene_out: dict of results from running transgene
-    :param dict univ_options: Universal Options
-    :param dict rankboost_options: Options specific to rank boost
-    :return:
+    :param dict rsem_files: Dict of results from rsem
+    :param dict merged_mhc_calls: Dict of results from merging mhc peptide binding predictions
+    :param dict transgene_out: Dict of results from running Transgene
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict rankboost_options: Options specific to rankboost
+    :return: Dict of concise and detailed results for mhci and mhcii
+             output_files:
+                |- 'mhcii_rankboost_concise_results.tsv': fsID
+                |- 'mhcii_rankboost_detailed_results.txt': fsID
+                |- 'mhci_rankboost_concise_results.tsv': fsID
+                +- 'mhci_rankboost_detailed_results.txt': fsID
+    :rtype: dict
     """
     rankboost = job.addChildJobFn(boost_ranks, rsem_files['rsem.isoforms.results'],
                                   merged_mhc_calls, transgene_out, univ_options, rankboost_options)
@@ -39,10 +44,20 @@ def wrap_rankboost(job, rsem_files, merged_mhc_calls, transgene_out, univ_option
 def boost_ranks(job, isoform_expression, merged_mhc_calls, transgene_out, univ_options,
                 rankboost_options):
     """
-    This is the final module in the pipeline.  It will call the rank boosting R
-    script.
+    Boost the ranks of the predicted peptides:MHC combinations.
 
-    This module corresponds to node 21 in the tree
+    :param toil.fileStore.FileID isoform_expression: fsID of rsem isoform expression file
+    :param dict merged_mhc_calls: Dict of results from merging mhc peptide binding predictions
+    :param dict transgene_out: Dict of results from running Transgene
+    :param dict univ_options: Dict of universal options used by almost all tools
+    :param dict rankboost_options: Options specific to rankboost
+    :return: Dict of concise and detailed results for mhci and mhcii
+             output_files:
+                |- 'mhcii_rankboost_concise_results.tsv': fsID
+                |- 'mhcii_rankboost_detailed_results.txt': fsID
+                |- 'mhci_rankboost_concise_results.tsv': fsID
+                +- 'mhci_rankboost_detailed_results.txt': fsID
+    :rtype: dict
     """
     job.fileStore.logToMaster('Running boost_ranks on %s' % univ_options['patient'])
     work_dir = os.getcwd()
diff --git a/src/protect/test/ci/test_protect.py b/src/protect/test/ci/test_protect.py
index ca7d97dc..0b4c6db4 100644
--- a/src/protect/test/ci/test_protect.py
+++ b/src/protect/test/ci/test_protect.py
@@ -61,7 +61,8 @@ def _test_ran_successfully(self):
                               [],
                               ['normal_dna_fix_pg_sorted.bam', 'normal_dna_fix_pg_sorted.bam.bai',
                                'rna_fix_pg_sorted.bam', 'rna_fix_pg_sorted.bam.bai',
-                               'tumor_dna_fix_pg_sorted.bam', 'tumor_dna_fix_pg_sorted.bam.bai']),
+                               'rna_transcriptome.bam', 'tumor_dna_fix_pg_sorted.bam',
+                               'tumor_dna_fix_pg_sorted.bam.bai']),
                              ('/mnt/ephemeral/done/TEST/binding_predictions',
                               [],
                               ['mhci_merged_files.list', 'mhcii_merged_files.list']),