From 95dcf6d7030c6168f5ef93b34cf174e288b78066 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 13 Jul 2016 21:55:55 -0400
Subject: [PATCH 01/13] add plot_coverage() and align_and_plot_coverage() to
 read_utils.py

add plot_coverage() and align_and_plot_coverage() to read_utils.py, WIP
---
 read_utils.py     | 256 ++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt  |   1 +
 tools/__init__.py |   2 +-
 tools/bwa.py      |  20 +++-
 tools/samtools.py |   6 ++
 5 files changed, 280 insertions(+), 5 deletions(-)

diff --git a/read_utils.py b/read_utils.py
index 7639b4eb1..c05283ac6 100755
--- a/read_utils.py
+++ b/read_utils.py
@@ -14,8 +14,11 @@
 import os
 import tempfile
 import shutil
+import csv
+from collections import OrderedDict
 
 from Bio import SeqIO
+import matplotlib.pyplot as plt
 
 import util.cmd
 import util.file
@@ -1052,6 +1055,259 @@ def parser_bwamem_idxstats(parser=argparse.ArgumentParser()):
 
 # =========================
 
+def parser_plot_coverage_common(parser=argparse.ArgumentParser()): # parser needs add_help=False?
+    parser.add_argument('in_bam', 
+                        help='Input reads, BAM format.')
+    parser.add_argument('out_plot_file', 
+                        help='The generated chart file')
+    parser.add_argument('--plotFormat',
+                        dest="plot_format",
+                        default="pdf",
+                        type=str,
+                        choices=list(plt.gcf().canvas.get_supported_filetypes().keys()),
+                        help="File format of the coverage plot")
+    parser.add_argument('--plotStyle',
+                        dest="plot_style",
+                        default="ggplot",
+                        type=str,
+                        choices=plt.style.available,
+                        help="The plot visual style")
+    parser.add_argument('--plotWidth',
+                        dest="plot_width",
+                        default=800,
+                        type=int,
+                        help="Width of the plot in pixels")
+    parser.add_argument('--plotHeight',
+                        dest="plot_height",
+                        default=600,
+                        type=int,
+                        help="Width of the plot in pixels")
+    parser.add_argument('--plotTitle',
+                        dest="plot_title",
+                        default="Coverage Plot",
+                        type=str,
+                        help="The title displayed on the coverage plot")
+    parser.add_argument('-q',
+                        dest="base_q_threshold",
+                        default=None,
+                        type=int,
+                        help="The minimum base quality threshold")
+    parser.add_argument('-Q',
+                        dest="mapping_q_threshold",
+                        default=None,
+                        type=int,
+                        help="The minimum mapping quality threshold")
+    parser.add_argument('-m',
+                        dest="max_coverage_depth",
+                        default=1000000,
+                        type=int,
+                        help="The max coverage depth (default: %(default)s)")
+    parser.add_argument('-l',
+                        dest="read_length_threshold",
+                        default=None,
+                        type=int,
+                        help="Read length threshold")
+    parser.add_argument('--outSummary',
+                        dest="out_summary",
+                        default=None,
+                        type=str,
+                        help="Coverage summary TSV file. Default is to write to temp.")
+    return parser
+
+def plot_coverage(in_bam, out_plot_file, plot_format, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary=None):
+    ''' 
+        Generate a coverage plot from an aligned bam file
+    '''
+    
+    # TODO: remove this:
+    #coverage_tsv_file = "/Users/tomkinsc/Downloads/plottest/test_multisegment.tsv"
+
+    samtools = tools.samtools.SamtoolsTool()
+
+    # check if in_bam is aligned, if not raise an error
+    num_mapped_reads = samtools.count(in_bam, opts=["-F", "4"])
+    if num_mapped_reads == 0:
+        raise Exception("""The bam file specified appears to have zero mapped reads. 'plot_coverage' requires an aligned bam file. You can try 'align_and_plot_coverage' if you don't mind a simple bwa alignment. \n File: %s""" % in_bam)
+
+
+    if out_summary is None:
+        coverage_tsv_file = mkstempfname('.summary.tsv')
+    else:
+        coverage_tsv_file = out_summary
+
+    bam_aligned = mkstempfname('.aligned.bam')
+
+    if in_bam[-4:] == ".sam":
+        # convert sam -> bam
+        samtools.view(["-b"], in_bam, bam_aligned)
+    elif in_bam[-4:] == ".bam":
+        shutil.copyfile(in_bam, bam_aligned)
+
+    # call samtools sort
+    bam_sorted = mkstempfname('.aligned.bam')
+    samtools.sort(bam_aligned, bam_sorted)
+    
+    # call samtools index
+    samtools.index(bam_sorted)
+    
+    # call samtools depth
+    opts = []
+    opts += ['-aa'] # report coverate at "absolutely all" positions
+    if base_q_threshold:
+        opts += ["-q", str(base_q_threshold)]
+    if mapping_q_threshold:
+        opts += ["-Q", str(mapping_q_threshold)]
+    if max_coverage_depth:
+        opts += ["-m", str(max_coverage_depth)]
+    if read_length_threshold:
+        opts += ["-l", str(read_length_threshold)]
+
+    samtools.depth(bam_sorted, coverage_tsv_file, opts)
+
+    # ---- create plot based on coverage_tsv_file ----
+
+    segment_depths = OrderedDict()
+    domain_max = 0
+    with open(coverage_tsv_file, "r") as tabfile:
+        for row in csv.reader(tabfile, delimiter='\t'):
+            segment_depths.setdefault(row[0],[]).append(int(row[2]))
+            domain_max += 1
+
+    domain_max = 0
+    with plt.style.context(plot_style):
+
+        fig = plt.gcf()
+        DPI = fig.get_dpi()
+        fig.set_size_inches(float(plot_width)/float(DPI),float(plot_height)/float(DPI))
+
+        font_size = math.sqrt((plot_width**2)+(plot_height**2))/float(DPI)*1.25
+
+        ax = plt.subplot() # Defines ax variable by creating an empty plot
+
+        # Set the tick labels font
+        for label in (ax.get_xticklabels() + ax.get_yticklabels()):
+            label.set_fontname('Arial')
+            label.set_fontsize(font_size)        
+
+        for segment_num, (segment_name, position_depths) in enumerate(segment_depths.items()):
+            prior_domain_max = domain_max
+            domain_max += len(position_depths)
+
+            segment_color = plt.rcParams['axes.color_cycle'][segment_num%len(plt.rcParams['axes.color_cycle'])]
+            plot = plt.fill_between(range(prior_domain_max, domain_max), position_depths, [0]*len(position_depths), linewidth=0, antialiased=True, color=segment_color)
+
+        plt.title(plot_title, fontsize=font_size*1.2)
+        plt.xlabel("bp", fontsize=font_size*1.1)
+        plt.ylabel("read depth", fontsize=font_size*1.1)
+        plt.tight_layout()
+        plt.savefig(out_plot_file, format=plot_format, dpi=DPI) #, bbox_inches='tight')
+        log.info("Coverage plot saved to: " + out_plot_file)
+
+    os.unlink(bam_aligned)
+    os.unlink(bam_sorted)
+    # TODO: uncomment when done testing
+    if not out_summary:
+        os.unlink(coverage_tsv_file)
+    
+
+def parser_plot_coverage(parser=argparse.ArgumentParser()):
+    parser = parser_plot_coverage_common(parser)
+    util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
+    util.cmd.attach_main(parser, plot_coverage, split_args=True)
+    return parser
+
+__commands__.append(('plot_coverage', parser_plot_coverage))
+
+def align_and_plot_coverage(out_plot_file, plot_format, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary,
+                            in_bam, ref_fasta, out_bam=None, sensitive=False, min_score_to_output=None
+                            ):
+    ''' 
+        Take reads, align to reference with BWA-MEM, and generate a coverage plot
+    '''
+    if out_bam is None:
+        bam_aligned = mkstempfname('.aligned.bam')
+    else:
+        bam_aligned = out_bam
+
+    ref_indexed = mkstempfname('.reference.fasta')
+    shutil.copyfile(ref_fasta, ref_indexed)
+
+    bwa = tools.bwa.Bwa()
+    samtools = tools.samtools.SamtoolsTool()
+
+    bwa.index(ref_indexed)
+
+    bwa_opts = []
+    if sensitive:
+        bwa_opts + "-k 12 -A 1 -B 1 -O 1 -E 1".split()
+
+    map_threshold = min_score_to_output or 30
+
+    bwa_opts + ["-T", str(map_threshold)]
+
+    aln_sam = mkstempfname('.sam')
+    aln_sam_filtered = mkstempfname('.filtered.sam')
+
+    bwa.mem(in_bam, ref_indexed, aln_sam, opts=bwa_opts)
+
+    # @haydenm says:
+    # For some reason (particularly when the --sensitive option is on), bwa
+    # doesn't listen to its '-T' flag and outputs alignments with score less
+    # than the '-T 30' threshold. So filter these:
+    os.system("grep \"^@\" " + aln_sam + " > " + aln_sam_filtered)
+    os.system("grep \"AS:i:\" " + aln_sam + " | awk -v threshold=" + str(map_threshold) + " '{split($14, subfield, \":\"); if(subfield[3]>=threshold) print $0}' >> " + aln_sam_filtered)
+    os.unlink(aln_sam)
+
+    # convert sam -> bam
+    aln_bam_filtered = mkstempfname('.reference.fasta')
+    samtools.view(["-b"], aln_sam_filtered, aln_bam_filtered)
+    os.unlink(aln_sam_filtered)
+
+    samtools.sort(aln_bam_filtered, bam_aligned)
+    os.unlink(aln_bam_filtered)
+
+    samtools.index(bam_aligned)
+    
+
+    # call plot function
+    plot_coverage(bam_aligned, out_plot_file, plot_format, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary)
+
+    # remove the output bam, unless it is needed
+    if out_bam is None:
+        os.unlink(bam_aligned)
+
+    # remove the files created by bwa index. 
+    # The empty extension causes the original fasta file to be removed
+    for ext in [".amb",".ann",".bwt",".bwa",".pac",".sa",""]:
+        file_to_remove = ref_indexed+ext
+        if os.path.isfile(file_to_remove):
+            os.unlink( file_to_remove )
+
+def parser_align_and_plot_coverage(parser=argparse.ArgumentParser()):
+    parser = parser_plot_coverage_common(parser)
+    parser.add_argument('ref_fasta', 
+                        default=None,
+                        help='Reference genome, FASTA format.')
+    parser.add_argument('--outBam', 
+                        dest="out_bam",
+                        default=None,
+                        help='Output aligned, indexed BAM file. Default is to write to temp.')
+    parser.add_argument('--sensitive',
+                        action="store_true",
+                        help="Equivalent to giving bwa: '-k 12 -A 1 -B 1 -O 1 -E 1' ")
+    parser.add_argument('-T',
+                        dest="min_score_to_output",
+                        default=30,
+                        type=int,
+                        help="The min score to output during alignment (default: %(default)s)")
+
+    util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
+    util.cmd.attach_main(parser, align_and_plot_coverage, split_args=True)
+    return parser
+
+__commands__.append(('align_and_plot_coverage', parser_align_and_plot_coverage))
+
+# =========================
 
 def full_parser():
     return util.cmd.make_parser(__commands__, __doc__)
diff --git a/requirements.txt b/requirements.txt
index ea56db617..8110b6f35 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ future>=0.15.2
 futures==3.0.3; python_version == '2.6' or python_version=='2.7'
 pysam==0.8.3
 PyYAML==3.11
+matplotlib==1.5.1
\ No newline at end of file
diff --git a/tools/__init__.py b/tools/__init__.py
index bc27ea07a..6b0a8ca8c 100644
--- a/tools/__init__.py
+++ b/tools/__init__.py
@@ -285,7 +285,7 @@ def __init__(
                     self.env_path = os.path.dirname(last_path_component) if last_path_component == "bin" else conda_env_path
                 else: # if conda env is an environment name, infer the path
                     _log.debug('Conda env found is specified by name: %s' % conda_env_path)
-                    result = util.misc.run_and_print(["conda", "env", "list", "--json"], loglevel=logging.DEBUG, env=os.environ)
+                    result = util.misc.run_and_print(["conda", "env", "list", "--json"], silent=True, env=os.environ)
                     if result.returncode == 0:
                         command_output = result.stdout.decode("UTF-8")
                         data = json.loads(self._string_from_start_of_json(command_output))
diff --git a/tools/bwa.py b/tools/bwa.py
index 56760c23e..6581e5946 100644
--- a/tools/bwa.py
+++ b/tools/bwa.py
@@ -7,6 +7,8 @@
 import os
 import os.path
 import subprocess
+import shutil
+
 import tools
 import tools.samtools
 import util.file
@@ -46,18 +48,28 @@ def index(self, inFasta, algorithm=None):
         cmd.append(inFasta)
         self.execute('index', cmd)
 
-    def mem(self, inReads, refDb, outAlign, threads=None):
+    def mem(self, inReads, refDb, outAlign, opts=None, threads=None):
+        opts = [] if not opts else opts
+
         threads = threads or util.misc.available_cpu_count()
         samtools = tools.samtools.SamtoolsTool()
         fq1 = util.file.mkstempfname('.1.fastq')
         fq2 = util.file.mkstempfname('.2.fastq')
         aln_sam = util.file.mkstempfname('.sam')
+        aln_sam_sorted = util.file.mkstempfname('sorted.sam')
         samtools.bam2fq(inReads, fq1, fq2)
-        self.execute('mem', ['-t', str(threads), refDb, fq1, fq2], stdout=aln_sam)
+        self.execute('mem', opts + ['-t', str(threads), refDb, fq1, fq2], stdout=aln_sam)
         os.unlink(fq1)
         os.unlink(fq2)
-        samtools.sort(aln_sam, outAlign)
+        samtools.sort(aln_sam, aln_sam_sorted)
         os.unlink(aln_sam)
-        samtools.index(outAlign)
+        # cannot index sam files; only do so if a bam is desired
+        if outAlign.endswith(".bam"):
+            # convert sam -> bam
+            samtools.view(["-b"], aln_sam_sorted, outAlign)
+            samtools.index(outAlign)
+        elif outAlign.endswith(".sam"):
+            shutil.copyfile(aln_sam_sorted, outAlign)
+        os.unlink(aln_sam_sorted)
 
 
diff --git a/tools/samtools.py b/tools/samtools.py
index a34fe59dc..51899e70b 100644
--- a/tools/samtools.py
+++ b/tools/samtools.py
@@ -105,6 +105,12 @@ def faidx(self, inFasta, overwrite=False):
         #pysam.faidx(inFasta)
         self.execute('faidx', [inFasta])
 
+    def depth(self, inBam, outFile, options=None):
+        """ Write a TSV file with coverage depth by position """
+        options = options or ["-aa", "-m", "1000000"]
+
+        self.execute('depth', options + [inBam], stdout=outFile)
+
     def idxstats(self, inBam, statsFile):
         self.execute('idxstats', [inBam], stdout=statsFile)
 

From a1d073f5d8a5a1b051a88823fc3e12d006e86a7c Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 13 Jul 2016 22:28:04 -0400
Subject: [PATCH 02/13] minor changes to fix matplotlib deprecation warnings

---
 read_utils.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/read_utils.py b/read_utils.py
index c05283ac6..05a2c6097 100755
--- a/read_utils.py
+++ b/read_utils.py
@@ -1074,12 +1074,12 @@ def parser_plot_coverage_common(parser=argparse.ArgumentParser()): # parser need
                         help="The plot visual style")
     parser.add_argument('--plotWidth',
                         dest="plot_width",
-                        default=800,
+                        default=1024,
                         type=int,
                         help="Width of the plot in pixels")
     parser.add_argument('--plotHeight',
                         dest="plot_height",
-                        default=600,
+                        default=768,
                         type=int,
                         help="Width of the plot in pixels")
     parser.add_argument('--plotTitle',
@@ -1186,26 +1186,32 @@ def plot_coverage(in_bam, out_plot_file, plot_format, plot_style, plot_width, pl
 
         # Set the tick labels font
         for label in (ax.get_xticklabels() + ax.get_yticklabels()):
-            label.set_fontname('Arial')
             label.set_fontsize(font_size)        
 
         for segment_num, (segment_name, position_depths) in enumerate(segment_depths.items()):
             prior_domain_max = domain_max
             domain_max += len(position_depths)
 
-            segment_color = plt.rcParams['axes.color_cycle'][segment_num%len(plt.rcParams['axes.color_cycle'])]
-            plot = plt.fill_between(range(prior_domain_max, domain_max), position_depths, [0]*len(position_depths), linewidth=0, antialiased=True, color=segment_color)
+            colors = list(plt.rcParams['axes.prop_cycle'].by_key()['color']) # get the colors for this style
+            segment_color = colors[segment_num%len(colors)] # pick a color, offset by the segment index
+            plt.fill_between(range(prior_domain_max, domain_max), position_depths, [0]*len(position_depths), linewidth=0, antialiased=True, color=segment_color)
 
         plt.title(plot_title, fontsize=font_size*1.2)
         plt.xlabel("bp", fontsize=font_size*1.1)
         plt.ylabel("read depth", fontsize=font_size*1.1)
-        plt.tight_layout()
+
+        # to squash a backend renderer error on OSX related to tight layout
+        if plt.get_backend().lower() in ['agg', 'macosx']:
+            fig.set_tight_layout(True)
+        else:
+            fig.tight_layout()
+
         plt.savefig(out_plot_file, format=plot_format, dpi=DPI) #, bbox_inches='tight')
         log.info("Coverage plot saved to: " + out_plot_file)
 
     os.unlink(bam_aligned)
     os.unlink(bam_sorted)
-    # TODO: uncomment when done testing
+
     if not out_summary:
         os.unlink(coverage_tsv_file)
     

From f5e45fda23d9c4e4c425b543fe357c82cf4fd8e2 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 13 Jul 2016 22:31:25 -0400
Subject: [PATCH 03/13] explicitly set matplotlib backend to hopefully fix
 Travis $DISPLAY==null issue

---
 read_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/read_utils.py b/read_utils.py
index 05a2c6097..a72f8d6d6 100755
--- a/read_utils.py
+++ b/read_utils.py
@@ -18,6 +18,8 @@
 from collections import OrderedDict
 
 from Bio import SeqIO
+import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 
 import util.cmd

From db75236ce8aeebb44067af6c6078b8ca70ed6faf Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 13 Jul 2016 23:18:21 -0400
Subject: [PATCH 04/13] improve parser defaults and help display

---
 read_utils.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/read_utils.py b/read_utils.py
index a72f8d6d6..f198f085d 100755
--- a/read_utils.py
+++ b/read_utils.py
@@ -1064,31 +1064,33 @@ def parser_plot_coverage_common(parser=argparse.ArgumentParser()): # parser need
                         help='The generated chart file')
     parser.add_argument('--plotFormat',
                         dest="plot_format",
-                        default="pdf",
+                        default=None,
                         type=str,
                         choices=list(plt.gcf().canvas.get_supported_filetypes().keys()),
-                        help="File format of the coverage plot")
+                        metavar='',
+                        help="File format of the coverage plot. By default it is inferred from the file extension of out_plot_file, but it can be set explicitly via --plotFormat. Valid formats include: " + ", ".join( list(plt.gcf().canvas.get_supported_filetypes().keys())) )
     parser.add_argument('--plotStyle',
                         dest="plot_style",
                         default="ggplot",
                         type=str,
                         choices=plt.style.available,
-                        help="The plot visual style")
+                        metavar='',
+                        help="The plot visual style. Valid options: " + ", ".join(plt.style.available) + " (default: %(default)s)")
     parser.add_argument('--plotWidth',
                         dest="plot_width",
                         default=1024,
                         type=int,
-                        help="Width of the plot in pixels")
+                        help="Width of the plot in pixels (default: %(default)s)")
     parser.add_argument('--plotHeight',
                         dest="plot_height",
                         default=768,
                         type=int,
-                        help="Width of the plot in pixels")
+                        help="Width of the plot in pixels (default: %(default)s)")
     parser.add_argument('--plotTitle',
                         dest="plot_title",
                         default="Coverage Plot",
                         type=str,
-                        help="The title displayed on the coverage plot")
+                        help="The title displayed on the coverage plot (default: '%(default)s')")
     parser.add_argument('-q',
                         dest="base_q_threshold",
                         default=None,

From e0d862b32d32c1aa16d736c5f0e04ca3d43696dc Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 13 Jul 2016 23:42:34 -0400
Subject: [PATCH 05/13] for bwa, change conditional to use bam or cram

---
 tools/bwa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bwa.py b/tools/bwa.py
index 6581e5946..37a870aeb 100644
--- a/tools/bwa.py
+++ b/tools/bwa.py
@@ -64,7 +64,7 @@ def mem(self, inReads, refDb, outAlign, opts=None, threads=None):
         samtools.sort(aln_sam, aln_sam_sorted)
         os.unlink(aln_sam)
         # cannot index sam files; only do so if a bam is desired
-        if outAlign.endswith(".bam"):
+        if outAlign.endswith(".bam") or outAlign.endswith(".cram"):
             # convert sam -> bam
             samtools.view(["-b"], aln_sam_sorted, outAlign)
             samtools.index(outAlign)

From 884d8bc12533c3d774a7948ce7a9a12acafba06e Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 13 Jul 2016 23:42:59 -0400
Subject: [PATCH 06/13] add data display styles of dots, line, and filled to
 plot_coverage

---
 read_utils.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/read_utils.py b/read_utils.py
index f198f085d..d9fb43a17 100755
--- a/read_utils.py
+++ b/read_utils.py
@@ -1069,6 +1069,13 @@ def parser_plot_coverage_common(parser=argparse.ArgumentParser()): # parser need
                         choices=list(plt.gcf().canvas.get_supported_filetypes().keys()),
                         metavar='',
                         help="File format of the coverage plot. By default it is inferred from the file extension of out_plot_file, but it can be set explicitly via --plotFormat. Valid formats include: " + ", ".join( list(plt.gcf().canvas.get_supported_filetypes().keys())) )
+    parser.add_argument('--plotDataStyle',
+                        dest="plot_data_style",
+                        default="filled",
+                        type=str,
+                        choices=["filled","line","dots"],
+                        metavar='',
+                        help="The plot data display style. Valid options: " + ", ".join(["filled","line","dots"]) + " (default: %(default)s)")
     parser.add_argument('--plotStyle',
                         dest="plot_style",
                         default="ggplot",
@@ -1118,7 +1125,7 @@ def parser_plot_coverage_common(parser=argparse.ArgumentParser()): # parser need
                         help="Coverage summary TSV file. Default is to write to temp.")
     return parser
 
-def plot_coverage(in_bam, out_plot_file, plot_format, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary=None):
+def plot_coverage(in_bam, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary=None):
     ''' 
         Generate a coverage plot from an aligned bam file
     '''
@@ -1141,10 +1148,10 @@ def plot_coverage(in_bam, out_plot_file, plot_format, plot_style, plot_width, pl
 
     bam_aligned = mkstempfname('.aligned.bam')
 
-    if in_bam[-4:] == ".sam":
+    if in_bam.endswith(".sam"):
         # convert sam -> bam
         samtools.view(["-b"], in_bam, bam_aligned)
-    elif in_bam[-4:] == ".bam":
+    elif in_bam.endswith(".bam") or in_bam.endswith(".cram"):
         shutil.copyfile(in_bam, bam_aligned)
 
     # call samtools sort
@@ -1198,7 +1205,14 @@ def plot_coverage(in_bam, out_plot_file, plot_format, plot_style, plot_width, pl
 
             colors = list(plt.rcParams['axes.prop_cycle'].by_key()['color']) # get the colors for this style
             segment_color = colors[segment_num%len(colors)] # pick a color, offset by the segment index
-            plt.fill_between(range(prior_domain_max, domain_max), position_depths, [0]*len(position_depths), linewidth=0, antialiased=True, color=segment_color)
+            
+            if plot_data_style == "filled":
+                plt.fill_between(range(prior_domain_max, domain_max), position_depths, [0]*len(position_depths), linewidth=0, antialiased=True, color=segment_color)
+            elif plot_data_style == "line":
+                plt.plot(range(prior_domain_max, domain_max), position_depths, antialiased=True, color=segment_color)
+            elif plot_data_style == "dots":
+                plt.plot(range(prior_domain_max, domain_max), position_depths, 'ro', antialiased=True, color=segment_color)
+
 
         plt.title(plot_title, fontsize=font_size*1.2)
         plt.xlabel("bp", fontsize=font_size*1.1)
@@ -1228,7 +1242,7 @@ def parser_plot_coverage(parser=argparse.ArgumentParser()):
 
 __commands__.append(('plot_coverage', parser_plot_coverage))
 
-def align_and_plot_coverage(out_plot_file, plot_format, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary,
+def align_and_plot_coverage(out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary,
                             in_bam, ref_fasta, out_bam=None, sensitive=False, min_score_to_output=None
                             ):
     ''' 
@@ -1280,7 +1294,7 @@ def align_and_plot_coverage(out_plot_file, plot_format, plot_style, plot_width,
     
 
     # call plot function
-    plot_coverage(bam_aligned, out_plot_file, plot_format, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary)
+    plot_coverage(bam_aligned, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary)
 
     # remove the output bam, unless it is needed
     if out_bam is None:

From b4cc1643d175841dda60481dcdf3451e2396cc01 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 13 Jul 2016 23:57:02 -0400
Subject: [PATCH 07/13] refactor sam filtering to statement using samtools view

---
 read_utils.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/read_utils.py b/read_utils.py
index d9fb43a17..079f9b33b 100755
--- a/read_utils.py
+++ b/read_utils.py
@@ -1269,31 +1269,24 @@ def align_and_plot_coverage(out_plot_file, plot_format, plot_data_style, plot_st
 
     bwa_opts + ["-T", str(map_threshold)]
 
-    aln_sam = mkstempfname('.sam')
-    aln_sam_filtered = mkstempfname('.filtered.sam')
+    aln_bam = mkstempfname('.bam')
 
-    bwa.mem(in_bam, ref_indexed, aln_sam, opts=bwa_opts)
+    bwa.mem(in_bam, ref_indexed, aln_bam, opts=bwa_opts)
 
     # @haydenm says:
     # For some reason (particularly when the --sensitive option is on), bwa
     # doesn't listen to its '-T' flag and outputs alignments with score less
     # than the '-T 30' threshold. So filter these:
-    os.system("grep \"^@\" " + aln_sam + " > " + aln_sam_filtered)
-    os.system("grep \"AS:i:\" " + aln_sam + " | awk -v threshold=" + str(map_threshold) + " '{split($14, subfield, \":\"); if(subfield[3]>=threshold) print $0}' >> " + aln_sam_filtered)
-    os.unlink(aln_sam)
-
-    # convert sam -> bam
-    aln_bam_filtered = mkstempfname('.reference.fasta')
-    samtools.view(["-b"], aln_sam_filtered, aln_bam_filtered)
-    os.unlink(aln_sam_filtered)
+    aln_bam_filtered = mkstempfname('.filtered.bam')
+    samtools.view(["-b", "-h", "-q", str(map_threshold)], aln_bam, aln_bam_filtered)
+    os.unlink(aln_bam)
 
     samtools.sort(aln_bam_filtered, bam_aligned)
     os.unlink(aln_bam_filtered)
 
     samtools.index(bam_aligned)
     
-
-    # call plot function
+    # -- call plot function --
     plot_coverage(bam_aligned, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary)
 
     # remove the output bam, unless it is needed

From 2687cf2e0fdd54ee8a0f29f6bafa3453aae4062e Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Thu, 14 Jul 2016 00:07:33 -0400
Subject: [PATCH 08/13] yapf formatting

---
 read_utils.py | 821 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 493 insertions(+), 328 deletions(-)

diff --git a/read_utils.py b/read_utils.py
index 079f9b33b..137b9fc01 100755
--- a/read_utils.py
+++ b/read_utils.py
@@ -62,9 +62,11 @@ def parser_purge_unmated(parser=argparse.ArgumentParser()):
     parser.add_argument('inFastq2', help='Input fastq file; 2nd end of paired-end reads.')
     parser.add_argument('outFastq1', help='Output fastq file; 1st end of paired-end reads.')
     parser.add_argument('outFastq2', help='Output fastq file; 2nd end of paired-end reads.')
-    parser.add_argument("--regex",
-                        help="Perl regular expression to parse paired read IDs (default: %(default)s)",
-                        default=r'^@(\S+)/[1|2]$')
+    parser.add_argument(
+        "--regex",
+        help="Perl regular expression to parse paired read IDs (default: %(default)s)",
+        default=r'^@(\S+)/[1|2]$'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, purge_unmated, split_args=True)
     return parser
@@ -131,13 +133,17 @@ def main_index_fasta_samtools(args):
 
 def parser_index_fasta_picard(parser=argparse.ArgumentParser()):
     parser.add_argument('inFasta', help='Input reference genome, FASTA format.')
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.CreateSequenceDictionaryTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
-    parser.add_argument('--picardOptions',
-                        default=[],
-                        nargs='*',
-                        help='Optional arguments to Picard\'s CreateSequenceDictionary, OPTIONNAME=value ...')
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.CreateSequenceDictionaryTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--picardOptions',
+        default=[],
+        nargs='*',
+        help='Optional arguments to Picard\'s CreateSequenceDictionary, OPTIONNAME=value ...'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_index_fasta_picard)
     return parser
@@ -146,10 +152,10 @@ def parser_index_fasta_picard(parser=argparse.ArgumentParser()):
 def main_index_fasta_picard(args):
     '''Create an index file for a reference genome suitable for Picard/GATK.'''
     tools.picard.CreateSequenceDictionaryTool().execute(
-        args.inFasta,
-        overwrite=True,
+        args.inFasta, overwrite=True,
         picardOptions=args.picardOptions,
-        JVMmemory=args.JVMmemory)
+        JVMmemory=args.JVMmemory
+    )
     return 0
 
 
@@ -164,18 +170,24 @@ def parser_mkdup_picard(parser=argparse.ArgumentParser()):
     parser.add_argument('inBams', help='Input reads, BAM format.', nargs='+')
     parser.add_argument('outBam', help='Output reads, BAM format.')
     parser.add_argument('--outMetrics', help='Output metrics file. Default is to dump to a temp file.', default=None)
-    parser.add_argument("--remove",
-                        help="Instead of marking duplicates, remove them entirely (default: %(default)s)",
-                        default=False,
-                        action="store_true",
-                        dest="remove")
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.MarkDuplicatesTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
-    parser.add_argument('--picardOptions',
-                        default=[],
-                        nargs='*',
-                        help='Optional arguments to Picard\'s MarkDuplicates, OPTIONNAME=value ...')
+    parser.add_argument(
+        "--remove",
+        help="Instead of marking duplicates, remove them entirely (default: %(default)s)",
+        default=False,
+        action="store_true",
+        dest="remove"
+    )
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.MarkDuplicatesTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--picardOptions',
+        default=[],
+        nargs='*',
+        help='Optional arguments to Picard\'s MarkDuplicates, OPTIONNAME=value ...'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_mkdup_picard)
     return parser
@@ -187,11 +199,10 @@ def main_mkdup_picard(args):
     if args.remove:
         opts = ['REMOVE_DUPLICATES=true'] + opts
     tools.picard.MarkDuplicatesTool().execute(
-        args.inBams,
-        args.outBam,
-        args.outMetrics,
-        picardOptions=opts,
-        JVMmemory=args.JVMmemory)
+        args.inBams, args.outBam,
+        args.outMetrics, picardOptions=opts,
+        JVMmemory=args.JVMmemory
+    )
     return 0
 
 
@@ -205,13 +216,17 @@ def main_mkdup_picard(args):
 def parser_revert_bam_picard(parser=argparse.ArgumentParser()):
     parser.add_argument('inBam', help='Input reads, BAM format.')
     parser.add_argument('outBam', help='Output reads, BAM format.')
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.RevertSamTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
-    parser.add_argument('--picardOptions',
-                        default=[],
-                        nargs='*',
-                        help='Optional arguments to Picard\'s RevertSam, OPTIONNAME=value ...')
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.RevertSamTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--picardOptions',
+        default=[],
+        nargs='*',
+        help='Optional arguments to Picard\'s RevertSam, OPTIONNAME=value ...'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_revert_bam_picard)
     return parser
@@ -233,13 +248,16 @@ def main_revert_bam_picard(args):
 
 def parser_picard(parser=argparse.ArgumentParser()):
     parser.add_argument('command', help='picard command')
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.PicardTools.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
-    parser.add_argument('--picardOptions',
-                        default=[],
-                        nargs='*',
-                        help='Optional arguments to Picard, OPTIONNAME=value ...')
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.PicardTools.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--picardOptions',
+        default=[], nargs='*',
+        help='Optional arguments to Picard, OPTIONNAME=value ...'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_picard)
     return parser
@@ -261,27 +279,37 @@ def main_picard(args):
 def parser_sort_bam(parser=argparse.ArgumentParser()):
     parser.add_argument('inBam', help='Input bam file.')
     parser.add_argument('outBam', help='Output bam file, sorted.')
-    parser.add_argument('sortOrder',
-                        help='How to sort the reads. [default: %(default)s]',
-                        choices=tools.picard.SortSamTool.valid_sort_orders,
-                        default=tools.picard.SortSamTool.default_sort_order)
-    parser.add_argument("--index",
-                        help="Index outBam (default: %(default)s)",
-                        default=False,
-                        action="store_true",
-                        dest="index")
-    parser.add_argument("--md5",
-                        help="MD5 checksum outBam (default: %(default)s)",
-                        default=False,
-                        action="store_true",
-                        dest="md5")
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.SortSamTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
-    parser.add_argument('--picardOptions',
-                        default=[],
-                        nargs='*',
-                        help='Optional arguments to Picard\'s SortSam, OPTIONNAME=value ...')
+    parser.add_argument(
+        'sortOrder',
+        help='How to sort the reads. [default: %(default)s]',
+        choices=tools.picard.SortSamTool.valid_sort_orders,
+        default=tools.picard.SortSamTool.default_sort_order
+    )
+    parser.add_argument(
+        "--index",
+        help="Index outBam (default: %(default)s)",
+        default=False,
+        action="store_true",
+        dest="index"
+    )
+    parser.add_argument(
+        "--md5",
+        help="MD5 checksum outBam (default: %(default)s)",
+        default=False,
+        action="store_true",
+        dest="md5"
+    )
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.SortSamTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--picardOptions',
+        default=[],
+        nargs='*',
+        help='Optional arguments to Picard\'s SortSam, OPTIONNAME=value ...'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_sort_bam)
     return parser
@@ -295,11 +323,9 @@ def main_sort_bam(args):
     if args.md5:
         opts = ['CREATE_MD5_FILE=true'] + opts
     tools.picard.SortSamTool().execute(
-        args.inBam,
-        args.outBam,
-        args.sortOrder,
-        picardOptions=opts,
-        JVMmemory=args.JVMmemory)
+        args.inBam, args.outBam, args.sortOrder,
+        picardOptions=opts, JVMmemory=args.JVMmemory
+    )
     return 0
 
 
@@ -313,13 +339,17 @@ def main_sort_bam(args):
 def parser_merge_bams(parser=argparse.ArgumentParser()):
     parser.add_argument('inBams', help='Input bam files.', nargs='+')
     parser.add_argument('outBam', help='Output bam file.')
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.MergeSamFilesTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
-    parser.add_argument('--picardOptions',
-                        default=[],
-                        nargs='*',
-                        help='Optional arguments to Picard\'s MergeSamFiles, OPTIONNAME=value ...')
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.MergeSamFilesTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--picardOptions',
+        default=[],
+        nargs='*',
+        help='Optional arguments to Picard\'s MergeSamFiles, OPTIONNAME=value ...'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_merge_bams)
     return parser
@@ -343,20 +373,26 @@ def parser_filter_bam(parser=argparse.ArgumentParser()):
     parser.add_argument('inBam', help='Input bam file.')
     parser.add_argument('readList', help='Input file of read IDs.')
     parser.add_argument('outBam', help='Output bam file.')
-    parser.add_argument("--exclude",
-                        help="""If specified, readList is a list of reads to remove from input.
+    parser.add_argument(
+        "--exclude",
+        help="""If specified, readList is a list of reads to remove from input.
             Default behavior is to treat readList as an inclusion list (all unnamed
             reads are removed).""",
-                        default=False,
-                        action="store_true",
-                        dest="exclude")
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.FilterSamReadsTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
-    parser.add_argument('--picardOptions',
-                        default=[],
-                        nargs='*',
-                        help='Optional arguments to Picard\'s FilterSamReads, OPTIONNAME=value ...')
+        default=False,
+        action="store_true",
+        dest="exclude"
+    )
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.FilterSamReadsTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--picardOptions',
+        default=[],
+        nargs='*',
+        help='Optional arguments to Picard\'s FilterSamReads, OPTIONNAME=value ...'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_filter_bam)
     return parser
@@ -370,7 +406,8 @@ def main_filter_bam(args):
         args.readList,
         args.outBam,
         picardOptions=args.picardOptions,
-        JVMmemory=args.JVMmemory)
+        JVMmemory=args.JVMmemory
+    )
     return 0
 
 
@@ -381,18 +418,20 @@ def main_filter_bam(args):
 # =======================
 
 
-def bam_to_fastq(inBam, outFastq1, outFastq2, outHeader=None,
-                 JVMmemory=tools.picard.SamToFastqTool.jvmMemDefault, picardOptions=None):
+def bam_to_fastq(
+    inBam,
+    outFastq1,
+    outFastq2,
+    outHeader=None,
+    JVMmemory=tools.picard.SamToFastqTool.jvmMemDefault,
+    picardOptions=None
+):
     ''' Convert a bam file to a pair of fastq paired-end read files and optional
         text header.
     '''
     picardOptions = picardOptions or []
 
-    tools.picard.SamToFastqTool().execute(inBam,
-                                          outFastq1,
-                                          outFastq2,
-                                          picardOptions=picardOptions,
-                                          JVMmemory=JVMmemory)
+    tools.picard.SamToFastqTool().execute(inBam, outFastq1, outFastq2, picardOptions=picardOptions, JVMmemory=JVMmemory)
     if outHeader:
         tools.samtools.SamtoolsTool().dumpHeader(inBam, outHeader)
     return 0
@@ -403,13 +442,17 @@ def parser_bam_to_fastq(parser=argparse.ArgumentParser()):
     parser.add_argument('outFastq1', help='Output fastq file; 1st end of paired-end reads.')
     parser.add_argument('outFastq2', help='Output fastq file; 2nd end of paired-end reads.')
     parser.add_argument('--outHeader', help='Optional text file name that will receive bam header.', default=None)
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.SamToFastqTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
-    parser.add_argument('--picardOptions',
-                        default=[],
-                        nargs='*',
-                        help='Optional arguments to Picard\'s SamToFastq, OPTIONNAME=value ...')
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.SamToFastqTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--picardOptions',
+        default=[],
+        nargs='*',
+        help='Optional arguments to Picard\'s SamToFastq, OPTIONNAME=value ...'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, bam_to_fastq, split_args=True)
     return parser
@@ -422,8 +465,15 @@ def parser_bam_to_fastq(parser=argparse.ArgumentParser()):
 # =======================
 
 
-def fastq_to_bam(inFastq1, inFastq2, outBam, sampleName=None, header=None,
-                 JVMmemory=tools.picard.FastqToSamTool.jvmMemDefault, picardOptions=None):
+def fastq_to_bam(
+    inFastq1,
+    inFastq2,
+    outBam,
+    sampleName=None,
+    header=None,
+    JVMmemory=tools.picard.FastqToSamTool.jvmMemDefault,
+    picardOptions=None
+):
     ''' Convert a pair of fastq paired-end read files and optional text header
         to a single bam file.
     '''
@@ -434,7 +484,7 @@ def fastq_to_bam(inFastq1, inFastq2, outBam, sampleName=None, header=None,
     else:
         fastqToSamOut = outBam
     if sampleName is None:
-        sampleName = 'Dummy'  # Will get overwritten by rehead command
+        sampleName = 'Dummy'    # Will get overwritten by rehead command
     if header:
         # With the header option, rehead will be called after FastqToSam.
         # This will invalidate any md5 file, which would be a slow to construct
@@ -442,12 +492,11 @@ def fastq_to_bam(inFastq1, inFastq2, outBam, sampleName=None, header=None,
         if any(opt.lower() == 'CREATE_MD5_FILE=True'.lower() for opt in picardOptions):
             raise Exception("""CREATE_MD5_FILE is not allowed with '--header.'""")
     tools.picard.FastqToSamTool().execute(
-        inFastq1,
-        inFastq2,
-        sampleName,
-        fastqToSamOut,
+        inFastq1, inFastq2,
+        sampleName, fastqToSamOut,
         picardOptions=picardOptions,
-        JVMmemory=JVMmemory)
+        JVMmemory=JVMmemory
+    )
 
     if header:
         tools.samtools.SamtoolsTool().reheader(fastqToSamOut, header, outBam)
@@ -462,15 +511,19 @@ def parser_fastq_to_bam(parser=argparse.ArgumentParser()):
     headerGroup = parser.add_mutually_exclusive_group(required=True)
     headerGroup.add_argument('--sampleName', help='Sample name to insert into the read group header.')
     headerGroup.add_argument('--header', help='Optional text file containing header.')
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.FastqToSamTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
-    parser.add_argument('--picardOptions',
-                        default=[],
-                        nargs='*',
-                        help='''Optional arguments to Picard\'s FastqToSam,
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.FastqToSamTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--picardOptions',
+        default=[],
+        nargs='*',
+        help='''Optional arguments to Picard\'s FastqToSam,
                 OPTIONNAME=value ...  Note that header-related options will be
-                overwritten by HEADER if present.''')
+                overwritten by HEADER if present.'''
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, fastq_to_bam, split_args=True)
     return parser
@@ -486,9 +539,15 @@ def parser_fastq_to_bam(parser=argparse.ArgumentParser()):
 defaultFormat = 'fastq'
 
 
-def split_reads(inFileName, outPrefix, outSuffix="",
-                maxReads=None, numChunks=None,
-                indexLen=defaultIndexLen, fmt=defaultFormat):
+def split_reads(
+    inFileName,
+    outPrefix,
+    outSuffix="",
+    maxReads=None,
+    numChunks=None,
+    indexLen=defaultIndexLen,
+    fmt=defaultFormat
+):
     '''Split fasta or fastq file into chunks of maxReads reads or into
            numChunks chunks named outPrefix01, outPrefix02, etc.
        If both maxReads and numChunks are None, use defaultMaxReads.
@@ -529,34 +588,45 @@ def split_reads(inFileName, outPrefix, outSuffix="",
 
 def parser_split_reads(parser=argparse.ArgumentParser()):
     parser.add_argument('inFileName', help='Input fastq or fasta file.')
-    parser.add_argument('outPrefix',
-                        help='Output files will be named ${outPrefix}01${outSuffix}, ${outPrefix}02${outSuffix}...')
+    parser.add_argument(
+        'outPrefix',
+        help='Output files will be named ${outPrefix}01${outSuffix}, ${outPrefix}02${outSuffix}...'
+    )
     group = parser.add_mutually_exclusive_group(required=False)
-    group.add_argument('--maxReads',
-                       type=int,
-                       default=None,
-                       help='''Maximum number of reads per chunk (default {:d} if neither
-               maxReads nor numChunks is specified).'''.format(defaultMaxReads))
-    group.add_argument('--numChunks',
-                       type=int,
-                       default=None,
-                       help='Number of output files, if maxReads is not specified.')
-    parser.add_argument('--indexLen',
-                        type=int,
-                        default=defaultIndexLen,
-                        help='''Number of characters to append to outputPrefix for each
+    group.add_argument(
+        '--maxReads',
+        type=int,
+        default=None,
+        help='''Maximum number of reads per chunk (default {:d} if neither
+               maxReads nor numChunks is specified).'''.format(defaultMaxReads)
+    )
+    group.add_argument(
+        '--numChunks',
+        type=int, default=None,
+        help='Number of output files, if maxReads is not specified.'
+    )
+    parser.add_argument(
+        '--indexLen',
+        type=int,
+        default=defaultIndexLen,
+        help='''Number of characters to append to outputPrefix for each
                output file (default %(default)s).
-               Number of files must not exceed 10^INDEXLEN.''')
-    parser.add_argument('--format',
-                        dest="fmt",
-                        choices=['fastq', 'fasta'],
-                        default=defaultFormat,
-                        help='Input fastq or fasta file (default: %(default)s).')
-    parser.add_argument('--outSuffix',
-                        default='',
-                        help='''Output filename suffix (e.g. .fastq or .fastq.gz).
+               Number of files must not exceed 10^INDEXLEN.'''
+    )
+    parser.add_argument(
+        '--format',
+        dest="fmt",
+        choices=['fastq', 'fasta'],
+        default=defaultFormat,
+        help='Input fastq or fasta file (default: %(default)s).'
+    )
+    parser.add_argument(
+        '--outSuffix',
+        default='',
+        help='''Output filename suffix (e.g. .fastq or .fastq.gz).
                   A suffix ending in .gz will cause the output file
-                  to be gzip compressed. Default is no suffix.''')
+                  to be gzip compressed. Default is no suffix.'''
+    )
     util.cmd.attach_main(parser, split_reads, split_args=True)
     return parser
 
@@ -603,11 +673,12 @@ def split_bam(inBam, outBams):
                 if outBam == outBams[-1]:
                     for line in inf:
                         outf.write(line)
-            picard.execute("SamFormatConverter",
-                           [
-                               'INPUT=' + tmp_sam_reads, 'OUTPUT=' + outBam, 'VERBOSITY=WARNING'
-                           ],
-                           JVMmemory='512m')
+            picard.execute(
+                "SamFormatConverter", [
+                    'INPUT=' + tmp_sam_reads, 'OUTPUT=' + outBam, 'VERBOSITY=WARNING'
+                ],
+                JVMmemory='512m'
+            )
             os.unlink(tmp_sam_reads)
     os.unlink(bigsam)
 
@@ -622,7 +693,6 @@ def parser_split_bam(parser=argparse.ArgumentParser()):
 
 __commands__.append(('split_bam', parser_split_bam))
 
-
 # =======================
 # ***  reheader_bam   ***
 # =======================
@@ -648,14 +718,14 @@ def main_reheader_bam(args):
             CN  broad   BI
     '''
     # read mapping file
-    mapper = dict((a+':'+b, a+':'+c) for a,b,c in util.file.read_tabfile(args.rgMap))
+    mapper = dict((a + ':' + b, a + ':' + c) for a, b, c in util.file.read_tabfile(args.rgMap))
     # read and convert bam header
     header_file = mkstempfname('.sam')
     with open(header_file, 'wt') as outf:
         for row in tools.samtools.SamtoolsTool().getHeader(args.inBam):
             if row[0] == '@RG':
                 row = [mapper.get(x, x) for x in row]
-            outf.write('\t'.join(row)+'\n')
+            outf.write('\t'.join(row) + '\n')
     # write new bam with new header
     tools.samtools.SamtoolsTool().reheader(args.inBam, header_file, args.outBam)
     os.unlink(header_file)
@@ -670,6 +740,8 @@ def parser_reheader_bams(parser=argparse.ArgumentParser()):
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_reheader_bams)
     return parser
+
+
 def main_reheader_bams(args):
     ''' Copy BAM files while renaming elements of the BAM header.
         The mapping file specifies which (key, old value, new value) mappings. For
@@ -683,8 +755,8 @@ def main_reheader_bams(args):
             FN  in2.bam out2.bam
     '''
     # read mapping file
-    mapper = dict((a+':'+b, a+':'+c) for a,b,c in util.file.read_tabfile(args.rgMap) if a != 'FN')
-    files = list((b,c) for a,b,c in util.file.read_tabfile(args.rgMap) if a == 'FN')
+    mapper = dict((a + ':' + b, a + ':' + c) for a, b, c in util.file.read_tabfile(args.rgMap) if a != 'FN')
+    files = list((b, c) for a, b, c in util.file.read_tabfile(args.rgMap) if a == 'FN')
     header_file = mkstempfname('.sam')
     # read and convert bam headers
     for inBam, outBam in files:
@@ -693,14 +765,15 @@ def main_reheader_bams(args):
                 for row in tools.samtools.SamtoolsTool().getHeader(inBam):
                     if row[0] == '@RG':
                         row = [mapper.get(x, x) for x in row]
-                    outf.write('\t'.join(row)+'\n')
+                    outf.write('\t'.join(row) + '\n')
             # write new bam with new header
             tools.samtools.SamtoolsTool().reheader(inBam, header_file, outBam)
     os.unlink(header_file)
     return 0
-__commands__.append(('reheader_bams', parser_reheader_bams))
 
 
+__commands__.append(('reheader_bams', parser_reheader_bams))
+
 # ============================
 # ***  dup_remove_mvicuna  ***
 # ============================
@@ -767,11 +840,13 @@ def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None):
                                 outf.write(line)
                         os.unlink(fn)
                     else:
-                        log.warn("""no reads found in %s,
-                                    assuming that's because there's no reads in that read group""", fn)
+                        log.warn(
+                            """no reads found in %s,
+                                    assuming that's because there's no reads in that read group""", fn
+                        )
 
         # M-Vicuna DupRm to see what we should keep (append IDs to running file)
-        if os.path.getsize(infastqs[0])>0 or os.path.getsize(infastqs[1])>0:
+        if os.path.getsize(infastqs[0]) > 0 or os.path.getsize(infastqs[1]) > 0:
             mvicuna_fastqs_to_readlist(infastqs[0], infastqs[1], readList)
         for fn in infastqs:
             os.unlink(fn)
@@ -784,9 +859,11 @@ def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None):
 def parser_rmdup_mvicuna_bam(parser=argparse.ArgumentParser()):
     parser.add_argument('inBam', help='Input reads, BAM format.')
     parser.add_argument('outBam', help='Output reads, BAM format.')
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.FilterSamReadsTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.FilterSamReadsTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, rmdup_mvicuna_bam, split_args=True)
     return parser
@@ -809,7 +886,8 @@ def parser_dup_remove_mvicuna(parser=argparse.ArgumentParser()):
 def main_dup_remove_mvicuna(args):
     '''Run mvicuna's duplicate removal operation on paired-end reads.'''
     tools.mvicuna.MvicunaTool().rmdup(
-        (args.inFastq1, args.inFastq2), (args.pairedOutFastq1, args.pairedOutFastq2), args.unpairedOutFastq)
+        (args.inFastq1, args.inFastq2), (args.pairedOutFastq1, args.pairedOutFastq2), args.unpairedOutFastq
+    )
     return 0
 
 
@@ -866,12 +944,12 @@ def parser_novoalign(parser=argparse.ArgumentParser()):
     parser.add_argument('refFasta', help='Reference genome, FASTA format, pre-indexed by Novoindex.')
     parser.add_argument('outBam', help='Output reads, BAM format (aligned).')
     parser.add_argument('--options', default='-r Random', help='Novoalign options (default: %(default)s)')
-    parser.add_argument('--min_qual',
-                        default=0,
-                        help='Filter outBam to minimum mapping quality (default: %(default)s)')
-    parser.add_argument('--JVMmemory',
-                        default=tools.picard.SortSamTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
+    parser.add_argument('--min_qual', default=0, help='Filter outBam to minimum mapping quality (default: %(default)s)')
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.picard.SortSamTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_novoalign)
     return parser
@@ -885,7 +963,8 @@ def main_novoalign(args):
         args.outBam,
         options=args.options.split(),
         min_qual=args.min_qual,
-        JVMmemory=args.JVMmemory)
+        JVMmemory=args.JVMmemory
+    )
     return 0
 
 
@@ -907,15 +986,21 @@ def parser_novoindex(parser=argparse.ArgumentParser()):
 def parser_gatk_ug(parser=argparse.ArgumentParser()):
     parser.add_argument('inBam', help='Input reads, BAM format.')
     parser.add_argument('refFasta', help='Reference genome, FASTA format, pre-indexed by Picard.')
-    parser.add_argument('outVcf',
-                        help='''Output calls in VCF format. If this filename ends with .gz,
-        GATK will BGZIP compress the output and produce a Tabix index file as well.''')
-    parser.add_argument('--options',
-                        default='--min_base_quality_score 15 -ploidy 4',
-                        help='UnifiedGenotyper options (default: %(default)s)')
-    parser.add_argument('--JVMmemory',
-                        default=tools.gatk.GATKTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
+    parser.add_argument(
+        'outVcf',
+        help='''Output calls in VCF format. If this filename ends with .gz,
+        GATK will BGZIP compress the output and produce a Tabix index file as well.'''
+    )
+    parser.add_argument(
+        '--options',
+        default='--min_base_quality_score 15 -ploidy 4',
+        help='UnifiedGenotyper options (default: %(default)s)'
+    )
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.gatk.GATKTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_gatk_ug)
     return parser
@@ -923,11 +1008,11 @@ def parser_gatk_ug(parser=argparse.ArgumentParser()):
 
 def main_gatk_ug(args):
     '''Call genotypes using the GATK UnifiedGenotyper.'''
-    tools.gatk.GATKTool().ug(args.inBam,
-                             args.refFasta,
-                             args.outVcf,
-                             options=args.options.split(),
-                             JVMmemory=args.JVMmemory)
+    tools.gatk.GATKTool().ug(
+        args.inBam, args.refFasta,
+        args.outVcf, options=args.options.split(),
+        JVMmemory=args.JVMmemory
+    )
     return 0
 
 
@@ -938,9 +1023,11 @@ def parser_gatk_realign(parser=argparse.ArgumentParser()):
     parser.add_argument('inBam', help='Input reads, BAM format, aligned to refFasta.')
     parser.add_argument('refFasta', help='Reference genome, FASTA format, pre-indexed by Picard.')
     parser.add_argument('outBam', help='Realigned reads.')
-    parser.add_argument('--JVMmemory',
-                        default=tools.gatk.GATKTool.jvmMemDefault,
-                        help='JVM virtual memory size (default: %(default)s)')
+    parser.add_argument(
+        '--JVMmemory',
+        default=tools.gatk.GATKTool.jvmMemDefault,
+        help='JVM virtual memory size (default: %(default)s)'
+    )
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, main_gatk_realign)
     parser.add_argument('--threads', default=1, help='Number of threads (default: %(default)s)')
@@ -950,11 +1037,10 @@ def parser_gatk_realign(parser=argparse.ArgumentParser()):
 def main_gatk_realign(args):
     '''Local realignment of BAM files with GATK IndelRealigner.'''
     tools.gatk.GATKTool().local_realign(
-        args.inBam,
-        args.refFasta,
-        args.outBam,
-        JVMmemory=args.JVMmemory,
-        threads=args.threads)
+        args.inBam, args.refFasta,
+        args.outBam, JVMmemory=args.JVMmemory,
+        threads=args.threads
+    )
     return 0
 
 
@@ -962,8 +1048,15 @@ def main_gatk_realign(args):
 
 # =========================
 
-def align_and_fix(inBam, refFasta, outBamAll=None, outBamFiltered=None,
-                  novoalign_options='', JVMmemory=None, threads=1):
+
+def align_and_fix(
+    inBam, refFasta,
+    outBamAll=None,
+    outBamFiltered=None,
+    novoalign_options='',
+    JVMmemory=None,
+    threads=1
+):
     ''' Take reads, align to reference with Novoalign, mark duplicates
         with Picard, realign indels with GATK, and optionally filter
         final file to mapped/non-dupe reads.
@@ -974,18 +1067,16 @@ def align_and_fix(inBam, refFasta, outBamAll=None, outBamFiltered=None,
 
     bam_aligned = mkstempfname('.aligned.bam')
     tools.novoalign.NovoalignTool().execute(
-        inBam,
-        refFasta,
-        bam_aligned,
+        inBam, refFasta, bam_aligned,
         options=novoalign_options.split(),
-        JVMmemory=JVMmemory)
+        JVMmemory=JVMmemory
+    )
 
     bam_mkdup = mkstempfname('.mkdup.bam')
     tools.picard.MarkDuplicatesTool().execute(
-        [bam_aligned],
-        bam_mkdup,
-        picardOptions=['CREATE_INDEX=true'],
-        JVMmemory=JVMmemory)
+        [bam_aligned], bam_mkdup, picardOptions=['CREATE_INDEX=true'],
+        JVMmemory=JVMmemory
+    )
     os.unlink(bam_aligned)
 
     bam_realigned = mkstempfname('.realigned.bam')
@@ -1004,14 +1095,18 @@ def align_and_fix(inBam, refFasta, outBamAll=None, outBamFiltered=None,
 def parser_align_and_fix(parser=argparse.ArgumentParser()):
     parser.add_argument('inBam', help='Input unaligned reads, BAM format.')
     parser.add_argument('refFasta', help='Reference genome, FASTA format, pre-indexed by Picard and Novoalign.')
-    parser.add_argument('--outBamAll',
-                        default=None,
-                        help='''Aligned, sorted, and indexed reads.  Unmapped reads are
-                retained and duplicate reads are marked, not removed.''')
-    parser.add_argument('--outBamFiltered',
-                        default=None,
-                        help='''Aligned, sorted, and indexed reads.  Unmapped reads and
-                duplicate reads are removed from this file.''')
+    parser.add_argument(
+        '--outBamAll',
+        default=None,
+        help='''Aligned, sorted, and indexed reads.  Unmapped reads are
+                retained and duplicate reads are marked, not removed.'''
+    )
+    parser.add_argument(
+        '--outBamFiltered',
+        default=None,
+        help='''Aligned, sorted, and indexed reads.  Unmapped reads and
+                duplicate reads are removed from this file.'''
+    )
     parser.add_argument('--novoalign_options', default='-r Random', help='Novoalign options (default: %(default)s)')
     parser.add_argument('--JVMmemory', default='4g', help='JVM virtual memory size (default: %(default)s)')
     parser.add_argument('--threads', default=1, help='Number of threads (default: %(default)s)')
@@ -1022,9 +1117,9 @@ def parser_align_and_fix(parser=argparse.ArgumentParser()):
 
 __commands__.append(('align_and_fix', parser_align_and_fix))
 
-
 # =========================
 
+
 def bwamem_idxstats(inBam, refFasta, outBam=None, outStats=None):
     ''' Take reads, align to reference with BWA-MEM and perform samtools idxstats.
     '''
@@ -1044,6 +1139,7 @@ def bwamem_idxstats(inBam, refFasta, outBam=None, outStats=None):
     if outBam is None:
         os.unlink(bam_aligned)
 
+
 def parser_bwamem_idxstats(parser=argparse.ArgumentParser()):
     parser.add_argument('inBam', help='Input unaligned reads, BAM format.')
     parser.add_argument('refFasta', help='Reference genome, FASTA format, pre-indexed by Picard and Novoalign.')
@@ -1053,83 +1149,112 @@ def parser_bwamem_idxstats(parser=argparse.ArgumentParser()):
     util.cmd.attach_main(parser, bwamem_idxstats, split_args=True)
     return parser
 
+
 __commands__.append(('bwamem_idxstats', parser_bwamem_idxstats))
 
 # =========================
 
-def parser_plot_coverage_common(parser=argparse.ArgumentParser()): # parser needs add_help=False?
-    parser.add_argument('in_bam', 
-                        help='Input reads, BAM format.')
-    parser.add_argument('out_plot_file', 
-                        help='The generated chart file')
-    parser.add_argument('--plotFormat',
-                        dest="plot_format",
-                        default=None,
-                        type=str,
-                        choices=list(plt.gcf().canvas.get_supported_filetypes().keys()),
-                        metavar='',
-                        help="File format of the coverage plot. By default it is inferred from the file extension of out_plot_file, but it can be set explicitly via --plotFormat. Valid formats include: " + ", ".join( list(plt.gcf().canvas.get_supported_filetypes().keys())) )
-    parser.add_argument('--plotDataStyle',
-                        dest="plot_data_style",
-                        default="filled",
-                        type=str,
-                        choices=["filled","line","dots"],
-                        metavar='',
-                        help="The plot data display style. Valid options: " + ", ".join(["filled","line","dots"]) + " (default: %(default)s)")
-    parser.add_argument('--plotStyle',
-                        dest="plot_style",
-                        default="ggplot",
-                        type=str,
-                        choices=plt.style.available,
-                        metavar='',
-                        help="The plot visual style. Valid options: " + ", ".join(plt.style.available) + " (default: %(default)s)")
-    parser.add_argument('--plotWidth',
-                        dest="plot_width",
-                        default=1024,
-                        type=int,
-                        help="Width of the plot in pixels (default: %(default)s)")
-    parser.add_argument('--plotHeight',
-                        dest="plot_height",
-                        default=768,
-                        type=int,
-                        help="Width of the plot in pixels (default: %(default)s)")
-    parser.add_argument('--plotTitle',
-                        dest="plot_title",
-                        default="Coverage Plot",
-                        type=str,
-                        help="The title displayed on the coverage plot (default: '%(default)s')")
-    parser.add_argument('-q',
-                        dest="base_q_threshold",
-                        default=None,
-                        type=int,
-                        help="The minimum base quality threshold")
-    parser.add_argument('-Q',
-                        dest="mapping_q_threshold",
-                        default=None,
-                        type=int,
-                        help="The minimum mapping quality threshold")
-    parser.add_argument('-m',
-                        dest="max_coverage_depth",
-                        default=1000000,
-                        type=int,
-                        help="The max coverage depth (default: %(default)s)")
-    parser.add_argument('-l',
-                        dest="read_length_threshold",
-                        default=None,
-                        type=int,
-                        help="Read length threshold")
-    parser.add_argument('--outSummary',
-                        dest="out_summary",
-                        default=None,
-                        type=str,
-                        help="Coverage summary TSV file. Default is to write to temp.")
+
+def parser_plot_coverage_common(parser=argparse.ArgumentParser()):    # parser needs add_help=False?
+    parser.add_argument('in_bam', help='Input reads, BAM format.')
+    parser.add_argument('out_plot_file', help='The generated chart file')
+    parser.add_argument(
+        '--plotFormat',
+        dest="plot_format",
+        default=None,
+        type=str,
+        choices=list(plt.gcf().canvas.get_supported_filetypes().keys()),
+        metavar='',
+        help="File format of the coverage plot. By default it is inferred from the file extension of out_plot_file, but it can be set explicitly via --plotFormat. Valid formats include: "
+        + ", ".join(list(plt.gcf().canvas.get_supported_filetypes().keys()))
+    )
+    parser.add_argument(
+        '--plotDataStyle',
+        dest="plot_data_style",
+        default="filled",
+        type=str,
+        choices=["filled", "line", "dots"],
+        metavar='',
+        help="The plot data display style. Valid options: " + ", ".join(["filled", "line", "dots"]) +
+        " (default: %(default)s)"
+    )
+    parser.add_argument(
+        '--plotStyle',
+        dest="plot_style",
+        default="ggplot",
+        type=str,
+        choices=plt.style.available,
+        metavar='',
+        help="The plot visual style. Valid options: " + ", ".join(plt.style.available) + " (default: %(default)s)"
+    )
+    parser.add_argument(
+        '--plotWidth',
+        dest="plot_width",
+        default=1024,
+        type=int,
+        help="Width of the plot in pixels (default: %(default)s)"
+    )
+    parser.add_argument(
+        '--plotHeight',
+        dest="plot_height",
+        default=768,
+        type=int,
+        help="Width of the plot in pixels (default: %(default)s)"
+    )
+    parser.add_argument(
+        '--plotTitle',
+        dest="plot_title",
+        default="Coverage Plot",
+        type=str,
+        help="The title displayed on the coverage plot (default: '%(default)s')"
+    )
+    parser.add_argument(
+        '-q', dest="base_q_threshold",
+        default=None, type=int,
+        help="The minimum base quality threshold"
+    )
+    parser.add_argument(
+        '-Q', dest="mapping_q_threshold",
+        default=None,
+        type=int, help="The minimum mapping quality threshold"
+    )
+    parser.add_argument(
+        '-m',
+        dest="max_coverage_depth",
+        default=1000000,
+        type=int,
+        help="The max coverage depth (default: %(default)s)"
+    )
+    parser.add_argument('-l', dest="read_length_threshold", default=None, type=int, help="Read length threshold")
+    parser.add_argument(
+        '--outSummary',
+        dest="out_summary",
+        default=None,
+        type=str,
+        help="Coverage summary TSV file. Default is to write to temp."
+    )
     return parser
 
-def plot_coverage(in_bam, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary=None):
+
+def plot_coverage(
+    in_bam,
+    out_plot_file,
+    plot_format,
+    plot_data_style,
+    plot_style,
+    plot_width,
+    plot_height,
+    plot_title,
+    base_q_threshold,
+    mapping_q_threshold,
+    max_coverage_depth,
+    read_length_threshold,
+    out_summary=None
+):
     ''' 
         Generate a coverage plot from an aligned bam file
     '''
-    
+
     # TODO: remove this:
     #coverage_tsv_file = "/Users/tomkinsc/Downloads/plottest/test_multisegment.tsv"
 
@@ -1138,8 +1263,10 @@ def plot_coverage(in_bam, out_plot_file, plot_format, plot_data_style, plot_styl
     # check if in_bam is aligned, if not raise an error
     num_mapped_reads = samtools.count(in_bam, opts=["-F", "4"])
     if num_mapped_reads == 0:
-        raise Exception("""The bam file specified appears to have zero mapped reads. 'plot_coverage' requires an aligned bam file. You can try 'align_and_plot_coverage' if you don't mind a simple bwa alignment. \n File: %s""" % in_bam)
-
+        raise Exception(
+            """The bam file specified appears to have zero mapped reads. 'plot_coverage' requires an aligned bam file. You can try 'align_and_plot_coverage' if you don't mind a simple bwa alignment. \n File: %s"""
+            % in_bam
+        )
 
     if out_summary is None:
         coverage_tsv_file = mkstempfname('.summary.tsv')
@@ -1157,13 +1284,13 @@ def plot_coverage(in_bam, out_plot_file, plot_format, plot_data_style, plot_styl
     # call samtools sort
     bam_sorted = mkstempfname('.aligned.bam')
     samtools.sort(bam_aligned, bam_sorted)
-    
+
     # call samtools index
     samtools.index(bam_sorted)
-    
+
     # call samtools depth
     opts = []
-    opts += ['-aa'] # report coverate at "absolutely all" positions
+    opts += ['-aa']    # report coverate at "absolutely all" positions
     if base_q_threshold:
         opts += ["-q", str(base_q_threshold)]
     if mapping_q_threshold:
@@ -1181,7 +1308,7 @@ def plot_coverage(in_bam, out_plot_file, plot_format, plot_data_style, plot_styl
     domain_max = 0
     with open(coverage_tsv_file, "r") as tabfile:
         for row in csv.reader(tabfile, delimiter='\t'):
-            segment_depths.setdefault(row[0],[]).append(int(row[2]))
+            segment_depths.setdefault(row[0], []).append(int(row[2]))
             domain_max += 1
 
     domain_max = 0
@@ -1189,34 +1316,45 @@ def plot_coverage(in_bam, out_plot_file, plot_format, plot_data_style, plot_styl
 
         fig = plt.gcf()
         DPI = fig.get_dpi()
-        fig.set_size_inches(float(plot_width)/float(DPI),float(plot_height)/float(DPI))
+        fig.set_size_inches(float(plot_width) / float(DPI), float(plot_height) / float(DPI))
 
-        font_size = math.sqrt((plot_width**2)+(plot_height**2))/float(DPI)*1.25
+        font_size = math.sqrt((plot_width**2) + (plot_height**2)) / float(DPI) * 1.25
 
-        ax = plt.subplot() # Defines ax variable by creating an empty plot
+        ax = plt.subplot()    # Defines ax variable by creating an empty plot
 
         # Set the tick labels font
         for label in (ax.get_xticklabels() + ax.get_yticklabels()):
-            label.set_fontsize(font_size)        
+            label.set_fontsize(font_size)
 
         for segment_num, (segment_name, position_depths) in enumerate(segment_depths.items()):
             prior_domain_max = domain_max
             domain_max += len(position_depths)
 
-            colors = list(plt.rcParams['axes.prop_cycle'].by_key()['color']) # get the colors for this style
-            segment_color = colors[segment_num%len(colors)] # pick a color, offset by the segment index
-            
+            colors = list(plt.rcParams['axes.prop_cycle'].by_key()['color'])    # get the colors for this style
+            segment_color = colors[segment_num % len(colors)]    # pick a color, offset by the segment index
+
             if plot_data_style == "filled":
-                plt.fill_between(range(prior_domain_max, domain_max), position_depths, [0]*len(position_depths), linewidth=0, antialiased=True, color=segment_color)
+                plt.fill_between(
+                    range(prior_domain_max, domain_max),
+                    position_depths, [0] * len(position_depths),
+                    linewidth=0,
+                    antialiased=True,
+                    color=segment_color
+                )
             elif plot_data_style == "line":
                 plt.plot(range(prior_domain_max, domain_max), position_depths, antialiased=True, color=segment_color)
             elif plot_data_style == "dots":
-                plt.plot(range(prior_domain_max, domain_max), position_depths, 'ro', antialiased=True, color=segment_color)
-
-
-        plt.title(plot_title, fontsize=font_size*1.2)
-        plt.xlabel("bp", fontsize=font_size*1.1)
-        plt.ylabel("read depth", fontsize=font_size*1.1)
+                plt.plot(
+                    range(prior_domain_max, domain_max),
+                    position_depths,
+                    'ro',
+                    antialiased=True,
+                    color=segment_color
+                )
+
+        plt.title(plot_title, fontsize=font_size * 1.2)
+        plt.xlabel("bp", fontsize=font_size * 1.1)
+        plt.ylabel("read depth", fontsize=font_size * 1.1)
 
         # to squash a backend renderer error on OSX related to tight layout
         if plt.get_backend().lower() in ['agg', 'macosx']:
@@ -1224,7 +1362,7 @@ def plot_coverage(in_bam, out_plot_file, plot_format, plot_data_style, plot_styl
         else:
             fig.tight_layout()
 
-        plt.savefig(out_plot_file, format=plot_format, dpi=DPI) #, bbox_inches='tight')
+        plt.savefig(out_plot_file, format=plot_format, dpi=DPI)    #, bbox_inches='tight')
         log.info("Coverage plot saved to: " + out_plot_file)
 
     os.unlink(bam_aligned)
@@ -1232,7 +1370,7 @@ def plot_coverage(in_bam, out_plot_file, plot_format, plot_data_style, plot_styl
 
     if not out_summary:
         os.unlink(coverage_tsv_file)
-    
+
 
 def parser_plot_coverage(parser=argparse.ArgumentParser()):
     parser = parser_plot_coverage_common(parser)
@@ -1240,11 +1378,29 @@ def parser_plot_coverage(parser=argparse.ArgumentParser()):
     util.cmd.attach_main(parser, plot_coverage, split_args=True)
     return parser
 
+
 __commands__.append(('plot_coverage', parser_plot_coverage))
 
-def align_and_plot_coverage(out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary,
-                            in_bam, ref_fasta, out_bam=None, sensitive=False, min_score_to_output=None
-                            ):
+
+def align_and_plot_coverage(
+    out_plot_file,
+    plot_format,
+    plot_data_style,
+    plot_style,
+    plot_width,
+    plot_height,
+    plot_title,
+    base_q_threshold,
+    mapping_q_threshold,
+    max_coverage_depth,
+    read_length_threshold,
+    out_summary,
+    in_bam,
+    ref_fasta,
+    out_bam=None,
+    sensitive=False,
+    min_score_to_output=None
+):
     ''' 
         Take reads, align to reference with BWA-MEM, and generate a coverage plot
     '''
@@ -1285,9 +1441,12 @@ def align_and_plot_coverage(out_plot_file, plot_format, plot_data_style, plot_st
     os.unlink(aln_bam_filtered)
 
     samtools.index(bam_aligned)
-    
+
     # -- call plot function --
-    plot_coverage(bam_aligned, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title, base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary)
+    plot_coverage(
+        bam_aligned, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title,
+        base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary
+    )
 
     # remove the output bam, unless it is needed
     if out_bam is None:
@@ -1295,37 +1454,43 @@ def align_and_plot_coverage(out_plot_file, plot_format, plot_data_style, plot_st
 
     # remove the files created by bwa index. 
     # The empty extension causes the original fasta file to be removed
-    for ext in [".amb",".ann",".bwt",".bwa",".pac",".sa",""]:
-        file_to_remove = ref_indexed+ext
+    for ext in [".amb", ".ann", ".bwt", ".bwa", ".pac", ".sa", ""]:
+        file_to_remove = ref_indexed + ext
         if os.path.isfile(file_to_remove):
-            os.unlink( file_to_remove )
+            os.unlink(file_to_remove)
+
 
 def parser_align_and_plot_coverage(parser=argparse.ArgumentParser()):
     parser = parser_plot_coverage_common(parser)
-    parser.add_argument('ref_fasta', 
-                        default=None,
-                        help='Reference genome, FASTA format.')
-    parser.add_argument('--outBam', 
-                        dest="out_bam",
-                        default=None,
-                        help='Output aligned, indexed BAM file. Default is to write to temp.')
-    parser.add_argument('--sensitive',
-                        action="store_true",
-                        help="Equivalent to giving bwa: '-k 12 -A 1 -B 1 -O 1 -E 1' ")
-    parser.add_argument('-T',
-                        dest="min_score_to_output",
-                        default=30,
-                        type=int,
-                        help="The min score to output during alignment (default: %(default)s)")
+    parser.add_argument('ref_fasta', default=None, help='Reference genome, FASTA format.')
+    parser.add_argument(
+        '--outBam',
+        dest="out_bam",
+        default=None,
+        help='Output aligned, indexed BAM file. Default is to write to temp.'
+    )
+    parser.add_argument(
+        '--sensitive', action="store_true",
+        help="Equivalent to giving bwa: '-k 12 -A 1 -B 1 -O 1 -E 1' "
+    )
+    parser.add_argument(
+        '-T',
+        dest="min_score_to_output",
+        default=30,
+        type=int,
+        help="The min score to output during alignment (default: %(default)s)"
+    )
 
     util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
     util.cmd.attach_main(parser, align_and_plot_coverage, split_args=True)
     return parser
 
+
 __commands__.append(('align_and_plot_coverage', parser_align_and_plot_coverage))
 
 # =========================
 
+
 def full_parser():
     return util.cmd.make_parser(__commands__, __doc__)
 

From 0f1310e9f89a5f24674d2b0e0fd599505a52b8c6 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Thu, 14 Jul 2016 01:09:15 -0400
Subject: [PATCH 09/13] remove sam->bam conversion prior to samtools.sort call

---
 read_utils.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/read_utils.py b/read_utils.py
index 137b9fc01..fae290110 100755
--- a/read_utils.py
+++ b/read_utils.py
@@ -1273,17 +1273,9 @@ def plot_coverage(
     else:
         coverage_tsv_file = out_summary
 
-    bam_aligned = mkstempfname('.aligned.bam')
-
-    if in_bam.endswith(".sam"):
-        # convert sam -> bam
-        samtools.view(["-b"], in_bam, bam_aligned)
-    elif in_bam.endswith(".bam") or in_bam.endswith(".cram"):
-        shutil.copyfile(in_bam, bam_aligned)
-
     # call samtools sort
-    bam_sorted = mkstempfname('.aligned.bam')
-    samtools.sort(bam_aligned, bam_sorted)
+    bam_sorted = mkstempfname('.sorted.bam')
+    samtools.sort(in_bam, bam_sorted, args=["-O", "bam"])
 
     # call samtools index
     samtools.index(bam_sorted)
@@ -1301,6 +1293,7 @@ def plot_coverage(
         opts += ["-l", str(read_length_threshold)]
 
     samtools.depth(bam_sorted, coverage_tsv_file, opts)
+    os.unlink(bam_sorted)
 
     # ---- create plot based on coverage_tsv_file ----
 
@@ -1365,9 +1358,6 @@ def plot_coverage(
         plt.savefig(out_plot_file, format=plot_format, dpi=DPI)    #, bbox_inches='tight')
         log.info("Coverage plot saved to: " + out_plot_file)
 
-    os.unlink(bam_aligned)
-    os.unlink(bam_sorted)
-
     if not out_summary:
         os.unlink(coverage_tsv_file)
 

From 7b80bd6408b2f6e4422be234fbdef93fbfda0f88 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Thu, 14 Jul 2016 01:17:40 -0400
Subject: [PATCH 10/13] move plot_coverage() and align_and_plot_coverage() to
 reports.py

move plot_coverage() and align_and_plot_coverage() to reports.py, along
with their respective parser functions
---
 read_utils.py | 332 -------------------------------------------------
 reports.py    | 333 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 333 insertions(+), 332 deletions(-)

diff --git a/read_utils.py b/read_utils.py
index fae290110..601b39f66 100755
--- a/read_utils.py
+++ b/read_utils.py
@@ -14,13 +14,8 @@
 import os
 import tempfile
 import shutil
-import csv
-from collections import OrderedDict
 
 from Bio import SeqIO
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
 
 import util.cmd
 import util.file
@@ -1154,333 +1149,6 @@ def parser_bwamem_idxstats(parser=argparse.ArgumentParser()):
 
 # =========================
 
-
-def parser_plot_coverage_common(parser=argparse.ArgumentParser()):    # parser needs add_help=False?
-    parser.add_argument('in_bam', help='Input reads, BAM format.')
-    parser.add_argument('out_plot_file', help='The generated chart file')
-    parser.add_argument(
-        '--plotFormat',
-        dest="plot_format",
-        default=None,
-        type=str,
-        choices=list(plt.gcf().canvas.get_supported_filetypes().keys()),
-        metavar='',
-        help="File format of the coverage plot. By default it is inferred from the file extension of out_plot_file, but it can be set explicitly via --plotFormat. Valid formats include: "
-        + ", ".join(list(plt.gcf().canvas.get_supported_filetypes().keys()))
-    )
-    parser.add_argument(
-        '--plotDataStyle',
-        dest="plot_data_style",
-        default="filled",
-        type=str,
-        choices=["filled", "line", "dots"],
-        metavar='',
-        help="The plot data display style. Valid options: " + ", ".join(["filled", "line", "dots"]) +
-        " (default: %(default)s)"
-    )
-    parser.add_argument(
-        '--plotStyle',
-        dest="plot_style",
-        default="ggplot",
-        type=str,
-        choices=plt.style.available,
-        metavar='',
-        help="The plot visual style. Valid options: " + ", ".join(plt.style.available) + " (default: %(default)s)"
-    )
-    parser.add_argument(
-        '--plotWidth',
-        dest="plot_width",
-        default=1024,
-        type=int,
-        help="Width of the plot in pixels (default: %(default)s)"
-    )
-    parser.add_argument(
-        '--plotHeight',
-        dest="plot_height",
-        default=768,
-        type=int,
-        help="Width of the plot in pixels (default: %(default)s)"
-    )
-    parser.add_argument(
-        '--plotTitle',
-        dest="plot_title",
-        default="Coverage Plot",
-        type=str,
-        help="The title displayed on the coverage plot (default: '%(default)s')"
-    )
-    parser.add_argument(
-        '-q', dest="base_q_threshold",
-        default=None, type=int,
-        help="The minimum base quality threshold"
-    )
-    parser.add_argument(
-        '-Q', dest="mapping_q_threshold",
-        default=None,
-        type=int, help="The minimum mapping quality threshold"
-    )
-    parser.add_argument(
-        '-m',
-        dest="max_coverage_depth",
-        default=1000000,
-        type=int,
-        help="The max coverage depth (default: %(default)s)"
-    )
-    parser.add_argument('-l', dest="read_length_threshold", default=None, type=int, help="Read length threshold")
-    parser.add_argument(
-        '--outSummary',
-        dest="out_summary",
-        default=None,
-        type=str,
-        help="Coverage summary TSV file. Default is to write to temp."
-    )
-    return parser
-
-
-def plot_coverage(
-    in_bam,
-    out_plot_file,
-    plot_format,
-    plot_data_style,
-    plot_style,
-    plot_width,
-    plot_height,
-    plot_title,
-    base_q_threshold,
-    mapping_q_threshold,
-    max_coverage_depth,
-    read_length_threshold,
-    out_summary=None
-):
-    ''' 
-        Generate a coverage plot from an aligned bam file
-    '''
-
-    # TODO: remove this:
-    #coverage_tsv_file = "/Users/tomkinsc/Downloads/plottest/test_multisegment.tsv"
-
-    samtools = tools.samtools.SamtoolsTool()
-
-    # check if in_bam is aligned, if not raise an error
-    num_mapped_reads = samtools.count(in_bam, opts=["-F", "4"])
-    if num_mapped_reads == 0:
-        raise Exception(
-            """The bam file specified appears to have zero mapped reads. 'plot_coverage' requires an aligned bam file. You can try 'align_and_plot_coverage' if you don't mind a simple bwa alignment. \n File: %s"""
-            % in_bam
-        )
-
-    if out_summary is None:
-        coverage_tsv_file = mkstempfname('.summary.tsv')
-    else:
-        coverage_tsv_file = out_summary
-
-    # call samtools sort
-    bam_sorted = mkstempfname('.sorted.bam')
-    samtools.sort(in_bam, bam_sorted, args=["-O", "bam"])
-
-    # call samtools index
-    samtools.index(bam_sorted)
-
-    # call samtools depth
-    opts = []
-    opts += ['-aa']    # report coverate at "absolutely all" positions
-    if base_q_threshold:
-        opts += ["-q", str(base_q_threshold)]
-    if mapping_q_threshold:
-        opts += ["-Q", str(mapping_q_threshold)]
-    if max_coverage_depth:
-        opts += ["-m", str(max_coverage_depth)]
-    if read_length_threshold:
-        opts += ["-l", str(read_length_threshold)]
-
-    samtools.depth(bam_sorted, coverage_tsv_file, opts)
-    os.unlink(bam_sorted)
-
-    # ---- create plot based on coverage_tsv_file ----
-
-    segment_depths = OrderedDict()
-    domain_max = 0
-    with open(coverage_tsv_file, "r") as tabfile:
-        for row in csv.reader(tabfile, delimiter='\t'):
-            segment_depths.setdefault(row[0], []).append(int(row[2]))
-            domain_max += 1
-
-    domain_max = 0
-    with plt.style.context(plot_style):
-
-        fig = plt.gcf()
-        DPI = fig.get_dpi()
-        fig.set_size_inches(float(plot_width) / float(DPI), float(plot_height) / float(DPI))
-
-        font_size = math.sqrt((plot_width**2) + (plot_height**2)) / float(DPI) * 1.25
-
-        ax = plt.subplot()    # Defines ax variable by creating an empty plot
-
-        # Set the tick labels font
-        for label in (ax.get_xticklabels() + ax.get_yticklabels()):
-            label.set_fontsize(font_size)
-
-        for segment_num, (segment_name, position_depths) in enumerate(segment_depths.items()):
-            prior_domain_max = domain_max
-            domain_max += len(position_depths)
-
-            colors = list(plt.rcParams['axes.prop_cycle'].by_key()['color'])    # get the colors for this style
-            segment_color = colors[segment_num % len(colors)]    # pick a color, offset by the segment index
-
-            if plot_data_style == "filled":
-                plt.fill_between(
-                    range(prior_domain_max, domain_max),
-                    position_depths, [0] * len(position_depths),
-                    linewidth=0,
-                    antialiased=True,
-                    color=segment_color
-                )
-            elif plot_data_style == "line":
-                plt.plot(range(prior_domain_max, domain_max), position_depths, antialiased=True, color=segment_color)
-            elif plot_data_style == "dots":
-                plt.plot(
-                    range(prior_domain_max, domain_max),
-                    position_depths,
-                    'ro',
-                    antialiased=True,
-                    color=segment_color
-                )
-
-        plt.title(plot_title, fontsize=font_size * 1.2)
-        plt.xlabel("bp", fontsize=font_size * 1.1)
-        plt.ylabel("read depth", fontsize=font_size * 1.1)
-
-        # to squash a backend renderer error on OSX related to tight layout
-        if plt.get_backend().lower() in ['agg', 'macosx']:
-            fig.set_tight_layout(True)
-        else:
-            fig.tight_layout()
-
-        plt.savefig(out_plot_file, format=plot_format, dpi=DPI)    #, bbox_inches='tight')
-        log.info("Coverage plot saved to: " + out_plot_file)
-
-    if not out_summary:
-        os.unlink(coverage_tsv_file)
-
-
-def parser_plot_coverage(parser=argparse.ArgumentParser()):
-    parser = parser_plot_coverage_common(parser)
-    util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
-    util.cmd.attach_main(parser, plot_coverage, split_args=True)
-    return parser
-
-
-__commands__.append(('plot_coverage', parser_plot_coverage))
-
-
-def align_and_plot_coverage(
-    out_plot_file,
-    plot_format,
-    plot_data_style,
-    plot_style,
-    plot_width,
-    plot_height,
-    plot_title,
-    base_q_threshold,
-    mapping_q_threshold,
-    max_coverage_depth,
-    read_length_threshold,
-    out_summary,
-    in_bam,
-    ref_fasta,
-    out_bam=None,
-    sensitive=False,
-    min_score_to_output=None
-):
-    ''' 
-        Take reads, align to reference with BWA-MEM, and generate a coverage plot
-    '''
-    if out_bam is None:
-        bam_aligned = mkstempfname('.aligned.bam')
-    else:
-        bam_aligned = out_bam
-
-    ref_indexed = mkstempfname('.reference.fasta')
-    shutil.copyfile(ref_fasta, ref_indexed)
-
-    bwa = tools.bwa.Bwa()
-    samtools = tools.samtools.SamtoolsTool()
-
-    bwa.index(ref_indexed)
-
-    bwa_opts = []
-    if sensitive:
-        bwa_opts + "-k 12 -A 1 -B 1 -O 1 -E 1".split()
-
-    map_threshold = min_score_to_output or 30
-
-    bwa_opts + ["-T", str(map_threshold)]
-
-    aln_bam = mkstempfname('.bam')
-
-    bwa.mem(in_bam, ref_indexed, aln_bam, opts=bwa_opts)
-
-    # @haydenm says:
-    # For some reason (particularly when the --sensitive option is on), bwa
-    # doesn't listen to its '-T' flag and outputs alignments with score less
-    # than the '-T 30' threshold. So filter these:
-    aln_bam_filtered = mkstempfname('.filtered.bam')
-    samtools.view(["-b", "-h", "-q", str(map_threshold)], aln_bam, aln_bam_filtered)
-    os.unlink(aln_bam)
-
-    samtools.sort(aln_bam_filtered, bam_aligned)
-    os.unlink(aln_bam_filtered)
-
-    samtools.index(bam_aligned)
-
-    # -- call plot function --
-    plot_coverage(
-        bam_aligned, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title,
-        base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary
-    )
-
-    # remove the output bam, unless it is needed
-    if out_bam is None:
-        os.unlink(bam_aligned)
-
-    # remove the files created by bwa index. 
-    # The empty extension causes the original fasta file to be removed
-    for ext in [".amb", ".ann", ".bwt", ".bwa", ".pac", ".sa", ""]:
-        file_to_remove = ref_indexed + ext
-        if os.path.isfile(file_to_remove):
-            os.unlink(file_to_remove)
-
-
-def parser_align_and_plot_coverage(parser=argparse.ArgumentParser()):
-    parser = parser_plot_coverage_common(parser)
-    parser.add_argument('ref_fasta', default=None, help='Reference genome, FASTA format.')
-    parser.add_argument(
-        '--outBam',
-        dest="out_bam",
-        default=None,
-        help='Output aligned, indexed BAM file. Default is to write to temp.'
-    )
-    parser.add_argument(
-        '--sensitive', action="store_true",
-        help="Equivalent to giving bwa: '-k 12 -A 1 -B 1 -O 1 -E 1' "
-    )
-    parser.add_argument(
-        '-T',
-        dest="min_score_to_output",
-        default=30,
-        type=int,
-        help="The min score to output during alignment (default: %(default)s)"
-    )
-
-    util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
-    util.cmd.attach_main(parser, align_and_plot_coverage, split_args=True)
-    return parser
-
-
-__commands__.append(('align_and_plot_coverage', parser_align_and_plot_coverage))
-
-# =========================
-
-
 def full_parser():
     return util.cmd.make_parser(__commands__, __doc__)
 
diff --git a/reports.py b/reports.py
index 158e2114f..9b3d78527 100755
--- a/reports.py
+++ b/reports.py
@@ -12,16 +12,21 @@
 import time
 from collections import OrderedDict
 import csv
+import math
 
 import pysam
 import Bio.SeqIO
 import Bio.AlignIO
 from Bio.Alphabet.IUPAC import IUPACUnambiguousDNA
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
 
 import util.cmd
 import util.file
 import util.misc
 import tools.samtools
+import tools.bwa
 import assembly
 import interhost
 from util.stats import mean, median
@@ -373,6 +378,334 @@ def parser_consolidate_spike_count(parser=argparse.ArgumentParser()):
 
 __commands__.append(('consolidate_spike_count', parser_consolidate_spike_count))
 
+
+# =========================
+
+
+def parser_plot_coverage_common(parser=argparse.ArgumentParser()):    # parser needs add_help=False?
+    parser.add_argument('in_bam', help='Input reads, BAM format.')
+    parser.add_argument('out_plot_file', help='The generated chart file')
+    parser.add_argument(
+        '--plotFormat',
+        dest="plot_format",
+        default=None,
+        type=str,
+        choices=list(plt.gcf().canvas.get_supported_filetypes().keys()),
+        metavar='',
+        help="File format of the coverage plot. By default it is inferred from the file extension of out_plot_file, but it can be set explicitly via --plotFormat. Valid formats include: "
+        + ", ".join(list(plt.gcf().canvas.get_supported_filetypes().keys()))
+    )
+    parser.add_argument(
+        '--plotDataStyle',
+        dest="plot_data_style",
+        default="filled",
+        type=str,
+        choices=["filled", "line", "dots"],
+        metavar='',
+        help="The plot data display style. Valid options: " + ", ".join(["filled", "line", "dots"]) +
+        " (default: %(default)s)"
+    )
+    parser.add_argument(
+        '--plotStyle',
+        dest="plot_style",
+        default="ggplot",
+        type=str,
+        choices=plt.style.available,
+        metavar='',
+        help="The plot visual style. Valid options: " + ", ".join(plt.style.available) + " (default: %(default)s)"
+    )
+    parser.add_argument(
+        '--plotWidth',
+        dest="plot_width",
+        default=1024,
+        type=int,
+        help="Width of the plot in pixels (default: %(default)s)"
+    )
+    parser.add_argument(
+        '--plotHeight',
+        dest="plot_height",
+        default=768,
+        type=int,
+        help="Width of the plot in pixels (default: %(default)s)"
+    )
+    parser.add_argument(
+        '--plotTitle',
+        dest="plot_title",
+        default="Coverage Plot",
+        type=str,
+        help="The title displayed on the coverage plot (default: '%(default)s')"
+    )
+    parser.add_argument(
+        '-q', dest="base_q_threshold",
+        default=None, type=int,
+        help="The minimum base quality threshold"
+    )
+    parser.add_argument(
+        '-Q', dest="mapping_q_threshold",
+        default=None,
+        type=int, help="The minimum mapping quality threshold"
+    )
+    parser.add_argument(
+        '-m',
+        dest="max_coverage_depth",
+        default=1000000,
+        type=int,
+        help="The max coverage depth (default: %(default)s)"
+    )
+    parser.add_argument('-l', dest="read_length_threshold", default=None, type=int, help="Read length threshold")
+    parser.add_argument(
+        '--outSummary',
+        dest="out_summary",
+        default=None,
+        type=str,
+        help="Coverage summary TSV file. Default is to write to temp."
+    )
+    return parser
+
+
+def plot_coverage(
+    in_bam,
+    out_plot_file,
+    plot_format,
+    plot_data_style,
+    plot_style,
+    plot_width,
+    plot_height,
+    plot_title,
+    base_q_threshold,
+    mapping_q_threshold,
+    max_coverage_depth,
+    read_length_threshold,
+    out_summary=None
+):
+    ''' 
+        Generate a coverage plot from an aligned bam file
+    '''
+
+    # TODO: remove this:
+    #coverage_tsv_file = "/Users/tomkinsc/Downloads/plottest/test_multisegment.tsv"
+
+    samtools = tools.samtools.SamtoolsTool()
+
+    # check if in_bam is aligned, if not raise an error
+    num_mapped_reads = samtools.count(in_bam, opts=["-F", "4"])
+    if num_mapped_reads == 0:
+        raise Exception(
+            """The bam file specified appears to have zero mapped reads. 'plot_coverage' requires an aligned bam file. You can try 'align_and_plot_coverage' if you don't mind a simple bwa alignment. \n File: %s"""
+            % in_bam
+        )
+
+    if out_summary is None:
+        coverage_tsv_file = util.file.mkstempfname('.summary.tsv')
+    else:
+        coverage_tsv_file = out_summary
+
+    # call samtools sort
+    bam_sorted = util.file.mkstempfname('.sorted.bam')
+    samtools.sort(in_bam, bam_sorted, args=["-O", "bam"])
+
+    # call samtools index
+    samtools.index(bam_sorted)
+
+    # call samtools depth
+    opts = []
+    opts += ['-aa']    # report coverate at "absolutely all" positions
+    if base_q_threshold:
+        opts += ["-q", str(base_q_threshold)]
+    if mapping_q_threshold:
+        opts += ["-Q", str(mapping_q_threshold)]
+    if max_coverage_depth:
+        opts += ["-m", str(max_coverage_depth)]
+    if read_length_threshold:
+        opts += ["-l", str(read_length_threshold)]
+
+    samtools.depth(bam_sorted, coverage_tsv_file, opts)
+    os.unlink(bam_sorted)
+
+    # ---- create plot based on coverage_tsv_file ----
+
+    segment_depths = OrderedDict()
+    domain_max = 0
+    with open(coverage_tsv_file, "r") as tabfile:
+        for row in csv.reader(tabfile, delimiter='\t'):
+            segment_depths.setdefault(row[0], []).append(int(row[2]))
+            domain_max += 1
+
+    domain_max = 0
+    with plt.style.context(plot_style):
+
+        fig = plt.gcf()
+        DPI = fig.get_dpi()
+        fig.set_size_inches(float(plot_width) / float(DPI), float(plot_height) / float(DPI))
+
+        font_size = math.sqrt((plot_width**2) + (plot_height**2)) / float(DPI) * 1.25
+
+        ax = plt.subplot()    # Defines ax variable by creating an empty plot
+
+        # Set the tick labels font
+        for label in (ax.get_xticklabels() + ax.get_yticklabels()):
+            label.set_fontsize(font_size)
+
+        for segment_num, (segment_name, position_depths) in enumerate(segment_depths.items()):
+            prior_domain_max = domain_max
+            domain_max += len(position_depths)
+
+            colors = list(plt.rcParams['axes.prop_cycle'].by_key()['color'])    # get the colors for this style
+            segment_color = colors[segment_num % len(colors)]    # pick a color, offset by the segment index
+
+            if plot_data_style == "filled":
+                plt.fill_between(
+                    range(prior_domain_max, domain_max),
+                    position_depths, [0] * len(position_depths),
+                    linewidth=0,
+                    antialiased=True,
+                    color=segment_color
+                )
+            elif plot_data_style == "line":
+                plt.plot(range(prior_domain_max, domain_max), position_depths, antialiased=True, color=segment_color)
+            elif plot_data_style == "dots":
+                plt.plot(
+                    range(prior_domain_max, domain_max),
+                    position_depths,
+                    'ro',
+                    antialiased=True,
+                    color=segment_color
+                )
+
+        plt.title(plot_title, fontsize=font_size * 1.2)
+        plt.xlabel("bp", fontsize=font_size * 1.1)
+        plt.ylabel("read depth", fontsize=font_size * 1.1)
+
+        # to squash a backend renderer error on OSX related to tight layout
+        if plt.get_backend().lower() in ['agg', 'macosx']:
+            fig.set_tight_layout(True)
+        else:
+            fig.tight_layout()
+
+        plt.savefig(out_plot_file, format=plot_format, dpi=DPI)    #, bbox_inches='tight')
+        log.info("Coverage plot saved to: " + out_plot_file)
+
+    if not out_summary:
+        os.unlink(coverage_tsv_file)
+
+
+def parser_plot_coverage(parser=argparse.ArgumentParser()):
+    parser = parser_plot_coverage_common(parser)
+    util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
+    util.cmd.attach_main(parser, plot_coverage, split_args=True)
+    return parser
+
+
+__commands__.append(('plot_coverage', parser_plot_coverage))
+
+
+def align_and_plot_coverage(
+    out_plot_file,
+    plot_format,
+    plot_data_style,
+    plot_style,
+    plot_width,
+    plot_height,
+    plot_title,
+    base_q_threshold,
+    mapping_q_threshold,
+    max_coverage_depth,
+    read_length_threshold,
+    out_summary,
+    in_bam,
+    ref_fasta,
+    out_bam=None,
+    sensitive=False,
+    min_score_to_output=None
+):
+    ''' 
+        Take reads, align to reference with BWA-MEM, and generate a coverage plot
+    '''
+    if out_bam is None:
+        bam_aligned = util.file.mkstempfname('.aligned.bam')
+    else:
+        bam_aligned = out_bam
+
+    ref_indexed = util.file.mkstempfname('.reference.fasta')
+    shutil.copyfile(ref_fasta, ref_indexed)
+
+    bwa = tools.bwa.Bwa()
+    samtools = tools.samtools.SamtoolsTool()
+
+    bwa.index(ref_indexed)
+
+    bwa_opts = []
+    if sensitive:
+        bwa_opts + "-k 12 -A 1 -B 1 -O 1 -E 1".split()
+
+    map_threshold = min_score_to_output or 30
+
+    bwa_opts + ["-T", str(map_threshold)]
+
+    aln_bam = util.file.mkstempfname('.bam')
+
+    bwa.mem(in_bam, ref_indexed, aln_bam, opts=bwa_opts)
+
+    # @haydenm says:
+    # For some reason (particularly when the --sensitive option is on), bwa
+    # doesn't listen to its '-T' flag and outputs alignments with score less
+    # than the '-T 30' threshold. So filter these:
+    aln_bam_filtered = util.file.mkstempfname('.filtered.bam')
+    samtools.view(["-b", "-h", "-q", str(map_threshold)], aln_bam, aln_bam_filtered)
+    os.unlink(aln_bam)
+
+    samtools.sort(aln_bam_filtered, bam_aligned)
+    os.unlink(aln_bam_filtered)
+
+    samtools.index(bam_aligned)
+
+    # -- call plot function --
+    plot_coverage(
+        bam_aligned, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title,
+        base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary
+    )
+
+    # remove the output bam, unless it is needed
+    if out_bam is None:
+        os.unlink(bam_aligned)
+
+    # remove the files created by bwa index. 
+    # The empty extension causes the original fasta file to be removed
+    for ext in [".amb", ".ann", ".bwt", ".bwa", ".pac", ".sa", ""]:
+        file_to_remove = ref_indexed + ext
+        if os.path.isfile(file_to_remove):
+            os.unlink(file_to_remove)
+
+
+def parser_align_and_plot_coverage(parser=argparse.ArgumentParser()):
+    parser = parser_plot_coverage_common(parser)
+    parser.add_argument('ref_fasta', default=None, help='Reference genome, FASTA format.')
+    parser.add_argument(
+        '--outBam',
+        dest="out_bam",
+        default=None,
+        help='Output aligned, indexed BAM file. Default is to write to temp.'
+    )
+    parser.add_argument(
+        '--sensitive', action="store_true",
+        help="Equivalent to giving bwa: '-k 12 -A 1 -B 1 -O 1 -E 1' "
+    )
+    parser.add_argument(
+        '-T',
+        dest="min_score_to_output",
+        default=30,
+        type=int,
+        help="The min score to output during alignment (default: %(default)s)"
+    )
+
+    util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
+    util.cmd.attach_main(parser, align_and_plot_coverage, split_args=True)
+    return parser
+
+
+__commands__.append(('align_and_plot_coverage', parser_align_and_plot_coverage))
+
+
 # =======================
 
 

From a07cd3ef981fd3520185579a416d92d2a17ad380 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Thu, 14 Jul 2016 01:19:57 -0400
Subject: [PATCH 11/13] missed hitting save before last commit; add missing
 shutil import

---
 reports.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/reports.py b/reports.py
index 9b3d78527..99bf159da 100755
--- a/reports.py
+++ b/reports.py
@@ -13,6 +13,7 @@
 from collections import OrderedDict
 import csv
 import math
+import shutil
 
 import pysam
 import Bio.SeqIO

From bfdcccc623d92ef2e7399fda3141b14fa7a2a456 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Thu, 14 Jul 2016 17:41:17 -0400
Subject: [PATCH 12/13] refactor bwa.py to remove the sam->bam conversion added
 earlier in this branch

---
 tools/bwa.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/tools/bwa.py b/tools/bwa.py
index 37a870aeb..3fe19af20 100644
--- a/tools/bwa.py
+++ b/tools/bwa.py
@@ -7,7 +7,6 @@
 import os
 import os.path
 import subprocess
-import shutil
 
 import tools
 import tools.samtools
@@ -56,20 +55,18 @@ def mem(self, inReads, refDb, outAlign, opts=None, threads=None):
         fq1 = util.file.mkstempfname('.1.fastq')
         fq2 = util.file.mkstempfname('.2.fastq')
         aln_sam = util.file.mkstempfname('.sam')
-        aln_sam_sorted = util.file.mkstempfname('sorted.sam')
         samtools.bam2fq(inReads, fq1, fq2)
-        self.execute('mem', opts + ['-t', str(threads), refDb, fq1, fq2], stdout=aln_sam)
+
+        if '-t' not in opts:
+            threads = threads or utils.misc.available_cpu_count()
+            opts.extend( ('-t', str(threads)) )
+
+        self.execute('mem', opts + [refDb, fq1, fq2], stdout=aln_sam)
+
         os.unlink(fq1)
         os.unlink(fq2)
-        samtools.sort(aln_sam, aln_sam_sorted)
+        samtools.sort(aln_sam, outAlign)
         os.unlink(aln_sam)
-        # cannot index sam files; only do so if a bam is desired
+        # cannot index sam files; only do so if a bam/cram is desired
         if outAlign.endswith(".bam") or outAlign.endswith(".cram"):
-            # convert sam -> bam
-            samtools.view(["-b"], aln_sam_sorted, outAlign)
             samtools.index(outAlign)
-        elif outAlign.endswith(".sam"):
-            shutil.copyfile(aln_sam_sorted, outAlign)
-        os.unlink(aln_sam_sorted)
-
-

From 0f47ccc6ef1f7c4930a18788edfcfaf571993a4f Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Thu, 14 Jul 2016 18:55:23 -0400
Subject: [PATCH 13/13] add DPI option as parameter for plot_coverage

---
 reports.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/reports.py b/reports.py
index 99bf159da..99ff054b1 100755
--- a/reports.py
+++ b/reports.py
@@ -429,6 +429,13 @@ def parser_plot_coverage_common(parser=argparse.ArgumentParser()):    # parser n
         type=int,
         help="Width of the plot in pixels (default: %(default)s)"
     )
+    parser.add_argument(
+        '--plotDPI',
+        dest="plot_dpi",
+        default=plt.gcf().get_dpi(),
+        type=int,
+        help="dots per inch for rendered output, more useful for vector modes (default: %(default)s)"
+    )
     parser.add_argument(
         '--plotTitle',
         dest="plot_title",
@@ -472,6 +479,7 @@ def plot_coverage(
     plot_style,
     plot_width,
     plot_height,
+    plot_dpi,
     plot_title,
     base_q_threshold,
     mapping_q_threshold,
@@ -534,12 +542,11 @@ def plot_coverage(
 
     domain_max = 0
     with plt.style.context(plot_style):
-
         fig = plt.gcf()
-        DPI = fig.get_dpi()
+        DPI = plot_dpi or fig.get_dpi()
         fig.set_size_inches(float(plot_width) / float(DPI), float(plot_height) / float(DPI))
 
-        font_size = math.sqrt((plot_width**2) + (plot_height**2)) / float(DPI) * 1.25
+        font_size = (2.5 * plot_height) / float(DPI)
 
         ax = plt.subplot()    # Defines ax variable by creating an empty plot
 
@@ -607,6 +614,7 @@ def align_and_plot_coverage(
     plot_style,
     plot_width,
     plot_height,
+    plot_dpi,
     plot_title,
     base_q_threshold,
     mapping_q_threshold,
@@ -662,7 +670,7 @@ def align_and_plot_coverage(
 
     # -- call plot function --
     plot_coverage(
-        bam_aligned, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_title,
+        bam_aligned, out_plot_file, plot_format, plot_data_style, plot_style, plot_width, plot_height, plot_dpi, plot_title,
         base_q_threshold, mapping_q_threshold, max_coverage_depth, read_length_threshold, out_summary
     )