diff --git a/assembly.py b/assembly.py index 079ab01ed..5ae556724 100755 --- a/assembly.py +++ b/assembly.py @@ -95,7 +95,7 @@ def assemble_trinity(inBam, outFasta, clipDb, n_reads=100000, outReads=None): def parser_assemble_trinity(parser=argparse.ArgumentParser()): parser.add_argument('inBam', - help='Input reads, BAM format.') + help='Input unaligned reads, BAM format.') parser.add_argument('clipDb', help='Trimmomatic clip DB.') parser.add_argument('outFasta', @@ -153,13 +153,13 @@ def order_and_orient(inFasta, inReference, outFasta, inReads=None): def parser_order_and_orient(parser=argparse.ArgumentParser()): parser.add_argument('inFasta', - help='Input assembly/contigs, FASTA format.') + help='Input de novo assembly/contigs, FASTA format.') parser.add_argument('inReference', - help='Reference genome, FASTA format.') + help='Reference genome for ordering, orienting, and merging contigs, FASTA format.') parser.add_argument('outFasta', - help='Output assembly, FASTA format.') + help='Output assembly, FASTA format, with the same number of chromosomes as inReference, and in the same order.') parser.add_argument('--inReads', default=None, - help='Input reads in BAM format.') + help='Input reads in unaligned BAM format. These can be used to improve the merge process.') util.cmd.common_args(parser, (('loglevel',None), ('version',None), ('tmpDir',None))) util.cmd.attach_main(parser, order_and_orient, split_args=True) return parser @@ -239,9 +239,9 @@ def impute_from_reference(inFasta, inReference, outFasta, def parser_impute_from_reference(parser=argparse.ArgumentParser()): parser.add_argument('inFasta', - help='Input assembly/contigs, FASTA format.') + help='Input assembly/contigs, FASTA format, already ordered, oriented and merged with inReference.') parser.add_argument('inReference', - help='Reference genome, FASTA format.') + help='Reference genome to impute with, FASTA format.') parser.add_argument('outFasta', help='Output assembly, FASTA format.') parser.add_argument("--newName", default=None, @@ -329,7 +329,7 @@ def parser_refine_assembly(parser=argparse.ArgumentParser()): parser.add_argument('inFasta', help='Input assembly, FASTA format, pre-indexed for Picard, Samtools, and Novoalign.') parser.add_argument('inBam', - help='Input reads, BAM format.') + help='Input reads, unaligned BAM format.') parser.add_argument('outFasta', help='Output refined assembly, FASTA format, indexed for Picard, Samtools, and Novoalign.') parser.add_argument('--outBam', diff --git a/docs/install.rst b/docs/install.rst index fdaee3d9e..3639a51cd 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -40,6 +40,9 @@ as well:: pip install snakemake==3.2 yappi=0.94 +However, most of the real functionality is encapsulated in the command line +tools, which can be used without any of the pipeline infrastructure. + You should either sudo pip install or use a virtualenv (recommended). diff --git a/taxon_filter.py b/taxon_filter.py index fa316aefd..dfea893f8 100755 --- a/taxon_filter.py +++ b/taxon_filter.py @@ -25,32 +25,33 @@ def parser_deplete_human(parser=argparse.ArgumentParser()): parser.add_argument('inBam', help='Input BAM file.') parser.add_argument('revertBam', - help='Output BAM file.') + help='Output BAM: read markup reverted with Picard.') parser.add_argument('bmtaggerBam', - help='Output BAM file.') + help='Output BAM: depleted of human reads with BMTagger.') parser.add_argument('rmdupBam', - help='Output BAM file.') + help='Output BAM: bmtaggerBam run through M-Vicuna duplicate removal.') parser.add_argument('blastnBam', - help='Output BAM file.') + help='Output BAM: rmdupBam run through another depletion of human reads with BLASTN.') parser.add_argument('--taxfiltBam', - help='Output BAM file.', + help='Output BAM: blastnBam run through taxonomic selection via LASTAL.', default=None) parser.add_argument('--bmtaggerDbs', nargs='+', required=True, help='''Reference databases (one or more) to deplete from input. For each db, requires prior creation of db.bitmask by bmtool, and db.srprism.idx, db.srprism.map, etc. by srprism mkindex.''') parser.add_argument('--blastDbs', nargs='+', required=True, - help='One or more reference databases for blast.') + help='One or more reference databases for blast to deplete from input.') parser.add_argument('--lastDb', - help='One reference database for last.', + help='One reference database for last (required if --taxfiltBam is specified).', default=None) parser.add_argument('--JVMmemory', default = tools.picard.FilterSamReadsTool.jvmMemDefault, - help='JVM virtual memory size (default: %(default)s)') + help='JVM virtual memory size for Picard FilterSamReads (default: %(default)s)') util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmpDir', None))) util.cmd.attach_main(parser, main_deplete_human) return parser def main_deplete_human(args): - '''Run the entire depletion pipeline: bmtagger, mvicuna, blastn, and maybe lastal''' + ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn. + Optionally, use lastal to select a specific taxon of interest.''' tools.picard.RevertSamTool().execute(args.inBam, args.revertBam, picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true']) multi_db_deplete_bam(args.revertBam, args.bmtaggerDbs, deplete_bmtagger_bam, args.bmtaggerBam, JVMmemory=args.JVMmemory)