Skip to content

Commit

Permalink
MCM Stitch: Make contig input optional and add final sequence ID param
Browse files Browse the repository at this point in the history
  • Loading branch information
Nolan Woods committed Sep 6, 2019
1 parent 49d5674 commit 17f5948
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 20 deletions.
9 changes: 7 additions & 2 deletions mauve_contig_mover/mcm_stitch.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,17 @@
</requirements>
<version_command><![CDATA[ $__tool_directory__/stitch.py -v ]]></version_command>
<command detect_errors="aggressive"><![CDATA[
python $__tool_directory__/stitch.py $pad_len $contigs $draft $draft.ext $draft.element_identifier.replace(' ', '_') > $output
python $__tool_directory__/stitch.py
#if $seqid
-s '$seqid'
#end if
$pad_len $draft $draft.ext $draft.element_identifier.replace(' ', '_') $contigs > $output
]]></command>
<inputs>
<param name="draft" type="data" format="abi,abi-trim,ace,cif-atom,cif-seqres,clustal,embl,fasta,fasta-2line,fastq-sanger,fastq,fastq-solexa,fastq-illumina,genbank,gb,ig,imgt,nexus,pdb-seqres,pdb-atom,phd,phylip,pir,seqxml,sff,sff-trim,stockholm,swiss,tab,qual,uniprot-xml,gff3" label="Draft" />
<param name="contigs" type="data" format="tabular" label="Contigs" help="Contigs output from Mauve Contig Mover" />
<param name="contigs" type="data" format="tabular" label="Contigs" optional="true" help="Contigs output from Mauve Contig Mover" />
<param name="pad_len" type="integer" min="0" value="1000" label="Pad length" help="Specify the length of padding to put between contigs" />
<param name="seqid" type="text" value="" label="Stiched string ID" help="Specify an ID to use for the stitched record, the first record ID is used otherwise" />
</inputs>
<outputs>
<data name="output" format_source="draft" />
Expand Down
52 changes: 34 additions & 18 deletions mauve_contig_mover/stitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
from Bio import SeqIO, Alphabet
from Bio.Seq import Seq
import csv
import getopt

usage = """
Mauve Contig Mover - Stitch
Stitch contigs into a single contig.
Compliments reversed sequences and rewrites all feature coordinates.
Use: stitch.py [-v] <padding length> <MauveCM contigs.tab path> <draft file path> <draft file format> [final sequence id]
Use: stitch.py [-v] [-s 'final sequence id'] <padding length> <draft file path> <draft file format> [MauveCM contigs.tab path]
\t-v Print version and exit
\t-s Provide an ID for the final sequence, the first sequence ID will be used otherwise
Valid draft file formats:
abi, abi-trim, ace, cif-atom, cif-seqres, clustal, embl, fasta, fasta-2line, fastq-sanger, fastq, fastq-solexa, fastq-illumina,
genbank, gb, ig, imgt, nexus, pdb-seqres, pdb-atom, phd, phylip, pir, seqxml, sff, sff-trim, stockholm, swiss, tab, qual, uniprot-xml, gff3
Expand Down Expand Up @@ -79,26 +81,39 @@ def stitch(pad, contigs, order):


if __name__ == '__main__':
if '-v' in sys.argv:
print('1.0')
exit(0)

if len(sys.argv) < 5:
print("Missing arguments", file=sys.stderr)
print(help, file=sys.stderr)
exit(1)

pad_len = int(sys.argv[1])
seqid = None
# Parse arguments
try:
opts, args = getopt.gnu_getopt(sysargs, 'vsiq:')
for opt, val in opts:
if opt == '-v':
print('1.0')
exit(0)
elif opt == '-s':
seqid = val
except getopt.GetoptError as err:
print("Argument error(" + str(err.opt) + "): " + err.msg, file=sys.stderr)
args = []

# Check for minimum number of arguments
if len(args) < 3:
print(usage, file=sys.stderr)
exit(1)

pad_len = int(args[0])
if pad_len < 0:
print("Padding length must be >= 0", file=sys.stderr)
print(help, file=sys.stderr)
exit(1)

contig_path = sys.argv[2]
draft_path = sys.argv[3]
draft_format = sys.argv[4]

order = getOrder(contig_path)
draft_path = args[1]
draft_format = args[2]

if len(args) < 4:
order = ()
else:
order = getOrder(args[3])

pad = Seq('N'*pad_len)
contigs = {seq.name: seq for seq in SeqIO.parse(draft_path, draft_format)}

Expand All @@ -107,9 +122,10 @@ def stitch(pad, contigs, order):
if result:
# Ensure there is only one 'source' feature
# TODO
pass

if result and len(sys.argv) > 5:
result.id = sys.argv[5]
if result and seqid:
result.id = seqid
result.description = ""

SeqIO.write(result, sys.stdout, draft_format)

0 comments on commit 17f5948

Please sign in to comment.