Skip to content

Commit

Permalink
update to v0.3.8
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Palmer authored and Jon Palmer committed Sep 16, 2016
1 parent 7b32564 commit 1ba402a
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 41 deletions.
74 changes: 37 additions & 37 deletions bin/funannotate-predict.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
#!/usr/bin/env python

import sys, os, subprocess, inspect, multiprocessing, shutil, argparse, time, re, platform
import sys, os, subprocess, inspect, shutil, argparse, re
from Bio import SeqIO
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)
sys.path.insert(0, parentdir)
import lib.library as lib

#setup menu with argparse
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter):
def __init__(self,prog):
super(MyFormatter,self).__init__(prog,max_help_position=48)
parser=argparse.ArgumentParser(prog='funannotate-predict.py', usage="%(prog)s [options] -i genome.fasta",
description='''Script that does it all...''',
epilog="""Written by Jon Palmer (2016) [email protected]""",
def __init__(self, prog):
super(MyFormatter, self).__init__(prog, max_help_position=48)
parser = argparse.ArgumentParser(prog='funannotate-predict.py', usage="%(prog)s [options] -i genome.fasta",
description = '''Script that does it all.''',
epilog = """Written by Jon Palmer (2016) [email protected]""",
formatter_class = MyFormatter)
parser.add_argument('-i','--input', required=True, help='Genome in FASTA format')
parser.add_argument('-o','--out', required=True, help='Basename of output files')
parser.add_argument('-s','--species', required=True, help='Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space')
parser.add_argument('-i', '--input', required=True, help='Genome in FASTA format')
parser.add_argument('-o', '--out', required=True, help='Basename of output files')
parser.add_argument('-s', '--species', required=True, help='Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space')
parser.add_argument('--isolate', help='Isolate/strain name (e.g. Af293)')
parser.add_argument('--header_length', default=16, type=int, help='Max length for fasta headers')
parser.add_argument('--name', default="FUN_", help='Shortname for genes, perhaps assigned by NCBI, eg. VC83')
Expand Down Expand Up @@ -52,7 +52,7 @@ def __init__(self,prog):
conflict = ['busco', 'busco_proteins', 'RepeatMasker', 'RepeatModeler', 'genemark', 'EVM_tmp', 'braker']
if args.out in conflict:
lib.log.error("%s output folder conflicts with a hard coded tmp folder, please change -o parameter" % args.out)
os._exit(1)
sys.exit(1)

#create folder structure
if not os.path.exists(args.out):
Expand Down Expand Up @@ -87,19 +87,18 @@ def __init__(self,prog):
blastdb = os.path.join(parentdir,'DB','REPEATS.psq')
if not os.path.isfile(blastdb):
lib.log.error("funannotate database is not properly configured, please run `./setup.sh` in the %s directory" % parentdir)
os._exit(1)
sys.exit(1)
#check buscos, download if necessary
if not os.path.isdir(os.path.join(parentdir, 'DB', args.busco_db)):
lib.download_buscos(args.busco_db)


#do some checks and balances
try:
EVM = os.environ["EVM_HOME"]
except KeyError:
if not args.EVM_HOME:
lib.log.error("$EVM_HOME environmental variable not found, Evidence Modeler is not properly configured. You can use the --EVM_HOME argument to specifiy a path at runtime")
os._exit(1)
sys.exit(1)
else:
EVM = args.EVM_HOME

Expand All @@ -108,7 +107,7 @@ def __init__(self,prog):
except KeyError:
if not args.AUGUSTUS_CONFIG_PATH:
lib.log.error("$AUGUSTUS_CONFIG_PATH environmental variable not found, Augustus is not properly configured. You can use the --AUGUSTUS_CONFIG_PATH argument to specify a path at runtime.")
os._exit(1)
sys.exit(1)
else:
AUGUSTUS = args.AUGUSTUS_CONFIG_PATH

Expand All @@ -119,7 +118,7 @@ def __init__(self,prog):
if not lib.which('gmes_petap.pl'):
if not args.GENEMARK_PATH:
lib.log.error("GeneMark not found and $GENEMARK_PATH environmental variable missing, BRAKER1 is not properly configured. You can use the --GENEMARK_PATH argument to specify a path at runtime.")
os._exit(1)
sys.exit(1)
else:
GENEMARK_PATH = args.GENEMARK_PATH

Expand All @@ -130,7 +129,7 @@ def __init__(self,prog):
if not lib.which('bamtools'):
if not args.BAMTOOLS_PATH:
lib.log.error("Bamtools not found and $BAMTOOLS_PATH environmental variable missing, BRAKER1 is not properly configured. You can use the --BAMTOOLS_PATH argument to specify a path at runtime.")
os._exit(1)
sys.exit(1)
else:
BAMTOOLS_PATH = args.BAMTOOLS_PATH

Expand All @@ -141,7 +140,7 @@ def __init__(self,prog):
AutoAug = os.path.join(AUGUSTUS_BASE, 'scripts', 'autoAug.pl')
GeneMark2GFF = os.path.join(parentdir, 'util', 'genemark_gtf2gff3.pl')

programs = ['tblastn', 'exonerate', 'makeblastdb','dustmasker','gag.py','tbl2asn','gmes_petap.pl', 'BuildDatabase', 'RepeatModeler', 'RepeatMasker', GeneMark2GFF, AutoAug, 'bedtools', 'gmap', 'gmap_build', 'blat', 'pslCDnaFilter', 'augustus', 'etraining', 'rmOutToGFF3.pl']
programs = ['tblastn', 'exonerate', 'makeblastdb', 'dustmasker', 'gag.py', 'tbl2asn', 'gmes_petap.pl', 'BuildDatabase', 'RepeatModeler', 'RepeatMasker', GeneMark2GFF, AutoAug, 'bedtools', 'gmap', 'gmap_build', 'blat', 'pslCDnaFilter', 'augustus', 'etraining', 'rmOutToGFF3.pl']
lib.CheckDependencies(programs)

#check augustus species now, so that you don't get through script and then find out it is already in DB
Expand All @@ -155,10 +154,11 @@ def __init__(self,prog):

#check augustus functionality
augustuscheck = lib.checkAugustusFunc(AUGUSTUS_BASE)
system_os = lib.systemOS()
if args.rna_bam:
if augustuscheck[1] == 0:
lib.log.error("ERROR: %s is not installed properly for BRAKER1 (check bam2hints compilation)" % augustuscheck[0])
os._exit(1)
sys.exit(1)
if not augspeciescheck: #means training needs to be done
if augustuscheck[2] == 0:
if 'MacOSX' in system_os:
Expand All @@ -170,7 +170,7 @@ def __init__(self,prog):
else:
lib.log.error("ERROR: %s is not installed properly and this version not work with BUSCO, this is a problem with Augustus compliatation, you may need to compile manually on %s." % (augustuscheck[0], system_os))
if not args.pasa_gff: #first training will use pasa, otherwise BUSCO
os._exit(1)
sys.exit(1)
else:
lib.log.info("Will proceed with PASA models to train Augustus")

Expand Down Expand Up @@ -204,7 +204,7 @@ def __init__(self,prog):
header_test = lib.checkFastaHeaders(args.input, args.header_length)
if not header_test:
lib.log.error("Fasta headers on your input have more characters than the max (16), reformat headers to continue.")
os._exit(1)
sys.exit(1)

#setup augustus parallel command
AUGUSTUS_PARALELL = os.path.join(parentdir, 'bin', 'augustus_parallel.py')
Expand Down Expand Up @@ -248,7 +248,7 @@ def __init__(self,prog):
#check for masked genome here
if not os.path.isfile(MaskGenome) or lib.getSize(MaskGenome) < 10:
lib.log.error("RepeatMasking failed, check log files.")
os._exit(1)
sys.exit(1)

#load contig names and sizes into dictionary.
ContigSizes = {}
Expand All @@ -258,7 +258,7 @@ def __init__(self,prog):
ContigSizes[rec.id] = len(rec.seq)
else:
lib.log.error("Error, duplicate contig names, exiting")
os._exit(1)
sys.exit(1)

#check for previous files and setup output files
Predictions = os.path.join(args.out, 'predict_misc', 'gene_predictions.gff3')
Expand Down Expand Up @@ -293,7 +293,7 @@ def __init__(self,prog):
genesources.append(source)
if not genesources:
lib.log.error("Maker2 GFF not parsed correctly, no gene models found, exiting.")
os._exit(1)
sys.exit(1)
for i in genesources:
if i == 'maker':
output.write("ABINITIO_PREDICTION\t%s\t1\n" % i)
Expand Down Expand Up @@ -373,7 +373,7 @@ def __init__(self,prog):
#check for protein evidence/format as needed
p2g_out = os.path.join(args.out, 'predict_misc', 'exonerate.out')
prot_temp = os.path.join(args.out, 'predict_misc', 'proteins.combined.fa')
P2G = os.path.join(parentdir, 'bin','funannotate-p2g.py')
P2G = os.path.join(parentdir, 'bin', 'funannotate-p2g.py')
if not args.exonerate_proteins:
if args.protein_evidence:
if os.path.isfile(prot_temp):
Expand Down Expand Up @@ -412,7 +412,7 @@ def __init__(self,prog):
subprocess.call([ExoConverter, exonerate_out], stdout = output, stderr = FNULL)
except OSError:
lib.log.error("$EVM_HOME variable is incorrect, please double-check: %s" % EVM)
os._exit(1)
sys.exit(1)
Exonerate = os.path.abspath(Exonerate)
#now run exonerate2 hints for Augustus
exonerate2hints = os.path.join(AUGUSTUS_BASE, 'scripts', 'exonerate2hints.pl')
Expand Down Expand Up @@ -448,7 +448,7 @@ def __init__(self,prog):
GeneMark = os.path.join(args.out, 'predict_misc', 'genemark.evm.gff3')
with open(GeneMark, 'w') as output:
with open(GeneMarkTemp, 'rU') as input:
lines = input.read().replace("Augustus","GeneMark")
lines = input.read().replace("Augustus", "GeneMark")
output.write(lines)

if args.augustus_gff:
Expand Down Expand Up @@ -556,7 +556,7 @@ def __init__(self,prog):
GeneMark = os.path.join(args.out, 'predict_misc', 'genemark.evm.gff3')
with open(GeneMark, 'w') as output:
with open(GeneMarkTemp, 'rU') as input:
lines = input.read().replace("Augustus","GeneMark")
lines = input.read().replace("Augustus", "GeneMark")
output.write(lines)
else: #have training parameters file, so just run genemark with
GeneMarkGFF3 = os.path.join(args.out, 'predict_misc', 'genemark.gff')
Expand Down Expand Up @@ -586,7 +586,7 @@ def __init__(self,prog):
subprocess.call(['perl', Converter, GeneMarkTemp], stdout = output, stderr = FNULL)
with open(GeneMark, 'w') as output:
with open(GeneMarkTemp2, 'rU') as input:
lines = input.read().replace("Augustus","GeneMark")
lines = input.read().replace("Augustus", "GeneMark")
output.write(lines)

else:
Expand All @@ -602,7 +602,7 @@ def __init__(self,prog):
GeneMark = os.path.join(args.out, 'predict_misc', 'genemark.evm.gff3')
with open(GeneMark, 'w') as output:
with open(GeneMarkTemp, 'rU') as input:
lines = input.read().replace("Augustus","GeneMark")
lines = input.read().replace("Augustus", "GeneMark")
output.write(lines)

if not Augustus:
Expand Down Expand Up @@ -644,7 +644,7 @@ def __init__(self,prog):
lib.log.error("BUSCO training of Augusus failed, check busco logs, exiting")
#remove the augustus training config folder
shutil.rmtree(os.path.join(AUGUSTUS, 'species', aug_species))
os._exit(1)
sys.exit(1)
#proper training files exist, now run EVM on busco models to get high quality predictions.
lib.log.info("BUSCO predictions complete, now formatting for EVM")
#move the busco folder now where it should reside
Expand Down Expand Up @@ -735,12 +735,12 @@ def __init__(self,prog):
total = lib.countGFFgenes(EVM_busco)
except IOError:
lib.log.error("EVM did not run correctly, output file missing")
os._exit(1)
sys.exit(1)
#check number of gene models, if 0 then failed, delete output file for re-running
if total < 1:
lib.log.error("Evidence modeler has failed, exiting")
os.remove(EVM_busco)
os._exit(1)
sys.exit(1)
else:
lib.log.info('{0:,}'.format(total) + ' total gene models from EVM')
#move EVM folder to predict folder
Expand Down Expand Up @@ -787,7 +787,7 @@ def __init__(self,prog):
#just double-check that you've gotten here and both Augustus/GeneMark are finished
if not any([Augustus, GeneMark]):
lib.log.error("Augustus or GeneMark prediction is missing, check log files for errors")
os._exit(1)
sys.exit(1)

#GeneMark can fail if you try to pass a single contig, check file length
GM_check = lib.line_count(GeneMark)
Expand Down Expand Up @@ -902,12 +902,12 @@ def __init__(self,prog):
total = lib.countGFFgenes(EVM_out)
except IOError:
lib.log.error("EVM did not run correctly, output file missing")
os._exit(1)
sys.exit(1)
#check number of gene models, if 0 then failed, delete output file for re-running
if total < 1:
lib.log.error("Evidence modeler has failed, exiting")
os.remove(EVM_out)
os._exit(1)
sys.exit(1)
else:
lib.log.info('{0:,}'.format(total) + ' total gene models from EVM')

Expand Down Expand Up @@ -1037,4 +1037,4 @@ def __init__(self,prog):
os.rename('funannotate-EVM.log', os.path.join(args.out, 'logfiles', 'funannotate-EVM.log'))
if os.path.isfile('funannotate-p2g.log'):
os.rename('funannotate-p2g.log', os.path.join(args.out, 'logfiles', 'funannotate-p2g.log'))
os._exit(1)
sys.exit(1)
4 changes: 2 additions & 2 deletions funannotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def fmtcols(mylist, cols):
for i in range(0,num_lines))
return "\n".join(lines)

version = '0.3.7'
version = '0.3.8'

default_help = """
Usage: funannotate <command> <arguments>
Expand Down Expand Up @@ -95,6 +95,7 @@ def fmtcols(mylist, cols):
Arguments: -i, --input Multi-fasta genome file. (Required)
-o, --output Sorted by size and relabeled output file. (Required)
-b, --base Base name to relabel contigs. Default: scaffold
--minlen Shorter contigs are discarded. Default: 0
Written by Jon Palmer (2016) [email protected]
""" % (sys.argv[1], version)
Expand Down Expand Up @@ -316,7 +317,6 @@ def fmtcols(mylist, cols):
if len(arguments) > 0:
cmd = os.path.join(script_path, 'setup.sh')
arguments.insert(0, cmd)
print [cmd, 'dep']
if '--all' in arguments:
subprocess.call(cmd, cwd = script_path)
elif '--dep' in arguments:
Expand Down
8 changes: 6 additions & 2 deletions lib/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,15 +842,19 @@ def MemoryCheck():
mem = psutil.virtual_memory()
RAM = int(mem.total)
return round(RAM / 1024000000)
def SystemInfo():

def systemOS():
if sys.platform == 'darwin':
system_os = 'MacOSX '+ platform.mac_ver()[0]
elif sys.platform == 'linux':
linux_version = platform.linux_distribution()
system_os = linux_version[0]+ ' '+linux_version[1]
else:
system_os = sys.platform
return system_os

def SystemInfo():
system_os = systemOS()
python_vers = str(sys.version_info[0])+'.'+str(sys.version_info[1])+'.'+str(sys.version_info[2])
log.info("OS: %s, %i cores, ~ %i GB RAM. Python: %s" % (system_os, multiprocessing.cpu_count(), MemoryCheck(), python_vers))

Expand Down

0 comments on commit 1ba402a

Please sign in to comment.