-
Notifications
You must be signed in to change notification settings - Fork 89
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Jon Palmer
authored and
Jon Palmer
committed
Dec 13, 2016
1 parent
0d7cdf5
commit 0abf4d9
Showing
3 changed files
with
2,125 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
seqfile = INPUTFILEHERE | ||
treefile = INPUTTREEHERE | ||
outfile = OUTPUTFILEHERE | ||
noisy = 9 * 0,1,2,3,9: how much rubbish on the screen | ||
verbose = 1 * 1: detailed output, 0: concise output | ||
runmode = 0 * 0: user tree; 1: semi-automatic; 2: automatic | ||
* 3: StepwiseAddition; (4,5):PerturbationNNI; -2: pairwise | ||
|
||
seqtype = 1 * 1:codons; 2:AAs; 3:codons-->AAs | ||
CodonFreq = 2 * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table | ||
clock = 0 * 0: no clock, unrooted tree, 1: clock, rooted tree | ||
aaDist = 0 * 0:equal, +:geometric; -:linear, {1-5:G1974,Miyata,c,p,v} | ||
model = 0 | ||
|
||
NSsites = 0 | ||
* 0:one w; 1:NearlyNeutral; 2:PositiveSelection; 3:discrete; | ||
* 4:freqs; 5:gamma;6:2gamma;7:beta;8:beta&w;9:betaγ10:3normal | ||
icode = 0 * 0:standard genetic code; 1:mammalian mt; 2-10:see below | ||
Mgene = 0 * 0:rates, 1:separate; 2:pi, 3:kappa, 4:all | ||
|
||
fix_kappa = 0 * 1: kappa fixed, 0: kappa to be estimated | ||
kappa = 1 * initial or fixed kappa | ||
fix_omega = 0 * 1: omega or omega_1 fixed, 0: estimate | ||
omega = 1 * initial or fixed omega, for codons or codon-based AAs | ||
ncatG = 10 * # of categories in the dG or AdG models of rates | ||
|
||
getSE = 0 * 0: don't want them, 1: want S.E.s of estimates | ||
RateAncestor = 0 * (0,1,2): rates (alpha>0) or ancestral states (1 or 2) | ||
Small_Diff = .45e-6 | ||
cleandata = 1 * remove sites with ambiguity data (1:yes, 0:no)? | ||
fix_blength = 0 * 0: ignore, -1: random, 1: initial, 2: fixed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys, os, subprocess, shutil, argparse, inspect | ||
from Bio import SeqIO | ||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) | ||
parentdir = os.path.dirname(currentdir) | ||
sys.path.insert(0, parentdir) | ||
import lib.library as lib | ||
|
||
|
||
#setup menu with argparse | ||
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter): | ||
def __init__(self, prog): | ||
super(MyFormatter, self).__init__(prog, max_help_position=48) | ||
parser = argparse.ArgumentParser(prog='funannotate-predict.py', usage="%(prog)s [options] -i genome.fasta", | ||
description = '''Script that adds a proteome to the outgroups.''', | ||
epilog = """Written by Jon Palmer (2016) [email protected]""", | ||
formatter_class = MyFormatter) | ||
parser.add_argument('-i', '--input', required=True, help='Proteome in FASTA format') | ||
parser.add_argument('--species', required=True, help='Species name "binomial in quotes"') | ||
parser.add_argument('--busco_db', default='dikarya', required=True, help='BUSCO database to use') | ||
parser.add_argument('--cpus', default=2, type=int, help='Number of CPUs to use') | ||
args=parser.parse_args() | ||
|
||
#get base name | ||
species = args.species.replace(' ', '_').lower()+'.'+args.busco_db | ||
OUTGROUPS = os.path.join(parentdir, 'DB', 'outgroups') | ||
|
||
#create log file | ||
log_name = species+'-add2outgroups.log' | ||
if os.path.isfile(log_name): | ||
os.remove(log_name) | ||
|
||
#initialize script, log system info and cmd issue at runtime | ||
lib.setupLogging(log_name) | ||
FNULL = open(os.devnull, 'w') | ||
cmd_args = " ".join(sys.argv)+'\n' | ||
lib.log.debug(cmd_args) | ||
print "-------------------------------------------------------" | ||
lib.SystemInfo() | ||
|
||
#get version of funannotate | ||
version = lib.get_version() | ||
lib.log.info("Running %s" % version) | ||
|
||
#check buscos, download if necessary | ||
if not os.path.isdir(os.path.join(parentdir, 'DB', args.busco_db)): | ||
lib.download_buscos(args.busco_db) | ||
|
||
ProtCount = lib.countfasta(args.input) | ||
lib.log.info('{0:,}'.format(ProtCount) + ' protein records loaded') | ||
|
||
#convert to proteins and screen with busco | ||
lib.log.info("Looking for BUSCO models with %s DB" % args.busco_db) | ||
BUSCODB = os.path.join(parentdir, 'DB', args.busco_db) | ||
BUSCO = os.path.join(parentdir, 'util', 'funannotate-BUSCO2.py') | ||
cmd = [sys.executable, BUSCO, '-i', os.path.abspath(args.input), '-m', 'proteins', '--lineage', BUSCODB, '-o', species, '--cpu', str(args.cpus), '-f'] | ||
lib.runSubprocess(cmd, '.', lib.log) | ||
|
||
#check that it ran correctly | ||
busco_results = os.path.join('run_'+species, 'full_table_'+species+'.tsv') | ||
if not lib.checkannotations(busco_results): | ||
lib.log.error("BUSCO failed, check logfile") | ||
sys.exit(1) | ||
nameChange = {} | ||
with open(busco_results, 'rU') as input: | ||
for line in input: | ||
if line.startswith('#'): | ||
continue | ||
cols = line.split('\t') | ||
if cols[1] == 'Complete': | ||
if not cols[2] in nameChange: | ||
nameChange[cols[2]] = cols[0] | ||
else: | ||
lib.log.error("Duplicate ID found: %s %s. Removing from results" % (cols[2], cols[0])) | ||
del nameChange[cols[2]] | ||
|
||
#output counts | ||
lib.log.info('{0:,}'.format(len(nameChange)) + ' BUSCO models found') | ||
|
||
#index the proteome for parsing | ||
SeqRecords = SeqIO.index(args.input, 'fasta') | ||
|
||
#setup output proteome | ||
busco_out = os.path.join(OUTGROUPS, species+'_buscos.fa') | ||
with open(busco_out, 'w') as output: | ||
for k,v in nameChange.items(): | ||
rec = SeqRecords[k] | ||
output.write('>%s\n%s\n' % (v, rec.seq)) | ||
lib.log.info("Results written to: %s" % busco_out) | ||
|
||
#clean up your mess | ||
shutil.rmtree('run_'+species) | ||
shutil.rmtree('tmp') | ||
|
||
|
Oops, something went wrong.