Skip to content

Commit

Permalink
fixes and updates to v0.5.2
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Palmer authored and Jon Palmer committed Dec 18, 2016
1 parent f3e03fd commit 5899e90
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 41 deletions.
20 changes: 10 additions & 10 deletions bin/funannotate-compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self,prog):
parser.add_argument('--num_orthos', default=500, type=int, help='Number of Single-copy orthologs to run with RAxML')
parser.add_argument('--outgroup', help='Name of species for RAxML outgroup')
parser.add_argument('--eggnog_db', default='fuNOG', help='EggNog database')
parser.add_argument('--run_dnds', action='store_true', help='Run dN/dS analysis with codeML for each ortholog (long runtime)')
parser.add_argument('--run_dnds', choices=['estimate', 'full'], help='Run dN/dS analysis with codeML for each ortholog (long runtime)')
parser.add_argument('--proteinortho', help='Pre-computed ProteinOrtho POFF')
args=parser.parse_args()

Expand Down Expand Up @@ -753,13 +753,6 @@ def __init__(self,prog):
with open(KaKstranscript, 'w') as tranout:
for i in proteins:
SeqIO.write(SeqTranscripts[i], tranout, 'fasta')
#calculate dN/dS
#DnDs,M1M2p,M7M8p = lib.rundNdS(KaKstranscript, ID, ortho_folder)
#else:
#DnDs = 'NC'
#M1M2p = 'NC'
#M7M8p = 'NC'

#write to output
if len(eggs) > 0:
eggs = ', '.join(str(v) for v in eggs)
Expand All @@ -776,7 +769,10 @@ def __init__(self,prog):
if args.run_dnds:
#multiprocessing dN/dS on list of folders
dNdSList = lib.get_subdirs(ortho_folder)
lib.runMultiProgress(lib.rundNdS, dNdSList, args.cpus)
if args.run_dnds == 'estimate':
lib.runMultiProgress(lib.rundNdSestimate, dNdSList, args.cpus)
else:
lib.runMultiProgress(lib.rundNdSexhaustive, dNdSList, args.cpus)

#after all data is run, then parse result log files, return dictionary
dNdSresults = lib.parsedNdS(ortho_folder)
Expand All @@ -791,7 +787,11 @@ def __init__(self,prog):
dNdS = dNdSresults.get(cols[0])
else:
dNdS = ('NC', 'NC', 'NC')
output.write("%s\t%s (%.4f,%.4f)\t%s\t%s\t%s\n" % (cols[0], dNdS[0], round(float(dNdS[1]),4), round(float(dNdS[2]),4), cols[1], cols[2], cols[3]))
if args.run_dnds == 'estimate':
output.write("%s\t%s (NC,NC)\t%s\t%s\t%s\n" % (cols[0], dNdS[0], cols[1], cols[2], cols[3]))
else:
output.write("%s\t%s (%.4f,%.4f)\t%s\t%s\t%s\n" % (cols[0], dNdS[0], round(float(dNdS[1]),4), round(float(dNdS[2]),4), cols[1], cols[2], cols[3]))

#cleanup
os.remove(orthologstmp)

Expand Down
10 changes: 9 additions & 1 deletion bin/funannotate-contig_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ def Sortbysize(input):
contigs.append(rec.id)
return contigs

def countfasta(input):
count = 0
with open(input, 'rU') as f:
for line in f:
if line.startswith (">"):
count += 1
return count

def getFasta(sequences, header):
with open('query.fa', 'w') as fasta:
with open(sequences, 'rU') as input:
Expand Down Expand Up @@ -109,7 +117,7 @@ def runNucmer(query, reference, output):
os.remove('reference.fa')

print"------------------------------------"
print"%i input contigs, %i duplicated, %i written to file" % (len(scaffolds), (len(scaffolds) - len(keepers)), len(keepers))
print"%i input contigs, %i duplicated, %i written to file" % (countfasta(args.input), (len(scaffolds) - len(keepers)), len(keepers))

#finally write a new reference based on list of keepers
with open(args.out, 'w') as output:
Expand Down
4 changes: 2 additions & 2 deletions funannotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def flatten(l):
flatList.append(elem)
return flatList

version = '0.5.0'
version = '0.5.2'

default_help = """
Usage: funannotate <command> <arguments>
Expand Down Expand Up @@ -217,7 +217,7 @@ def flatten(l):
Optional: -o, --out Output folder name. Default: funannotate_compare
--cpus Number of CPUs to use. Default: 2
--run_dnds Calculate dN/dS ratio on all orthologs. Very long runtime.
--run_dnds Calculate dN/dS ratio on all orthologs. [estimate,full]
--go_fdr P-value for FDR GO-enrichment. Default: 0.05
--heatmap_stdev Cut-off for heatmap. Default: 1.0
--num_orthos Number of Single-copy orthologs to use for RAxML. Default: 500
Expand Down
74 changes: 48 additions & 26 deletions lib/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,27 +326,21 @@ def countGFFgenes(input):
return count

def runMultiProgress(function, inputList, cpus):
from progressbar import ProgressBar, Percentage
try:
from progressbar import AdaptiveETA
eta = AdaptiveETA()
except ImportError:
from progressbar import ETA
eta = ETA()
from time import sleep
#setup pool
p = multiprocessing.Pool(cpus)
#setup progress bar
widgets = [' Progress: ', Percentage(),' || ', eta]
pbar = ProgressBar(widgets=widgets, term_width=30, maxval=len(inputList)).start()
#setup results and split over cpus
tasks = len(inputList)
results = []
r = [p.apply_async(function, (x,), callback=results.append) for x in inputList]
for i in inputList:
results.append(p.apply_async(function, [i]))
#refresh pbar every 5 seconds
while len(results) != len(inputList):
pbar.update(len(results))
sleep(5)
pbar.finish()
while True:
incomplete_count = sum(1 for x in results if not x.ready())
if incomplete_count == 0:
break
sys.stdout.write(" Progress: %.2f%% \r" % (float(tasks - incomplete_count) / tasks * 100))
sys.stdout.flush()
time.sleep(1)
p.close()
p.join()

Expand Down Expand Up @@ -1988,9 +1982,8 @@ def simplestTreeEver(fasta, tree):
for rec in SeqIO.parse(input, 'fasta'):
ids.append(rec.id)
outfile.write('(%s,%s);' % (ids[0], ids[1]))


def rundNdS(folder):
def rundNdSexhaustive(folder):
FNULL = open(os.devnull, 'w')
#setup intermediate files
tmpdir = os.path.dirname(folder)
Expand Down Expand Up @@ -2025,6 +2018,43 @@ def rundNdS(folder):
for file in os.listdir(tmpdir):
if file.startswith(name+'.'):
os.rename(os.path.join(tmpdir, file), os.path.join(tmpdir, name, file))


def rundNdSestimate(folder):
FNULL = open(os.devnull, 'w')
#setup intermediate files
tmpdir = os.path.dirname(folder)
name = os.path.basename(folder)
transcripts = os.path.join(tmpdir, name+'.transcripts.fa')
prots = os.path.join(tmpdir, name+'.proteins.fa')
aln = os.path.join(tmpdir, name+'.aln')
codon = os.path.join(tmpdir, name+'.codon.aln')
tree = os.path.join(tmpdir, name+'.tree')
log = os.path.join(tmpdir, name+'.log')
finallog = os.path.join(tmpdir, name, name+'.log')
if not checkannotations(finallog):
num_seqs = countfasta(transcripts)
#Translate to protein space
translatemRNA(transcripts, prots)
#align protein sequences
alignMAFFT(prots, aln)
#convert to codon alignment
align2Codon(aln, transcripts, codon)
if checkannotations(codon):
if num_seqs > 2:
#now generate a tree using phyml
drawPhyMLtree(codon, tree)
else:
simplestTreeEver(transcripts, tree)
#now run codeml through ete3
etecmd = ['ete3', 'evol', '--alg', os.path.abspath(codon), '-t', os.path.abspath(tree), '--models', 'M0', '-o', name, '--clear_all', '--codeml_param', 'cleandata,1']
with open(log, 'w') as logfile:
logfile.write('\n%s\n' % ' '.join(etecmd))
subprocess.call(etecmd, cwd = tmpdir, stdout = logfile, stderr = logfile)
#clean up
for file in os.listdir(tmpdir):
if file.startswith(name+'.'):
os.rename(os.path.join(tmpdir, file), os.path.join(tmpdir, name, file))

def get_subdirs(a_dir):
return [os.path.join(a_dir, name) for name in os.listdir(a_dir)
Expand Down Expand Up @@ -2069,14 +2099,6 @@ def chunkIt(seq, num):
last += avg
return out

def countKaks(folder, num):
allfiles = []
for file in os.listdir(folder):
if file.endswith('.fasta.axt'):
f = os.path.join(folder, file)
allfiles.append(f)
#split files by x chunks
return chunkIt(allfiles, num)

HEADER = '''
<!DOCTYPE html>
Expand Down
2 changes: 1 addition & 1 deletion sample_data/run_unit_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ cmd='funannotate annotate -i genome3 -e [email protected] --cpus 6 --iprscan iprs
echo $cmd; eval $cmd

#now run compare
cmd='funannotate compare -i genome1 genome2 genome3 --cpus 6 --outgroup botrytis_cinerea.dikarya'
cmd='funannotate compare -i genome1 genome2 genome3 --cpus 6 --outgroup botrytis_cinerea.dikarya --run_dnds estimate'
echo $cmd; eval $cmd

#clean up augustus training
Expand Down
2 changes: 1 addition & 1 deletion setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ else
fi
else
echo "HomeBrew installation not detected, specify DB installation directory"
outputdir='/usr/local/share/funannotate'
outputdir="$HOME/funannotate/"
echo -n "Default DB directory set to ($outputdir), continue [y/n]: "
read question1
if [ $question1 == 'n' ]; then
Expand Down

0 comments on commit 5899e90

Please sign in to comment.