diff --git a/README.md b/README.md index e29df83..aa646b5 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ This would perform a full search for the optimal (lowest-scoring) MUL-tree on th | Option | Description | | ------ | ----------- | | -s | A file or string containing a bifurcating, rooted species tree in Newick format. This tree can either be singly-labeled or MUL. | -| -g | A file containing one or more rooted, Newick formatted gene trees. | +| -g | A file containing one or more bifurcating, rooted, Newick formatted gene trees. Gene trees with polytomies will be removed from the dataset. | | -h1 | A space separated list of nodes to search as the polyploid clade. Only used with `-t s`. If nothing is entered all nodes will be considered. | | -h2 | A space separated list of nodes to search as possible parental lineages for all nodes specified with `-h1`. If nothing is entered all possible nodes for the current `h1` will be considered. | | -d | An option to specify whether to do reconciliations to MUL-trees only (0), the singly-labeled tree only (1), or both (2). Default: 2 | diff --git a/data/test_data/gt-poly-test.txt b/data/test_data/gt-poly-test.txt new file mode 100644 index 0000000..8b96b3b --- /dev/null +++ b/data/test_data/gt-poly-test.txt @@ -0,0 +1,3 @@ +((((((1_x,1_y),1_z),1_B),1_A),1_C),1_D) +((((((1_x,1_y),1_z),1_B),1_A),(((2_x,2_y),2_z),1_C)),1_D) +(((((1_x,1_y,1_z),1_B),1_A),1_C),1_D) \ No newline at end of file diff --git a/docs/readme.html b/docs/readme.html index a98abc4..4e129eb 100644 --- a/docs/readme.html +++ b/docs/readme.html @@ -228,7 +228,8 @@
-g
: A file containing newick formatted gene trees.This file should contain one or more Newick formatted gene trees, with one tree per line in the file.
+This file should contain one or more bifurcating, Newick formatted gene trees, with one tree per line in the file. Currentky, gene trees with + unresolved nodes (polytomies) are not supported as they falsely increase the number of losses counted in that tree.
The tip labels in the gene trees must end with _[species label] where [species label] matches a tip label in the species tree This is necessary so GRAMPA can initialize the mappings correctly.
Alternatively, if you wish to reconcile to only a single gene tree, you can simply paste the tree string into the command line.
@@ -482,7 +484,7 @@-h1
and -h2
are not speci
-g
: A file containing newick formatted gene trees.This file should contain one or more Newick formatted gene trees, with one tree per line in the file.
+This file should contain one or more bifurcating, Newick formatted gene trees, with one tree per line in the file. Currentky, gene trees with + unresolved nodes (polytomies) are not supported as they falsely increase the number of losses counted in that tree.
The tip labels in the gene trees must end with _[species label] where [species label] matches a tip label in the species tree This is necessary so GRAMPA can initialize the mappings correctly.
Alternatively, if you wish to reconcile to only a single gene tree, you can simply paste the tree string into the command line.
diff --git a/grampa.py b/grampa.py index 2e62e01..db4ee64 100755 --- a/grampa.py +++ b/grampa.py @@ -16,7 +16,7 @@ import pickle sys.path.append("lib/"); -import reconcore as RC, mul_recon as ALG, opt_parse as OP, mul_tree as MT, spec_tree as ST, gene_tree as GT, global_vars as globs, mul_out as OUT +import lib.reconcore as RC, mul_recon as ALG, opt_parse as OP, mul_tree as MT, spec_tree as ST, gene_tree as GT, global_vars as globs, mul_out as OUT def grampa(starttime): ########################### @@ -56,6 +56,8 @@ def grampa(starttime): step = RC.printStep(step, "# " + RC.getDateTime() + " --> STEP " + str(step) + " " + RC.getLogTime() + ": Counting MUL-trees"); num_mul_trees = MT.countMULTrees(hybrid_nodes, copy_nodes, st, sinfo, starttime); + else: + num_mul_trees = 0; ########################### ### Gene trees @@ -83,7 +85,7 @@ def grampa(starttime): if result[2]: num_skipped += 1; # Parsing the gene trees to get info about each node in each tree. - + #sys.exit(); if num_skipped == len(gene_trees): RC.errorOut(14, "Couldn't find any gene trees in your gene tree input file (-g)!"); # If the input is a file, we assume each line contains one gene tree. @@ -92,7 +94,7 @@ def grampa(starttime): ## Reading the input files. if globs.stats: step_start_time = RC.report_stats("Read gene trees", pids, step_start_time, prog_start_time, globs.output_directory); - + ########################### OUT.logOut(st, str(len(gene_trees_filtered)), hybrid_nodes, copy_nodes, globs.gene_tree_input, globs.h1_input, globs.h2_input); diff --git a/lib/gene_tree.py b/lib/gene_tree.py index d806997..dfd1d56 100644 --- a/lib/gene_tree.py +++ b/lib/gene_tree.py @@ -1,4 +1,4 @@ -import recontree as RT, reconcore as RC, global_vars as globs +import recontree as RT, reconcore as RC, global_vars as globs, sys ############################################################################# def readGeneTree(gene_tree_input): @@ -22,7 +22,7 @@ def readGeneTree(gene_tree_input): # Tries the gene tree parsing code and if anything goes wrong, catches exception and filters the tree. if len([g for g in ginfo if ginfo[g][2] != 'tip']) != len([g for g in ginfo if ginfo[g][2] == 'tip']) - 1: - gene_tree = ["# This line may not contain a tree, or if so it may be unrooted -- Filtering."]; + gene_tree = ["# This line may not contain a tree, or if so it may be unrooted or contain a polytomy -- Filtering."]; return gene_num, gene_tree, True; # Another check for gene tree parsing and formatting errors. diff --git a/lib/opt_parse.py b/lib/opt_parse.py index f737fea..978cebb 100644 --- a/lib/opt_parse.py +++ b/lib/opt_parse.py @@ -14,7 +14,7 @@ def optParse(errorflag): parser = argparse.ArgumentParser(description="GRAMPA: Gene-tree Reconciliation Algorithm with MUL-trees for Polyploid Analysis."); parser.add_argument("-s", dest="spec_tree", help="A file or string containing a ROOTED, bifurcating, newick formatted species tree in newick format on which to search for polyploid events."); - parser.add_argument("-g", dest="gene_input", help="A file containing one or more ROOTED, bifurcating, newick formatted gene trees to reconcile. The labels in the gene tree MUST end with '_[species name]' and contain no other underscores. Also accepts a single tree string."); + parser.add_argument("-g", dest="gene_input", help="A file containing one or more ROOTED, bifurcating (no polytomies), newick formatted gene trees to reconcile. The labels in the gene tree MUST end with '_[species name]' and contain no other underscores. Also accepts a single tree string."); parser.add_argument("-h1", dest="h1_spec", help="A space separated list of species labels or internal nodes that define the polyploid clade. Example: 'x,y,z y,z' or '2 4'", default=False); parser.add_argument("-h2", dest="h2_spec", help="A space separated list of species labels or internal node labels that make up the clade that you wish to place the second polyploid clade sister to. Example: 'c'", default=False); parser.add_argument("-c", dest="group_cap", help="The maxmimum number of groups to consider for any gene tree. Default: 8. Max value: 15.", type=int, default=8);