From 04b7f6121761c13a301cbf5d6309ee8fe6b67c40 Mon Sep 17 00:00:00 2001 From: kozlov Date: Tue, 28 Nov 2017 12:25:41 +0100 Subject: [PATCH] add alignment validation command which also removes gap-only sites (--check) --- src/CommandLineParser.cpp | 23 +++++++++++++++++------ src/Options.cpp | 6 ++++++ src/main.cpp | 22 ++++++++++++++++++++++ src/types.hpp | 3 ++- src/version.h | 2 +- 5 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/CommandLineParser.cpp b/src/CommandLineParser.cpp index 79e20fa0..a21a8985 100644 --- a/src/CommandLineParser.cpp +++ b/src/CommandLineParser.cpp @@ -49,6 +49,7 @@ static struct option long_options[] = {"support", no_argument, 0, 0 }, /* 30 */ {"terrace", no_argument, 0, 0 }, /* 31 */ {"terrace-maxsize", required_argument, 0, 0 }, /* 32 */ + {"check", no_argument, 0, 0 }, /* 33 */ { 0, 0, 0, 0 } }; @@ -402,6 +403,10 @@ void CommandLineParser::parse_options(int argc, char** argv, Options &opts) + string(optarg) + ", please provide a positive integer number!"); } break; + case 33: /* check */ + opts.command = Command::check; + num_commands++; + break; default: throw OptionException("Internal error in option parsing"); } @@ -417,11 +422,16 @@ void CommandLineParser::parse_options(int argc, char** argv, Options &opts) /* check for mandatory options for each command */ if (opts.command == Command::evaluate || opts.command == Command::search || opts.command == Command::bootstrap || opts.command == Command::all || - opts.command == Command::terrace) + opts.command == Command::terrace || opts.command == Command::check) { if (opts.msa_file.empty()) throw OptionException("You must specify a multiple alignment file with --msa switch"); + } + if (opts.command == Command::evaluate || opts.command == Command::search || + opts.command == Command::bootstrap || opts.command == Command::all || + opts.command == Command::terrace) + { if (opts.model_file.empty()) throw OptionException("You must specify an evolutionary model with --model switch"); } @@ -495,15 +505,16 @@ void CommandLineParser::print_help() cout << "\n" "Commands (mutually exclusive):\n" - " --help display help information.\n" - " --version display version information.\n" - " --evaluate evaluate the likelihood of a tree.\n" + " --help display help information\n" + " --version display version information\n" + " --evaluate evaluate the likelihood of a tree\n" " --search ML tree search.\n" - " --bootstrap bootstrapping.\n" + " --bootstrap bootstrapping\n" " --all all-in-one (ML search + bootstrapping).\n" " --support compute bipartition support for a given reference tree (e.g., best ML tree)\n" - " and a set of replicate trees (e.g., from a bootstrap analysis) \n" + " and a set of replicate trees (e.g., from a bootstrap analysis)\n" " --terrace check whether tree lies on a phylogenetic terrace \n" + " --check check alignment correctness and remove empty columns/rows\n" "\n" "Input and output options:\n" " --tree FILE | rand{N} | pars{N} starting tree: rand(om), pars(imony) or user-specified (newick file)\n" diff --git a/src/Options.cpp b/src/Options.cpp index b4d2cb79..534f2d69 100644 --- a/src/Options.cpp +++ b/src/Options.cpp @@ -125,6 +125,12 @@ std::ostream& operator<<(std::ostream& stream, const Options& opts) case Command::support: stream << "Compute bipartition support"; break; + case Command::terrace: + stream << "Count/enumerate trees on a phylogenetic terrace"; + break; + case Command::check: + stream << "Alignment validation"; + break; default: break; } diff --git a/src/main.cpp b/src/main.cpp index b6c2f57e..c3b3d63f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -385,10 +385,16 @@ void load_msa(RaxmlInstance& instance) /* check alignment */ if (!opts.force_mode) + { + LOG_VERB_TS << "Validating alignment... " << endl; check_msa(instance); + } if (opts.use_pattern_compression) + { + LOG_VERB_TS << "Compressing alignment patterns... " << endl; parted_msa.compress_patterns(); + } // if (parted_msa.part_count() > 1) // instance.terrace_wrapper.reset(new TerraceWrapper(parted_msa)); @@ -1172,6 +1178,22 @@ int main(int argc, char** argv) check_terrace(instance, tree); break; } + case Command::check: + { + instance.opts.use_pattern_compression = false; + init_part_info(instance); + load_msa(instance); + if (instance.opts.start_tree == StartingTree::user) + { + LOG_INFO << "Loading tree from: " << instance.opts.tree_file << endl << endl; + if (!sysutil_file_exists(instance.opts.tree_file)) + throw runtime_error("File not found: " + instance.opts.tree_file); + instance.start_tree_stream.reset(new NewickStream(instance.opts.tree_file, std::ios::in)); + Tree tree = generate_tree(instance, instance.opts.start_tree); + } + LOG_INFO << "Alignment can be successfully read by RAxML-NG." << endl << endl; + break; + } case Command::none: default: LOG_ERROR << "Unknown command!" << endl; diff --git a/src/types.hpp b/src/types.hpp index 8f9c37cf..e3d68462 100644 --- a/src/types.hpp +++ b/src/types.hpp @@ -18,7 +18,8 @@ enum class Command bootstrap, all, support, - terrace + terrace, + check }; enum class FileFormat diff --git a/src/version.h b/src/version.h index 42cb5f4f..8cbbc091 100644 --- a/src/version.h +++ b/src/version.h @@ -1,2 +1,2 @@ #define RAXML_VERSION "0.5.1 BETA" -#define RAXML_DATE "27.10.2017" +#define RAXML_DATE "28.11.2017"