From 42303e0317e0c1e47e82e7a3d761fdd3d3d46775 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Thu, 2 Apr 2020 19:06:11 -0400 Subject: [PATCH 01/13] Changing the way mapping codons to amino-acids deals with 'X' --- res/TemplateBatchFiles/SelectionAnalyses/FADE.bf | 8 ++++++-- .../libv3/models/model_functions.bf | 1 + res/TemplateBatchFiles/libv3/tasks/alignments.bf | 11 +++++++++-- src/core/batchlanruntime.cpp | 12 +++++++++++- src/core/strings.cpp | 12 +++++++----- 5 files changed, 34 insertions(+), 10 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf b/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf index 3eb44cb85..df470954c 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf @@ -167,6 +167,8 @@ KeywordArgument ("tree", "A rooted phylogenetic tree", null, "Please select fade.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (fade.alignment_info[utility.getGlobalValue("terms.data.partitions")], fade.alignment_info[utility.getGlobalValue("terms.data.name_mapping")] ); + + fade.partition_count = Abs (fade.partitions_and_trees); selection.io.json_store_key_value_pair (fade.json, terms.json.input, terms.json.partition_count,fade.partition_count); @@ -192,6 +194,7 @@ assert (((fade.partitions_and_trees[index])[terms.data.tree])[terms.trees.rooted fade.name_mapping = fade.alignment_info[utility.getGlobalValue("terms.data.name_mapping")]; + if (utility.Has (fade.cache, terms.fade.cache.settings, "AssociativeList")) { fade.run_settings = fade.cache [terms.fade.cache.settings]; fade.prompts["grid"] = FALSE; @@ -247,6 +250,7 @@ if (utility.Has (fade.cache, terms.fade.cache.baseline, "AssociativeList")) { model [utility.getGlobalValue("terms.model.frequency_estimator")] = "frequencies.mle"; return model; } + fade.baseline_fit = estimators.FitSingleModel_Ext ( fade.filter_names, @@ -272,8 +276,6 @@ if (utility.Has (fade.run_settings, "method", "String")) { // =========== FIT BASELINE MODEL - - io.ReportProgressMessageMD ("FADE", "baseline", ">Fitted an alignment-wide model. " + selection.io.report_fit (fade.baseline_fit, 0, fade.alignment_sample_size ) + "\n\nTotal tree lengths by partition\n"); utility.ForEachPair (fade.baseline_fit[terms.branch_length], "_part_", "_value_", ' @@ -522,6 +524,8 @@ for (fade.residue = 0; fade.residue < 20; fade.residue += 1) { fade.trees.names = utility.Map (utility.Range (fade.partition_count, 1, 1), "_index_", "'fade.grid_tree_' + _index_"); fade.lf.components = {fade.partition_count * 2, 1}; + + utility.ForEachPair (fade.filter_names, "_index_", "_filter_", ' fade.model_assignment = { diff --git a/res/TemplateBatchFiles/libv3/models/model_functions.bf b/res/TemplateBatchFiles/libv3/models/model_functions.bf index bf1c15a24..d1085a84d 100644 --- a/res/TemplateBatchFiles/libv3/models/model_functions.bf +++ b/res/TemplateBatchFiles/libv3/models/model_functions.bf @@ -55,6 +55,7 @@ function model.ApplyModelToTree (id, tree, model_list, rules) { } + /* debug.log (tree); diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 3aa21a53b..01fae44f3 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -860,6 +860,9 @@ lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, map seqPos = 0; codon = codon_sequence[seqPos][seqPos + 2]; currentAA = mapping[codon]; + if (currentAA == 0) { + currentAA = "X"; + } mismatch_count = 0; @@ -879,6 +882,7 @@ lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, map if (mismatch_count == this_many_mm) { translString * 0; console.log (translString); + console.log ("\n"); console.log (codon_sequence); } assert(mismatch_count < this_many_mm, "A mismatch between codon and protein sequences at position " + aaPos + " (codon `seqPos`) : codon '" + codon_sequence[seqPos][seqPos + 2] + "'(`currentAA`) a.a. '`aa_sequence[aaPos]`'"); @@ -895,18 +899,21 @@ lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, map if (advance) { if (copy_codon) { - if (currentAA == "X") { + if (currentAA == "X" && mapping[codon] == "X") { translString * "---"; } else { translString * codon; } } else { //fprintf (stdout, "Skipping codon ", codon, "\n"); - aaPos = aaPos - 1; + //aaPos = aaPos - 1; } seqPos += 3; codon = codon_sequence[seqPos][seqPos + 2]; currentAA = mapping[codon]; + if (currentAA == 0) { + currentAA = "X"; + } } } diff --git a/src/core/batchlanruntime.cpp b/src/core/batchlanruntime.cpp index d84fc0078..3de3aac2e 100644 --- a/src/core/batchlanruntime.cpp +++ b/src/core/batchlanruntime.cpp @@ -2424,7 +2424,17 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& current_progra try { source_object = _GetHBLObjectByTypeMutable (source_name, object_type, &object_index); } catch (const _String& error) { // handle cases when the source is not an HBL object - _CalcNode* tree_node = (_CalcNode*)FetchObjectFromVariableByType(&source_name, TREE_NODE); + + _CalcNode* tree_node = nil; + + + if (source_name.IsValidIdentifier(fIDAllowFirstNumeric|fIDAllowCompound)) { + tree_node = (_CalcNode*)FetchObjectFromVariableByType(&source_name, TREE_NODE); + } else{ + _String converted = source_name.ConvertToAnIdent(fIDAllowFirstNumeric|fIDAllowCompound); + tree_node = (_CalcNode*)FetchObjectFromVariableByType(&converted, TREE_NODE); + } + if (tree_node) { if (set_this_attribute == _String("MODEL")) { _String model_name = AppendContainerName(*GetIthParameter(2UL),current_program.nameSpacePrefix); diff --git a/src/core/strings.cpp b/src/core/strings.cpp index fc7a30b91..d06828ba7 100644 --- a/src/core/strings.cpp +++ b/src/core/strings.cpp @@ -1230,13 +1230,15 @@ const _String _String::ConvertToAnIdent(int options) const { first = false; } else { if ( hy_Valid_ID_Chars.is_valid(current_char)) { - if (allow_compounds && current_char == '.') { - first = true; - } converted << current_char; } else { - if (last_char != default_placeholder) { - converted << default_placeholder; + if (allow_compounds && current_char == '.') { + first = true; + converted << current_char; + } else { + if (last_char != default_placeholder) { + converted << default_placeholder; + } } } } From 9ee50efcc549ab6c631cbc4367029252133b9912 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Fri, 3 Apr 2020 19:32:47 -0400 Subject: [PATCH 02/13] Auto delete zero branch lengths --- .../modules/shared-load-file.bf | 28 +++++- res/TemplateBatchFiles/libv3/tasks/trees.bf | 96 +++++++++++++++---- src/core/alignment.cpp | 33 ++++--- 3 files changed, 123 insertions(+), 34 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index cb3a69b46..add841383 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -283,6 +283,33 @@ function doGTR (prefix) { trees, gtr_results); + + KeywordArgument ("kill-zero-lengths", "Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise)", "Yes"); + + kill0 = io.SelectAnOption ( + { + "Yes":"Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise)", + "No":"Keep all branches" + }, + "The set of properties to use in the model") == "Yes"; + + + if (kill0) { + for (index, tree; in; trees) { + deleted = {}; + if (^(prefix + ".selected_branches") / index) { + trees[index] = trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], (^(prefix + ".selected_branches"))[index], deleted); + } else { + trees[index] = trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], null, deleted); + } + + if (utility.Array1D (deleted)) { + io.ReportProgressMessageMD(prefix, 'selector', 'Deleted ' + Abs(deleted) + ' zero-length internal branches: \`' + Join (', ',utility.Values(deleted)) + '\`'); + } + } + } + + io.ReportProgressMessageMD (prefix, "nuc-fit", "* " + selection.io.report_fit (gtr_results, 0, 3*(^"`prefix`.sample_size"))); @@ -343,7 +370,6 @@ function doPartitionedMG (prefix, keep_lf) { utility.ForEach (scaler_variables, "_value_", "parameters.DeclareGlobal(_value_, None);parameters.SetValue(_value_, 3);"); - partitioned_mg_results = estimators.FitMGREV(filter_names, trees, codon_data_info [utility.getGlobalValue("terms.code")], { utility.getGlobalValue("terms.run_options.model_type"): utility.getGlobalValue("terms.local"), utility.getGlobalValue("terms.run_options.proportional_branch_length_scaler"): scaler_variables, diff --git a/res/TemplateBatchFiles/libv3/tasks/trees.bf b/res/TemplateBatchFiles/libv3/tasks/trees.bf index e986dba35..1195fb80f 100644 --- a/res/TemplateBatchFiles/libv3/tasks/trees.bf +++ b/res/TemplateBatchFiles/libv3/tasks/trees.bf @@ -352,6 +352,46 @@ lfunction trees.branch_names(tree, respect_case) { } +/** + * @name trees.KillZeroBranches + * Given a tree dict and a list of matching parameter estimates + * this returns a modified tree dict with all zero-branch length + * internal branches removed and modifies in in place + * @param tree - dict of the tree object + * @param estimates - dict with branch length estimates + * @param branch_set - if not null, treat as test set and delete branches form it as well + * @param zero_internal -- store branches that were deleted here + * @return modified tree + */ + +lfunction trees.KillZeroBranches (tree, estimates, branch_set, zero_internal) { + + for (branch, value; in; tree [^"terms.trees.partitioned"]) { + if (value == ^"terms.tree_attributes.internal") { + if (estimates / branch) { + if ((estimates[branch])[^"terms.fit.MLE"] < 1e-10) { + zero_internal + branch; + } + } + } + } + + if (Abs (zero_internal) > 0) { // has zero internal branches + Topology T = tree[^"terms.trees.newick_annotated"]; + T - Columns(zero_internal); + if (None != branch_set) { + for (branch; in; zero_internal) { + branch_set - branch; + } + } + return trees.ExtractTreeInfoFromTopology (&T); + + } + + return tree; + +} + /** * @name trees.RootTree * @param {Dict} tree_info @@ -381,8 +421,8 @@ lfunction trees.RootTree(tree_info, root_on) { } /** - * @name trees.ExtractTreeInfo - * @param {String} tree_string + * @name trees.ExtractTreeInfoFromTopology + * @param {String} Topology * @returns a {Dictionary} of the following tree information : * - newick string * - newick string with branch lengths @@ -391,20 +431,11 @@ lfunction trees.RootTree(tree_info, root_on) { * - internal leaves * - list of models */ -lfunction trees.ExtractTreeInfo(tree_string) { - - if (Type (tree_string) == "AssociativeList") { - file_name = tree_string[^"terms.data.file"]; - tree_string = tree_string[^"terms.data.tree"]; - } else { - file_name = None; - } - - Topology T = tree_string; +lfunction trees.ExtractTreeInfoFromTopology(topology_object) { - branch_lengths = BranchLength(T, -1); - branch_names = BranchName(T, -1); + branch_lengths = BranchLength(^topology_object, -1); + branch_names = BranchName(^topology_object, -1); branch_count = utility.Array1D (branch_names) - 1; bls = {}; @@ -415,15 +446,15 @@ lfunction trees.ExtractTreeInfo(tree_string) { } } - GetInformation(modelMap, T); + GetInformation(modelMap, ^topology_object); leaves_internals = {}; - flat_tree = T ^ 0; + flat_tree = (^topology_object) ^ 0; trees.PartitionTree(flat_tree, leaves_internals); utility.ToggleEnvVariable("INCLUDE_MODEL_SPECS", TRUE); - T.str = "" + T; + T.str = "" + ^topology_object; utility.ToggleEnvVariable("INCLUDE_MODEL_SPECS", None); rooted = utility.Array1D ((flat_tree[(flat_tree[0])["Root"]])["Children"]) == 2; @@ -434,20 +465,45 @@ lfunction trees.ExtractTreeInfo(tree_string) { branch_count = None; return { - ^"terms.trees.newick": Format(T, 1, 0), - ^"terms.trees.newick_with_lengths": Format(T, 1, 1), + ^"terms.trees.newick": Format(^topology_object, 1, 0), + ^"terms.trees.newick_with_lengths": Format(^topology_object, 1, 1), ^"terms.branch_length": bls, ^"terms.trees.newick_annotated": T.str, ^"terms.trees.model_map": modelMap, ^"terms.trees.partitioned": leaves_internals, ^"terms.trees.model_list": Columns(modelMap), ^"terms.trees.rooted" : rooted, - ^"terms.trees.meta" : T.__meta, + ^"terms.trees.meta" : Eval(^(topology_object+".__meta")), ^"terms.data.file" : file_name }; } +/** + * @name trees.ExtractTreeInfo + * @param {String} tree_string + * @returns a {Dictionary} of the following tree information : + * - newick string + * - newick string with branch lengths + * - annotated string + * - model map + * - internal leaves + * - list of models + */ +lfunction trees.ExtractTreeInfo(tree_string) { + + if (Type (tree_string) == "AssociativeList") { + file_name = tree_string[^"terms.data.file"]; + tree_string = tree_string[^"terms.data.tree"]; + } else { + file_name = None; + } + + Topology T = tree_string; + return trees.ExtractTreeInfoFromTopology (&T); + +} + /** * @name trees.HasBranchLengths * @param {Dictionary} tree information object (e.g. as returned by LoadAnnotatedTopology) diff --git a/src/core/alignment.cpp b/src/core/alignment.cpp index 098e138a1..77c74ad9a 100644 --- a/src/core/alignment.cpp +++ b/src/core/alignment.cpp @@ -146,7 +146,7 @@ long CodonAlignStringsStep( double * const score_matrix best_choice = 0, i, choice, partial_codons[ 10 ]; // we need to multiply by 3 to get the NUC position // 3x5 codon specifications (negative indices) - long codon_spec_3x5[ 10 ][ 3 ] = { + static long const codon_spec_3x5[ 10 ][ 3 ] = { { 5, 4, 3 }, // 11100 { 5, 4, 2 }, // 11010 { 5, 4, 1 }, // 11001 @@ -159,7 +159,7 @@ long CodonAlignStringsStep( double * const score_matrix { 3, 2, 1 } // 00111 }; // 3x4 codon specifications (negative indices) - long codon_spec_3x4[ 4 ][ 3 ] = { + static long const long codon_spec_3x4[ 4 ][ 3 ] = { { 4, 3, 2 }, // 1110 { 4, 3, 1 }, // 1101 { 4, 2, 1 }, // 1011 @@ -168,15 +168,15 @@ long CodonAlignStringsStep( double * const score_matrix int local_shortcut_came_from_this_move = -1; - double choices[ HY_ALIGNMENT_TYPES_COUNT ], + double choices[ HY_ALIGNMENT_TYPES_COUNT ] = {-INFINITY}, max_score = -INFINITY, penalty; // store the scores of our choices in choices, // pre-initialize to -infinity - for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT; i++ ) { - choices[ i ] = -INFINITY; - } + //for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT; i++ ) { + // choices[ i ] = -INFINITY; + //} // if we're at least a CODON away from the edge... // (psst, r is CODONs remember?) @@ -388,13 +388,20 @@ long CodonAlignStringsStep( double * const score_matrix } // find the best possible choice - for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT - (!do_local); ++i ) { - /* if ( i > 0 ) - fprintf( stderr, ", " ); - fprintf( stderr, "( %ld, %.3g )", i, choices[ i ] ); */ - if ( choices[ i ] > max_score ) { - best_choice = i; - max_score = choices[ i ]; + + if (do_local) { + for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT ; ++i ) { + if ( choices[ i ] > max_score ) { + best_choice = i; + max_score = choices[ i ]; + } + } + } else { + for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT - 1; ++i ) { + if ( choices[ i ] > max_score ) { + best_choice = i; + max_score = choices[ i ]; + } } } From e8114a0c1f363bcaf7b514ed758712205b1d7ecd Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Sat, 4 Apr 2020 07:59:51 -0400 Subject: [PATCH 03/13] Fixing an initialization bug --- src/core/alignment.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/alignment.cpp b/src/core/alignment.cpp index 77c74ad9a..1ac9923a9 100644 --- a/src/core/alignment.cpp +++ b/src/core/alignment.cpp @@ -168,15 +168,15 @@ long CodonAlignStringsStep( double * const score_matrix int local_shortcut_came_from_this_move = -1; - double choices[ HY_ALIGNMENT_TYPES_COUNT ] = {-INFINITY}, + double choices[ HY_ALIGNMENT_TYPES_COUNT ], max_score = -INFINITY, penalty; // store the scores of our choices in choices, // pre-initialize to -infinity - //for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT; i++ ) { - // choices[ i ] = -INFINITY; - //} + for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT; i++ ) { + choices[ i ] = -INFINITY; + } // if we're at least a CODON away from the edge... // (psst, r is CODONs remember?) @@ -388,7 +388,6 @@ long CodonAlignStringsStep( double * const score_matrix } // find the best possible choice - if (do_local) { for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT ; ++i ) { if ( choices[ i ] > max_score ) { @@ -397,7 +396,7 @@ long CodonAlignStringsStep( double * const score_matrix } } } else { - for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT - 1; ++i ) { + for ( i = 0; i < HY_ALIGNMENT_TYPES_COUNT - 1 ; ++i ) { if ( choices[ i ] > max_score ) { best_choice = i; max_score = choices[ i ]; @@ -405,6 +404,7 @@ long CodonAlignStringsStep( double * const score_matrix } } + //fprintf( stderr, "\nscore: %.3g best: %ld\n", max_score, best_choice ); // assign the score to the current position From f1ed96911bce507448eb5cf072a37ffd38d7a2f6 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Sat, 4 Apr 2020 09:03:06 -0400 Subject: [PATCH 04/13] Update JSON objects if internal zero length branches got deleted --- .../SelectionAnalyses/modules/shared-load-file.bf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index add841383..1230d50d4 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -202,6 +202,7 @@ function load_file (prefix) { function store_tree_information () { // Place in own attribute called `tested` + selection.io.json_store_key_value_pair (json, None, utility.getGlobalValue("terms.json.tested"), selected_branches); /** this will return a dictionary of selected branches; one set per partition, like in @@ -305,8 +306,15 @@ function doGTR (prefix) { if (utility.Array1D (deleted)) { io.ReportProgressMessageMD(prefix, 'selector', 'Deleted ' + Abs(deleted) + ' zero-length internal branches: \`' + Join (', ',utility.Values(deleted)) + '\`'); + for (i,v; in; deleted) { + (gtr_results[^"terms.branch_length"])[index] - v; + } } } + for (i = 0; i < partition_count; i+=1) { + (partitions_and_trees[i])[^"terms.data.tree"] = trees[i]; + } + store_tree_information (); } From 0f23194af3bbaae6eb4282f26739ae83d84fdd20 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Sat, 4 Apr 2020 18:53:35 -0400 Subject: [PATCH 05/13] Fixing ambiguity handling in HyPhy SLAC; correctly record amino-acid codons for mixed codons --- res/TemplateBatchFiles/SelectionAnalyses/FADE.bf | 2 +- .../SelectionAnalyses/modules/selection_lib.ibf | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf b/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf index df470954c..dc458182b 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf @@ -328,7 +328,7 @@ if (utility.Has (fade.cache, terms.fade.cache.substitutions, "AssociativeList") fade.baseline_fit, {terms.run_options.retain_lf_object: TRUE}, None - ) + ); fade.baseline_fit[terms.likelihood_function] = "fade.ancestral.rebuild.likelihoodFunction"; } diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/selection_lib.ibf b/res/TemplateBatchFiles/SelectionAnalyses/modules/selection_lib.ibf index 7a1caf74a..ef41a361c 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/selection_lib.ibf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/selection_lib.ibf @@ -68,7 +68,12 @@ lfunction selection.substitution_mapper (matrix, tree, ambig_lookup, pairwise_co } else { collect_residues = {}; - utility.ForEach ( ambig_lookup[-code-2], "_for_each_", "`&collect_residues`[`&integer_mapping`[_for_each_]] = 1"); + for (index, acode; in; ambig_lookup[-code-2]) { + if (acode) { + collect_residues[integer_mapping[index]] = 1; + } + } + console.log (collect_residues); code_lookup [code] = Join ("", utility.Keys (collect_residues)); } } @@ -94,7 +99,8 @@ lfunction selection.substitution_mapper (matrix, tree, ambig_lookup, pairwise_co "(`&result`[slac.substituton_mapper.key])[`&bname`] = `&branch_info`[slac.substituton_mapper.key]"); } + - return result; + return result; } From 998b25572f000d62532594c5d57cc0267166b93c Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Sat, 4 Apr 2020 19:16:13 -0400 Subject: [PATCH 06/13] Fixing ambiguity handling in HyPhy SLAC; correctly record amino-acid codons for mixed codons --- .../SelectionAnalyses/modules/selection_lib.ibf | 1 - 1 file changed, 1 deletion(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/selection_lib.ibf b/res/TemplateBatchFiles/SelectionAnalyses/modules/selection_lib.ibf index ef41a361c..5ca09b170 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/selection_lib.ibf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/selection_lib.ibf @@ -73,7 +73,6 @@ lfunction selection.substitution_mapper (matrix, tree, ambig_lookup, pairwise_co collect_residues[integer_mapping[index]] = 1; } } - console.log (collect_residues); code_lookup [code] = Join ("", utility.Keys (collect_residues)); } } From fde340e31e52bddd855f094930b8cc668d3a252b Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Wed, 8 Apr 2020 22:23:13 -0400 Subject: [PATCH 07/13] Fixing name normalization bug --- src/core/strings.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/strings.cpp b/src/core/strings.cpp index d06828ba7..3e1d3d189 100644 --- a/src/core/strings.cpp +++ b/src/core/strings.cpp @@ -1209,8 +1209,8 @@ const _String _String::ConvertToAnIdent(int options) const { const char default_placeholder = '_'; char last_char = '\0'; - bool allow_compounds = options | fIDAllowCompound, - allow_first_numeric = options | fIDAllowFirstNumeric; + bool allow_compounds = options & fIDAllowCompound, + allow_first_numeric = options & fIDAllowFirstNumeric; unsigned long current_index = 0UL; From 68b4941d0c3961a5be8997c9d23a4cb183c76e7b Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Thu, 9 Apr 2020 10:49:31 -0400 Subject: [PATCH 08/13] Bug fixes --- res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf | 7 +++---- src/core/batchlan.cpp | 2 +- src/core/likefunc.cpp | 7 ++++++- src/core/likefunc2.cpp | 3 ++- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf index 7cccf307d..cd269c2cc 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf @@ -553,10 +553,9 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa ^l = 0; } - /* - Export (lfe, ^lf_prop); - fprintf ("/Users/sergei/Desktop/prime.bf",CLEAR_FILE,lfe); - */ + /*Export (lfe, ^lf_prop); + fprintf ("/Users/sergei/Desktop/prime.bf",CLEAR_FILE,lfe);*/ + Optimize (results, ^lf_prop, { //"OPTIMIZATION_METHOD" : "gradient-descent", diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index b4ff1d6d1..3a1906f32 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -4434,7 +4434,7 @@ bool _ElementaryCommand::ConstructDataSet (_String&source, _ExecutionList&tar } else { if (operation_type == kReconstructAncestors || operation_type == kSampleAncestors) { - if (arguments.countitems()>4UL || arguments.countitems()==1L) { + if (arguments.countitems()>5UL || arguments.countitems()==1L) { throw operation_type.Enquote() & " expects 1-4 parameters: likelihood function ident (mandatory), an matrix expression to specify the list of partition(s) to reconstruct/sample from (optional), and, for ReconstructAncestors, an optional MARGINAL flag, plus an optional DOLEAVES flag."; } _ElementaryCommand * dsc = new _ElementaryCommand (operation_type == kReconstructAncestors ? 38 : 50); diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index bf0402624..bfe27cc51 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -8138,10 +8138,15 @@ hyFloat _LikelihoodFunction::ComputeBlock (long index, hyFloat* siteRes, long c } long np = 1; + long sitesPerP = df->GetPatternCount(); #ifdef _OPENMP np = MIN(GetThreadCount(),omp_get_max_threads()); + if (np > sitesPerP) { + np = sitesPerP; + sitesPerP = 1; + } else #endif - long sitesPerP = df->GetPatternCount() / np + 1L; + sitesPerP = sitesPerP / np + 1L; #ifdef _UBER_VERBOSE_LF_DEBUG fprintf (stderr, "NORMAL compute lf \n"); diff --git a/src/core/likefunc2.cpp b/src/core/likefunc2.cpp index 0b5994637..77a7d139d 100644 --- a/src/core/likefunc2.cpp +++ b/src/core/likefunc2.cpp @@ -888,6 +888,7 @@ _List* _LikelihoodFunction::RecoverAncestralSequencesMarginal (long index, _Ma if (scaleDiff > 0) { ratio *= acquireScalerMultiplier(scaleDiff); } + //printf ("%g\n", ratio); supportValues.theData[mappedBranchID*shiftForTheNode + siteID*alphabetDimension + currentChar] = ratio; } blockTree->AddBranchToForcedRecomputeList (branchID+leafCount); @@ -896,7 +897,7 @@ _List* _LikelihoodFunction::RecoverAncestralSequencesMarginal (long index, _Ma _SimpleList conversion; _AVLListXL conversionAVL (&conversion); - _String codeBuffer (unitLength); + _String codeBuffer ((unsigned long)unitLength); _List *result = new _List; for (long k = 0L; k < matrixSize; k++) { From 2a6bc23066b91a4e002e4d769dab780880576bd5 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Fri, 10 Apr 2020 06:46:44 -0400 Subject: [PATCH 09/13] Bug fixes; PRIME imputes leaf states --- .../SelectionAnalyses/PRIME.bf | 69 +++++++++++++------ .../libv3/tasks/alignments.bf | 4 +- 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf index cd269c2cc..978d803f9 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf @@ -63,6 +63,7 @@ prime.parameter_site_alpha = "Site relative synonymous rate"; prime.parameter_site_beta_nuisance = "Site relative non-synonymous rate (untested branches)"; prime.parameter_property_prefix = "Site importance for property "; prime.parameter_site_beta = "Site log (baseline beta)"; +terms.prime_imputed_states = "Imputed States"; // default cutoff for printing to screen prime.pvalue = 0.1; @@ -81,7 +82,6 @@ prime.display_orders = { selection.io.startTimer (prime.json [terms.json.timers], "Total time", 0); - /*------------------------------------------------------------------------------ Key word arguments */ @@ -132,6 +132,16 @@ prime.property_set = io.SelectAnOption ( "Custom":"Load the set of properties from a file" }, "The set of properties to use in the model"); + +KeywordArgument ("impute-states", "Use site-level model fits to impute likely character states for each sequence", "No"); + +prime.impute_states = io.SelectAnOption ( + { + "Yes":"Impute marginal likelihoods for each codon at each sequence and each site", + "No": "Do not impute marginal likelihoods for each codon at each sequence and each site", + }, + "Impute likely states for sequences") == "Yes"; + KeywordArgument ("output", "Write the resulting JSON to this file (default is to save to the same path as the alignment file + 'prime.json')", prime.codon_data_info [terms.json.json]); prime.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to"); @@ -326,7 +336,8 @@ prime.report.significant_site = {{"" + (1+((prime.filter_specification[prime.rep }}; -prime.site_results = {}; +prime.site_results = {}; +prime.imputed_leaf_states = {}; for (prime.partition_index = 0; prime.partition_index < prime.partition_count; prime.partition_index += 1) { @@ -402,7 +413,7 @@ for (prime.partition_index = 0; prime.partition_index < prime.partition_count; p prime.queue = mpi.CreateQueue ({terms.mpi.LikelihoodFunctions: {{"prime.site_likelihood","prime.site_likelihood_property"}}, terms.mpi.Models : {{"prime.site.background_fel","prime.site.property_model"}}, terms.mpi.Headers : utility.GetListOfLoadedModules ("libv3/"), - terms.mpi.Variables : {{"prime.selected_branches","prime.pairwise_counts","prime.codon_data_info","prime.lambdas"}} + terms.mpi.Variables : {{"prime.selected_branches","prime.pairwise_counts","prime.codon_data_info","prime.lambdas","prime.impute_states","terms.prime_imputed_states"}} }); @@ -456,8 +467,11 @@ for (prime.partition_index = 0; prime.partition_index < prime.partition_count; p io.ClearProgressBar (); prime.json [terms.json.MLE ] = {terms.json.headers : prime.table_headers, - terms.json.content : prime.site_results }; + terms.json.content : prime.site_results }; +if (prime.impute_states) { + (prime.json [terms.json.MLE])[terms.prime_imputed_states] = prime.imputed_leaf_states; +} for (key, value; in; prime.report.count) { io.ReportProgressMessageMD ("PRIME", "results", "** Found _" + value + "_ sites where " + key + " property was important at p <= " + prime.pvalue + "**"); @@ -542,7 +556,7 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa }*/ - // fit the universal alternative + // fit the universal alternative ^"prime.site_beta" = Eval (^"prime.site_beta"); LFCompute (^lf_prop,LF_START_COMPUTE); LFCompute (^lf_prop,results); @@ -575,6 +589,24 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa }*/ }); + character_map = None; + if (^"prime.impute_states") { + DataSet anc = ReconstructAncestors ( ^lf_prop, {{0}}, MARGINAL, DOLEAVES); + GetString (names, anc, -1); + GetDataInfo (codon_chars, ^((lfInfo["Datafilters"])[0]) , "CHARACTERS"); + + character_map = {}; + for (seq_id, seq_name; in; names) { + character_map [seq_name] = {}; + for (char, char_support; in; (anc.marginal_support_matrix)[seq_id][-1]) { + if (char_support > 1e-6) { + (character_map [seq_name])[codon_chars[char]] = char_support; + } + } + } + + } + alternative = estimators.ExtractMLEsOptions (lf_prop, model_mapping, {}); alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; @@ -602,10 +634,13 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa - return {"fel" : fel, + return { + "fel" : fel, utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.Null"): Null, - utility.getGlobalValue("terms.model.residue_properties") : constrained_models}; + utility.getGlobalValue("terms.model.residue_properties") : constrained_models, + utility.getGlobalValue("terms.prime_imputed_states") : character_map + }; } /* echo to screen calls */ @@ -639,6 +674,7 @@ function prime.report.echo (prime.report.site, prime.report.partition, prime.rep prime.table_output_options[utility.getGlobalValue("terms.table_options.header")] = FALSE; } + io.ClearProgressBar (); fprintf (stdout, io.FormatTableRow (prime.print_row,prime.table_output_options)); } @@ -702,12 +738,11 @@ lfunction prime.store_results (node, result, arguments) { result_row [property_index+1] = p_values[(^"prime.local_to_property_name")[key]]; } - } - - - + } utility.EnsureKey (^"prime.site_results", partition_index); + utility.EnsureKey (^"prime.imputed_leaf_states", partition_index); + sites_mapping_to_pattern = pattern_info[utility.getGlobalValue("terms.data.sites")]; sites_mapping_to_pattern.count = utility.Array1D (sites_mapping_to_pattern); @@ -715,17 +750,7 @@ lfunction prime.store_results (node, result, arguments) { for (i = 0; i < sites_mapping_to_pattern.count; i+=1) { site_index = sites_mapping_to_pattern[i]; ((^"prime.site_results")[partition_index])[site_index] = result_row; + ((^"prime.imputed_leaf_states")[partition_index])[site_index] = result[^"terms.prime_imputed_states"]; prime.report.echo (site_index, partition_index, result_row); } - - /* - utility.ForEach (pattern_info[utility.getGlobalValue("terms.data.sites")], "_value_", - ' - (prime.site_results[`&partition_index`])[_value_] = `&result_row`; - prime.report.echo (_value_, `&partition_index`, `&result_row`); - ' - );*/ - - - //assert (0); } diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 01fae44f3..2d5a13c98 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -881,9 +881,9 @@ lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, map if (this_many_mm == 1) { if (mismatch_count == this_many_mm) { translString * 0; - console.log (translString); + /*console.log (translString); console.log ("\n"); - console.log (codon_sequence); + console.log (codon_sequence);*/ } assert(mismatch_count < this_many_mm, "A mismatch between codon and protein sequences at position " + aaPos + " (codon `seqPos`) : codon '" + codon_sequence[seqPos][seqPos + 2] + "'(`currentAA`) a.a. '`aa_sequence[aaPos]`'"); } else { From 911bbff5326a81d8218bfe2f1dd6150a6cb0822a Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Mon, 13 Apr 2020 15:13:38 -0400 Subject: [PATCH 10/13] GARD tweaks; v0.2 --- res/TemplateBatchFiles/GARD.bf | 48 +++++++++++-------- .../libv3/convenience/math.bf | 6 ++- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/res/TemplateBatchFiles/GARD.bf b/res/TemplateBatchFiles/GARD.bf index fdc7a2f9c..d5791250d 100644 --- a/res/TemplateBatchFiles/GARD.bf +++ b/res/TemplateBatchFiles/GARD.bf @@ -63,8 +63,8 @@ gard.analysisDescription = {terms.io.info : "GARD : Genetic Algorithms for Recom approach to screening alignments of sequences for recombination, by using the CHC genetic algorithm to search for phylogenetic incongruence among different partitions of the data. The number of partitions is determined using a step-up procedure, while the placement of breakpoints is searched for with the GA. The best fitting model (based on c-AIC) is returned; and additional post-hoc - tests run to distinguish topological incongruence from rate-variation.", - terms.io.version : "0.1", + tests run to distinguish topological incongruence from rate-variation. v0.2 adds and spooling results to JSON after each breakpoint search conclusion", + terms.io.version : "0.2", terms.io.reference : "**Automated Phylogenetic Detection of Recombination Using a Genetic Algorithm**, _Mol Biol Evol 23(10), 1891–1901", terms.io.authors : "Sergei L Kosakovsky Pond", terms.io.contact : "spond@temple.edu", @@ -301,7 +301,7 @@ namespace gard { if (singleBreakPointBest_cAIC < baseline_cAIC) { io.ReportProgressBar ("GARD", "Breakpoint " + Format (1+breakPointIndex, 10, 0) + " of " + (variableSites-1) + ". Best cAIC = " + Format (singleBreakPointBest_cAIC, 12, 4) + " [delta = " + Format (baseline_cAIC - singleBreakPointBest_cAIC, 12, 4) + "] with breakpoint at site " + Format (singleBreakPointBestLocation, 10, 0)); } else { - io.ReportProgressBar ("GARD", "Breakpoint " + Format (1+breakPointIndex, 10, 0) + " of " + (variableSites-1) + ". Best cAIC = " + Format (baseline_cAIC, 12, 4) + " with no breakpoints"); + io.ReportProgressBar ("GARD", "Breakpoint " + Format (1+breakPointIndex, 10, 0) + " of " + (variableSites-1) + ". Best cAIC = " + Format (baseline_cAIC, 12, 4) + " with no breakpoints." ); } @@ -338,6 +338,8 @@ if (gard.singleBreakPointBest_cAIC < gard.bestOverall_cAIC_soFar) { gard.bestOverallModelSoFar = null; } +gard.concludeAnalysis(gard.bestOverallModelSoFar); + /* 2b. Evaluation of multiple break points with genetic algorithm ------------------------------------------------------------------------------*/ @@ -349,12 +351,12 @@ namespace gard { if(populationSize < mpi.NodeCount() -1 ) { populationSize = mpi.NodeCount() + 1; } - mutationRate = 0.8; // the GARD paper said "15% of randomly selected bits were toggled"... - rateOfMutationsTharAreSmallShifts = 0.5; // some mutations are a new random break point; some are small shifts of the break point to an adjacent location. + mutationRate = 0.2; // the GARD paper said "15% of randomly selected bits were toggled"... + rateOfMutationsTharAreSmallShifts = 0.8; // some mutations are a new random break point; some are small shifts of the break point to an adjacent location. maxFailedAttemptsToMakeNewModel = 7; - cAIC_diversityThreshold = 0.001; + cAIC_diversityThreshold = 0.01; cAIC_improvementThreshold = 0.01; // I think this was basically 0 in the gard paper - maxGenerationsAllowedWithNoNewModelsAdded = 5; // TODO: Not in the GARD paper. use 10? + maxGenerationsAllowedWithNoNewModelsAdded = 2; // TODO: Not in the GARD paper. use 10? maxGenerationsAllowedAtStagnant_cAIC = 100; // TODO: this is set to 100 in the GARD paper // GA.2: Loop over increasing number of break points @@ -401,7 +403,6 @@ namespace gard { currentBest_model = Eval(currentBest_individual["key"]); if ( (math.minNormalizedRange(selectedModels) < cAIC_diversityThreshold) || (generationsNoNewModelsAdded > maxGenerationsAllowedWithNoNewModelsAdded) ) { - //console.log ("UPDATING PARENT MODELS"); parentModels = gard.GA.generateNewGenerationOfModelsByMutatingModelSet(selectedModels, numberOfPotentialBreakPoints, mutationRate, rateOfMutationsTharAreSmallShifts); if (Abs(parentModels) == 1) { @@ -423,11 +424,10 @@ namespace gard { terminationCondition = TRUE; } } else { + //console.log ("RESETTING generationsAtCurrentBest_cAIC" + previousBest_cAIC + ", " + currentBest_cAIC + ", " + cAIC_improvementThreshold + "\n" + currentBest_model + "\n"); generationsAtCurrentBest_cAIC = 0; } - //console.log ("END OF LOOP"); - //console.log (parentModels); if (previousBest_cAIC > 0 && previousBest_cAIC < currentBest_cAIC) { io.CheckAssertion("gard.previousBest_cAIC >= gard.currentBest_cAIC", "Internal error in GARD -- c-AIC INCREASED between two consecutive generations"); @@ -438,13 +438,16 @@ namespace gard { generation += 1; if (bestOverall_cAIC_soFar-currentBest_cAIC > 0) { - io.ReportProgressBar ("GARD", Format (numberOfBreakPointsBeingEvaluated,3,0) + ' breakpoints [ generation ' + Format (generation, 6, 0) + ", total models " + Format (Abs (masterList), 8, 0) + ", " + Format (generationsAtCurrentBest_cAIC/ maxGenerationsAllowedAtStagnant_cAIC*100, 4, 0) +"% converged]" + io.ReportProgressBar ("GARD", Format (numberOfBreakPointsBeingEvaluated,3,0) + ' breakpoints [ generation ' + Format (generation, 6, 0) + ", total models " + Format (Abs (masterList), 8, 0) + ", " + + Format (generationsAtCurrentBest_cAIC/ maxGenerationsAllowedAtStagnant_cAIC*100, 4, 0) +"% converged, " + Format (Abs (masterList)/(Time(1)-startTime),5,2) + "/sec]" + ". Min (c-AIC) = " + Format (currentBest_cAIC, 12,4) + " [ delta = " + Format (bestOverall_cAIC_soFar-currentBest_cAIC, 8, 2) + "], breakpoints at " + Join (", ", currentBest_model)); } else { - io.ReportProgressBar ("GARD", Format (numberOfBreakPointsBeingEvaluated,3,0) + ' breakpoints [ generation ' + Format (generation, 6, 0) + ", total models " + Format (Abs (masterList), 8, 0) + ", " + Format (generationsAtCurrentBest_cAIC/ maxGenerationsAllowedAtStagnant_cAIC*100, 4, 0) +"% converged]" - + ". Min (c-AIC) = " + Format (currentBest_cAIC, 12,4) + " [no improvement], breakpoints at " + Join (", ", bestOverallModelSoFar)); + io.ReportProgressBar ("GARD", Format (numberOfBreakPointsBeingEvaluated,3,0) + ' breakpoints [ generation ' + Format (generation, 6, 0) + ", total models " + Format (Abs (masterList), 8, 0) + ", " + + Format (generationsAtCurrentBest_cAIC/ maxGenerationsAllowedAtStagnant_cAIC*100, 4, 0) +"% converged, " + Format (Abs (masterList)/(Time(1)-startTime),5,2) + "/sec]" + + ". Min (c-AIC) = " + Format (currentBest_cAIC, 12,4) + " [no improvement], breakpoints at " + Join (", ", currentBest_model)); } + } io.ClearProgressBar(); @@ -464,8 +467,8 @@ namespace gard { bestOverallModelSoFar = bestModel; } else { addingBreakPointsImproves_cAIC = FALSE; - gard.concludeAnalysis(bestOverallModelSoFar); } + gard.concludeAnalysis(bestOverallModelSoFar); } } @@ -497,6 +500,7 @@ namespace gard { lfunction gard.fitPartitionedModel (breakPoints, model, initialValues, saveToFile, constrainToOneTopology) { + currentIndex = 0; currentStart = 0; breakPointsCount = utility.Array1D (breakPoints); @@ -915,30 +919,32 @@ lfunction gard.GA.generateNewGenerationOfModelsByMutatingModelSet(parentModels, numberOfBreakPoints = utility.Array1D(firstModel); // Generate a new set of models + local_mutation_rate = 1 - (1-mutationRate) ^ (1/numberOfBreakPoints); // keep bp the same nextGenOfModels = {}; for(i=0; i= 0 && variableSiteMapIndexOfParentBreakPoint < (^"gard.variableSites")) { newBreakPoint = (^"gard.variableSiteMap")[variableSiteMapIndexOfParentBreakPoint]; - notValid = FALSE; + break; } } breakPoints[breakPointIndex] = newBreakPoint; diff --git a/res/TemplateBatchFiles/libv3/convenience/math.bf b/res/TemplateBatchFiles/libv3/convenience/math.bf index 5bc0f7aea..7f68fd12a 100644 --- a/res/TemplateBatchFiles/libv3/convenience/math.bf +++ b/res/TemplateBatchFiles/libv3/convenience/math.bf @@ -265,7 +265,11 @@ lfunction math.minNormalizedRange(object) { if (Type (object) == "AssociativeList") { object = utility.Values(object); } - return (Max(object, 0) - Min(object, 0) ) / Min(object, 0); + min_value = Min(object, 0); + if (min_value == 0) { + return ^"math.Infinity"; + } + return (Max(object, 0) - min_value ) / min_value; }; From d666e8f037f34db2492f5923552161945b993519 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Sat, 18 Apr 2020 13:55:47 -0400 Subject: [PATCH 11/13] Alignment fixes for edge cases --- res/TemplateBatchFiles/libv3/tasks/alignments.bf | 2 +- src/core/batchlanruntime.cpp | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 2d5a13c98..014870d66 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -877,7 +877,7 @@ lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, map advance = 0; //} } else { - mismatch_count += (aa_sequence[aaPos] != currentAA); + mismatch_count += (aa_sequence[aaPos] != currentAA && currentAA != "X"); if (this_many_mm == 1) { if (mismatch_count == this_many_mm) { translString * 0; diff --git a/src/core/batchlanruntime.cpp b/src/core/batchlanruntime.cpp index 3de3aac2e..14472abe2 100644 --- a/src/core/batchlanruntime.cpp +++ b/src/core/batchlanruntime.cpp @@ -1115,8 +1115,9 @@ bool _ElementaryCommand::HandleAlignSequences(_ExecutionList& current_progr char * str1r = nil, * str2r = nil; - score = AlignStrings (reference_sequence->get_str(), - sequence2->get_str(), + + score = AlignStrings (reference_sequence->get_str() ? reference_sequence->get_str() : "", + sequence2->get_str() ? sequence2->get_str() : "", str1r, str2r, character_map_to_integers, From 15b43cc0147d2d2c3ef6037b151d4f9f041ea9dc Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Mon, 20 Apr 2020 22:40:00 -0400 Subject: [PATCH 12/13] 2.5.9rc; add support for MH in absrel --- CMakeLists.txt | 2 + .../SelectionAnalyses/aBSREL.bf | 398 +++++++++++++++--- src/core/global_things.cpp | 2 +- tests/hbltests/libv3/aBSREL-MH.wbf | 58 +++ tests/hbltests/libv3/data/ncov.fasta | 334 +++++++++++++++ 5 files changed, 739 insertions(+), 55 deletions(-) create mode 100644 tests/hbltests/libv3/aBSREL-MH.wbf create mode 100644 tests/hbltests/libv3/data/ncov.fasta diff --git a/CMakeLists.txt b/CMakeLists.txt index ce2d380c0..59f11e48b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -611,6 +611,8 @@ add_test (MEME HYPHYMP tests/hbltests/libv3/MEME.wbf) add_test (MEME-PARTITIONED HYPHYMP tests/hbltests/libv3/MEME.wbf) add_test (BUSTED HYPHYMP tests/hbltests/libv3/BUSTED.wbf) add_test (BUSTED-SRV HYPHYMP tests/hbltests/libv3/BUSTED-SRV.wbf) +add_test (ABSREL HYPHYMP tests/hbltests/libv3/aBSREL.wbf) +add_test (ABSREL-MH HYPHYMP tests/hbltests/libv3/aBSREL-MH.wbf) add_test (RELAX HYPHYMP tests/hbltests/libv3/RELAX.wbf) add_test (FUBAR HYPHYMP tests/hbltests/libv3/FUBAR.wbf) add_test (BGM HYPHYMP tests/hbltests/libv3/BGM.wbf) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf index efb3a7001..9d1bcf99c 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf @@ -1,4 +1,4 @@ -RequireVersion ("2.4.0"); +RequireVersion ("2.5.8"); LoadFunctionLibrary("libv3/all-terms.bf"); // must be loaded before CF3x4 @@ -15,7 +15,8 @@ LoadFunctionLibrary("modules/io_functions.ibf"); LoadFunctionLibrary("modules/selection_lib.ibf"); LoadFunctionLibrary("libv3/models/codon/BS_REL.bf"); LoadFunctionLibrary("libv3/convenience/math.bf"); - +LoadFunctionLibrary("libv3/models/codon/MG_REV_MH.bf"); +LoadFunctionLibrary("libv3/models/codon/MG_REV_TRIP.bf"); utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE); utility.SetEnvVariable ("ASSUME_REVERSIBLE_MODELS", TRUE); @@ -35,6 +36,8 @@ absrel.baseline_omega_ratio = "Baseline MG94xREV omega ratio"; absrel.full_adaptive_model = "Full adaptive model"; absrel.rate_classes = "Rate classes"; absrel.per_branch_omega = "Per-branch omega"; +absrel.per_branch_delta = "Per-branch delta"; +absrel.per_branch_psi = "Per-branch psi"; absrel.display_orders = {terms.original_name: -1, terms.json.nucleotide_gtr: 0, @@ -45,7 +48,9 @@ absrel.display_orders = {terms.original_name: -1, terms.json.rate_distribution: 3, terms.LRT: 4, terms.json.uncorrected_pvalue: 5, - terms.json.corrected_pvalue: 6 + terms.json.corrected_pvalue: 6, + terms.parameters.multiple_hit_rate: 7, + terms.parameters.triple_hit_rate : 8 }; @@ -57,11 +62,11 @@ absrel.analysis_description = {terms.io.info : "aBSREL (Adaptive branch-site ran uses an adaptive random effects branch-site model framework to test whether each branch has evolved under positive selection, using a procedure which infers an optimal number of rate categories per branch.", - terms.io.version : "2.1", - terms.io.reference : "Less Is More: An Adaptive Branch-Site Random Effects Model for Efficient Detection of Episodic Diversifying Selection (2015). Mol Biol Evol 32 (5): 1342-1353", - terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group", - terms.io.contact : "spond@temple.edu", - terms.io.requirements : "in-frame codon alignment and a phylogenetic tree" + terms.io.version : "2.2", + terms.io.reference : "Less Is More: An Adaptive Branch-Site Random Effects Model for Efficient Detection of Episodic Diversifying Selection (2015). Mol Biol Evol 32 (5): 1342-1353. v2.2 adds support for multiple-hit models", + terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group", + terms.io.contact : "spond@temple.edu", + terms.io.requirements : "in-frame codon alignment and a phylogenetic tree" }; @@ -98,6 +103,15 @@ namespace absrel { load_file ("absrel"); } +KeywordArgument ("multiple-hits", "Include support for multiple nucleotide substitutions", "None"); + +absrel.multi_hit = io.SelectAnOption ({ + {"Double", "Include branch-specific rates for double nucleotide substitutions"} + {"Double+Triple", "Include branch-specific rates for double and triple nucleotide substitutions"} + {"None", "[Default] Use standard models which permit only single nucleotide changes to occur instantly"} + }, "Include support for multiple nucleotide substitutions"); + + KeywordArgument ("output", "Write the resulting JSON to this file (default is to save to the same path as the alignment file + 'ABSREL.json')", absrel.codon_data_info [terms.json.json]); absrel.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to"); @@ -109,13 +123,6 @@ utility.ForEachPair (absrel.selected_branches, "_partition_", "_selection_", io.ReportProgressMessageMD('RELAX', 'selector', '* Selected ' + Abs(_selection_) + ' branches for testing: \\\`' + Join (', ',utility.Keys(_selection_)) + '\\\`')"); -/* -absrel.srv = io.SelectAnOption ({ - {"Yes", "Both synonymous and non-synonymous rates vary in a branch-site fashion (~5x more computationally expensive)"} - {"No", "[Default] Synonymous rates vary from branch to branch, while the dN/dS ratio varies among branch-site combinations"} - }, "Enable synonymous rate variation?"); -*/ - selection.io.startTimer (absrel.json [terms.json.timers], "Preliminary model fitting", 1); @@ -130,7 +137,39 @@ estimators.fixSubsetOfEstimates(absrel.gtr_results, absrel.gtr_results[terms.glo io.ReportProgressMessageMD ("absrel", "base", "Fitting the baseline model with a single dN/dS class per branch, and no site-to-site variation. "); -absrel.base.results = estimators.FitMGREV (absrel.filter_names, absrel.trees, absrel.codon_data_info [terms.code], { + +// estimators.FitCodonModel (codon_data, tree, "models.codon.MG_REV.ModelDescription", genetic_code, option, initial_values); + +absrel.model_generator = "models.codon.MG_REV.ModelDescription"; + +if (absrel.multi_hit == "Double") { + absrel.model_generator = "models.codon.MG_REV_MH.ModelDescription"; +} else { + if (absrel.multi_hit == "Double+Triple") { + + lfunction absrel.codon.MG_REV_TRIP._GenerateRate(fromChar, toChar, namespace, model_type, model) { + return models.codon.MG_REV_TRIP._GenerateRate_generic (fromChar, toChar, namespace, model_type, + model[utility.getGlobalValue("terms.translation_table")], + "alpha", utility.getGlobalValue("terms.parameters.synonymous_rate"), + "beta", utility.getGlobalValue("terms.parameters.nonsynonymous_rate"), + "omega", utility.getGlobalValue("terms.parameters.omega_ratio"), + "delta", utility.getGlobalValue("terms.parameters.multiple_hit_rate"), + "psi", utility.getGlobalValue("terms.parameters.triple_hit_rate"), + "psi", utility.getGlobalValue("terms.parameters.triple_hit_rate") + ); + } + + + lfunction absrel.model.MG_REV_no_islands (type, code) { + mdl = models.codon.MG_REV_TRIP.ModelDescription (type,code); + mdl[utility.getGlobalValue("terms.model.q_ij")] = "absrel.codon.MG_REV_TRIP._GenerateRate"; + return mdl; + } + absrel.model_generator = "absrel.model.MG_REV_no_islands"; + } +} + +absrel.base.results = estimators.FitCodonModel (absrel.filter_names, absrel.trees, absrel.model_generator, absrel.codon_data_info [terms.code], { terms.run_options.model_type: terms.local, terms.run_options.retain_lf_object: TRUE, terms.run_options.retain_model_object : TRUE @@ -140,16 +179,33 @@ absrel.base.results = estimators.FitMGREV (absrel.filter_names, absrel.trees, ab io.ReportProgressMessageMD("absrel", "base", "* " + selection.io.report_fit (absrel.base.results, 0, absrel.codon_data_info[terms.data.sample_size])); - - absrel.baseline.branch_lengths = selection.io.extract_branch_info((absrel.base.results[terms.branch_length])[0], "selection.io.branch.length"); absrel.baseline.omegas = selection.io.extract_branch_info((absrel.base.results[terms.branch_length])[0], "absrel.local.omega"); +if (absrel.multi_hit == "Double" || absrel.multi_hit == "Double+Triple") { + absrel.baseline.deltas = selection.io.extract_branch_info((absrel.base.results[terms.branch_length])[0], "absrel.local.delta"); +} + + absrel.omega_stats = math.GatherDescriptiveStats (utility.Map (utility.UniqueValues (absrel.baseline.omegas), "_value_", "0+_value_")); io.ReportProgressMessageMD("absrel", "base", "* Branch-level `terms.parameters.omega_ratio` distribution has median " + Format (absrel.omega_stats[terms.math.median], 5,2) + ", and 95% of the weight in " + Format (absrel.omega_stats[terms.math._2.5], 5,2) + " - " + Format (absrel.omega_stats[terms.math._97.5], 5,2)); +if (absrel.multi_hit == "Double" || absrel.multi_hit == "Double+Triple") { + absrel.baseline.deltas = selection.io.extract_branch_info((absrel.base.results[terms.branch_length])[0], "absrel.local.delta"); + absrel.delta_stats = math.GatherDescriptiveStats (utility.Map (utility.UniqueValues (absrel.baseline.deltas), "_value_", "0+_value_")); + io.ReportProgressMessageMD("absrel", "base", "* Branch-level `terms.parameters.multiple_hit_rate` distribution has median " + + Format (absrel.delta_stats[terms.math.median], 5,2) + ", and 95% of the weight in " + Format (absrel.delta_stats[terms.math._2.5], 5,2) + " - " + Format (absrel.delta_stats[terms.math._97.5], 5,2)); +} + +if (absrel.multi_hit == "Double+Triple") { + absrel.baseline.psi = selection.io.extract_branch_info((absrel.base.results[terms.branch_length])[0], "absrel.local.psi"); + absrel.psi_stats = math.GatherDescriptiveStats (utility.Map (utility.UniqueValues (absrel.baseline.psi), "_value_", "0+_value_")); + io.ReportProgressMessageMD("absrel", "base", "* Branch-level `terms.parameters.triple_hit_rate` distribution has median " + + Format (absrel.psi_stats[terms.math.median], 5,2) + ", and 95% of the weight in " + Format (absrel.psi_stats[terms.math._2.5], 5,2) + " - " + Format (absrel.psi_stats[terms.math._97.5], 5,2)); +} + selection.io.stopTimer (absrel.json [terms.json.timers], "Baseline model fitting"); @@ -176,8 +232,28 @@ absrel.distribution_for_json = {absrel.per_branch_omega : terms.math._2.5 : absrel.omega_stats[terms.math._2.5], terms.math._97.5 : absrel.omega_stats[terms.math._97.5]} }; - - + + +if (absrel.multi_hit == "Double" || absrel.multi_hit == "Double+Triple") { + absrel.distribution_for_json [absrel.per_branch_delta] = + { + terms.math.mean : absrel.delta_stats[terms.math.mean], + terms.math.median : absrel.delta_stats[terms.math.median], + terms.math._2.5 : absrel.delta_stats[terms.math._2.5], + terms.math._97.5 : absrel.delta_stats[terms.math._97.5] + }; + + if (absrel.multi_hit == "Double+Triple") { + absrel.distribution_for_json [absrel.per_branch_psi] = + { + terms.math.mean : absrel.psi_stats[terms.math.mean], + terms.math.median : absrel.psi_stats[terms.math.median], + terms.math._2.5 : absrel.psi_stats[terms.math._2.5], + terms.math._97.5 : absrel.psi_stats[terms.math._97.5] + }; + + } +} //Store MG94 to JSON selection.io.json_store_lf_withEFV (absrel.json, @@ -246,18 +322,53 @@ absrel.branch.complexity = {}; utility.ToggleEnvVariable ("USE_LAST_RESULTS", TRUE); -absrel.complexity_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { - "0": 35, - "1": 10, - "2": 10, - "3": 20, - "4": 15, - "5": 15, - "6": 15 - }, - terms.number_precision : 2}; +if (absrel.multi_hit == "Double") { + absrel.complexity_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { + "0": 35, + "1": 10, + "2": 10, + "3": 20, + "4": 12, + "5": 15, + "6": 15, + "7": 15 + }, + terms.number_precision : 2}; + + fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Length", "Rates", "Max. dN/dS", "2x rate", "Log(L)", "AIC-c", "Best AIC-c so far"}}, absrel.complexity_table.settings)); +} else { + if (absrel.multi_hit == "Double+Triple") { + absrel.complexity_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { + "0": 35, + "1": 10, + "2": 10, + "3": 20, + "4": 12, + "5": 12, + "6": 15, + "7": 15, + "8": 15 + }, + terms.number_precision : 2}; + + fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Length", "Rates", "Max. dN/dS", "2x rate", "3x rate", "Log(L)", "AIC-c", "Best AIC-c so far"}}, absrel.complexity_table.settings)); + } else { + absrel.complexity_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { + "0": 35, + "1": 10, + "2": 10, + "3": 20, + "4": 15, + "5": 15, + "6": 15 + }, + terms.number_precision : 2}; + + fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Length", "Rates", "Max. dN/dS", "Log(L)", "AIC-c", "Best AIC-c so far"}}, absrel.complexity_table.settings)); + } + +} -fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Length", "Rates", "Max. dN/dS", "Log(L)", "AIC-c", "Best AIC-c so far"}}, absrel.complexity_table.settings)); absrel.complexity_table.settings [terms.table_options.header] = FALSE; utility.SetEnvVariable ("LF_SMOOTHING_SCALER", 0); @@ -280,9 +391,7 @@ for (absrel.branch_id = 0; absrel.branch_id < absrel.branch_count; absrel.branch absrel.initial_guess = absrel.ComputeOnAGrid (absrel.PopulateInitialGrid (absrel.model_defintions [absrel.current_rate_count], absrel.tree_id, absrel.current_branch, absrel.current_branch_estimates), absrel.likelihood_function_id); absrel.SetBranchConstraints (absrel.model_defintions [absrel.current_rate_count], absrel.tree_id, absrel.current_branch); - - - + Optimize (absrel.stepup.mles, ^absrel.likelihood_function_id); absrel.current_test_score = math.GetIC (absrel.stepup.mles[1][0], absrel.current_parameter_count + 2, absrel.codon_data_info[terms.data.sample_size]); @@ -294,14 +403,24 @@ for (absrel.branch_id = 0; absrel.branch_id < absrel.branch_count; absrel.branch } else { absrel.report.row [3] = ">1000 (" + Format (absrel.dn_ds.distro[absrel.current_rate_count-1][1]*100,5,2) + "%)"; } - absrel.report.row [4] = absrel.stepup.mles[1][0]; - absrel.report.row [5] = absrel.current_test_score; + + absrel.offset = 0; + if (absrel.multi_hit == "Double" || absrel.multi_hit == "Double+Triple") { + absrel.report.row [4] = (absrel.provisional_estimates[utility.getGlobalValue ("terms.parameters.multiple_hit_rate")])[utility.getGlobalValue ("terms.fit.MLE")]; + absrel.offset = 1; + } + if (absrel.multi_hit == "Double+Triple") { + absrel.report.row [5] = (absrel.provisional_estimates[utility.getGlobalValue ("terms.parameters.triple_hit_rate")])[utility.getGlobalValue ("terms.fit.MLE")]; + absrel.offset += 1; + } + absrel.report.row [4+absrel.offset] = absrel.stepup.mles[1][0]; + absrel.report.row [5+absrel.offset] = absrel.current_test_score; if (absrel.current_test_score < absrel.current_best_score) { absrel.current_branch_estimates = absrel.provisional_estimates; absrel.current_best_score = absrel.current_test_score; - absrel.report.row [6] = absrel.current_test_score; + absrel.report.row [6+absrel.offset] = absrel.current_test_score; fprintf (stdout, io.FormatTableRow (absrel.report.row, absrel.complexity_table.settings)); if (absrel.current_rate_count >= absrel.max_rate_classes) { @@ -310,7 +429,7 @@ for (absrel.branch_id = 0; absrel.branch_id < absrel.branch_count; absrel.branch absrel.current_rate_count += 1; absrel.current_parameter_count += 2; } else { - absrel.report.row [6] = absrel.current_best_score; + absrel.report.row [6+absrel.offset] = absrel.current_best_score; fprintf (stdout, io.FormatTableRow (absrel.report.row, absrel.complexity_table.settings)); break; } @@ -375,6 +494,7 @@ selection.io.json_store_branch_attribute(absrel.json, terms.json.rate_distributi + selection.io.json_store_lf (absrel.json, absrel.full_adaptive_model, absrel.full_model.fit[terms.fit.log_likelihood], @@ -390,20 +510,54 @@ selection.io.json_store_lf (absrel.json, selection.io.startTimer (absrel.json [terms.json.timers], "Testing for selection", 5); io.ReportProgressMessageMD ("absrel", "testing", "Testing selected branches for selection"); -absrel.testing_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { - "0": 35, - "1": 10, - "2": 20, - "3": 20, - "4": 20, - }, - terms.number_precision : 2}; +if (absrel.multi_hit == "Double") { + absrel.testing_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { + "0": 35, + "1": 10, + "2": 20, + "3": 12, + "4": 20, + "5": 20, + }, + terms.number_precision : 2}; + + fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Rates", "Max. dN/dS", "2x rate", "Test LRT", "Uncorrected p-value"}}, absrel.testing_table.settings)); +} else { + if (absrel.multi_hit == "Double+Triple") { + absrel.testing_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { + "0": 35, + "1": 10, + "2": 20, + "3": 12, + "4": 12, + "5": 20, + "6": 20, + }, + terms.number_precision : 2}; + + fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Rates", "Max. dN/dS", "2x rate","3x rate", "Test LRT", "Uncorrected p-value"}}, absrel.testing_table.settings)); + } else { + absrel.testing_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { + "0": 35, + "1": 10, + "2": 20, + "3": 20, + "4": 20, + }, + terms.number_precision : 2}; + + fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Rates", "Max. dN/dS", "Test LRT", "Uncorrected p-value"}}, absrel.testing_table.settings)); + } + +} + -fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Rates", "Max. dN/dS", "Test LRT", "Uncorrected p-value"}}, absrel.testing_table.settings)); absrel.testing_table.settings [terms.table_options.header] = FALSE; absrel.branch.p_values = {}; absrel.branch.lrt = {}; +absrel.branch.delta = {}; +absrel.branch.psi = {}; for (absrel.branch_id = 0; absrel.branch_id < absrel.branch_count; absrel.branch_id += 1) { absrel.current_branch = absrel.names_sorted_by_length[absrel.branch_id]; @@ -419,9 +573,23 @@ for (absrel.branch_id = 0; absrel.branch_id < absrel.branch_count; absrel.branch } else { absrel.report.row [2] = ">1000 (" + Format (absrel.dn_ds.distro[absrel.branch.complexity[absrel.current_branch]-1][1]*100,5,2) + "%)"; } + + absrel.offset = 0; + absrel.final_estimates = absrel.GetBranchEstimates(absrel.model_defintions [absrel.branch.complexity[absrel.current_branch]], absrel.tree_id, absrel.current_branch); + + if (absrel.multi_hit == "Double" || absrel.multi_hit == "Double+Triple") { + absrel.report.row [3] = (absrel.final_estimates[utility.getGlobalValue ("terms.parameters.multiple_hit_rate")])[utility.getGlobalValue ("terms.fit.MLE")]; + absrel.branch.delta [absrel.current_branch] = absrel.report.row [3]; + absrel.offset = 1; + } + if (absrel.multi_hit == "Double+Triple") { + absrel.report.row [4] = (absrel.final_estimates[utility.getGlobalValue ("terms.parameters.triple_hit_rate")])[utility.getGlobalValue ("terms.fit.MLE")]; + absrel.branch.psi [absrel.current_branch] = absrel.report.row [4]; + absrel.offset += 1; + } + if ((absrel.selected_branches[0])[absrel.current_branch] == terms.tree_attributes.test) { - if (absrel.dn_ds.distro [absrel.branch.complexity[absrel.current_branch]-1][0] > 1) { absrel.branch.ConstrainForTesting (absrel.model_defintions [absrel.branch.complexity[absrel.current_branch]], absrel.tree_id, absrel.current_branch); Optimize (absrel.null.mles, ^absrel.likelihood_function_id); @@ -432,18 +600,32 @@ for (absrel.branch_id = 0; absrel.branch_id < absrel.branch_count; absrel.branch } absrel.branch.p_values [ absrel.current_branch ] = absrel.branch.test [terms.p_value]; absrel.branch.lrt [absrel.current_branch] = absrel.branch.test [terms.LRT]; - absrel.report.row [3] = absrel.branch.test [terms.LRT]; - absrel.report.row [4] = Format (absrel.branch.test [terms.p_value], 8, 5); + absrel.report.row [3+absrel.offset] = absrel.branch.test [terms.LRT]; + absrel.report.row [4+absrel.offset] = Format (absrel.branch.test [terms.p_value], 8, 5); } else { absrel.branch.lrt [absrel.current_branch] = None; absrel.branch.p_values [absrel.current_branch] = None; - absrel.report.row [3] = "Not selected"; - absrel.report.row [4] = "for testing"; + absrel.report.row [3+absrel.offset] = "Not selected"; + absrel.report.row [4+absrel.offset] = "for testing"; } fprintf (stdout, io.FormatTableRow (absrel.report.row, absrel.testing_table.settings)); } +if (Abs (absrel.branch.delta)) { + selection.io.json_store_branch_attribute(absrel.json, terms.parameters.multiple_hit_rate, terms.json.branch_label, absrel.display_orders[terms.parameters.multiple_hit_rate], + 0, + absrel.branch.delta); + +} + +if (Abs (absrel.branch.psi)) { + selection.io.json_store_branch_attribute(absrel.json, terms.parameters.triple_hit_rate, terms.json.branch_label, absrel.display_orders[terms.parameters.triple_hit_rate], + 0, + absrel.branch.psi); + +} + selection.io.json_store_branch_attribute(absrel.json, terms.LRT, terms.json.branch_label, absrel.display_orders[terms.LRT], 0, absrel.branch.lrt); @@ -517,8 +699,10 @@ lfunction absrel.GetBranchEstimates (model, tree_id, branch_id) { lfunction absrel.GetRateDistribution (local_parameters) { + + offset = (^"absrel.multi_hit" == "Double") + (^"absrel.multi_hit" == "Double+Triple")*2; result = None; - component_count = (Abs (local_parameters))$2; + component_count = (Abs (local_parameters)-offset)$2; if (component_count > 1) { rates = {"rates" : {component_count,1}, "weights" : {component_count,1}}; for (k = 1; k < component_count; k+=1) { @@ -684,15 +868,121 @@ lfunction absrel.local.omega(branch_info) { (branch_info[utility.getGlobalValue ("terms.parameters.synonymous_rate")])[utility.getGlobalValue("terms.fit.MLE")]); } +lfunction absrel.local.delta (branch_info) { + return (branch_info[utility.getGlobalValue ("terms.parameters.multiple_hit_rate")])[utility.getGlobalValue("terms.fit.MLE")]; +} + +lfunction absrel.local.psi (branch_info) { + return (branch_info[utility.getGlobalValue ("terms.parameters.triple_hit_rate")])[utility.getGlobalValue("terms.fit.MLE")]; +} + //------------------------------------------------------------------------------ lfunction absrel.BS_REL.ModelDescription (type, code, components) { model = models.codon.BS_REL.ModelDescription(type, code, components); - model [utility.getGlobalValue("terms.model.defineQ")] = "absrel.BS_REL._DefineQ"; + if (^"absrel.multi_hit" == "Double") { + model [utility.getGlobalValue("terms.model.defineQ")] = "absrel.codon.BS_REL._DefineQ.DH"; + + } else { + if (^"absrel.multi_hit" == "Double+Triple") { + model [utility.getGlobalValue("terms.model.defineQ")] = "absrel.codon.BS_REL._DefineQ.TH"; + } else { + model [utility.getGlobalValue("terms.model.defineQ")] = "absrel.BS_REL._DefineQ"; + + } + } return model; } +//------------------------------------------------------------------------------ + + +lfunction absrel.codon.BS_REL._DefineQ.TH (bs_rel, namespace) { + + + rate_matrices = {}; + + bs_rel [utility.getGlobalValue("terms.model.q_ij")] = &rate_generator; + bs_rel [utility.getGlobalValue("terms.mixture.mixture_components")] = {}; + + _aux = parameters.GenerateSequentialNames ("bsrel_mixture_aux", bs_rel[utility.getGlobalValue("terms.model.components")] - 1, "_"); + _wts = parameters.helper.stick_breaking (_aux, None); + + + for (component = 1; component <= bs_rel[utility.getGlobalValue("terms.model.components")]; component += 1) { + key = "component_" + component; + ExecuteCommands (" + function rate_generator (fromChar, toChar, namespace, model_type, model) { + return models.codon.MG_REV_TRIP._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')], + 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'), + 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component), + 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component), + 'delta', utility.getGlobalValue('terms.parameters.multiple_hit_rate'), + 'psi', utility.getGlobalValue('terms.parameters.triple_hit_rate'), + 'psi', utility.getGlobalValue('terms.parameters.triple_hit_rate') + ); + }" + ); + + if ( component < bs_rel[utility.getGlobalValue("terms.model.components")]) { + model.generic.AddLocal ( bs_rel, _aux[component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component )); + parameters.SetRange (_aux[component-1], utility.getGlobalValue("terms.range_almost_01")); + } + models.codon.generic.DefineQMatrix(bs_rel, namespace); + rate_matrices [key] = bs_rel[utility.getGlobalValue("terms.model.rate_matrix")]; + (bs_rel [^'terms.mixture.mixture_components'])[key] = _wts [component-1]; + } + + bs_rel[utility.getGlobalValue("terms.model.rate_matrix")] = rate_matrices; + parameters.SetConstraint(((bs_rel[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.global")])[terms.nucleotideRate("A", "G")], "1", ""); + + return bs_rel; +} + +//------------------------------------------------------------------------------ + + + +lfunction absrel.codon.BS_REL._DefineQ.DH (bs_rel, namespace) { + + rate_matrices = {}; + + bs_rel [utility.getGlobalValue("terms.model.q_ij")] = &rate_generator; + bs_rel [utility.getGlobalValue("terms.mixture.mixture_components")] = {}; + + _aux = parameters.GenerateSequentialNames ("bsrel_mixture_aux", bs_rel[utility.getGlobalValue("terms.model.components")] - 1, "_"); + _wts = parameters.helper.stick_breaking (_aux, None); + + + for (component = 1; component <= bs_rel[utility.getGlobalValue("terms.model.components")]; component += 1) { + key = "component_" + component; + ExecuteCommands (" + function rate_generator (fromChar, toChar, namespace, model_type, model) { + return models.codon.MG_REV_MH._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')], + 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'), + 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component), + 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component), + 'delta', utility.getGlobalValue('terms.parameters.multiple_hit_rate') + ); + }" + ); + + if ( component < bs_rel[utility.getGlobalValue("terms.model.components")]) { + model.generic.AddLocal ( bs_rel, _aux[component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component )); + parameters.SetRange (_aux[component-1], utility.getGlobalValue("terms.range_almost_01")); + } + + models.codon.generic.DefineQMatrix(bs_rel, namespace); + rate_matrices [key] = bs_rel[utility.getGlobalValue("terms.model.rate_matrix")]; + (bs_rel [^'terms.mixture.mixture_components'])[key] = _wts [component-1]; + } + + bs_rel[utility.getGlobalValue("terms.model.rate_matrix")] = rate_matrices; + parameters.SetConstraint(((bs_rel[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.global")])[terms.nucleotideRate("A", "G")], "1", ""); + + return bs_rel; +} //------------------------------------------------------------------------------ diff --git a/src/core/global_things.cpp b/src/core/global_things.cpp index 61a50470f..9183d7115 100644 --- a/src/core/global_things.cpp +++ b/src/core/global_things.cpp @@ -120,7 +120,7 @@ namespace hy_global { kErrorStringDatasetRefIndexError ("Dataset index reference out of range"), kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"), kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"), - kHyPhyVersion = _String ("2.5.8"), + kHyPhyVersion = _String ("2.5.9"), kNoneToken = "None", kNullToken = "null", diff --git a/tests/hbltests/libv3/aBSREL-MH.wbf b/tests/hbltests/libv3/aBSREL-MH.wbf new file mode 100644 index 000000000..f7e2436d2 --- /dev/null +++ b/tests/hbltests/libv3/aBSREL-MH.wbf @@ -0,0 +1,58 @@ +GetString (version, HYPHY_VERSION, 0); + +if (+version >= 2.4) { + LoadFunctionLibrary ("SelectionAnalyses/ABSREL.bf", {"--code" : "Universal", "--alignment" : PATH_TO_CURRENT_BF + "data/ncov.fasta", "--multiple-hits" : "Double", "--branches": "Internal"}); +} else { + return TRUE; +} +LoadFunctionLibrary ("shared.bf"); + + +assert (check_value ( + ((absrel.json["fits"])["Full adaptive model"])["Log Likelihood"], -12245.95, 0.001), "Incorrect log-likelihood for the full adaptive model"); + +assert (check_value ( + ((absrel.json["test results"])["positive test results"]),1, 0.001), "Incorrect number of positive test results"); + +assert (check_value ( + ((absrel.json["test results"])["tested"]),2, 0.001), "Incorrect number of total tests"); + + + +test.expected_positives = utility.MatrixToDict({{"Node2"}}); +test.lrts = 0; +test.delta = 0; + +function confirm_branch (branch, p, dict) { + if (p == None) { + p = 1; + } + if (p <= 0.05) { + if (dict/branch) { + dict - branch; + return TRUE; + } else { + assert (0, "Failed to correctly classify branch " + branch); + } + } + if (dict/branch) { + assert (0, "Incorrectly classified branch " + branch); + } + return false; +} + +utility.ForEachPair ((absrel.json["branch attributes"])["0"],"_key_", "_value_", + " + if (confirm_branch (_key_, _value_['Corrected P-value'], test.expected_positives)) { + test.lrts += _value_['LRT']; + test.delta += _value_[terms.parameters.multiple_hit_rate]; + } + "); + + +assert (check_value ( + test.lrts, 6.38 , 0.05), "More than 5% difference in cumulative LRT for positively selected branches"); + +assert (check_value ( + test.delta, 0.61 , 0.05), "More than 5% difference in cumulative delta for positively selected branches"); + diff --git a/tests/hbltests/libv3/data/ncov.fasta b/tests/hbltests/libv3/data/ncov.fasta new file mode 100644 index 000000000..467d02306 --- /dev/null +++ b/tests/hbltests/libv3/data/ncov.fasta @@ -0,0 +1,334 @@ +>AY274119.3 +------ATGTTTATTTTCTTATTATTT---CTTACTCTCACTAGTGGTAGTGACCTTGAC +CGGTGC---ACCACTTTTGATGATGTTCAAGCTCCTAATTACACTCAACATACTTCATCT +ATGAGGGGGGTTTACTATCCTGATGAAATTTTTAGATCAGACACTCTTTATTTAACTCAG +GATTTATTTCTTCCATTTTATTCTAATGTTACAGGGTTTCATACTATT------------ +---------AATCATACGTTTGGCAACCCTGTCATACCTTTTAAGGATGGTATTTATTTT +GCTGCCACAGAGAAATCAAATGTTGTCCGTGGTTGGGTTTTTGGTTCTACCATGAACAAC +AAGTCACAGTCGGTGATTATTATTAACAATTCTACTAATGTTGTTATACGAGCATGTAAC +TTTGAATTGTGTGACAACCCTTTCTTTGCTGTTTCTAAACCCATGGGTACACAGACACAT +ACTATG------------ATATTCGATAATGCATTTAATTGCACTTTCGAGTACATATCT +GATGCCTTTTCGCTTGATGTTTCAGAAAAGTCAGGTAATTTTAAACACTTACGAGAGTTT +GTGTTTAAAAATAAAGATGGGTTTCTCTATGTTTATAAGGGCTATCAACCTATAGATGTA +GTTCGTGATCTACCTTCTGGTTTTAACACTTTGAAACCTATTTTTAAGTTGCCTCTTGGT +ATTAACATTACAAATTTTAGAGCCATTCTTACAGCCTTTTCACCT--------------- +---------GCTCAAGACATTTGGGGCACGTCAGCTGCAGCCTATTTTGTTGGCTATTTA +AAGCCAACTACATTTATGCTCAAGTATGATGAAAATGGTACAATCACAGATGCTGTTGAT +TGTTCTCAAAATCCACTTGCTGAACTCAAATGCTCTGTTAAGAGCTTTGAGATTGACAAA +GGAATTTACCAGACCTCTAATTTCAGGGTTGTTCCCTCAGGAGATGTTGTGAGATTCCCT +AATATTACAAACTTGTGTCCTTTTGGAGAGGTTTTTAATGCTACTAAATTCCCTTCTGTC +TATGCATGGGAGAGAAAAAAAATTTCTAATTGTGTTGCTGATTACTCTGTGCTCTACAAC +TCAACATTTTTTTCAACCTTTAAGTGCTATGGCGTTTCTGCCACTAAGTTGAATGATCTT +TGCTTCTCCAATGTCTATGCAGATTCTTTTGTAGTCAAGGGAGATGATGTAAGACAAATA +GCGCCAGGACAAACTGGTGTTATTGCTGATTATAATTATAAATTGCCAGATGATTTCATG +GGTTGTGTCCTTGCTTGGAATACTAGGAACATTGATGCTACTTCAACTGGTAATTATAAT +TATAAATATAGGTATCTTAGACATGGCAAGCTTAGGCCCTTTGAGAGAGACATATCTAAT +GTGCCTTTCTCCCCTGATGGCAAACCTTGCACCCCACCTGCT---CTTAATTGTTATTGG +CCATTAAATGATTATGGTTTTTACACCACTACTGGCATTGGCTACCAACCTTACAGAGTT +GTAGTACTTTCTTTTGAACTTTTAAATGCACCGGCCACGGTTTGTGGACCAAAATTATCC +ACTGACCTTATTAAGAACCAGTGTGTCAATTTTAATTTTAATGGACTCACTGGTACTGGT +GTGTTAACTCCTTCTTCAAAGAGATTTCAACCATTTCAACAATTTGGCCGTGATGTTTCT +GATTTCACTGATTCCGTTCGAGATCCTAAAACATCTGAAATATTAGACATTTCACCTTGC +GCTTTTGGGGGTGTAAGTGTAATTACACCTGGAACAAATGCTTCATCTGAAGTTGCTGTT +CTATATCAAGATGTTAACTGCACTGATGTTTCTACAGCAATTCATGCAGATCAACTCACA +CCAGCTTGGCGCATATATTCTACTGGAAACAATGTATTCCAGACTCAAGCAGGCTGTCTT +ATAGGAGCTGAGCATGTCGACACTTCTTATGAGTGCGACATTCCTATTGGAGCTGGCATT +TGTGCTAGTTACCATACAGTTTCT------------TTATTACGTAGTACTAGCCAAAAA +TCTATTGTGGCTTATACTATGTCTTTAGGTGCTGATAGTTCAATTGCTTACTCTAATAAC +ACCATTGCTATACCTACTAACTTTTCAATTAGCATTACTACAGAAGTAATGCCTGTTTCT +ATGGCTAAAACCTCCGTAGATTGTAATATGTACATCTGCGGAGATTCTACTGAATGTGCT +AATTTGCTTCTCCAATATGGTAGCTTTTGCACACAACTAAATCGTGCACTCTCAGGTATT +GCTGCTGAACAGGATCGCAACACACGTGAAGTGTTCGCTCAAGTCAAACAAATGTACAAA +ACCCCAACTTTGAAATATTTTGGTGGTTTTAATTTTTCACAAATATTACCTGACCCTCTA +AAGCCAACTAAGAGGTCTTTTATTGAGGACTTGCTCTTTAATAAGGTGACACTCGCTGAT +GCTGGCTTCATGAAGCAATATGGCGAATGCCTAGGTGATATTAATGCTAGAGATCTCATT +TGTGCGCAGAAGTTCAATGGACTTACAGTGTTGCCACCTCTGCTCACTGATGATATGATT +GCTGCCTACACTGCTGCTCTAGTTAGTGGTACTGCCACTGCTGGATGGACATTTGGTGCT +GGCGCTGCTCTTCAAATACCTTTTGCTATGCAAATGGCATATAGGTTCAATGGCATTGGA +GTTACCCAAAATGTTCTCTATGAGAACCAAAAACAAATCGCCAACCAATTTAACAAGGCG +ATTAGTCAAATTCAAGAATCACTTACAACAACATCAACTGCATTGGGCAAGCTGCAAGAC +GTTGTTAACCAGAATGCTCAAGCATTAAACACACTTGTTAAACAACTTAGCTCTAATTTT +GGTGCAATTTCAAGTGTGCTAAATGATATCCTTTCGCGACTTGATAAAGTCGAGGCGGAG +GTACAAATTGACAGGTTAATTACAGGCAGACTTCAAAGCCTTCAAACCTATGTAACACAA +CAACTAATCAGGGCTGCTGAAATCAGGGCTTCTGCTAATCTTGCTGCTACTAAAATGTCT +GAGTGTGTTCTTGGACAATCAAAAAGAGTTGACTTTTGTGGAAAGGGCTACCACCTTATG +TCCTTCCCACAAGCAGCCCCGCATGGTGTTGTCTTCCTACATGTCACGTATGTGCCATCC +CAGGAGAGGAACTTCACCACAGCGCCAGCAATTTGTCATGAAGGCAAAGCATACTTCCCT +CGTGAAGGTGTTTTTGTGTTTAATGGCACTTCTTGGTTTATTACACAGAGGAACTTCTTT +TCTCCACAAATAATTACTACAGACAATACATTTGTCTCAGGAAATTGTGATGTCGTTATT +GGCATCATTAACAACACAGTTTATGATCCTCTGCAACCTGAGCTTGACTCATTCAAAGAA +GAGCTGGACAAGTACTTCAAAAATCATACATCACCAGATGTTGATCTTGGCGACATTTCA +GGCATTAACGCTTCTGTCGTCAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTCGCT +AAAAATTTAAATGAATCACTCATTGACCTTCAAGAATTGGGAAAATATGAGCAATATATT +AAATGGCCTTGGTATGTTTGGCTCGGCTTCATTGCTGGACTAATTGCCATCGTCATGGTT +ACAATCTTGCTTTGTTGCATGACTAGTTGTTGCAGTTGCCTCAAGGGTGCATGCTCTTGT +GGTTCTTGCTGCAAGTTTGATGAGGATGACTCTGAGCCAGTTCTCAAGGGTGTCAAATTA +CATTACACA +>DQ071615.1 +ATGAAAATTTTAATTCTTGCTTTCCTA---GCTAGTCTAGCTAAAGCACAAGAA------ +GGATGT---GGCATTATCAGTCGAAAACCTCAGCCAAAAATGGCACAAGTCTCTTCTTCT +CGTAGAGGTGTGTACTATAATGATGACATTTTTCGTTCTAATGTACTACACCTGACGCAG +GATTATTTCCTGCCATTTGATTCAAATTTAACACAGTACTTTTCTCTTAATGTTGATTCA +GATAGGTTTACC---TACTTTGACAATCCTATTTTAGACTTTGGTGACGGCGTCTACTTC +GCTGCTACTGAAAAGTCTAATGTAATTAGGGGCTGGATTTTTGGTTCCACTTTCGATAAC +ACAACCCAGTCAGCTGTTATAGTTAATAATTCCACACACATTATTATACGTGTGTGCAAC +TTCAACTTATGTAAAGAACCTATGTATACAGTGTCTCGTGGTGCACAACAATCATCTTGG +------------------GTTTATCAGAGTGCATTCAATTGCACATATGATAGAGTGGAA +AAAAGCTTTCAGCTCGACACTGCTCCTAAAACTGGAAATTTTAAAGACCTACGTGAGTAT +GTCTTTAAGAATCGGGATGGCTTTCTCAGTGTTTACCAAACTTATACAGCTGTTAATTTA +CCTAGAGGATTACCTATTGGCTTTTCAGTTTTGAGGCCAATTCTCAAACTGCCCTTTGGA +ATTAACATTACATCTTATAGAGTTGTTATGGCTATGTTTAGCCAA--------------- +---------ACTACTTCTAATTTCCTACCAGAAAGTGCTGCTTATTATGTTGGTAATTTA +AAATACACCACTTTCATGCTTAGTTTTAATGAAAATGGGACTATTACCAATGCTATTGAT +TGTGCTCAAAACCCACTTGCTGAACTAAAATGCACCATTAAAAATTTCAATGTCAGCAAG +GGAATCTACCAAACATCTAACTTCAGAGTTTCGCCAACTCAGGAAGTTATTAGATTCCCA +AACATTACAAATCGTTGTCCTTTTGACAAAGTTTTTAATGCTACACGCTTTCCTAATGTG +TATGCGTGGGAGAGAACTAAAATTTCTGATTGTGTTGCTGACTACACTGTTCTCTACAAC +TCAACTTCTTTCTCAACTTTTAAGTGCTATGGAGTTTCTCCTTCTAAGTTGATTGATTTA +TGCTTTACAAGTGTGTATGCTGACACATTCTTGATAAGATCTTCAGAAGTAAGACAAGTT +GCACCGGGTGAAACTGGTGTCATTGCTGACTATAATTACAAGCTGCCTGATGATTTTACT +GGTTGCGTAATAGCCTGGAATACTGCAAAGCAGGATCAAGGT---------------CAG +TATTACTACAGGTCTCACCGGAAGACTAAACTTAAACCTTTTGAGAGAGACCTTTCTTCT +GAT------------------------GAAAATGGTGTACGT------------------ +ACTCTTAGTACTTACGACTTCTACCCTAGTGTGCCGGTTGCTTATCAGGCTACTAGGGTG +GTTGTACTGTCATTTGAACTACTAAACGCACCTGCAACAGTTTGTGGACCTAAATTATCC +ACACAACTTGTTAAGAACCAGTGTGTCAATTTTAATTTTAATGGACTCAAAGGTACTGGT +GTTTTGACTGAATCATCAAAGAGATTTCAGTCATTTCAACAATTTGGTCGTGACACGTCT +GATTTTACTGACTCCGTGCGTGACCCACAAACATTAGAAATACTTGACATTTCACCATGC +TCTTTTGGTGGTGTTAGTGTAATTACACCAGGAACAAATGCTTCTTCTGAAGTGGCTGTT +CTTTATCAAGATGTTAACTGTACTGACGTGCCAGCAGCAATTCATGCAGATCAACTAACA +CCAGCTTGGCGTGTTTATTCAACTGGAACAAATGTTTTCCAAACACAGGCTGGCTGTCTT +ATAGGAGCTGAACATGTTAATGCTTCGTATGAGTGTGACATCCCTATTGGTGCTGGCATT +TGTGCTAGCTACCATACAGCTTCT------------ACTTTACGTAGTGTAGGTCAGAAA +TCCATTGTGGCTTACACTATGTCTTTGGGTGCAGAAAATTCTATTGCTTATGCTAATAAT +TCAATTGCCATACCTACAAATTTTTCAATCAGTGTCACTACTGAAGTGATGCCTGTTTCA +ATGGCTAAAACATCTGTAGATTGTACAATGTACATCTGCGGTGATTCTTTGGAGTGCAGC +AACCTACTCTTGCAGTATGGAAGTTTCTGCACACAACTAAATCGTGCACTCTCAGGTATT +GCTATTGAACAAGACAAGAACACTCAAGAAGTTTTTGCCCAAGTTAAACAAATGTATAAG +ACACCAGCCATAAAAGATTTTGGCGGTTTCAATTTTTCACAGATATTGCCTGACCCTTCA +AAGCCAACAAAGAGATCATTTATCGAAGATTTACTCTTCAACAAGGTGACTCTTGCTGAT +GCCGGCTTTATGAAACAATACGGCGAATGCCTAGGCGATATTAGTGCTAGAGACCTCATT +TGTGCTCAGAAGTTTAATGGACTTACTGTCCTACCACCACTGCTCACAGATGAAATGATT +GCTGCGTACACTGCTGCCCTTGTCAGTGGTACTGCTACTGCTGGCTGGACGTTCGGTGCA +GGATCTGCTCTTCAAATACCCTTTGCTATGCAAATGGCATATAGGTTTAATGGCATTGGA +GTTACCCAAAATGTTCTCTATGAGAACCAAAAACAGATTGCCAACCAATTCAACAAGGCA +ATCAGTCAAATTCAAGAATCACTTACGACAACATCAACTGCATTGGGCAAGCTGCAAGAC +GTTGTCAATCAGAATGCTCAAGCATTAAATACACTTGTTAAACAACTTAGCTCCAATTTT +GGTGCAATTTCAAGTGTGCTAAATGACATCCTGTCACGACTAGACAAAGTCGAGGCAGAG +GTACAAATTGACAGGTTGATCACAGGCAGATTACAAAGCCTTCAAACCTATGTAACACAA +CAACTAATCAGAGCTGCTGAAATAAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCT +GAGTGTGTTCTTGGACAATCAAAAAGAGTTGACTTCTGTGGGAAGGGCTATCATTTGATG +TCCTTCCCTCAAGCTGCTCCACATGGTGTCGTCTTCCTACATGTTACTTATGTTCCATCG +CAGGAAAGAAACTTCACTACTGCTCCAGCGATTTGTCATGAAGGCAAAGCATACTTTCCT +CGTGAAGGTGTCTTTGTATCTAATGGCACTTCTTGGTTTATCACACAGAGGAACTTCTAT +TCACCACAGATAATTACAACAGACAATACATTTGTCGCTGGAAGTTGTGATGTCGTAATT +GGCATCATCAACAATACAGTTTATGATCCTCTGCAACCTGAGCTTGACTCATTTAAGGAA +GAGCTGGACAAGTACTTTAAAAATCATACATCACCAGATGTTGATCTCGGCGACATTTCA +GGCATTAATGCTTCTGTCGTCAACATTCAGAAAGAAATTGACCGCCTCAATGAGGTTGCC +AAAAACCTAAATGAATCACTCATTGACCTCCAAGAACTTGGAAAATATGAGCAATACATC +AAGTGGCCTTGGTATGTTTGGCTCGGCTTTATTGCTGGCCTAATTGCCATCGTCATGGTT +ACAATCTTGCTTTGTTGCATGACCAGCTGTTGCAGTTGCCTCAAGGGCGCATGCTCTTGC +GGTTCTTGCTGCAAATTTGATGAGGACGACTCTGAGCCTGTGCTCAAAGGAGTAAAATTA +CACTACACA +>MG772933.1 +------ATGTTGTTTTTCTTGTTTCTTCAGTTCGCCTTAGTAAACTCCCAG--------- +---TGTGTTAACTTGACAGGCAGAACCCCACTCAATCCC---AATTATACTAATTCTTCA +CAAAGAGGTGTTTATTACCCTGACACAATTTATAGATCAGACACACTTGTGCTCAGCCAG +GGTTATTTTCTTCCATTTTATTCTAATGTTAGCTGGTATTACTCATTA---ACAACCAAC +AATGCTGCCACAAAGAGGACTGATAATCCTATATTAGATTTCAAGGACGGCATATACTTT +GCTGCCACTGAACACTCAAATATTATCAGGGGCTGGATCTTTGGAACAACTCTTGACAAC +ACTTCTCAATCTCTCTTGATAGTTAACAACGCAACGAATGTTATTATCAAGGTTTGTAAT +TTTGATTTTTGTTATGATCCCTACCTTAGTGGTTACTATCAT---AACAACAAAACATGG +AGCATCAGAGAATTTGCTGTCTATTCTTCTTATGCTAATTGTACTTTTGAGTATGTTTCG +AAATCCTTTATGTTGAACATTTCTGGTAATGGTGGTCTGTTCAACACTCTTAGAGAGTTT +GTTTTCAGAAATGTCGATGGGCATTTCAAGATTTACTCAAAGTTTACACCAGTAAATTTA +AATCGTGGCTTGCCTACTGGTCTCTCAGTGCTTCAGCCATTGGTTGAATTACCAGTTAGC +ATAAATATTACTAAATTCAGAACACTCCTCACTATTCATAGA---------------GGA +GACCCTATGCCTAATAACGGCTGGACTGCTTTTTCAGCTGCTTATTTCGTGGGCTATCTT +AAACCACGTACCTTTATGCTGAAATATAATGAGAATGGCACCATTACTGATGCTGTTGAT +TGTGCACTTGACCCTCTTTCGGAGACAAAGTGTACGTTAAAATCTCTTACTGTCCAAAAG +GGCATCTATCAGACTTCTAACTTCCGAGTGCAACCCACTCAGTCTGTAGTTAGATTTCCT +AATATTACCAATGTGTGTCCATTTCACAAGGTTTTTAATGCCACGAGGTTTCCTTCCGTC +TATGCGTGGGAAAGAACTAAAATTTCTGATTGCATTGCAGATTACACTGTTTTCTACAAT +TCAACTTCTTTTTCTACTTTTAAATGTTATGGTGTTTCACCTTCTAAATTGATTGATTTG +TGCTTTACGAGTGTGTATGCTGATACATTTCTCATAAGATTCTCAGAAGTCAGACAGGTG +GCACCAGGACAAACTGGTGTCATTGCTGACTATAATTATAAATTACCTGATGATTTTACA +GGTTGTGTCATAGCTTGGAACACTGCCAAACAGGATGTAGGT---------------AAT +TATTTCTACAGGTCTCATCGTTCTACCAAATTGAAACCATTTGAAAGAGATCTTTCCTCA +GAC------------------------GAGAATGGTGTCCGT------------------ +ACACTTAGTACTTATGACTTCAACCCTAATGTACCACTTGAATACCAAGCTACAAGGGTT +GTTGTTTTGTCATTTGAGCTTCTAAATGCACCAGCTACAGTTTGTGGACCAAAACTATCC +ACACAACTAGTAAAAAATCAGTGCGTTAATTTCAACTTTAACGGACTCAAGGGCACTGGT +GTCTTGACTGATTCTTCCAAGAGGTTTCAGTCATTCCAACAATTTGGTAAAGATGCGTCT +GACTTTATTGATTCAGTACGTGATCCTCAAACACTTGAGATACTTGACATTACACCTTGC +TCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAACACTTCTTTAGAGGTGGCTGTT +CTTTACCAAGATGTTAACTGCACTGATGTACCAACTACTATACATGCAGACCAACTAACA +CCTGCTTGGCGTATTTATGCTACTGGCACTAATGTGTTTCAAACTCAAGCAGGCTGTCTT +ATAGGAGCTGAACATGTCAATGCTTCTTATGAGTGTGACATCCCAATTGGTGCTGGTATT +TGTGCTAGCTACCATACGGCTTCT------------ATATTACGCAGTACAAGCCAGAAA +GCTATTGTGGCTTATACTATGTCCCTTGGTGCTGAGAACTCTATCGCTTATGCTAACAAT +TCTATAGCCATACCTACAAATTTTTCAATTAGTGTTACCACTGAAGTTATGCCTGTATCA +ATGGCTAAAACTTCTGTAGATTGTACTATGTATATCTGTGGTGACTCTATAGAGTGTAGC +AACTTGTTGTTACAATATGGCAGTTTTTGCACACAACTAAATCGTGCTTTAAGTGGGATT +GCTATTGAGCAAGACAAGAACACCCAAGAGGTTTTTGCTCAAGTTAAGCAAATCTATAAA +ACACCACCTATTAAGGATTTTGGTGGTTTTAATTTTTCACAGATACTACCTGACCCATCT +AAACCCAGCAAGAGGTCGTTTATTGAAGACTTACTCTTCAATAAAGTCACTCTTGCTGAT +GCCGGTTTTATCAAACAGTACGGTGATTGTTTGGGTGGTATTTCTGCTAGAGATTTGATT +TGTGCTCAAAAGTTCAATGGACTTACTGTCTTACCACCATTGCTCACAGATGAAATGATC +GCTGCTTATACAGCTGCATTAATTAGCGGCACTGCCACTGCTGGATGGACCTTTGGTGCT +GGTGCTGCTCTTCAAATACCATTTGCCATGCAAATGGCTTATAGGTTTAATGGAATTGGA +GTTACTCAGAATGTTCTCTATGAGAATCAGAAATTAATAGCCAATCAGTTTAATAGTGCT +ATTGGAAAAATCCAAGAGTCTTTGACATCTACAGCTAGTGCACTTGGAAAATTGCAGGAT +GTTGTTAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGTTCCAATTTT +GGTGCAATTTCAAGCGTGTTGAATGACATTCTTTCACGCCTTGACAAAGTCGAGGCTGAG +GTTCAGATTGATAGGTTGATCACAGGTAGACTTCAGAGTTTACAGACGTATGTGACTCAA +CAATTAATCAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCGACTAAAATGTCC +GAGTGTGTACTAGGACAATCTAAAAGAGTTGATTTTTGTGGAAAAGGTTATCACCTAATG +TCTTTTCCCCAGTCAGCGCCTCATGGTGTTGTCTTCTTACATGTGACTTACATTCCTTCG +CAAGAAAAGAACTTCACAACAGCTCCTGCCATTTGCCATGAAGGTAAAGCACACTTCCCA +CGTGAAGGTGTTTTCGTTTCGAATGGCACACACTGGTTTGTAACACAAAGGAACTTTTAT +GAACCTAAAATTATAACCACTGACAATACATTTGTCTCTGGTAACTGTGATGTTGTAATT +GGAATTATCAACAACACAGTTTATGATCCTTTACAACCAGAACTTGATTCATTTAAGGAG +GAGTTAGATAAATATTTTAAAAATCATACATCACCTGATATTGATCTTGGTGATATTTCT +GGCATTAATGCTTCTGTTGTCAATATTCAAAAGGAAATTGACCGCCTCAATGAGGTTGCC +AGAAATTTAAATGAATCACTCATTGATCTCCAAGAACTTGGAAAATATGAGCAATATATC +AAATGGCCATGGTATGTTTGGCTCGGCTTCATTGCTGGACTCATTGCTATAGTCATGGTT +ACAATCCTGCTTTGTTGCATGACAAGTTGTTGCAGTTGTCTCAAGGGCTGTTGTTCTTGC +GGATCTTGCTGTAAATTTGATGAAGACGACTCTGAGCCTGTGCTCAAAGGAGTCAAATTA +CATTACACA +>MG772934.1 +------ATGTTGTTTTTCTTGTTTCTTCAGTTCGCCTTAGTAAACTCCCAG--------- +---TGT---GATTTGACAGGTAGAACTCCACTCAATCCC---AATTATACTAATTCTTCA +CAAAGAGGTGTTTATTACCCTGACACAATTTATAGATCTGACACACTAGTGCTTAGTCAG +GGTTATTTTCTTCCATTTTATTCCAATGTTAGCTGGTATTATTCATTA---ACAACCAAC +AATGCTGCCACAAAGAGGACTGACAACCCTATATTAGATTTCAAGGACGGCATATATTTT +GCTGCCACTGAACACTCAAATATTGTCAGGGGCTGGATCTTTGGAACAACTCTTGACAAC +ACTTCTCAATCTCTCTTGATAGTTAACAATGCAACGAATGTTATTATCAAGGTTTGTAAT +TTTGACTTTTGTTACGATCCCTACCTTAGTGGTTACTATCAT---AACAACAAAACCTGG +AGCATCAGAGAATTTGCTGTCTATTCCTTTTATGCTAATTGTACTTTTGAGTATGTCTCA +AAATCCTTTATGTTGAACATTTCTGGTAATGGTGGTCTGTTCAACACTCTCAGAGAGTTT +GTTTTTAGAAATGTCGATGGGCATTTCAAGATTTACTCAAAATTTACACCAGTAAACTTA +AATCGTGGCTTACCTACTGGTCTTTCAGTACTTCAACCGTTGGTTGAATTACCAGTTAGC +ATAAATATTACTAAATTTAGAACACTCCTCACTATTCATAGA---------------GGA +GACCCTATGTCTAATAATGGCTGGACTGCTTTTTCAGCTGCTTATTTCGTGGGCTATCTT +AAACCACGTACCTTTATGCTGAAATATAATGAGAATGGCACCATTACTGATGCTGTTGAT +TGTGCACTTGACCCTCTTTCGGAGACAAAGTGTACGTTAAAATCTCTTAGTGTTCAAAAG +GGTATCTATCAGACTTCTAACTTTCGAGTGCAACCCACTCAGTCTATAGTTAGATTTCCT +AATATTACCAATGTGTGTCCATTTCACAAGGTTTTTAATGCCACAAGGTTTCCTTCTGTT +TACGCGTGGGAAAGAACTAAAATTTCTGATTGCATTGCAGACTACACTGTTTTCTACAAT +TCAACTTCTTTTTCTACTTTCAAATGTTATGGTGTTTCACCCTCTAAATTGATTGATTTG +TGTTTTACGAGTGTGTATGCTGATACATTTCTCATAAGATTTTCAGAGGTCAGACAAGTG +GCACCAGGACAGACTGGTGTTATTGCTGACTACAATTATAAATTACCTGATGATTTTACA +GGTTGTGTCATAGCCTGGAACACAGCAAAACAGGACACAGGT---------------CAT +TATTTCTATAGGTCTCATCGCTCTACCAAATTAAAACCATTTGAAAGAGACCTTTCTTCA +GAT------------------------GAGAATGGTGTCCGT------------------ +ACACTTAGTACTTATGACTTTAACCCTAATGTACCGCTTGAATATCAAGCTACAAGGGTT +GTTGTCTTGTCATTTGAGCTTCTAAATGCACCAGCTACAGTTTGTGGACCAAAATTATCC +ACACAACTAGTAAAAAATCAGTGCGTTAATTTCAATTTTAACGGACTCAAGGGCACTGGT +GTCTTGACTGATTCTTCTAAGAGGTTTCAGTCATTCCAACAATTTGGTAAAGATGCGTCT +GACTTTATTGATTCAGTACGTGATCCTCAAACACTTGAGATACTTGACATTACACCTTGC +TCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAACACTTCTTCAGAGGTGGCTGTT +CTTTACCAAGATGTTAACTGCACTGATGTACCAACTACTATACATGCAGACCAATTAACA +CCTGCTTGGCGCATTTATGCTATTGGCACTAGTGTGTTTCAAACTCAAGCAGGCTGTCTT +ATAGGAGCTGAACATGTCAATGCTTCTTATGAGTGTGACATCCCAATTGGTGCTGGTATT +TGTGCTAGCTACCATACGGCTTCT------------ATATTACGTAGTACAGGCCAGAAA +GCTATTGTGGCTTATACTATGTCCCTTGGTGCTGAGAACTCTATTGCTTATGCTAACAAT +TCTATAGCCATACCTACAAATTTTTCAATCAGTGTCACCACTGAAGTTATGCCTGTATCA +ATGGCTAAAACTTCTGTAGATTGCACTATGTATATCTGCGGTGACTCTATAGAGTGTAGC +AACTTGTTGTTACAATATGGCAGTTTTTGCACACAACTAAATCGTGCTTTAAGTGGAATT +GCTATTGAACAAGACAAGAACACTCAAGAGGTTTTTGCTCAAGTCAAGCAAATCTATAAA +ACACCACCTATTAAGGATTTTGGTGGTTTCAATTTTTCACAGATATTACCCGATCCTTCT +AAACCCAGCAAGAGGTCGTTTATTGAAGATTTACTCTTCAATAAAGTCACTCTTGCTGAT +GCTGGTTTTATAAAACAGTACGGTGATTGTTTGGGTGATATTTCTGCTAGAGATTTGATT +TGTGCTCAAAAGTTCAATGGACTCACTGTCTTACCACCATTGCTCACAGATGAAATGATC +GCTGCTTATACAGCTGCATTAATTAGCGGCACTGCCACTGCTGGATGGACCTTTGGTGCT +GGTGCTGCTCTTCAAATACCATTTGCCATGCAAATGGCTTATAGATTTAATGGAATTGGA +GTTACTCAGAATGTTCTCTATGAGAATCAGAAATTAATAGCCAATCAGTTTAATAGTGCT +ATTGGAAAAATCCAAGAGTCTTTGACATCTACAGCTAGTGCACTTGGAAAATTGCAGGAT +GTTGTTAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGTTCCAATTTT +GGTGCAATTTCAAGCGTGTTGAATGATATTCTTTCACGCCTTGACAAAGTCGAGGCTGAG +GTTCAGATTGATAGGTTGATCACAGGTAGACTTCAGAGTTTACAGACGTATGTGACTCAA +CAATTAATCAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCGACTAAAATGTCC +GAGTGTGTACTAGGACAATCTAAAAGAGTTGATTTTTGTGGAAAAGGTTATCACCTAATG +TCTTTTCCCCAGTCAGCGCCTCATGGTGTTGTTTTCTTACATGTGACTTACATTCCTTCG +CAAGAAAAGAACTTCACAACAGCTCCTGCCATTTGTCATGAAGGTAAAGCACACTTCCCA +CGTGAAGGTGTTTTCGTTTCGAATGGCACACACTGGTTTGTAACACAAAGGAACTTTTAC +GAACCCCAAATTATAACCACTGACAATACATTTGTCTCCGGTAACTGTGATGTTGTAATT +GGAATTATCAATAACACAGTTTATGATCCTTTACAACCAGAACTTGATTCATTTAAGGAG +GAGTTAGATAAATATTTTAAAAATCATACATCACCTGATATTGATCTTGGTGATATTTCT +GGCATTAATGCTTCTGTTGTCAATATTCAAAAGGAAATTGACCGCCTCAATGAGGTTGCC +AGAAATTTAAATGAATCACTCATTGATCTCCAAGAACTTGGAAAATATGAGCACTATATC +AAATGGCCATGGTATGTTTGGCTCGGCTTCATTGCTGGACTCATTGCTATAGTCATGGTT +ACAATCCTGCTTTGTTGCATGACAAGTTGTTGCAGTTGTCTCAAGGGCTGTTGTTCTTGC +GGATTTTGCTGTAAATTTGATGAAGATGACTCTGAGCCTGTGCTCAAAGGAGTCAAATTA +CATTACACG +>MN988668.1 +------ATGTTTGTTTTTCTTGTTTTA---TTGCCACTAGTCTCTAGTCAG--------- +---TGTGTTAATCTTACAACCAGAACTCAATTACCCCCT---GCATACACTAATTCTTTC +ACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAG +GACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGG +ACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTT +GCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCG +AAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAA +TTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGG +ATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCT +CAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTT +GTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTA +GTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGT +ATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGT +GAT------TCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTT +CAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGAC +TGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAA +GGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCT +AATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTT +TATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAAT +TCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTC +TGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATC +GCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACA +GGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAAT +TACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACT +GAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTT +CCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTA +GTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCT +ACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGT +GTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCT +GACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGT +TCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTT +CTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACT +CCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTA +ATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATA +TGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAA +TCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAAC +TCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCT +ATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGC +AATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATA +GCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAA +ACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCA +AAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGAT +GCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATT +TGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATT +GCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCA +GGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGA +GTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCT +ATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGAT +GTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTT +GGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAA +GTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAA +CAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCA +GAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATG +TCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCA +CAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCT +CGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTAT +GAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATA +GGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAG +GAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCT +GGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCC +AAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATA +AAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTG +ACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGT +GGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTA +CATTACACA + + +(MN988668.1:0.192870176,(MG772933.1:0.014425893,MG772934.1:0.018908860)1.000:0.125259425,(AY274119.3:0.178327368,DQ071615.1:0.133462746)1.000:0.101595714); + From 2ef403fb5e2f473faf0de98f08f191a7ec6095b4 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Tue, 21 Apr 2020 07:58:55 -0400 Subject: [PATCH 13/13] Adding absrel tests --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 59f11e48b..df17c5fdc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -611,12 +611,12 @@ add_test (MEME HYPHYMP tests/hbltests/libv3/MEME.wbf) add_test (MEME-PARTITIONED HYPHYMP tests/hbltests/libv3/MEME.wbf) add_test (BUSTED HYPHYMP tests/hbltests/libv3/BUSTED.wbf) add_test (BUSTED-SRV HYPHYMP tests/hbltests/libv3/BUSTED-SRV.wbf) -add_test (ABSREL HYPHYMP tests/hbltests/libv3/aBSREL.wbf) -add_test (ABSREL-MH HYPHYMP tests/hbltests/libv3/aBSREL-MH.wbf) add_test (RELAX HYPHYMP tests/hbltests/libv3/RELAX.wbf) add_test (FUBAR HYPHYMP tests/hbltests/libv3/FUBAR.wbf) add_test (BGM HYPHYMP tests/hbltests/libv3/BGM.wbf) add_test (CONTRAST-FEL HYPHYMP tests/hbltests/libv3/CFEL.wbf) add_test (GARD HYPHYMP tests/hbltests/libv3/GARD.wbf) add_test (FADE HYPHYMP tests/hbltests/libv3/FADE.wbf) +add_test (ABSREL HYPHYMP tests/hbltests/libv3/aBSREL.wbf) +add_test (ABSREL-MH HYPHYMP tests/hbltests/libv3/aBSREL-MH.wbf)