From 96a737f72fa716a900324b5238ca4742f6cd0780 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Thu, 22 Oct 2020 06:27:51 -0400 Subject: [PATCH 1/5] Fixing rerooting issues --- CMakeLists.txt | 2 +- res/TemplateBatchFiles/ConvertDataFile.bf | 3 +- .../modules/shared-load-file.bf | 3 +- .../libv3/models/codon/MG_REV.bf | 8 ++-- .../libv3/tasks/estimators.bf | 16 ++++---- res/TemplateBatchFiles/libv3/tasks/trees.bf | 4 ++ src/core/dataset_filter.cpp | 16 +++++++- src/core/matrix.cpp | 16 ++++---- src/core/topology.cpp | 7 +++- src/core/tree_evaluator.cpp | 20 ++++++---- tests/hbltests/libv3/FMM.wbf | 40 +++++++++++++++++++ 11 files changed, 103 insertions(+), 32 deletions(-) create mode 100644 tests/hbltests/libv3/FMM.wbf diff --git a/CMakeLists.txt b/CMakeLists.txt index 64d6d4a07..53a7f0fa8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -588,6 +588,6 @@ add_test (BGM HYPHYMP tests/hbltests/libv3/BGM.wbf) add_test (CONTRAST-FEL HYPHYMP tests/hbltests/libv3/CFEL.wbf) add_test (GARD HYPHYMP tests/hbltests/libv3/GARD.wbf) add_test (FADE HYPHYMP tests/hbltests/libv3/FADE.wbf) -add_test (NAME ABSREL COMMAND HYPHYMP tests/hbltests/libv3/ABSREL.wbf) +add_test (ABSREL HYPHYMP tests/hbltests/libv3/ABSREL.wbf) #add_test (NAME ABSREL-MH COMMAND HYPHYMP tests/hbltests/libv3/ABSREL-MH.wbf) diff --git a/res/TemplateBatchFiles/ConvertDataFile.bf b/res/TemplateBatchFiles/ConvertDataFile.bf index e022e5124..a7d2d6a73 100644 --- a/res/TemplateBatchFiles/ConvertDataFile.bf +++ b/res/TemplateBatchFiles/ConvertDataFile.bf @@ -32,7 +32,8 @@ ChoiceList (DATA_FILE_PRINT_FORMAT,"Output Format",1,SKIP_NONE, /* 8 */ "CSV", "Comma separated characters", /* 9 */ "FASTA sequential","FASTA Sequential Format.", /* 10 */ "FASTA interleaved","FASTA Interleaved Format.", - /* 11 */ "PAML compatible", "PAML Compatible PHYLIP-like format"); + /* 11 */ "PAML compatible", "PAML Compatible PHYLIP-like format", + /* 12 */ "STOCKHOLM", "STOCKHOLM format"); if (DATA_FILE_PRINT_FORMAT<0) { diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index dcfc00749..f88b00ad7 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -297,7 +297,6 @@ function doGTR (prefix) { trees, gtr_results); - KeywordArgument ("kill-zero-lengths", "Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise)", "Yes"); kill0 = io.SelectAnOption ( @@ -414,6 +413,8 @@ function doPartitionedMG (prefix, keep_lf) { utility.getGlobalValue("terms.run_options.partitioned_omega"): selected_branches, utility.getGlobalValue("terms.run_options.retain_lf_object"): keep_lf }, gtr_results); + + io.ReportProgressMessageMD("`prefix`", "codon-fit", "* " + selection.io.report_fit (partitioned_mg_results, 0, (^"`prefix`.codon_data_info")[utility.getGlobalValue ("terms.data.sample_size")])); diff --git a/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf b/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf index f89ceb3ce..d1d77753d 100644 --- a/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf +++ b/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf @@ -119,6 +119,7 @@ function models.codon.MG_REV._DefineQ(mg_rev, namespace) { */ function models.codon.MG_REV.set_branch_length(model, value, parameter) { + if (model[terms.model.type] == terms.global) { // boost numeric branch length values by a factor of 3 @@ -162,17 +163,18 @@ function models.codon.MG_REV.set_branch_length(model, value, parameter) { } //fprintf (stdout, models.codon.MG_REV.set_branch_length.alpha.p, "\n"); + + parameters.SetConstraint(models.codon.MG_REV.set_branch_length.beta.p, "(" + 3 * value[terms.branch_length] + " - " + models.codon.MG_REV.set_branch_length.alpha.p + "*(" + model[terms.parameters.synonymous_rate] + "))/(" + model[terms.parameters.nonsynonymous_rate] + ")", ""); return 1; } else { - assert(0, "TBA in models.codon.MG_REV.set_branch_length"); + assert(0, "TBD in models.codon.MG_REV.set_branch_length"); } } else { models.codon.MG_REV.set_branch_length.lp = 0; - - + if (parameters.IsIndependent(models.codon.MG_REV.set_branch_length.alpha.p)) { Eval(models.codon.MG_REV.set_branch_length.alpha.p + ":=(" + value[terms.model.branch_length_scaler] + ")*" + value[terms.branch_length]); models.codon.MG_REV.set_branch_length.lp += 1; diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 636e5fd54..e3f9513d1 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -467,13 +467,15 @@ function estimators.ApplyExistingEstimatesToTree (_tree_name, model_descriptions } } } - + estimators.ApplyExistingEstimatesToTree.constraint_count += estimators.applyBranchLength(_tree_name, _branch_name, model_descriptions[estimators.ApplyExistingEstimatesToTree.map[_branch_name]], _set_branch_length_to); } else { if (Type(_existing_estimate) != "Unknown") { warning.log ("Incorrect type for the initial values object of for branch '" + _branch_name + "' : " + _existing_estimate); } } + } else { + //warning.log ("No initial branch length object of for branch '" + _branch_name); } } @@ -532,7 +534,7 @@ function estimators.ApplyExistingEstimates(likelihood_function_id, model_descrip if (Type (branch_length_conditions) == "AssociativeList") { if (Abs(branch_length_conditions) > estimators.ApplyExistingEstimates.i) { _application_type = branch_length_conditions[estimators.ApplyExistingEstimates.i]; - } + } } estimators.ApplyExistingEstimates.df_correction += estimators.ApplyExistingEstimatesToTree ((estimators.ApplyExistingEstimates.lfInfo[terms.fit.trees])[estimators.ApplyExistingEstimates.i], @@ -890,7 +892,6 @@ lfunction estimators.FitSingleModel_Ext (data_filter, tree, model_template, init Optimize (mles, likelihoodFunction); } - if (Type(initial_values) == "AssociativeList") { utility.ToggleEnvVariable("USE_LAST_RESULTS", None); } @@ -1094,17 +1095,14 @@ lfunction estimators.FitCodonModel(codon_data, tree, generator, genetic_code, op component_tree = lf_components[2 * i + 1]; ClearConstraints( * component_tree); branch_map = (option[utility.getGlobalValue("terms.run_options.partitioned_omega")])[i]; - - + component_branches = BranchName( * component_tree, -1); for (j = 0; j < Columns(component_branches) - 1; j += 1) { /** -1 in the upper bound because we don't want to count the root node */ - node_name = (component_branches[j]); - - + node_name = (component_branches[j]); ExecuteCommands(apply_constraint); } } @@ -1113,6 +1111,7 @@ lfunction estimators.FitCodonModel(codon_data, tree, generator, genetic_code, op LikelihoodFunction likelihoodFunction = (lf_components); GetString (params, likelihoodFunction,-1); + utility.ToggleEnvVariable ("PARAMETER_GROUPING", {"0" : params["Global Independent"]}); if (utility.Has (option,utility.getGlobalValue("terms.run_options.apply_user_constraints"),"String")) { @@ -1130,6 +1129,7 @@ lfunction estimators.FitCodonModel(codon_data, tree, generator, genetic_code, op parameters.SetRange (_value_,terms.range_clamp_locals); ');*/ + //io.SpoolLF ("likelihoodFunction", "/tmp/hyphy-spool", "cfit"); //Export (lfe, likelihoodFunction); //console.log (lfe); diff --git a/res/TemplateBatchFiles/libv3/tasks/trees.bf b/res/TemplateBatchFiles/libv3/tasks/trees.bf index affddf404..31f3deb5e 100644 --- a/res/TemplateBatchFiles/libv3/tasks/trees.bf +++ b/res/TemplateBatchFiles/libv3/tasks/trees.bf @@ -85,7 +85,11 @@ LoadFunctionLibrary("distances.bf"); lfunction trees.GetTreeString._sanitize(string) { if (utility.GetEnvVariable("_DO_TREE_REBALANCE_")) { + console.log ("BEFORE REBALANCE\n"); + console.log (string); string = RerootTree(string, 0); + console.log ("AFTER REBALANCE\n"); + console.log (string); } if (utility.GetEnvVariable("_KEEP_I_LABELS_")) { diff --git a/src/core/dataset_filter.cpp b/src/core/dataset_filter.cpp index 1bbc2a920..b2258413a 100644 --- a/src/core/dataset_filter.cpp +++ b/src/core/dataset_filter.cpp @@ -1919,7 +1919,8 @@ void _DataSetFilter::internalToStr (FILE * file ,_StringBuffer * string_buffe kFormatCharacterList = 8, kFormatFASTASequential = 9, kFormatFASTAInterleaved = 10, - kFormatPAML = 11 + kFormatPAML = 11, + kFormatSTOCKHOLM = 12 } datafile_format = kFormatMEGASequential; auto trim_to_10 = [] (const _String& seq_name) -> _String const { @@ -2242,6 +2243,19 @@ void _DataSetFilter::internalToStr (FILE * file ,_StringBuffer * string_buffe } break; } + + case kFormatSTOCKHOLM: { + write_here << "# STOCKHOLM 1.0" << kStringFileWrapperNewLine; + for (unsigned long i = 0UL; i< sequence_count; i++) { + write_here << GetSequenceName(i) << " "; + for (unsigned long j = 1UL; j':'#'; diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index 2692c7ac1..067063b2b 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -4815,18 +4815,18 @@ void _Matrix::RowAndColumnMax (hyFloat& r, hyFloat &c, hyFloat * cache) { if (theIndex) { if (compressedIndex) { long from = 0L; - for (long r = 0; r < hDim; r++) { - for (long c = from; c < compressedIndex[r]; c++) { - hyFloat temp = theData[c]; + for (long row = 0; row < hDim; row++) { + for (long col = from; col < compressedIndex[row]; col++) { + hyFloat temp = theData[col]; if (temp > 0.0) { - rowMax[r] += temp; - colMax[compressedIndex[c+hDim]] += temp; + rowMax[row] += temp; + colMax[compressedIndex[col+hDim]] += temp; } else { - rowMax[r] -= temp; - colMax[compressedIndex[c+hDim]] -= temp; + rowMax[row] -= temp; + colMax[compressedIndex[col+hDim]] -= temp; } } - from = compressedIndex[r]; + from = compressedIndex[row]; } } else { for (long i = 0; i* iterator, long orig create a new root with >=2 children nodes - this node, and one more containing all other children (>=2 of them) */ + long count = 0L, root_children_count = theRoot->get_num_nodes(); @@ -2726,7 +2727,7 @@ void _TreeTopology::RerootTreeInternalTraverser (node* iterator, long orig if (root_children_count > 2) { res<<')'; } - + PasteBranchLength (stash_originator,res, branch_length_mode, variable_ref); } } @@ -2743,6 +2744,8 @@ void _TreeTopology::PasteBranchLength (node* node, _StringBuffe } }; + //printf ("_TreeTopology::PasteBranchLength %d %g %s\n", mode, GetNodeName(node).get_str(), GetBranchLength(node)); + switch (mode) { case kTopologyBranchLengthExpectedSubs: { result << ':' << _String(GetBranchLength (node)*factor); @@ -2805,7 +2808,7 @@ HBLObjectRef _TreeTopology::RerootTree (HBLObjectRef new_root, HBLObjectRef cach (*res)<<'('; // opening tree ( RerootTreeInternalTraverser (reroot_at, reroot_at->get_child_num(),false,*res,settings,kTopologyBranchLengthUserLengths,-1,true); (*res)<<','; - SubTreeString (reroot_at, *res,settings,kTopologyBranchLengthUserLengths); + SubTreeString (reroot_at, *res,settings,false, kTopologyBranchLengthUserLengths); (*res)<<')'; } } diff --git a/src/core/tree_evaluator.cpp b/src/core/tree_evaluator.cpp index 0b1d316d8..2b893dbb6 100644 --- a/src/core/tree_evaluator.cpp +++ b/src/core/tree_evaluator.cpp @@ -482,10 +482,16 @@ inline void __ll_product_sum_loop_generic (hyFloat const* _hprestrict_ tMatrix, } template inline void __ll_loop_handle_scaling (hyFloat& sum, hyFloat* _hprestrict_ parentConditionals, hyFloat* _hprestrict_ scalingAdjustments, long& didScale, long parentCode, long siteCount, long siteID, long& localScalerChange, long siteFrequency) { + + /*if (sum == 0.) { + fprintf (stderr, "THE SUM IS EXACTLY ZERO parent code %ld\n", parentCode); + }*/ + if (__builtin_expect(sum < _lfScalingFactorThreshold && sum > 0.0,0)) { hyFloat scaler = _computeBoostScaler(scalingAdjustments [parentCode*siteCount + siteID] * _lfScalerUpwards, sum, didScale); + //fprintf (stderr, "UP %ld (%ld) %lg\n", didScale, parentCode, scaler); /*if (likeFuncEvalCallCount == 15098 && siteID == 91) { fprintf (stderr, "UP %ld (%ld) %lg\n", didScale, parentCode, scaler); @@ -495,9 +501,9 @@ template inline void __ll_loop_handle_scaling (hyFloat& sum #pragma GCC unroll 4 for (long c = 0; c < D; c++) { parentConditionals [c] *= scaler; - /*if (likeFuncEvalCallCount == 15098 && siteID == 91) { - fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]); - }*/ + //if (likeFuncEvalCallCount == 15098 && siteID == 91) { + //fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]); + // } } if (siteFrequency == 1L) { @@ -523,9 +529,9 @@ template inline void __ll_loop_handle_scaling (hyFloat& sum #pragma GCC unroll 4 for (long c = 0; c < D; c++) { parentConditionals [c] *= scaler; - /*if (likeFuncEvalCallCount == 15098 && siteID == 91) { - fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]); - }*/ + //if (likeFuncEvalCallCount == 15098 && siteID == 91) { + // fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]); + // } } if (siteFrequency == 1L) { @@ -1062,7 +1068,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList #else __ll_product_sum_loop<61L> (tMatrix, childVector, parentConditionals, sum); #endif - + __ll_loop_handle_scaling<61L, true> (sum, parentConditionals, scalingAdjustments, didScale, parentCode, siteCount, siteID, localScalerChange, theFilter->theFrequencies.get (siteOrdering.list_data[siteID])); childVector += 61; diff --git a/tests/hbltests/libv3/FMM.wbf b/tests/hbltests/libv3/FMM.wbf new file mode 100644 index 000000000..5b823dcb8 --- /dev/null +++ b/tests/hbltests/libv3/FMM.wbf @@ -0,0 +1,40 @@ +GetString (version, HYPHY_VERSION, 0); + +PRODUCE_OPTIMIZATION_LOG = 1; + +if (+version >= 2.4) { + LoadFunctionLibrary ("SelectionAnalyses/FitMultiModel.bf", {"--code" : "Universal", "--alignment" : PATH_TO_CURRENT_BF + "data/CD2.nex", "--branches" : "GROUP1", "--srv" : "No", "--starting-points" : "5"}); +} else { + LoadFunctionLibrary ("SelectionAnalyses/FitMultiModel.bf", {"0" : "Universal", "1" : PATH_TO_CURRENT_BF + "data/CD2.nex", "2" : "GROUP1", "3" : "Yes", "4" : "0.1"}); + +} +LoadFunctionLibrary ("shared.bf"); +LoadFunctionLibrary ("libv3/IOFunctions.bf"); + +fprintf ("logger.hbl", CLEAR_FILE, ^((fitter.results[terms.likelihood_function])+".trace")); + + +assert (check_value ( + ((fitter.json["fits"])["Standard MG94"])["Log Likelihood"], -3405.53, 0.001), "Incorrect log-likelihood for the 1H model"); + +assert (check_value ( + ((fitter.json["fits"])["MG94 with double instantaneous substitutions"])["Log Likelihood"], -3403.0265, 0.01), "Incorrect log-likelihood for the 2H model"); + +assert (check_value ( + ((fitter.json["fits"])["MG94 with double and triple instantaneous substitutions"])["Log Likelihood"], -3403.026, 0.01), "Incorrect log-likelihood for the 2H model"); + +assert (check_value ( + (((((fitter.json["fits"])["MG94 with double and triple instantaneous substitutions"])["Rate Distributions"])["parameters"])["rate at which 2 nucleotides are changed instantly within a single codon"]), 0.157, 0.05), "Incorrect 2H rate estimate for the 3H model"); + +assert (check_value ( + (((fitter.json["test results"])["Double-hit vs single-hit"])["p-value"]),0.0251, 0.01), "p-value for the 2H:1H test is incorrect"); + +assert (check_value ( + (+(fitter.json["Evidence Ratios"])["Three-hit"]), 187.000, 0.05), "Incorrect sum of ER for 3H"); + + + + + + + From 1b4591881bee3b04f12321beb75e2768823fcc2a Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Wed, 28 Oct 2020 21:42:25 -0400 Subject: [PATCH 2/5] 2.5.21 RC --- CMakeLists.txt | 1 + .../SelectionAnalyses/BUSTED.bf | 57 +- .../SelectionAnalyses/FEL.bf | 5 +- .../SelectionAnalyses/MEME.bf | 58 +- .../SelectionAnalyses/RELAX-SRV.bf | 888 ------------------ .../SelectionAnalyses/RELAX.bf | 335 +++++-- .../modules/shared-load-file.bf | 2 +- res/TemplateBatchFiles/libv3/all-terms.bf | 2 +- .../libv3/models/codon/BS_REL.bf | 36 +- .../libv3/models/model_functions.bf | 3 +- .../libv3/models/parameters.bf | 28 +- .../libv3/models/rate_variation.bf | 2 +- .../libv3/tasks/estimators.bf | 41 +- src/core/global_things.cpp | 2 +- src/core/include/variable.h | 9 +- src/core/likefunc.cpp | 275 ++++-- src/core/matrix.cpp | 30 +- src/core/variable.cpp | 1 + src/core/variablecontainer.cpp | 6 + 19 files changed, 683 insertions(+), 1098 deletions(-) delete mode 100644 res/TemplateBatchFiles/SelectionAnalyses/RELAX-SRV.bf diff --git a/CMakeLists.txt b/CMakeLists.txt index 53a7f0fa8..4ac5ad531 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -589,5 +589,6 @@ add_test (CONTRAST-FEL HYPHYMP tests/hbltests/libv3/CFEL.wbf) add_test (GARD HYPHYMP tests/hbltests/libv3/GARD.wbf) add_test (FADE HYPHYMP tests/hbltests/libv3/FADE.wbf) add_test (ABSREL HYPHYMP tests/hbltests/libv3/ABSREL.wbf) +add_test (FMM HYPHYMP tests/hbltests/libv3/FMM.wbf) #add_test (NAME ABSREL-MH COMMAND HYPHYMP tests/hbltests/libv3/ABSREL-MH.wbf) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf index abbd6456d..ec913d752 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf @@ -1,4 +1,4 @@ -RequireVersion ("2.5.12"); +RequireVersion ("2.5.21"); LoadFunctionLibrary("libv3/all-terms.bf"); // must be loaded before CF3x4 @@ -214,9 +214,7 @@ busted.model_generator = "models.codon.BS_REL.ModelDescription"; if (busted.do_srv) { - if (busted.do_bs_srv) { - busted.model_generator = "busted.model.with.GDD"; busted.model_generator = "models.codon.BS_REL_SRV.ModelDescription"; busted.rate_class_arguments = {{busted.synonymous_rate_classes__,busted.rate_classes__}}; } else { @@ -262,15 +260,20 @@ busted.background.bsrel_model = model.generic.DefineMixtureModel(busted.model_g models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, terms.nucleotideRate("[ACGT]","[ACGT]")); -/*if (busted.do_srv) { +if (busted.do_srv) { if (busted.do_bs_srv) { - models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, "Mean scaler variable for"); + for (description,var_id; in; (busted.background.bsrel_model[terms.parameters])[terms.global]) { + if (regexp.Find (description, terms.parameters.synonymous_rate + " for category")) { + var_id_2 = utility.GetByKey ((busted.test.bsrel_model[terms.parameters])[terms.global], description, "String"); + if (None != var_id_2) { + parameters.SetConstraint (var_id, var_id_2, terms.global); + } + } + } + models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), "SRV [0-9]+")); - } else { - models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, "GDD rate category"); - models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, utility.getGlobalValue("terms.mixture.mixture_aux_weight") + " for GDD category "); - } -}*/ + } +} busted.distribution = models.codon.BS_REL.ExtractMixtureDistribution(busted.test.bsrel_model); @@ -408,8 +411,8 @@ busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.tree terms.search_grid : busted.initial_grid, terms.search_restarts : busted.N.initial_guesses }); - - + + busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.grid_search.results, busted.model_object_map, { "retain-lf-object": TRUE, terms.run_options.optimization_settings : @@ -426,11 +429,6 @@ io.SpoolLFToPath(busted.full_model[terms.likelihood_function], io.PromptUserForF io.ReportProgressMessageMD("BUSTED", "main", "* " + selection.io.report_fit (busted.full_model, 9, busted.codon_data_info[terms.data.sample_size])); -if (busted.do_srv_hmm) { - busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda); - io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3)); - -} io.ReportProgressMessageMD("BUSTED", "main", "* For *test* branches, the following rate distribution for branch-site combinations was inferred"); @@ -448,9 +446,7 @@ busted.distribution_for_json = {busted.FG : utility.Map (utility.Range (busted.r terms.json.proportion : busted.inferred_test_distribution [_index_][1]}") }; -if (busted.do_srv_hmm) { - busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda; -} + if (busted.has_background) { io.ReportProgressMessageMD("BUSTED", "main", "* For *background* branches, the following rate distribution for branch-site combinations was inferred"); @@ -464,6 +460,19 @@ if (busted.has_background) { if (busted.do_srv) { + if (busted.do_srv_hmm) { + busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda); + busted.hmm_lambda.CI = parameters.GetProfileCI(((busted.full_model[terms.global])[terms.rate_variation.hmm_lambda])[terms.id], + busted.full_model[terms.likelihood_function], 0.95); + io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3)); + + io.ReportProgressMessageMD ("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda,8,4) + + " (95% profile CI " + Format ((busted.hmm_lambda.CI )[terms.lower_bound],8,4) + "-" + Format ((busted.hmm_lambda.CI )[terms.upper_bound],8,4) + ")"); + + busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda.CI; + } + + if (busted.do_bs_srv) { busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0 } else { @@ -560,10 +569,16 @@ if (!busted.run_test) { if (busted.do_srv) { if (busted.do_bs_srv) { - busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0 + busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0; } else { busted.srv_info = Transpose((rate_variation.extract_category_information(busted.test.bsrel_model))["VALUEINDEXORDER"][0])%0; } + if (busted.do_srv_hmm) { + busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda); + io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3)); + busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda; + } + io.ReportProgressMessageMD("BUSTED", "main", "* The following rate distribution for site-to-site **synonymous** rate variation was inferred"); selection.io.report_distribution (busted.srv_info); diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf index bf13802ce..e93df085e 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf @@ -162,7 +162,10 @@ io.ReportProgressMessageMD ("fel", "codon-refit", "Improving branch lengths, nuc fel.final_partitioned_mg_results = estimators.FitMGREV (fel.filter_names, fel.trees, fel.codon_data_info [terms.code], { terms.run_options.model_type: terms.local, terms.run_options.partitioned_omega: fel.selected_branches, - terms.run_options.retain_lf_object: TRUE + terms.run_options.retain_lf_object: TRUE, + terms.run_options.optimization_settings: { + "OPTIMIZATION_METHOD" : "coordinate-wise" + } }, fel.partitioned_mg_results); diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index a56fd8e42..97428fd8f 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -163,7 +163,10 @@ meme.final_partitioned_mg_results_bl = estimators.FitMGREV (meme.filter_names, m terms.run_options.model_type: terms.local, terms.run_options.partitioned_omega: meme.selected_branches, terms.run_options.retain_lf_object: FALSE, - terms.run_options.optimization_settings: {"OPTIMIZATION_PRECISION" : Max(meme.partitioned_mg_results[terms.fit.log_likelihood] * 0.00001, 0.1)} + terms.run_options.optimization_settings: { + "OPTIMIZATION_METHOD" : "coordinate-wise", + "OPTIMIZATION_PRECISION" : Max(meme.partitioned_mg_results[terms.fit.log_likelihood] * 0.00001, 0.1) + } }, meme.partitioned_mg_results); @@ -177,7 +180,10 @@ io.ReportProgressMessageMD ("MEME", "codon-refit", "Improving branch lengths, nu meme.final_partitioned_mg_results = estimators.FitMGREV (meme.filter_names, meme.trees, meme.codon_data_info [terms.code], { terms.run_options.model_type: terms.local, terms.run_options.partitioned_omega: meme.selected_branches, - terms.run_options.retain_lf_object: TRUE + terms.run_options.retain_lf_object: TRUE, + terms.run_options.optimization_settings: { + "OPTIMIZATION_METHOD" : "coordinate-wise" + } }, meme.partitioned_mg_results); @@ -526,11 +532,8 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa //io.SpoolLF (lf_bsrel, "/tmp/meme.debug", "MEME"); - - Optimize (results, ^lf_bsrel, { - "OPTIMIZATION_METHOD" : "nedler-mead", - "OPTIMIZATION_START_GRID" : - { + + initial_guess_grid = { "0" : { "meme.site_beta_plus": ^"meme.site_beta_plus", "meme.site_omega_minus": ^"meme.site_omega_minus", @@ -565,15 +568,54 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa "meme.site_beta_plus": ^"meme.site_beta_plus", "meme.site_omega_minus": 0, "meme.site_mixture_weight": 0.01 + }, + "7" : { + "meme.site_beta_plus": ^"meme.site_beta_plus", + "meme.site_omega_minus": ^"meme.site_omega_minus", + "meme.site_mixture_weight": 1.0 } - } + }; + + //before_opt = {"alpha" : ^"meme.site_alpha", "other" : initial_guess_grid}; + + + + ^"meme.site_alpha" = ^"meme.site_alpha"; + // SLKP 20201028 : without this, there are occasional initialization issues with + // the likelihood function below + + Optimize (results, ^lf_bsrel, { + "OPTIMIZATION_METHOD" : "nedler-mead", + "OPTIMIZATION_START_GRID" : initial_guess_grid }); + /*after_opt = { + "alpha" : Eval("meme.site_alpha"), + "meme.site_beta_plus": Eval("meme.site_beta_plus"), + "meme.site_omega_minus": Eval("meme.site_omega_minus"), + "meme.site_mixture_weight": Eval("meme.site_mixture_weight") + };*/ + alternative = estimators.ExtractMLEs (lf_bsrel, model_mapping); alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; + /*init_grid_best = ^"LF_INITIAL_GRID_MAXIMUM"; + if (Abs((results[1][0]-init_grid_best)/results[1][0]) > 0.05) { + console.log (before_opt); + console.log (^"LF_INITIAL_GRID_MAXIMUM_VALUE"); + console.log (after_opt); + console.log ("" + results[1][0] + " vs " + init_grid_best); + + //fprintf (stdout, ^lf_bsrel, "\n"); + //utility.SetEnvVariable ("VERBOSITY_LEVEL",10); + + } + + + console.log ("--------");*/ + ancestral_info = ancestral.build (lf_bsrel,0,FALSE); diff --git a/res/TemplateBatchFiles/SelectionAnalyses/RELAX-SRV.bf b/res/TemplateBatchFiles/SelectionAnalyses/RELAX-SRV.bf deleted file mode 100644 index e575918e5..000000000 --- a/res/TemplateBatchFiles/SelectionAnalyses/RELAX-SRV.bf +++ /dev/null @@ -1,888 +0,0 @@ -RequireVersion ("2.31"); - -console.log("ERROR: THIS BATCHFILE WILL NOT RUN PROPERLY WITH UPDATED TERMS BASE. QUITTING."); -exit(); - - -RELAX.debug.reload = FALSE; - -LoadFunctionLibrary("GrabBag"); -LoadFunctionLibrary("CF3x4"); -LoadFunctionLibrary("TreeTools"); - - -// namespace 'utility' for convenience functions -LoadFunctionLibrary("libv3/UtilityFunctions.bf"); - -// namespace 'io' for interactive/datamonkey i/o functions -LoadFunctionLibrary("libv3/IOFunctions.bf"); - -LoadFunctionLibrary ("libv3/models/codon/MG_REV.bf"); - -// namespace 'estimators' for various estimator related functions -LoadFunctionLibrary("libv3/tasks/estimators.bf"); - -// namespace 'alignments' for various alignments related functions -LoadFunctionLibrary("libv3/tasks/alignments.bf"); - -// namespace 'tree' for various tree related functions -LoadFunctionLibrary("libv3/tasks/trees.bf"); - -// namespace 'estimators' for various estimator related functions -LoadFunctionLibrary("SelectionAnalyses/modules/io_functions.ibf"); - - -LoadFunctionLibrary("BranchSiteTemplate"); -/*------------------------------------------------------------------------------ - BranchSiteTemplate Defines - - BuildCodonFrequencies (obsF); - PopulateModelMatrix (ModelMatrixName&, EFV, synrateP, globalP, nonsynRateP); - -------------------------------------------------------------------------------*/ - -RELAX.settings = {"GTR" : 1, - "LocalMG" : 1, - "Estimate GTR" : 1}; - -RELAX.timers = {6,1}; - - -relax.taskTimerStart (0); - -RELAX.json = {"fits" : {}, - "timers" : {}, - "relaxation-test" : None - }; - -RELAX.test = "RELAX.test"; -RELAX.reference = "RELAX.reference"; -RELAX.unclassified = "RELAX.unclassified"; -RELAX.relaxation_parameter = "RELAX.K"; - -term.RELAX.k = "relaxation coefficient"; - -/*------------------------------------------------------------------------------*/ - - -io.DisplayAnalysisBanner ({"info" : "RELAX (a random effects test of selection relaxation) - uses a random effects branch-site model framework - to test whether a set of 'Test' branches evolves under relaxed - selection relative to a set of 'Reference' branches (R), as measured - by the relaxation parameter (K).", - "version" : "1.0", - "reference" : "RELAX: Detecting Relaxed Selection in a Phylogenetic Framework (2015). Mol Biol Evol 32 (3): 820-832", - "authors" : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group", - "contact" : "spond@temple.edu", - "requirements" : "in-frame codon alignment and a phylogenetic tree, with at least two groups of branches defined using the {} notation (one group can be defined as all unlabeled branches)" - } ); - -relax.codon_data_info = alignments.PromptForGeneticCodeAndAlignment ("RELAX.codon_data", "RELAX.codon_filter"); -relax.sample_size = relax.codon_data_info["sites"] * relax.codon_data_info["sequences"]; - -relax.name_mapping = relax.codon_data_info[utility.getGlobalValue("terms.json.name_mapping")]; - /** - will contain "mapped" -> "original" associations with sequence names; or null if no mapping was necessary - */ - -if (None == relax.name_mapping) { /** create a 1-1 mapping if nothing was done */ - relax.name_mapping = {}; - utility.ForEach (alignments.GetSequenceNames ("RELAX.codon_data"), "_value_", "`&relax.name_mapping`[_value_] = _value_"); -} - -relax.codon_data_info["json"] = relax.codon_data_info["file"] + ".RELAX.json"; -io.ReportProgressMessage ("RELAX", "Loaded an MSA with " + relax.codon_data_info["sequences"] + " sequences and " + relax.codon_data_info["sites"] + " codons from '" + relax.codon_data_info["file"] + "'"); - -relax.codon_lists = models.codon.MapCode (relax.codon_data_info["code"]); -_Genetic_Code = relax.codon_data_info["code"]; - /* - - hack to make PopulateModelMatrix work - - */ -relax.codon_frequencies = frequencies._aux.CF3x4(frequencies._aux.empirical.collect_data ("RELAX.codon_filter",3,1,1), -models.DNA.alphabet, relax.codon_lists["sense"], relax.codon_lists["stop"]); - - -relax.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (relax.codon_data_info[utility.getGlobalValue("terms.json.partitions")], relax.name_mapping); -io.CheckAssertion("utility.Array1D (relax.partitions_and_trees) == 1", "RELAX only works on a single partition dataset"); - -relax.filter_specification = alignments.DefineFiltersForPartitions (relax.partitions_and_trees, "RELAX.codon_data" , "RELAX.codon_filter.", relax.codon_data_info); - -relax.trees = utility.Map (relax.partitions_and_trees, "_partition_", '_partition_["tree"]'); -relax.filter_names = utility.Map (relax.filter_specification, "_partition_", '_partition_["name"]'); - -relax.tree = relax.trees[0]; - -utility.SetEnvVariable ("VERBOSITY_LEVEL", 1); - -relax.group_count = utility.Array1D(relax.tree["model_list"]); - -assert (relax.group_count >= 2, "This analysis expects a partitioned tree with at least two groups defined (one can be the default group without a model)"); - -RELAX.groups = {}; - -utility.ForEach (relax.tree ["model_list"], "_model_", " - if (_model_ == '') { - RELAX.groups [RELAX.unclassified] = 'Unclassified branches'; - } else { - RELAX.groups [_model_] = _model_ + ' branches'; - } -"); - -RELAX.reference_set = io.SelectAnOption (RELAX.groups, "Choose the reference set of branches"); -RELAX.reference_branches = utility.Filter (relax.tree["model_map"], "_value_", "_value_ == RELAX.reference_set"); - -io.ReportProgressMessage ("RELAX", "Selected " + Abs (RELAX.reference_branches) + " branches as the test set: " + Join (",", utility.Keys (RELAX.reference_branches))); - -RELAX.has_unclassified = RELAX.groups / RELAX.unclassified; -RELAX.branch_to_partiton = {}; -utility.ForEachPair (relax.tree ["model_map"], "_key_", "_value_", "if (Abs (_value_)) { RELAX.branch_to_partiton[_key_&&1] = _value_; } else { RELAX.branch_to_partiton[_key_&&1] = RELAX.unclassified;}"); - -RELAX.json ["partition"] = relax.selected_branches; -RELAX.json ["tree"] = relax.tree ["string"]; - -RELAX.runModel = io.SelectAnOption ({ - {"All", "[Default] Fit descriptive models AND run the relax test (4 models)"} - {"Minimal", "Run only the RELAX test (2 models)"} - }, "Analysis type"); - -relax.taskTimerStart (1); - -RELAX.srvType = io.SelectAnOption ({ - {"Site-only", "[Default] Synonymous rates vary from site to site only (all branches are scaled uniformly)"} - {"Branch-site", "Synonymous rates vary from site to site and branch to branch"} - }, "Analysis type"); - - - -if (RELAX.settings["GTR"]) { - io.ReportProgressMessage ("RELAX", "Obtaining branch lengths under the GTR model"); - relax.gtr_results = estimators.FitGTR ("RELAX.codon_filter", relax.tree, None); - io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.gtr_results["LogL"]); - estimators.fixSubsetOfEstimates (relax.gtr_results, relax.gtr_results["global"]); -} else { - relax.gtr_results = None; -} - -/*if (RELAX.settings["LocalMG"] && RELAX.runModel == "All") { - io.ReportProgressMessage ("RELAX", "Obtaining omega and branch length estimates under the local MG94xGTR model"); - relax.local_mg_results = estimators.FitMGREV (relax.filter_names, relax.trees, relax.codon_data_info ["code"], {"model-type" : terms.local}, relax.gtr_results); - io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.local_mg_results["LogL"]); - estimators.fixSubsetOfEstimates (relax.local_mg_results, relax.local_mg_results["global"]); -} else { - relax.local_mg_results = relax.gtr_results; -}*/ - -parameters.DeclareGlobal ("relax.codon_branch_scaler", None); - -utility.SetEnvVariable ("VERBOSITY_LEVEL", 1); - - -if (!RELAX.debug.reload) { - io.ReportProgressMessageMD ("RELAX", "mg-rev", "Obtaining omega and branch length estimates under the partitioned MG94xGTR model"); - relax.mg_results = estimators.FitMGREV (relax.filter_names, relax.trees, relax.codon_data_info ["code"], - {"model-type" : terms.local, "partitioned-omega" : {"0" : RELAX.branch_to_partiton}, "proportional-branch-length-scaler": {"0" : "relax.codon_branch_scaler"}}, - relax.local_mg_results); - relax.taskTimerStop (1); - - io.ReportProgressMessageMD("RELAX", "mg-rev", "* Log(L) = " + Format(relax.mg_results["LogL"],8,2)); - relax.global_dnds = selection.io.extract_global_MLE_re (relax.mg_results , "^" + terms.omega_ratio); - - relax.mg_results_rate = {}; - - utility.ForEach (relax.global_dnds, "_value_", - ' - io.ReportProgressMessageMD ("fel", "mg-rev", "* " + _value_["description"] + " = " + Format (_value_["MLE"],8,4)); - ' - ); - - - relax.json_store_lf (RELAX.json, "Partitioned MG94xREV", - relax.mg_results["LogL"], relax.mg_results["parameters"] + 5, - RELAX.timers[1], - relax._aux.extract_branch_info ((relax.mg_results["branch lengths"])[0], "relax.branch.length"), - relax._aux.extract_branch_info ((relax.mg_results["branch lengths"])[0], "relax.branch.omega"), - None, - None, - "ω" - ); - relax.json_spool (RELAX.json, relax.codon_data_info["json"]); - - relax.taskTimerStart (2); -} - -RELAX.model_mapping = {}; -RELAX.model_assignment = {}; -RELAX.model_specification = {}; -RELAX.unclassified_model_id = None; - - -for (g = 0; g < relax.group_count; g += 1) { - relax.group_name = (relax.tree ["model_list"])[g]; - relax.branch_set = utility.Filter (relax.tree ["model_map"], "_value_", "_value_ == relax.group_name"); - - if (Abs ( relax.group_name ) == 0) { - relax.group_name = RELAX.unclassified; - } - - - relax.omega_estimate = Eval(utility.Values (selection.io.extract_global_MLE_re (relax.mg_results , relax.group_name + "\*$"))[0]); - - RELAX.model = relax.io.define_a_bsrel_model ("RELAX.`g`", - relax.codon_frequencies, - relax.omega_estimate["MLE"], - 1, - RELAX.srvType != "Site-only"); - - - RELAX.model_assignment [RELAX.model["id"]] = RELAX.model; - RELAX.model_specification [relax.group_name] = relax.branch_set; - RELAX.model_mapping [relax.group_name] = RELAX.model["id"]; - - if ((relax.tree ["model_list"])[g] == RELAX.reference_set) { - RELAX.reference_model = RELAX.model["id"]; - } - - if (relax.group_name == RELAX.unclassified) { - RELAX.unclassified_model_id = RELAX.model["id"]; - } -} - -utility.ForEachPair ( RELAX.model_assignment, "_key_", "_value_", ' - if (_key_ != RELAX.reference_model) { - parameters.ConstrainSets ((RELAX.model_assignment[RELAX.reference_model]) ["omegas"], (RELAX.model_assignment[_key_]) ["omegas"]); - parameters.ConstrainSets ((RELAX.model_assignment[RELAX.reference_model]) ["f"], (RELAX.model_assignment[_key_]) ["f"]); - } - '); - - - -model.ApplyModelToTree ("RELAX.tree", relax.tree, RELAX.model_mapping, RELAX.model_specification); - -ASSUME_REVERSIBLE_MODELS = 1; -LikelihoodFunction relax.LF = (RELAX.codon_filter, RELAX.tree); - -global RELAX.branch_scaler = 4; -RELAX.proportional_constraint = "RELAX.branch_scaler"; - -if (RELAX.settings["Estimate GTR"] != TRUE) { - estimators.fixSubsetOfEstimates (relax.mg_results, relax.mg_results["global"]); -} - -estimators.ApplyExistingEstimates ("relax.LF", RELAX.model_assignment, relax.mg_results, None); - -utility.SetEnvVariable ("USE_LAST_RESULTS", 1); - -if (RELAX.runModel == "All") { - - io.ReportProgressMessageMD ("RELAX", "GDM", "## Two-stage fit of the general descriptive model (separate relaxation parameter for each branch)"); - - OPTIMIZATION_PRECISION = 0.1; - - Optimize (relax.MLE.general_descriptive, relax.LF); - - OPTIMIZATION_PRECISION = 0.001; - parameters.UnconstrainParameterSet ("relax.LF", {{terms.lf.local.constrained}}); - - Optimize (relax.MLE.general_descriptive, relax.LF); - io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.MLE.general_descriptive[1][0]); - - relax.general_descriptive = estimators.ExtractMLEs ("relax.LF", RELAX.model_specification); - relax.add_scores (relax.general_descriptive, relax.MLE.general_descriptive); - - relax.taskTimerStop (2); - - relax.json_store_lf (RELAX.json, "General Descriptive", - relax.general_descriptive["LogL"], relax.general_descriptive["parameters"], - RELAX.timers[2], - relax._aux.extract_branch_info ((relax.general_descriptive["branch lengths"])[0], "relax.branch.length"), - relax._aux.extract_branch_info ((relax.general_descriptive["branch lengths"])[0], "relax.branch.local_k"), - {"All" : relax.getRateDistribution (RELAX.reference.model, 1)}, - None, - "k" - ); - relax.json_spool (RELAX.json, relax.codon_data_info["json"]); - -} else { - parameters.UnconstrainParameterSet ("relax.LF", {{terms.lf.local.constrained}}); -} - -if (RELAX.has_unclassified) { - parameters.RemoveConstraint ((RELAX.model_assignment[RELAX.unclassified_model_id]) ["omegas"]); - parameters.RemoveConstraint ((RELAX.model_assignment[RELAX.unclassified_model_id]) ["f"]); -} - -relax.taskTimerStart (3); -io.ReportProgressMessage ("RELAX", "Fitting the RELAX null model"); - -RELAX.null = relax.define.null ("RELAX.tree", RELAX.model_assignment[RELAX.reference_model], RELAX.model_specification); - - -if (!RELAX.debug.reload) { - - utility.SetEnvVariable ("VERBOSITY_LEVEL", 1); - - Optimize (relax.MLE.null, relax.LF); - io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.MLE.null[1][0]); - relax.null = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment); - - LIKELIHOOD_FUNCTION_OUTPUT = 7; - fprintf (relax.codon_data_info["file"] + ".null.fit", CLEAR_FILE, relax.LF); - LIKELIHOOD_FUNCTION_OUTPUT = 2; -} else { - ExecuteAFile (relax.codon_data_info["file"] + ".null.fit"); - relax.null = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment); - relax.null["LogL"] = estimators.ComputeLF ("relax.LF"); -} - -relax.add_scores (relax.null, relax.MLE.null); - -relax.taskTimerStop (3); - -relax.omega_distributions = {}; -relax.omega_distributions["Joint"] = relax.getRateDistribution (RELAX.model_assignment[RELAX.reference_model], 1); -relax.omega_distributions["SRV"] = relax.getRateDistributionSRV (RELAX.model_assignment[RELAX.reference_model]); - -if (RELAX.has_unclassified) { - relax.omega_distributions ["Unclassified"] = relax.getRateDistribution (RELAX.model_assignment[RELAX.unclassified_model_id], 1); -} - - - -if (!RELAX.debug.reload) { - relax.json_store_lf (RELAX.json, "Null", - relax.null["LogL"], relax.null["parameters"], - RELAX.timers[3], - relax._aux.extract_branch_info ((relax.null["branch lengths"])[0], "relax.branch.length"), - relax._aux.extract_branch_info ((relax.null["branch lengths"])[0], "relax.branch.local_k"), - relax.omega_distributions, - 1, - "k" - ); - - relax.json_spool (RELAX.json, relax.codon_data_info["json"]); -} - -io.ReportProgressMessage ("RELAX", "Fitting the RELAX alternative model"); - -relax.taskTimerStart (4); - -utility.ForEach (estimators.GetGlobalMLE_RegExp (relax.null, "^Relaxation parameter"), "_parameter_", -' - parameters.RemoveConstraint (_parameter_["ID"]); -'); - - -if (!RELAX.debug.reload) { - - Optimize (relax.MLE.alt, relax.LF); - relax.alt = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment); - - io.ReportProgressMessageMD ("RELAX", "AltModel", "Log(L) = " + relax.MLE.alt[1][0]); -} else { - ExecuteAFile (relax.codon_data_info["file"] + ".alternative.fit"); - relax.alt = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment); - relax.alt["LogL"] = estimators.ComputeLF ("relax.LF"); -} - -utility.ForEachPair (estimators.GetGlobalMLE_RegExp (relax.alt, "^Relaxation parameter"), "_name_", "_parameter_", -' - io.ReportProgressMessageMD ("RELAX", "AltModel", "* " + _name_ + " = " + _parameter_ [terms.MLE]); - -'); - - -if (!RELAX.debug.reload) { - LIKELIHOOD_FUNCTION_OUTPUT = 7; - fprintf (relax.codon_data_info["file"] + ".alternative.fit", CLEAR_FILE, relax.LF); - LIKELIHOOD_FUNCTION_OUTPUT = 2; -} - -relax.add_scores (relax.alt, relax.MLE.alt); - -RELAX.json ["relaxation-test"] = relax.runLRT (relax.alt["LogL"], relax.null["LogL"], relax.group_count-2); - -io.ReportProgressMessageMD ("RELAX", "AltModel", "Likelihood ratio test for differences among branch classes. Null = " - + relax.null["LogL"] + ", Alt = " + relax.alt["LogL"] + ", p = " + (RELAX.json ["relaxation-test"])["p"]); - -relax.taskTimerStop (4); - -relax.omega_distributions = {}; -relax.omega_distributions["Joint"] = relax.getRateDistribution (RELAX.model_assignment[RELAX.reference_model], 1); -relax.omega_distributions["SRV"] = relax.getRateDistributionSRV (RELAX.model_assignment[RELAX.reference_model]); - -if (RELAX.has_unclassified) { - relax.omega_distributions ["Unclassified"] = relax.getRateDistribution (RELAX.model_assignment[RELAX.unclassified_model_id], 1); -} - - -relax.json_store_lf (RELAX.json, "Alternative", - relax.alt["LogL"], relax.alt["parameters"], - RELAX.timers[4], - relax._aux.extract_branch_info ((relax.alt["branch lengths"])[0], "relax.branch.length"), - relax._aux.extract_branch_info ((relax.alt["branch lengths"])[0], "relax.branch.local_k"), - relax.omega_distributions, - Eval (RELAX.relaxation_parameter), - "k" - ); - - -if (RELAX.runModel == "All") { - - relax.taskTimerStart (5); - - parameters.RemoveConstraint (RELAX.test.model ["omegas"]); - parameters.RemoveConstraint (RELAX.test.model ["f"]); - parameters.SetConstraint (RELAX.relaxation_parameter, Eval (RELAX.relaxation_parameter), ""); - - - io.ReportProgressMessage ("RELAX", "Fitting the RELAX partitioned exploratory model"); - Optimize (relax.MLE.part.expl, relax.LF); - io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.MLE.part.expl [1][0]); - - LIKELIHOOD_FUNCTION_OUTPUT = 7; - fprintf (relax.codon_data_info["file"] + ".partitioned_descriptive.fit", CLEAR_FILE, relax.LF); - LIKELIHOOD_FUNCTION_OUTPUT = 2; - - relax.part.expl = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment); - - relax.omega_distributions["Test"] = relax.getRateDistribution (RELAX.test.model,Eval (RELAX.relaxation_parameter)); - relax.omega_distributions["Reference"] = relax.getRateDistribution (RELAX.reference.model, 1); - - if (RELAX.has_unclassified) { - relax.omega_distributions ["Unclassified"] = relax.getRateDistribution (RELAX.unclassified.model, 1); - } - - relax.add_scores (relax.part.expl, relax.MLE.part.expl); - - relax.taskTimerStop (5); - relax.json_store_lf (RELAX.json, "Partitioned Exploratory", - relax.part.expl["LogL"], relax.part.expl["parameters"], - RELAX.timers[5], - relax._aux.extract_branch_info ((relax.part.expl["branch lengths"])[0], "relax.branch.length"), - None, - relax.omega_distributions, - None, - "" - ); -} - - -relax.taskTimerStop (0); - - -(RELAX.json ["timers"])["Overall"] = RELAX.timers[0]; -(RELAX.json ["timers"])["Preliminaries"] = RELAX.timers[1]; -(RELAX.json ["timers"])["General Descriptive"] = RELAX.timers[2]; -(RELAX.json ["timers"])["Null"] = RELAX.timers[3]; -(RELAX.json ["timers"])["Alternative"] = RELAX.timers[4]; -(RELAX.json ["timers"])["Partitioned Descriptive"] = RELAX.timers[5]; - -relax.json_spool (RELAX.json, relax.codon_data_info["json"]); - -return RELAX.json; - -//------------------------------------------------------------------------------ -// HELPER FUNCTIONS FROM THIS POINT ON -//------------------------------------------------------------------------------ - -function relax.branch.length (branch_info) { - return branch_info["MLE"]; -} - -function relax.branch.omega (branch_info) { - return parameters.NormalizeRatio ((branch_info[terms.nonsynonymous_rate])["MLE"], (branch_info[terms.synonymous_rate])["MLE"]); -} - -function relax.branch.local_k (branch_info) { - return (branch_info[term.RELAX.k])["MLE"]; -} - -function relax._aux.extract_branch_info.callback (key, value) { - relax._aux.extract_branch_info_result [key] = utility.CallFunction (callback, {"0" : "value"}); -} - -function relax._aux.extract_branch_info (branch_spec, callback) { - relax._aux.extract_branch_info_result = {}; - branch_spec ["relax._aux.extract_branch_info.callback"][""]; - return relax._aux.extract_branch_info_result; -} - - -//------------------------------------------------------------------------------ -function relax.getRateDistribution (model_description, K) { - relax.getRateInformation.rate_classes = Abs (model_description["omegas"]); - relax.getRateInformation.omega_info = {relax.getRateInformation.rate_classes,2}; - - for (relax.getRateInformation.k = 0; relax.getRateInformation.k < relax.getRateInformation.rate_classes; relax.getRateInformation.k += 1) { - relax.getRateInformation.omega_info[relax.getRateInformation.k][0] = Eval ((model_description["omegas"])[relax.getRateInformation.k])^K; - relax.getRateInformation.omega_info[relax.getRateInformation.k][1] = Eval ((model_description["weights"])[relax.getRateInformation.k]); - } - return relax.getRateInformation.omega_info % 0; -} - -//------------------------------------------------------------------------------ -function relax.getRateDistributionSRV (model_description) { - relax.getRateInformation.rate_classes = Abs (model_description["srv_rates"]); - relax.getRateInformation.omega_info = {relax.getRateInformation.rate_classes,2}; - - for (relax.getRateInformation.k = 0; relax.getRateInformation.k < relax.getRateInformation.rate_classes; relax.getRateInformation.k += 1) { - relax.getRateInformation.omega_info[relax.getRateInformation.k][0] = Eval ((model_description["srv_rates"])[relax.getRateInformation.k]); - relax.getRateInformation.omega_info[relax.getRateInformation.k][1] = Eval ((model_description["srv_weights"])[relax.getRateInformation.k]); - } - return relax.getRateInformation.omega_info % 0; -} - -//------------------------------------------------------------------------------ -function relax.define.null._aux (key, value) { - //fprintf (stdout, "`tree_id`.`key`.`relax.define.null.local` := `relax.define.null.global`\n"); - ExecuteCommands ("`tree_id`.`key`.`relax.define.null.local` := `relax.define.null.global`"); -} - -//------------------------------------------------------------------------------ -function relax.define.null (tree_id, general_model, partition) { - - parameters.RemoveConstraint ((general_model["omegas"])[2]); - - relax.define.null.local = ((general_model["parameters"])["local"])[term.RELAX.k]; - - relax.define.null.index = 0; - - utility.ForEachPair (partition, "_part_name_","_branch_list_", ' - relax.define.null.index += 1; - if (_part_name_ == RELAX.reference_set || _part_name_ == RELAX.unclassified) { - relax.define.null.global = "1"; - _branch_list_["relax.define.null._aux"][""]; - } else { - relax.define.null.global = RELAX.relaxation_parameter + "_" + relax.define.null.index; - (((RELAX.model_assignment[ RELAX.model_mapping[_part_name_]])["parameters"])["global"])["Relaxation parameter for *" + _part_name_ + "*"] = relax.define.null.global; - parameters.SetConstraint (relax.define.null.global, 1, terms.global); - _branch_list_["relax.define.null._aux"][""]; - } - '); - - return general_model; -} - - - -//------------------------------------------------------------------------------ -function relax.add_scores (desc, mles) { - if (Type (mles) == "Matrix") { - desc ["LogL"] = mles[1][0]; - desc ["parameters"] = mles[1][1] + 9 + 5 * (RELAX.settings["Estimate GTR"] != 1); - } -} - -//------------------------------------------------------------------------------ -function relax.runLRT (ha, h0, df) { - return {"LR" : 2*(ha-h0), - "p" : 1-CChi2 (2*(ha-h0),df)}; -} - - -//------------------------------------------------------------------------------ -function relax._aux.define_bsrel_model (id,Q,weights,freqs) { - rate_count = Abs (Q); - components = {}; - length = ""; - Eval ("`id`_eqf = freqs"); - - for (k = 0; k < rate_count; k+=1) { - components[k] = "Exp(" + Q[k] + ")*" + weights[k]; - ExecuteCommands ("Model busted._aux.define_bsrel_model_bl = (" + Q[k] + ",`id`_eqf,0)"); - GetString (blExp, busted._aux.define_bsrel_model_bl, -1); - if (k) { - length += "+"; - } - length += "(`blExp`)*" + weights[k]; - } - - ExecuteCommands ("Model `id` =(\"" + Join("+",components) + "\",`id`_eqf,EXPLICIT_FORM_MATRIX_EXPONENTIAL);"); - - return length; -} - - - -//------------------------------------------------------------------------------ - -function relax.io.evaluator (key, value) { - fprintf (stdout, key, "->", Eval (value), "\n"); -} - -//------------------------------------------------------------------------------ -function relax._aux.define_srv_category (rates, weights, cat_name) { - - rate_count = Abs (weights); - expected_value = "(" + weights[0] + ")*(" + rates[0] + ")"; - - cat_freqs = "`cat_name`.weights"; - cat_rates = "`cat_name`.rates"; - cat_norm = "`cat_name`.normalizer"; - norm_weights = {}; - ExecuteCommands ("`cat_freqs` = {rate_count,1}"); - ExecuteCommands ("`cat_rates` = {rate_count,1}"); - ExecuteCommands ("`cat_freqs`[0] := " + weights[0]); - - for (k = 1; k < rate_count; k+=1) { - expected_value += "+(" + weights[k] + ")*(" + rates[k] + ")"; - ExecuteCommands ("`cat_freqs`[k] := " + weights[k]); - } - - ExecuteCommands ("global `cat_norm` := `expected_value`"); - - for (k = 0; k < rate_count; k+=1) { - ExecuteCommands ("`cat_rates`[k] := (" + rates[k] + ")/`cat_norm`"); - norm_weights + ("(" + rates[k] + ")/`cat_norm`"); - } - //category c = ("+resp+", categFreqMatrix , MEAN, ,categRateMatrix, 0, 1e25);\n\n" - ExecuteCommands ("category `cat_name` = (rate_count, `cat_freqs`, MEAN, , `cat_rates`, 0,1e25)"); - - return norm_weights; -} - -//------------------------------------------------------------------------------ -function relax.io.define_a_bsrel_model (id, frequencies, mean_omega, do_local, srv_branch_site) { - - model_parameters = {"parameters" : {"global" : {}, "local" : {}}, "omegas" : {}, "weights" : {}, "f" : {}, "Q" : {}, "length" : ""}; - - model_parameters["omegas"] = parameters.GenerateSequentialNames ("`id`.omega", 3, "_"); - model_parameters["f"] = parameters.GenerateSequentialNames ("`id`.aux_freq", 2, "_"); - - parameters.DeclareGlobal (model_parameters["f"], None); - parameters.SetRange (model_parameters["f"], terms.range01); - - parameters.DeclareGlobal (model_parameters["omegas"], None); - - model_parameters["weights"] = parameters.helper.stick_breaking (model_parameters["f"], {{0.7,0.25,0.05}}); - - relax.init_omegas = {{0.05,0.25,4}}; - relax.init_omegas = relax.init_omegas * (1/ parameters.Mean (relax.init_omegas, model_parameters["weights"], Abs (model_parameters["omegas"]))); - - parameters.SetRange ((model_parameters["omegas"])[0], terms.range_almost_01); - parameters.SetValue ((model_parameters["omegas"])[0], relax.init_omegas[0]); - - parameters.SetRange ((model_parameters["omegas"])[1], terms.range_almost_01); - parameters.SetValue ((model_parameters["omegas"])[1], relax.init_omegas[1]); - - parameters.SetRange ((model_parameters["omegas"])[2], terms.range_gte1); - parameters.SetValue ((model_parameters["omegas"])[2], relax.init_omegas[2]); - - model_parameters["srv_rates"] = parameters.GenerateSequentialNames ("relax.srv_rate", 3, "_"); - model_parameters["srv_f"] = parameters.GenerateSequentialNames ("relax.aux_freq_srv", 2, "_"); - - parameters.DeclareGlobal (model_parameters["srv_f"], None); - parameters.SetRange (model_parameters["srv_f"], terms.range01); - - parameters.DeclareGlobal (model_parameters["srv_rates"], None); - model_parameters["srv_weights"] = parameters.helper.stick_breaking (model_parameters["srv_f"], {{0.7,0.25,0.05}}); - - relax.init_srv = {{0.05,1,4}}; - - parameters.SetRange ((model_parameters["srv_rates"])[0], terms.range01); - parameters.SetValue ((model_parameters["srv_rates"])[0], relax.init_omegas[0]); - - parameters.SetConstraint ((model_parameters["srv_rates"])[1], 1,""); - - parameters.SetRange ((model_parameters["srv_rates"])[2], terms.range_gte1); - parameters.SetValue ((model_parameters["srv_rates"])[2], relax.init_omegas[2]); - - relax._srv_cat_name = "relax.srv.cat"; - relax.scaled_weights = relax._aux.define_srv_category (model_parameters["srv_rates"], model_parameters["srv_weights"], relax._srv_cat_name); - - - - if (do_local) { - parameters.SetConstraint ((model_parameters["omegas"])[2], " 1/" + ((model_parameters["omegas"])[0]) + "/" + ((model_parameters["omegas"])[1]) , ""); - relax.io.define_a_bsrel_model_r = {"LB" : 1e-4, "UB" : 1}; - parameters.SetRange ((model_parameters["omegas"])[1], relax.io.define_a_bsrel_model_r); - } - - - local_k :< 50; - - relax.nuc = {4,3}; - for (k = 0; k < 4; k+=1) { - for (k2 = 0; k2 < 3; k2 += 1) { - relax.nuc [k][k2] = ((frequencies["bases"])[models.DNA.alphabet[k]])[k2]; - } - } - - model_parameters["id"] = "`id`_model"; - - if (srv_branch_site) { - composite_weights = {}; - rc_counter = 1; - for (k = 1; k <= 3; k +=1) { - ((model_parameters["parameters"])["global"])[relax.define_omega_term (k)] = (model_parameters["omegas"])[k-1]; - ((model_parameters["parameters"])["global"])[relax.define_srv_term (k)] = (model_parameters["srv_rates"])[k-1]; - if (k < 3) { - ((model_parameters["parameters"])["global"])[relax.define_weight_term (k)] = (model_parameters["f"])[k-1]; - ((model_parameters["parameters"])["global"])[relax.define_srv_weight_term (k)] = (model_parameters["srv_f"])[k-1]; - } - - for (srv = 1; srv <= 3; srv += 1) { - model_parameters["Q"] + ("Q_`id`_" + rc_counter); - if (do_local) { - PopulateModelMatrix ((model_parameters["Q"])[rc_counter-1], relax.nuc, "t*" + relax.scaled_weights [srv-1] , "Min (1000," + (model_parameters["omegas"])[k-1] +"^local_k)", ""); - } else { - PopulateModelMatrix ((model_parameters["Q"])[rc_counter-1], relax.nuc, "t*" + relax.scaled_weights [srv-1] , (model_parameters["omegas"])[k-1], ""); - } - rc_counter += 1; - composite_weights + ("(" + (model_parameters["weights"])[k-1] + ")*(" + (model_parameters["srv_weights"])[srv-1] + ")"); - } - } - model_parameters["length-expression"] = relax._aux.define_bsrel_model ("`id`_model", model_parameters["Q"], composite_weights, frequencies["codons"]); - } else { - for (k = 1; k <= 3; k +=1) { - ((model_parameters["parameters"])["global"])[relax.define_omega_term (k)] = (model_parameters["omegas"])[k-1]; - ((model_parameters["parameters"])["global"])[relax.define_srv_term (k)] = (model_parameters["srv_rates"])[k-1]; - if (k < 3) { - ((model_parameters["parameters"])["global"])[relax.define_weight_term (k)] = (model_parameters["f"])[k-1]; - ((model_parameters["parameters"])["global"])[relax.define_srv_weight_term (k)] = (model_parameters["srv_f"])[k-1]; - } - - model_parameters["Q"] + ("Q_`id`_" + k); - if (do_local) { - PopulateModelMatrix ((model_parameters["Q"])[k-1], relax.nuc, "t*`relax._srv_cat_name`", "Min (1000," + (model_parameters["omegas"])[k-1] +"^local_k)", ""); - } else { - PopulateModelMatrix ((model_parameters["Q"])[k-1], relax.nuc, "t*`relax._srv_cat_name`", (model_parameters["omegas"])[k-1], ""); - } - } - model_parameters["length-expression"] = relax._aux.define_bsrel_model ("`id`_model", model_parameters["Q"], model_parameters["weights"], frequencies["codons"]); - } - - - ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("A","C")] = "AC"; - ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("A","T")] = "AT"; - ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("C","G")] = "CG"; - ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("C","T")] = "CT"; - ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("G","T")] = "GT"; - - model_parameters["set-branch-length"] = "relax.aux.copy_branch_length"; - - model_parameters["length parameter"] = "t"; - ((model_parameters["parameters"])[terms.local])[terms.timeParameter ()] = "t"; - if (do_local) { - ((model_parameters["parameters"])[terms.local])[term.RELAX.k] = "local_k"; - } - model_parameters["get-branch-length"] = "relax.aux.retrieve_branch_length"; - - - return model_parameters; -} - -//------------------------------------------------------------------------------ - - -function relax.aux.retrieve_branch_length (model, tree, node) { - relax.aux.retrieve_branch_length.locals = Columns ((model_parameters["parameters"])[terms.local]); - for (relax.aux.retrieve_branch_length.i = 0; relax.aux.retrieve_branch_length.i < Columns (relax.aux.retrieve_branch_length.locals); relax.aux.retrieve_branch_length.i += 1) { - Eval (relax.aux.retrieve_branch_length.locals[relax.aux.retrieve_branch_length.i] + " = `tree`.`node`." + relax.aux.retrieve_branch_length.locals[relax.aux.retrieve_branch_length.i]); - } - return Eval (model["length-expression"]); -} - -//------------------------------------------------------------------------------ - -function relax.aux.copy_branch_length (model, value, parameter) { - - relax.aux.copy_branch_length.t = ((model["parameters"])["local"])[terms.timeParameter ()]; - relax.aux.copy_branch_length.k = ((model["parameters"])["local"])[term.RELAX.k]; - - if (Abs (RELAX.proportional_constraint)) { - Eval ("`parameter`.`relax.aux.copy_branch_length.t` := `RELAX.proportional_constraint` * " + value); - } else { - Eval ("`parameter`.`relax.aux.copy_branch_length.t` = " + value); - } - - if (Type (relax.aux.copy_branch_length.k) == "String") { - Eval ("`parameter`.`relax.aux.copy_branch_length.k` = 1"); - } -} - -function relax._aux.io.countBranchSets (key, value) { - available_models[value] += 1; - return None; -} - -function relax._aux.io.mapBranchSets (key, value) { - /*if (Abs (value) == 0) { - value = RELAX.unclassified; - }*/ - (relax.tree ["model_map"])[key] = branch_set[value]; - (return_set[branch_set[value]])[key] = 1; - return None; -} - -function relax.handle.unlabeled (label) { - if (label == "Unlabeled branches") { - return ""; - } else { - return label; - } -} - - - -//------------------------------------------------------------------------------------------------------------------------ - -function relax.taskTimerStart (index) { - RELAX.timers[index] = Time(1); -} - -function relax.taskTimerStop (index) { - RELAX.timers[index] = Time(1) - RELAX.timers[index]; -} - -//------------------------------------------------------------------------------------------------------------------------ - -function relax.getIC (logl,params,samples) { - return -2*logl + 2*samples/(samples-params-1)*params; -} - -//------------------------------------------------------------------------------------------------------------------------ - -lfunction relax.define_omega_term (cat) { - return "Omega for category " + cat; -} - -lfunction relax.define_weight_term (cat) { - return "Omega frequency parameter " + cat; -} - -lfunction relax.define_srv_term (cat) { - return "Synonymous rate variation for category " + cat; -} - -lfunction relax.define_srv_weight_term (cat) { - return "Synonymous frequency parameter " + cat; -} - -//------------------------------------------------------------------------------------------------------------------------ - -function relax.json_spool (json, file) { - USE_JSON_FOR_MATRIX = 1; - fprintf (file, CLEAR_FILE, json); - USE_JSON_FOR_MATRIX = 0; - -} - -//------------------------------------------------------------------------------------------------------------------------ - -function relax.json_store_lf (json, name, ll, df, time, branch_length, branch_annotation, omega_distribution, K, annotation_tag) { - - (json["fits"])[name] = {"log-likelihood" : ll, - "parameters" : df, - "AIC-c" : relax.getIC (ll, df, relax.sample_size), - "runtime" : time, - "branch-lengths" : branch_length, - "branch-annotations" : branch_annotation, - "rate-distributions" : omega_distribution, - "K" : K, - "annotation-tag" : annotation_tag, - "display-order" : Abs (json["fits"])}; - - } diff --git a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf index 64b64c652..03b97ceab 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf @@ -1,5 +1,4 @@ - -RequireVersion("2.3.3"); +RequireVersion ("2.5.21"); LoadFunctionLibrary("libv3/all-terms.bf"); // must be loaded before CF3x4 @@ -25,14 +24,14 @@ LoadFunctionLibrary("modules/selection_lib.ibf"); LoadFunctionLibrary("libv3/models/codon/BS_REL.bf"); LoadFunctionLibrary("libv3/convenience/math.bf"); LoadFunctionLibrary("libv3/tasks/mpi.bf"); +LoadFunctionLibrary("libv3/models/rate_variation.bf"); utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE); utility.SetEnvVariable ("ASSUME_REVERSIBLE_MODELS", TRUE); utility.SetEnvVariable ("USE_MEMORY_SAVING_DATA_STRUCTURES", 1e8); -utility.SetEnvVariable ("LF_SMOOTHING_SCALER",0.05); - +u /*------------------------------------------------------------------------------*/ @@ -41,7 +40,8 @@ relax.analysis_description = { Version 2.1 adds a check for stability in K estimates to try to mitigate convergence problems. Version 3 provides support for >2 branch sets. Version 3.1 adds LHC + Nedler-Mead initial fit phase and keyword support. - Version 3.1.1 adds some bug fixes for better convergence.", + Version 3.1.1 adds some bug fixes for better convergence. + Version 4.0 adds support for synonymous rate variation.", terms.io.version : "3.1.1", terms.io.reference : "RELAX: Detecting Relaxed Selection in a Phylogenetic Framework (2015). Mol Biol Evol 32 (3): 820-832", terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group", @@ -58,10 +58,16 @@ relax.json = { terms.json.analysis: relax.analysis_description, relax.relaxation_parameter = "relax.K"; relax.rate_classes = 3; +relax.synonymous_rate_classes = 3; +relax.SRV = "Synonymous site-to-site rates"; +relax.json.srv_posteriors = "Synonymous site-posteriors"; +relax.json.srv_viterbi = "Viterbi synonymous rate path"; + relax.initial_ranges = {}; relax.initial_grid.N = 500; + relax.MG94_name = terms.json.mg94xrev_sep_rates; relax.general_descriptive_name = "General descriptive"; relax.alternative_name = "RELAX alternative"; @@ -135,6 +141,7 @@ KeywordArgument ("reference", "Branches to use as the reference set", null, "Ch + io.DisplayAnalysisBanner ( relax.analysis_description ); selection.io.startTimer (relax.json [terms.json.timers], "Overall", 0); @@ -148,6 +155,11 @@ namespace relax { KeywordArgument ("output", "Write the resulting JSON to this file (default is to save to the same path as the alignment file + 'RELAX.json')", relax.codon_data_info [terms.json.json]); relax.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to"); +KeywordArgument ("grid-size", "The number of points in the initial distributional guess for likelihood fitting", 250); +relax.initial_grid.N = io.PromptUser ("The number of points in the initial distributional guess for likelihood fitting", 250, 1, 10000, TRUE); + +KeywordArgument ("starting-points", "The number of initial random guesses to seed rate values optimization", 1); +relax.N.initial_guesses = io.PromptUser ("The number of initial random guesses to 'seed' rate values optimization", 1, 1, relax.initial_grid.N$10, TRUE); io.ReportProgressMessageMD('RELAX', 'selector', 'Branch sets for RELAX analysis'); @@ -211,6 +223,43 @@ relax.model_set = io.SelectAnOption ({ {"All", "[Default] Fit descriptive models AND run the relax test (4 models)"} {"Minimal", "Run only the RELAX test (2 models)"} }, "RELAX analysis type"); + +KeywordArgument ("srv", "Include synonymous rate variation", "No"); + +relax.do_srv = io.SelectAnOption ( + { + "Yes" : "Allow synonymous substitution rates to vary from site to site (but not from branch to branch)", + "Branch-site" : "Allow synonymous substitution rates to vary using general branch site models", + "HMM" : "Allow synonymous substitution rates to vary from site to site (but not from branch to branch), and use a hidden markov model for spatial correlation on synonymous rates", + "No" : "Synonymous substitution rates are constant across sites. This is the 'classic' behavior, i.e. the original published test" + }, + "Synonymous rate variation" +); + +if (relax.do_srv == "Branch-site") { + relax.do_bs_srv = TRUE; + relax.do_srv = TRUE; + (relax.json[terms.json.analysis])[terms.settings] = "Branch-site synonymous rate variation"; +} else { + if (relax.do_srv == "Yes") { + relax.do_bs_srv = FALSE; + relax.do_srv = TRUE; + relax.do_srv_hmm = FALSE; + } else { + if (relax.do_srv == "HMM") { + relax.do_srv = TRUE; + relax.do_bs_srv = FALSE; + relax.do_srv_hmm = TRUE; + } else { + relax.do_srv = FALSE; + } + } +} + +if (relax.do_srv) { + KeywordArgument ("syn-rates", "The number alpha rate classes to include in the model [1-10, default 3]", relax.synonymous_rate_classes); + relax.synonymous_rate_classes = io.PromptUser ("The number omega rate classes to include in the model", relax.synonymous_rate_classes, 1, 10, TRUE); +} selection.io.startTimer (relax.json [terms.json.timers], "Preliminary model fitting", 1); @@ -245,8 +294,6 @@ utility.ForEach (relax.global_dnds, "_value_", ' '); - - //Store MG94 to JSON selection.io.json_store_lf_withEFV (relax.json, relax.MG94_name, @@ -264,10 +311,41 @@ utility.ForEachPair (relax.filter_specification, "_key_", "_value_", +relax.model_generator = "models.codon.BS_REL.ModelDescription"; + +if (relax.do_srv) { + if (relax.do_bs_srv) { + relax.model_generator = "relax.model.with.GDD"; + relax.model_generator = "models.codon.BS_REL_SRV.ModelDescription"; + relax.rate_class_arguments = {{relax.synonymous_rate_classes__,relax.rate_classes__}}; + } else { + + lfunction relax.model.with.GDD (type, code, rates) { + def = models.codon.BS_REL.ModelDescription (type, code, rates); + options = {utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("relax.synonymous_rate_classes"), + utility.getGlobalValue("terms._namespace") : "relax._shared_srv"}; + + if (utility.getGlobalValue("relax.do_srv_hmm")) { + options [utility.getGlobalValue ("terms.rate_variation.HMM")] = "equal"; + } + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory (options); + return def; + } + + relax.model_generator = "relax.model.with.GDD"; + relax.rate_class_arguments = relax.rate_classes; + } +} else { + relax.rate_class_arguments = relax.rate_classes; +} selection.io.stopTimer (relax.json [terms.json.timers], "Preliminary model fitting"); parameters.DeclareGlobalWithRanges (relax.relaxation_parameter, 1, 0, 50); +PARAMETER_GROUPING = {}; +PARAMETER_GROUPING + relax.distribution["rates"]; +PARAMETER_GROUPING + relax.distribution["weights"]; + if (relax.model_set == "All") { // run all the models relax.ge_guess = None; @@ -307,9 +385,6 @@ if (relax.model_set == "All") { // run all the models io.ReportProgressMessageMD ("RELAX", "gd", "Fitting the general descriptive (separate k per branch) model"); selection.io.startTimer (relax.json [terms.json.timers], "General descriptive model fitting", 2); - PARAMETER_GROUPING = {}; - PARAMETER_GROUPING + relax.distribution["rates"]; - PARAMETER_GROUPING + relax.distribution["weights"]; @@ -329,6 +404,7 @@ if (relax.model_set == "All") { // run all the models parameters.DeclareGlobalWithRanges ("relax.bl.scaler", 1, 0, 1000); + relax.grid_search.results = estimators.FitLF (relax.filter_names, relax.trees,{ "0" : {"DEFAULT" : "relax.ge"}}, relax.final_partitioned_mg_results, @@ -346,7 +422,8 @@ if (relax.model_set == "All") { // run all the models "OPTIMIZATION_PRECISION" : relax.nm.precision } , - terms.search_grid : relax.initial_grid + terms.search_grid : relax.initial_grid, + terms.search_restarts : relax.N.initial_guesses } ); @@ -406,7 +483,6 @@ if (relax.model_set == "All") { // run all the models relax.ge_guess[relax.i][0] = relax.inferred_ge_distribution[relax.i + relax.shift][0]; relax.ge_guess[relax.i][1] = relax.inferred_ge_distribution[relax.i + relax.shift][1]; } - //console.log (relax.ge_guess); continue; } } @@ -476,6 +552,7 @@ if (relax.numbers_of_tested_groups == 2) { relax.model_to_relax_parameter ["relax.test"] = relax.relaxation_parameter; relax.model_namespaces = {{"relax.reference","relax.test"}}; relax.model_to_group_name = {"relax.reference" : relax.reference_branches_name, "relax.test" : relax.test_branches_name}; + relax.model_for_srv = "relax.test"; } else { @@ -489,24 +566,24 @@ if (relax.numbers_of_tested_groups == 2) { relax.model_namespaces[_value_] = relax.k; "); relax.reference_model_namespace = relax.model_namespaces[0]; - //console.log (relax.model_namespaces); - //console.log (relax.reference_model_namespace); + relax.model_for_srv = relax.model_namespaces[0]; } + + //console.log (relax.model_namespaces); //explicit loop to avoid re-entrance errors relax.relax_parameter_terms = {}; - -relax.bound_weights = {}; +relax.bound_weights = {}; for (relax.k = 0; relax.k < relax.numbers_of_tested_groups; relax.k += 1) { relax.model_nmsp = relax.model_namespaces[relax.k ]; - relax.model_object_map[relax.model_nmsp] = model.generic.DefineMixtureModel("models.codon.BS_REL.ModelDescription", + relax.model_object_map[relax.model_nmsp] = model.generic.DefineMixtureModel(relax.model_generator, relax.model_nmsp, { "0": parameters.Quote(terms.global), "1": relax.codon_data_info[terms.code], - "2": parameters.Quote (relax.rate_classes) // the number of rate classes + "2": parameters.Quote (relax.rate_class_arguments) // the number of rate classes }, relax.filter_names, None); @@ -525,13 +602,35 @@ for (relax.k = 0; relax.k < relax.numbers_of_tested_groups; relax.k += 1) { } else { model.generic.AddGlobal (relax.model_object_map[relax.model_nmsp], relax.model_to_relax_parameter [relax.model_nmsp], terms.relax.k); } - relax.bound_weights * models.BindGlobalParameters ({"0" : relax.model_object_map[relax.reference_model_namespace], "1" : relax.model_object_map[relax.model_nmsp]}, terms.mixture.mixture_aux_weight + ".+"); + relax.bound_weights * models.BindGlobalParameters ({"0" : relax.model_object_map[relax.reference_model_namespace], "1" : relax.model_object_map[relax.model_nmsp]}, terms.mixture.mixture_aux_weight + " for category"); models.BindGlobalParameters ({"0" : relax.model_object_map[relax.reference_model_namespace], "1" : relax.model_object_map[relax.model_nmsp]}, terms.nucleotideRate("[ACGT]","[ACGT]")); for (relax.i = 1; relax.i <= relax.rate_classes; relax.i += 1) { parameters.SetConstraint (model.generic.GetGlobalParameter (relax.model_object_map[relax.model_nmsp] , terms.AddCategory (terms.parameters.omega_ratio,relax.i)), model.generic.GetGlobalParameter (relax.model_object_map[relax.reference_model_namespace] , terms.AddCategory (terms.parameters.omega_ratio,relax.i)) + "^" + relax.model_to_relax_parameter [relax.model_nmsp], terms.global); } + if (relax.do_bs_srv) { + for (description,var_id; in; ((relax.model_object_map[relax.reference_model_namespace])[terms.parameters])[terms.global]) { + + if (None != regexp.Find (description, terms.parameters.synonymous_rate + " for category")) { + var_id_2 = utility.GetByKey (((relax.model_object_map[relax.model_nmsp])[terms.parameters])[terms.global], description, "String"); + if (None != var_id_2) { + parameters.SetConstraint (var_id, var_id_2, terms.global); + } + } + + if (None != regexp.Find (description, terms.mixture.mixture_aux_weight + " for category SRV ")) { + var_id_2 = utility.GetByKey (((relax.model_object_map[relax.model_nmsp])[terms.parameters])[terms.global], description, "String"); + if (None != var_id_2) { + if (parameters.IsIndependent (var_id_2)) { + parameters.SetConstraint (var_id, var_id_2, terms.global); + } + } + } + } + + + } } } @@ -541,6 +640,43 @@ relax.model_map = utility.Map (relax.model_to_group_name, "_groupid_", 'utility. relax.do_lhc = FALSE; +if (relax.do_srv) { + + if (relax.do_bs_srv) { + relax.srv_rate_regex = "Mean scaler variable for"; + relax.srv_weight_regex = terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), "SRV [0-9]+"); + + relax.srv_rate_reporting = regexp.PartitionByRegularExpressions(utility.Keys (((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global]), + {"0" : "^" + utility.getGlobalValue('terms.parameters.synonymous_rate'), + "1" : relax.srv_weight_regex}); + + + relax.srv_rate_reporting = { + 'rates' : utility.UniqueValues (utility.Map ( relax.srv_rate_reporting ["^" + utility.getGlobalValue('terms.parameters.synonymous_rate') ] , "_value_", '(((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global])[_value_]')), + 'weights' : utility.UniqueValues (utility.Map (relax.srv_rate_reporting [relax.srv_weight_regex ] , "_value_", '(((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global])[_value_]')) + }; + + + + } else { + relax.srv_rate_regex = "GDD rate category [0-9]+"; + relax.srv_weight_regex = "Mixture auxiliary weight for GDD category [0-9]+"; + } + relax.srv_distribution = regexp.PartitionByRegularExpressions(utility.Keys (((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global]), {"0" : relax.srv_rate_regex, "1" : relax.srv_weight_regex}); + + + relax.srv_distribution = { + 'rates' : utility.UniqueValues (utility.Map (relax.srv_distribution [relax.srv_rate_regex ] , "_value_", '(((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global])[_value_]')), + 'weights' : utility.UniqueValues (utility.Map (relax.srv_distribution [relax.srv_weight_regex ] , "_value_", '(((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global])[_value_]')) + }; + + + PARAMETER_GROUPING + relax.srv_distribution["rates"]; + PARAMETER_GROUPING + relax.srv_distribution["weights"]; + + relax.init_grid_setup (relax.srv_distribution); + +} if (relax.model_set != "All") { /* @@ -555,11 +691,11 @@ if (relax.model_set != "All") { } if (relax.has_unclassified) { - relax.unclassified.bsrel_model = model.generic.DefineMixtureModel("models.codon.BS_REL.ModelDescription", + relax.unclassified.bsrel_model = model.generic.DefineMixtureModel(relax.model_generator, "relax.unclassified", { "0": parameters.Quote(terms.global), "1": relax.codon_data_info[terms.code], - "2": parameters.Quote (relax.rate_classes) // the number of rate classes + "2": parameters.Quote (relax.rate_class_arguments) // the number of rate classes }, relax.filter_names, None); @@ -580,51 +716,115 @@ if (relax.has_unclassified) { models.BindGlobalParameters ({"0" : relax.model_object_map[relax.reference_model_namespace], "1" : relax.unclassified.bsrel_model}, terms.nucleotideRate("[ACGT]","[ACGT]")); } + + //------------------------------------ function relax.report_multi_class_rates (model_fit, distributions) { - io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **" + relax.model_to_group_name[relax.reference_model_namespace] + "** (reference) branches"); - relax.inferred_distribution_ref = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.model_object_map[relax.reference_model_namespace])) % 0; - selection.io.report_dnds (relax.inferred_distribution_ref); + io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **" + relax.model_to_group_name[relax.reference_model_namespace] + "** (reference) branches"); + relax.inferred_distribution_ref = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.model_object_map[relax.reference_model_namespace])) % 0; + selection.io.report_dnds (relax.inferred_distribution_ref); - relax.distribution_for_json = {}; - relax.distribution_for_json [relax.model_to_group_name[relax.reference_model_namespace]] = utility.Map (utility.Range (relax.rate_classes, 0, 1), - "_index_", - "{terms.json.omega_ratio : relax.inferred_distribution_ref [_index_][0], - terms.json.proportion : relax.inferred_distribution_ref [_index_][1]}"); + relax.distribution_for_json = {}; + relax.distribution_for_json [relax.model_to_group_name[relax.reference_model_namespace]] = utility.Map (utility.Range (relax.rate_classes, 0, 1), + "_index_", + "{terms.json.omega_ratio : relax.inferred_distribution_ref [_index_][0], + terms.json.proportion : relax.inferred_distribution_ref [_index_][1]}"); - if (None != model_fit || distributions) { - - if (None != model_fit) { - relax.fitted.K = {}; - relax.fitted.K [relax.model_to_group_name[relax.reference_model_namespace]] = 1; - } - + if (None != model_fit || distributions) { - for (relax.k = 1; relax.k < relax.numbers_of_tested_groups; relax.k += 1) { - relax.model_nmsp = relax.model_namespaces[relax.k ]; - relax.branch_set = relax.model_to_group_name[relax.model_nmsp]; - if (None != model_fit) { - if (relax.k > 1) { - relax.fitted.K_group = estimators.GetGlobalMLE (model_fit,relax.relax_parameter_terms[relax.k]); - } else { - relax.fitted.K_group = estimators.GetGlobalMLE (model_fit,terms.relax.k); - } - relax.fitted.K [relax.model_to_group_name[relax.model_nmsp]] = relax.fitted.K_group ; - - io.ReportProgressMessageMD("RELAX", "alt", "* Relaxation/intensification parameter (K) for branch set `relax.branch_set` = " + Format(relax.fitted.K_group ,8,2)); - } - io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **`relax.branch_set`** branches"); - relax.inferred_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.model_object_map[relax.model_nmsp])) % 0; - selection.io.report_dnds (relax.inferred_distribution); - relax.distribution_for_json [relax.model_to_group_name[relax.model_nmsp]] = utility.Map (utility.Range (relax.rate_classes, 0, 1), - "_index_", - "{terms.json.omega_ratio : relax.inferred_distribution [_index_][0], - terms.json.proportion : relax.inferred_distribution [_index_][1]}"); - } - } + if (None != model_fit) { + relax.fitted.K = {}; + relax.fitted.K [relax.model_to_group_name[relax.reference_model_namespace]] = 1; + } + + + for (relax.k = 1; relax.k < relax.numbers_of_tested_groups; relax.k += 1) { + relax.model_nmsp = relax.model_namespaces[relax.k ]; + relax.branch_set = relax.model_to_group_name[relax.model_nmsp]; + if (None != model_fit) { + if (relax.k > 1) { + relax.fitted.K_group = estimators.GetGlobalMLE (model_fit,relax.relax_parameter_terms[relax.k]); + } else { + relax.fitted.K_group = estimators.GetGlobalMLE (model_fit,terms.relax.k); + } + relax.fitted.K [relax.model_to_group_name[relax.model_nmsp]] = relax.fitted.K_group ; + + io.ReportProgressMessageMD("RELAX", "alt", "* Relaxation/intensification parameter (K) for branch set `relax.branch_set` = " + Format(relax.fitted.K_group ,8,2)); + } + io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **`relax.branch_set`** branches"); + relax.inferred_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.model_object_map[relax.model_nmsp])) % 0; + selection.io.report_dnds (relax.inferred_distribution); + relax.distribution_for_json [relax.model_to_group_name[relax.model_nmsp]] = utility.Map (utility.Range (relax.rate_classes, 0, 1), + "_index_", + "{terms.json.omega_ratio : relax.inferred_distribution [_index_][0], + terms.json.proportion : relax.inferred_distribution [_index_][1]}"); + } + + + + } +} + +//------------------------------------ + +function relax._report_srv (relax_model_fit, is_null) { + + if (relax.do_srv_hmm) { + relax.hmm_lambda = selection.io.extract_global_MLE (relax_model_fit, terms.rate_variation.hmm_lambda); + if (is_null) { + io.ReportProgressMessageMD("relax", "main", "* HMM switching rate = " + Format (relax.hmm_lambda, 8, 3)); + relax.distribution_for_json [terms.rate_variation.hmm_lambda] = relax.hmm_lambda; + } else { + relax.hmm_lambda.CI = parameters.GetProfileCI(((relax_model_fit[terms.global])[terms.rate_variation.hmm_lambda])[terms.id], + relax_model_fit[terms.likelihood_function], 0.95); + + io.ReportProgressMessageMD ("relax", "main", "* HMM switching rate = " + Format (relax.hmm_lambda,8,4) + + " (95% profile CI " + Format ((relax.hmm_lambda.CI )[terms.lower_bound],8,4) + "-" + Format ((relax.hmm_lambda.CI )[terms.upper_bound],8,4) + ")"); + + relax.distribution_for_json [terms.rate_variation.hmm_lambda] = relax.hmm_lambda.CI; + + } + + } + + if (relax.do_srv) { + if (relax.do_bs_srv) { + relax.srv_info = parameters.GetStickBreakingDistribution ( relax.srv_rate_reporting) % 0; + } else { + relax.srv_info = Transpose((rate_variation.extract_category_information((relax.model_object_map[relax.model_for_srv])))["VALUEINDEXORDER"][0])%0; + } + io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution for site-to-site **synonymous** rate variation was inferred"); + selection.io.report_distribution (relax.srv_info); + + relax.distribution_for_json [relax.SRV] = (utility.Map (utility.Range (relax.synonymous_rate_classes, 0, 1), + "_index_", + "{terms.json.rate :relax.srv_info [_index_][0], + terms.json.proportion : relax.srv_info [_index_][1]}")); + + if (is_null == FALSE) { + ConstructCategoryMatrix (relax.cmx, ^(relax_model_fit[terms.likelihood_function])); + ConstructCategoryMatrix (relax.cmx_weights, ^(relax_model_fit[terms.likelihood_function]), WEIGHTS); + relax.cmx_weighted = (relax.cmx_weights[-1][0]) $ relax.cmx; // taking the 1st column fixes a bug with multiple partitions + relax.column_weights = {1, Rows (relax.cmx_weights)}["1"] * relax.cmx_weighted; + relax.column_weights = relax.column_weights["1/_MATRIX_ELEMENT_VALUE_"]; + (relax.json [relax.json.srv_posteriors]) = relax.cmx_weighted $ relax.column_weights; + } + + if (relax.do_srv_hmm && is_null == FALSE ) { + ConstructCategoryMatrix (relax.cmx_viterbi, ^(relax_model_fit[terms.likelihood_function]), SHORT); + (relax.json [relax.json.srv_viterbi]) = relax.cmx_viterbi; + io.ReportProgressMessageMD("relax", "main", "* The following switch points for synonymous rates were inferred"); + selection.io.report_viterbi_path (relax.cmx_viterbi); + + } + } + + if (relax.do_srv_hmm) { + relax.distribution_for_json [terms.rate_variation.hmm_lambda] = relax.hmm_lambda; + } } //------------------------------------ @@ -636,6 +836,7 @@ function relax.FitMainTestPair () { relax.nm.precision = -0.00025*relax.final_partitioned_mg_results[terms.fit.log_likelihood]; parameters.DeclareGlobalWithRanges ("relax.bl.scaler", 1, 0, 1000); + relax.general_descriptive.fit = estimators.FitLF (relax.filter_names, relax.trees,{ "0" : relax.model_map}, relax.general_descriptive.fit, relax.model_object_map, @@ -651,16 +852,15 @@ function relax.FitMainTestPair () { "OPTIMIZATION_PRECISION" : relax.nm.precision } , - terms.search_grid : relax.initial_grid + terms.search_grid : relax.initial_grid, + terms.search_restarts : relax.N.initial_guesses } ); } - - + relax.alternative_model.fit = estimators.FitLF (relax.filter_names, relax.trees, { "0" : relax.model_map}, relax.general_descriptive.fit, relax.model_object_map, {terms.run_options.retain_lf_object: TRUE}); - //io.SpoolLF(relax.alternative_model.fit["LF"], "/tmp/relax", "alt"); io.ReportProgressMessageMD("RELAX", "alt", "* " + selection.io.report_fit (relax.alternative_model.fit, 9, relax.codon_data_info[terms.data.sample_size])); @@ -753,7 +953,11 @@ function relax.FitMainTestPair () { relax.report_multi_class_rates (relax.alternative_model.fit, TRUE); } - + relax._report_srv (relax.alternative_model.fit, FALSE); + + KeywordArgument ("save-fit", "Save RELAX alternative model fit to this file (default is not to save)", "/dev/null"); + io.SpoolLFToPath(relax.alternative_model.fit[terms.likelihood_function], io.PromptUserForFilePath ("Save RELAX model fit to this file ['/dev/null' to skip]")); + selection.io.json_store_lf (relax.json, relax.alternative_name, relax.alternative_model.fit[terms.fit.log_likelihood], @@ -774,6 +978,8 @@ function relax.FitMainTestPair () { selection.io.startTimer (relax.json [terms.json.timers], "RELAX null model fitting", 4); io.ReportProgressMessageMD ("RELAX", "null", "Fitting the null (K := 1) model"); + + for (relax.k = 1; relax.k < relax.numbers_of_tested_groups; relax.k += 1) { relax.model_nmsp = relax.model_namespaces[relax.k ]; @@ -785,7 +991,6 @@ function relax.FitMainTestPair () { } - relax.null_model.fit = estimators.FitExistingLF (relax.alternative_model.fit[terms.likelihood_function], relax.model_object_map); io.ReportProgressMessageMD ("RELAX", "null", "* " + selection.io.report_fit (relax.null_model.fit, 9, relax.codon_data_info[terms.data.sample_size])); relax.LRT = math.DoLRT (relax.null_model.fit[terms.fit.log_likelihood], relax.alternative_model.fit[terms.fit.log_likelihood], relax.numbers_of_tested_groups-1); @@ -804,6 +1009,8 @@ function relax.FitMainTestPair () { } else { relax.report_multi_class_rates (None, FALSE); } + + relax._report_srv (relax.null_model.fit, TRUE); selection.io.json_store_lf (relax.json, relax.null_name , diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index f88b00ad7..9e0f34aed 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -389,7 +389,7 @@ function doGTR (prefix) { /** - * @name doPartitionMG + * @name doPartitionedMG * Can only be used after including shared-load-file * @return */ diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index 3f7467db3..41c5ab733 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -375,7 +375,7 @@ namespace terms{ constrain_branch_length = "constrain-branch-length"; branch_length_string = "branch-length-string"; branch_length_string_conditional = "branch-length-string-conditional"; - branch_length_string_expr = = "branch-length-string-expression"; + branch_length_string_expr = "branch-length-string-expression"; branch_length_scaler = "branch length scaler"; post_definition = "post-definition"; length = "length"; diff --git a/res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf b/res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf index dbacc6a1c..55b5920b0 100644 --- a/res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf +++ b/res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf @@ -164,6 +164,11 @@ lfunction models.codon.BS_REL.ExtractMixtureDistribution (bs_rel) { lfunction models.codon.BS_REL.ExtractMixtureDistributionFromFit (bs_rel, fit) { count = bs_rel [utility.getGlobalValue ("terms.model.components")]; + + if (Type (count) == "Matrix") { + count = count[1]; + } + rates = {count, 1}; weights = {count-1, 1}; @@ -177,6 +182,34 @@ lfunction models.codon.BS_REL.ExtractMixtureDistributionFromFit (bs_rel, fit) { return {utility.getGlobalValue ("terms.parameters.rates") : rates, utility.getGlobalValue ("terms.parameters.weights") : weights }; } +/** + * @name models.codon.BS_REL.ExtractSynMixtureDistributionFromFit + * @param {Dict} bs_rel + * @returns {Dict} mixture distribution parameters + */ + +lfunction models.codon.BS_REL.ExtractSynMixtureDistributionFromFit (bs_rel, fit) { + count = bs_rel [utility.getGlobalValue ("terms.model.components")]; + + if (Type (count) == "Matrix") { + count = count[0]; + } else { + assert (0, "models.codon.BS_REL.ExtractSynMixtureDistributionFromFit called for a model without a bivariate component" ); + } + + rates = {count, 1}; + weights = {count-1, 1}; + + for (i = 1; i <= count; i+=1) { + rates [i-1] = estimators.GetGlobalMLE (fit, terms.AddCategory (utility.getGlobalValue ("terms.parameters.synonymous_rate"), i)); + if (i < count ) { + weights [i-1] = estimators.GetGlobalMLE (fit, terms.AddCategory (utility.getGlobalValue ("terms.mixture.mixture_aux_weight"), "SRV " + i)); + } + } + + return {utility.getGlobalValue ("terms.parameters.rates") : rates, utility.getGlobalValue ("terms.parameters.weights") : weights }; +} + /** * @name models.codon.BS_REL.BS_REL._DefineQ * @param {Dict} mg_rev @@ -359,6 +392,7 @@ function models.codon.BS_REL.post_definition(model) { */ function models.codon.BS_REL.get_branch_length(model, tree, node) { + parameters.SetLocalModelParameters (model, tree, node); parameters.SetCategoryVariables (model); bl = utility.GetEnvVariable ("BRANCH_LENGTH_STENCIL"); @@ -369,7 +403,7 @@ function models.codon.BS_REL.get_branch_length(model, tree, node) { bl = Eval ((model [utility.getGlobalValue("terms.model.branch_length_string_conditional")])[bl]); } else { if (model / terms.model.branch_length_string_expr) { - bl = Eval (model [terms.model.branch_length_string_exp]); + bl = Eval (model [terms.model.branch_length_string_expr]); //console.log (bl); //console.log (Eval (model [utility.getGlobalValue("terms.model.branch_length_string")])); } else { diff --git a/res/TemplateBatchFiles/libv3/models/model_functions.bf b/res/TemplateBatchFiles/libv3/models/model_functions.bf index 766f7942b..758eeca92 100644 --- a/res/TemplateBatchFiles/libv3/models/model_functions.bf +++ b/res/TemplateBatchFiles/libv3/models/model_functions.bf @@ -558,6 +558,7 @@ lfunction models.BindGlobalParameters (models, filter) { candidate_set = utility.UniqueValues(utility.Filter (utility.Keys (reference_set), "_key_", "regexp.Find (_key_,`&filter`)" )); + constraints_set = {}; @@ -569,7 +570,7 @@ lfunction models.BindGlobalParameters (models, filter) { if (parameters.IsIndependent (`¶meter_set`[_p_])) { parameters.SetConstraint (`¶meter_set`[_p_], `&reference_set`[_p_], ''); `&constraints_set` [`¶meter_set`[_p_]] = `&reference_set`[_p_]; - } + } }" ); } diff --git a/res/TemplateBatchFiles/libv3/models/parameters.bf b/res/TemplateBatchFiles/libv3/models/parameters.bf index 5c31d8366..95eb633a3 100644 --- a/res/TemplateBatchFiles/libv3/models/parameters.bf +++ b/res/TemplateBatchFiles/libv3/models/parameters.bf @@ -202,10 +202,12 @@ function parameters.SetLocalValue(tree, branch, id, value) { function parameters.SetValues(set) { if (Type (set) == "AssociativeList") { - utility.ForEachPair (set, "_key_", "_value_", - ' - parameters.SetValue (_value_[terms.id], _value_[terms.fit.MLE]); - '); + for (_key_, _value_; in; set) { + if (parameters.IsIndependent (_value_[terms.id])) { + parameters.SetValue (_value_[terms.id], _value_[terms.fit.MLE]); + } + } + } } @@ -231,10 +233,8 @@ lfunction parameters.ConstrainMeanOfSet (set, weights, mean, namespace) { return; } } - - - scaler_variables = {}; + scaler_variables = {}; utility.ForEach (unscaled, "_name_", 'parameters.DeclareGlobal (_name_, null)'); global_scaler = namespace + ".scaler_variable"; parameters.SetConstraint (global_scaler, Join ("+", constraint), "global"); @@ -492,18 +492,21 @@ function parameters.SetRange(id, ranges) { } + /** * Check if parameter is independent * @name parameters.IsIndependent * @param parameter - id of parameter to check * @returns {Bool} TRUE if independent, FALSE otherwise */ -lfunction parameters.IsIndependent(parameter) { - - //console.log(parameter); +function parameters.IsIndependent(parameter) { - GetString(info, ^ parameter, -1); - + SetParameter(HBL_EXECUTION_ERROR_HANDLING,1,0); + GetString(info, ^parameter, -1); + SetParameter(HBL_EXECUTION_ERROR_HANDLING,0,0); + if (Abs (LAST_HBL_EXECUTION_ERROR)) { // variable does not exist + return TRUE; + } if (Type(info) == "AssociativeList") { return (utility.CheckKey(info, "Local", "Matrix") && utility.CheckKey(info, "Global", "Matrix")) == FALSE; } @@ -639,7 +642,6 @@ lfunction parameters.SetStickBreakingDistribution (parameters, values) { left_over = 1; for (i = 0; i < rate_count; i += 1) { - parameters.SetValue ((parameters["rates"])[i], values[i][0]); if (i < rate_count - 1) { break_here = values[i][1] / left_over; diff --git a/res/TemplateBatchFiles/libv3/models/rate_variation.bf b/res/TemplateBatchFiles/libv3/models/rate_variation.bf index 05faab508..36db000a0 100644 --- a/res/TemplateBatchFiles/libv3/models/rate_variation.bf +++ b/res/TemplateBatchFiles/libv3/models/rate_variation.bf @@ -38,7 +38,7 @@ function rate_variation.modifier_everything (q_ij, from, to, namespace, cat_name lfunction rate_variation_define_HMM (categories, namespace, globals, definition) { lambda = parameters.ApplyNameSpace ("hmm_lambda", namespace); - parameters.DeclareGlobalWithRanges (lambda, .1/categories, 0, 1/(categories-1)); + parameters.DeclareGlobalWithRanges (lambda, .1/categories, 1e-6, 1/(categories)); globals[utility.getGlobalValue("terms.rate_variation.hmm_lambda")] = lambda; hmm_T = parameters.ApplyNameSpace ("hmm_transition_matrix", namespace); diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index e3f9513d1..a5b1f1ed6 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -123,6 +123,7 @@ lfunction estimators.GetGlobalMLE_RegExp(results, re) { * @returns nothing */ function estimators.copyGlobals2(key2, value2) { + if (Type((estimators.ExtractMLEs.results[terms.global])[key2]) == "AssociativeList") { key2 = "[`key`] `key2`"; // this parameter has already been defined, need to prefix with model name @@ -163,26 +164,38 @@ function estimators.CopyFrequencies(model_name, model_decription) { function estimators.SetGlobals2(key2, value) { + if (Type(estimators.ApplyExistingEstimates.set_globals[key2]) == "AssociativeList") { key3 = "[`key`] `key2`"; } else { key3 = key2; } + + + + estimators.ApplyExistingEstimates.set_globals[key3] = { terms.id: key3, terms.fit.MLE: value }; - __init_value = (initial_values[terms.global])[key2]; + __init_value = (initial_values[terms.global])[key3]; + + if (Type(__init_value) != "AssociativeList") { + __init_value = (initial_values[terms.global])[key2]; + } + + if (Type(__init_value) == "AssociativeList") { if (__init_value[terms.fix]) { estimators.ApplyExistingEstimates.df_correction += parameters.IsIndependent(value); ExecuteCommands("`value` := " + __init_value[terms.fit.MLE]); + //_did_set [value] = 1; } else { if (parameters.IsIndependent (value)) { - //fprintf (stdout, "Setting `value` to " + __init_value[terms.fit.MLE] + "\n", parameters.IsIndependent (value), "\n"); ExecuteCommands("`value` = " + __init_value[terms.fit.MLE]); + //_did_set [value] = 1; } else { messages.log (value + " was already constrained in estimators.SetGlobals2"); } @@ -197,7 +210,14 @@ function estimators.SetGlobals2(key2, value) { * @returns nothing */ function estimators.SetGlobals(key, value) { + /*_did_set = {}; + for (i,v; in; ((value[terms.parameters])[terms.global])) { + _did_set [v] = 0; + }*/ + ((value[terms.parameters])[terms.global])["estimators.SetGlobals2"][""]; + + //console.log (_did_set); } /** @@ -437,6 +457,7 @@ lfunction estimators.TraverseLocalParameters (likelihood_function_id, model_desc * @returns number of constrained parameters; */ function estimators.ApplyExistingEstimatesToTree (_tree_name, model_descriptions, initial_values, _application_type, keep_track_of_proportional_scalers) { + estimators.ApplyExistingEstimatesToTree.constraint_count = 0; @@ -712,15 +733,15 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o can_do_restarts = null; - /* + - Export (lfe, likelihoodFunction); - console.log (lfe); - GetString (lfe, likelihoodFunction, -1); - console.log (lfe); - fprintf ("/Users/sergei/Desktop/busted.txt", CLEAR_FILE, lfe); - utility.ToggleEnvVariable("VERBOSITY_LEVEL", 1); - */ + //Export (lfe, likelihoodFunction); + //console.log (lfe); + //GetString (lfe, likelihoodFunction, -1); + //console.log (lfe); + //fprintf ("/Users/sergei/Desktop/busted.txt", CLEAR_FILE, lfe); + //utility.ToggleEnvVariable("VERBOSITY_LEVEL", 10); + diff --git a/src/core/global_things.cpp b/src/core/global_things.cpp index dfdf062be..976de1b7b 100644 --- a/src/core/global_things.cpp +++ b/src/core/global_things.cpp @@ -120,7 +120,7 @@ namespace hy_global { kErrorStringDatasetRefIndexError ("Dataset index reference out of range"), kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"), kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"), - kHyPhyVersion = _String ("2.5.20"), + kHyPhyVersion = _String ("2.5.21"), kNoneToken = "None", kNullToken = "null", diff --git a/src/core/include/variable.h b/src/core/include/variable.h index c498310d3..2a65cc84a 100644 --- a/src/core/include/variable.h +++ b/src/core/include/variable.h @@ -90,7 +90,11 @@ class _Variable : public _Constant { return varValue; // get the value of the variable } void SetFormula (_Formula&); // set the variable to a new formula - + + void ClearValue (void) { + if (varValue) { delete (varValue); varValue = nil;} + } + const _Formula * get_constraint (void) const { return varFormula; } @@ -139,7 +143,8 @@ class _Variable : public _Constant { virtual bool CheckFForDependence (_AVLList const&, bool = false); virtual bool HasBeenInitialized (void) const {return !(varFlags & HY_VARIABLE_NOTSET);} virtual void MarkModified (void) {varFlags = varFlags | HY_VARIABLE_CHANGED;} - + virtual void ClearModified (void) {if (varFlags & HY_VARIABLE_CHANGED) varFlags -= HY_VARIABLE_CHANGED;} + _String const ContextFreeName (void) const; _StringBuffer& ContextFreeName (_StringBuffer&) const; _String const ParentObjectName (void) const; diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index f846c0d52..e9821d041 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -3897,8 +3897,10 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) kMethodCoordinate ("coordinate-wise"), kMethodNedlerMead ("nedler-mead"), kMethodHybrid ("hybrid"), - kMethodGradientDescent ("gradient-descent"); - + kMethodGradientDescent ("gradient-descent"), + kInitialGridMaximum ("LF_INITIAL_GRID_MAXIMUM"), + kInitialGridMaximumValue ("LF_INITIAL_GRID_MAXIMUM_VALUE"); + // optimization setting to produce a detailed log of optimization runs auto get_optimization_setting = [options] (const _String& arg, const hyFloat def_value) -> hyFloat { @@ -4137,7 +4139,14 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) GetInitialValues(); } - + if (keepStartingPoint) { + indexInd.Each ([this] (long v, unsigned long i) -> void { + _Variable *iv = GetIthIndependentVar (i); + if (iv->HasBeenInitialized()) { + iv->ClearModified(); + } + }); + } if (!keepStartingPoint) { hyFloat global_starting_point = get_optimization_setting (kGlobalStartingPoint, 0.1); @@ -4277,6 +4286,12 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) //_String const * grid_point = key_value.get_key(); hyFloat this_point = set_and_compute((_AssociativeList*)key_value.get_object()); + if (verbosity_level > 100) { + char buffer[512]; + snprintf (buffer, 512, "[GRID INITIAL VALUE CALCULATION] At point %s, LF = %15.12g\n", key_value.get_key()->get_str(), this_point); + BufferToConsole(buffer); + } + //printf ("%s %g\n", grid_point->get_str(), this_point); if (this_point > max_value) { @@ -4286,7 +4301,14 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) } if (best_values) { - set_and_compute (best_values); + maxSoFar = set_and_compute (best_values); + hy_env::EnvVariableSet(kInitialGridMaximum, new _Constant (maxSoFar), false); + hy_env::EnvVariableSet(kInitialGridMaximumValue, best_values, true); + if (verbosity_level > 100) { + char buffer[512]; + snprintf (buffer, 512, "[GRID INITIAL VALUE CALCULATION: BEST VALUE] LF = %15.12g\n", maxSoFar); + BufferToConsole(buffer); + } } } @@ -6193,10 +6215,10 @@ void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, hyFloat& gradi /*if (currentValue < 0.) { printf ("Negative value stashed %15.12lg\n", currentValue); } - hyFloat check_vv = GetIthIndependent(index); - if (verbosity_level > 100) { - printf ("_LikelihoodFunction::ComputeGradient %d\t%s\t%20.18g\t%e\t%e\t%e\t%e\t%15.12g\t \n", index, GetIthIndependentName(index)->get_str(), funcValue, testStep, currentValue, check_vv, check_vv-currentValue, DerivativeCorrection (index, currentValue)); - }*/ + hyFloat check_vv = GetIthIndependent(index);*/ + if (verbosity_level > 150) { + printf ("_LikelihoodFunction::ComputeGradient %d\t%s\t%20.18g\t%e\t%e\t%e\t%e\t%15.12g\n", index, GetIthIndependentName(index)->get_str(), funcValue, testStep, currentValue, check_vv, check_vv-currentValue, DerivativeCorrection (index, currentValue)); + } SetIthIndependent(index,currentValue); } else { gradient[index]= 0.; @@ -6331,6 +6353,8 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision initial_value = maxSoFar, currentPrecision = localOnly?step_precision:.01; + //printf ("\n\n_LikelihoodFunction::ConjugateGradientDescent ==> %d (%lg)\n", usedCachedResults, maxSoFar); + if (check_value != -INFINITY) { if (!CheckEqual(check_value, maxSoFar)) { _String errorStr = _String("Internal error in _LikelihoodFunction::ConjugateGradientDescent. The function evaluated at current parameter values [") & maxSoFar & "] does not match the last recorded LF maximum [" & check_value & "]"; @@ -7114,6 +7138,7 @@ hyFloat _LikelihoodFunction::replaceAPoint (_Matrix&m, long row, _Matrix&p, } + //_______________________________________________________________________________________ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned long iterations, unsigned long max_evaluations) { @@ -7155,7 +7180,13 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l auto set_point_and_compute = [this,&lf_evaluations] (_Matrix& v) -> hyFloat { lf_evaluations++; SetAllIndependent (&v); - return -Compute(); + hyFloat ll = -Compute(); + /* + if (fabs (ll - 22.3935843505) < 0.001 ) { + _TerminateAndDump(_String ("Checkpoint ") & likeFuncEvalCallCount); + } + */ + return ll; }; auto resort_values = [&zero] (_Matrix& v) -> void { @@ -7163,12 +7194,34 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l v = *sorted; DeleteObject (sorted); }; + + auto echo_values = [=] (_Matrix &v, hyFloat lf)-> void { + char buffer [512]; + snprintf(buffer, 512, "\n\t%15.12g (LF)", lf); + BufferToConsole(buffer); + for (long i = 0; i < N; i++) { + snprintf(buffer, 512, "\n\t%15.12g (%s)", GetIthIndependent(i,false), GetIthIndependentName(i)->get_str()); + BufferToConsole(buffer); + } + }; + + auto echo_simplex_values = [=] (_Matrix &function_values)-> void { + char buffer [512]; + for (long i = 0; i <= N; i++) { + snprintf(buffer, 512, "\n\tSimple point %d => %15.12g (map to point %d)", i, function_values (i,0), (long)function_values(i,1)); + BufferToConsole(buffer); + } + }; GetAllIndependent(simplex[0]); // current FEASIBLE point function_values.Store (0,0, set_point_and_compute (simplex[0])); + if (verbosity_level > 100) { + BufferToConsole ("\n[SIMPLEX SETUP]"); + echo_values(simplex[0], function_values(0,0)); + } for (long i = 0L; i < N; i++) { simplex[i+1] = simplex[0]; @@ -7177,10 +7230,11 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l lb = GetIthIndependentBound(i, true), ub = GetIthIndependentBound(i, false); -#ifdef NEDLER_MEAD_DEBUG - printf ("\nCoordinate %ld, value %g, range [%g, %g]\n", i, ith_coordinate, lb, ub); -#endif - + if (verbosity_level > 100) { + char buffer[512]; + snprintf (buffer, 512, "\n\tVariable %s, coordinate %ld, value %g, range [%g, %g]", GetIthIndependentName(i)->get_str(), i, ith_coordinate, lb, ub); + BufferToConsole(buffer); + } if (CheckEqual(ith_coordinate, lb)) { simplex[i+1][i] += MIN(DEFAULT_STEP_OFF_BOUND, (ub-lb)*0.5); } @@ -7194,19 +7248,20 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l } } -#ifdef NEDLER_MEAD_DEBUG - ObjectToConsole(&simplex[i+1]); -#endif - //SetAllIndependent(&simplex[i+1]); function_values.Store(i+1,0, set_point_and_compute (simplex[i+1])); function_values.Store(i+1,1, i+1); + if (verbosity_level > 100) { + echo_values(simplex[i+1], function_values(i+1,0)); + } + } resort_values (function_values); -#ifdef NEDLER_MEAD_DEBUG - printf ("\n**********\nSimplex iteration in\n"); - ObjectToConsole(&function_values); -#endif + if (verbosity_level > 100) { + BufferToConsole ("\nInitial simplex"); + echo_simplex_values (function_values); + } + for (long it_count = 0L; it_count <= iterations && lf_evaluations <= max_evaluations; it_count ++) { /** compute the centroid of all EXCEPT the WORST point **/ @@ -7232,46 +7287,55 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l new_point += t; hyFloat trial_value = set_point_and_compute (new_point); -#ifdef NEDLER_MEAD_DEBUG - printf ("\tTrial point value %g\n", trial_value); -#endif + if (verbosity_level > 100) { + char buffer[512]; + snprintf (buffer, 512, "\n>Simplex iteration %d", (it_count+1)); + BufferToConsole(buffer); + echo_values(new_point, trial_value); + } + bool do_shrink = false; if (trial_value >= function_values(0,0) && trial_value < function_values (N-1,0)) { // accept the reflection -#ifdef NEDLER_MEAD_DEBUG - printf ("### ACCEPT REFLECTION replace %d\n", (long)function_values(N,1)); -#endif + if (verbosity_level > 100) { + char buffer[512]; + snprintf (buffer, 512, "\nACCEPT REFLECTION replace point %d\n", (long)function_values(N,1)); + BufferToConsole (buffer); + } replace_point (new_point, trial_value, (long)function_values(N,1), N); } else { if (trial_value < function_values(0,0)) { // try expansion // x[e] = ¯x +β(x[r] − ¯x) -#ifdef NEDLER_MEAD_DEBUG - printf ("--- Trying expansion\n"); -#endif + if (verbosity_level > 100) { + BufferToConsole ("\nTrying expansion"); + } _Matrix expansion (centroid), t (new_point); t -= centroid; t *= simplex_beta; expansion += t; hyFloat expansion_value = set_point_and_compute (expansion); + if (verbosity_level > 100) { + echo_values(expansion, expansion_value); + } if (expansion_value < trial_value) { -#ifdef NEDLER_MEAD_DEBUG - printf ("### ACCEPT EXPANSION\n"); -#endif + if (verbosity_level > 100) { + BufferToConsole ("\nACCEPT EXPANSION"); + } replace_point (expansion, expansion_value, (long)function_values(N,1), N); } else { -#ifdef NEDLER_MEAD_DEBUG - printf ("### REJECT EXPANSION, ACCEPT REFLECTION\n"); -#endif + if (verbosity_level > 100) { + BufferToConsole ("\nREJECT EXPANSION, ACCEPT REFLECTION"); + } replace_point (new_point, trial_value, (long)function_values(N,1), N); } } else { //if (trial_value >= function_values (N,0)) { long worst_index = function_values (N,1); -#ifdef NEDLER_MEAD_DEBUG - printf ("--- Trying CONTRACTION\n"); -#endif + if (verbosity_level > 100) { + BufferToConsole ("\nTRYING CONTRACTION"); + } // x[ic] = x ̄− γ (x ̄−x[n+1]) // x[oc] = ¯x ̄+γ alpha (x ̄−x[n+1]) _Matrix cv (centroid), @@ -7281,65 +7345,67 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l t *= simplex_gamma; bool inside = false; if (trial_value >= function_values (N,0)) { // INSIDE -#ifdef NEDLER_MEAD_DEBUG - printf ("--- INSIDE contraction\n"); -#endif + if (verbosity_level > 100) { + BufferToConsole ("\nINSIDE contraction"); + } inside = true; cv -= t; } else { -#ifdef NEDLER_MEAD_DEBUG - printf ("--- OUTSIDE contraction\n"); -#endif - t *= simplex_alpha; + if (verbosity_level > 100) { + BufferToConsole ("\nOUTSIDE contraction"); + } + t *= simplex_alpha; cv += t; } hyFloat contraction_value = set_point_and_compute (cv); - -#ifdef NEDLER_MEAD_DEBUG - printf ("--Contraction value = %g (relected = %g, worst = %g)\n", contraction_value, trial_value, function_values (N,0)); -#endif + if (verbosity_level > 100) { + echo_values(cv, contraction_value); + char buffer[512]; + snprintf (buffer, 512, "\n\tContraction value = %g (relected = %g, worst = %g)", contraction_value, trial_value, function_values (N,0)); + } if ((inside && contraction_value < function_values (N,0)) || (!inside && contraction_value < trial_value)) { -#ifdef NEDLER_MEAD_DEBUG - printf ("### ACCEPT contraction\n"); -#endif + + if (verbosity_level > 100) { + BufferToConsole ("\nACCEPT contraction"); + } replace_point (cv, contraction_value, worst_index, N); } else { -#ifdef NEDLER_MEAD_DEBUG - printf ("### REJECT contraction\n"); -#endif + if (verbosity_level > 100) { + BufferToConsole ("\nREJECT contraction"); + } do_shrink = true; } } } if (do_shrink) { long best_idx = function_values(0,1); -#ifdef NEDLER_MEAD_DEBUG - printf ("### PEFRORM SHRINKAGE\n"); - BufferToConsole("\nBest point\n"); - ObjectToConsole(&simplex[best_idx]); -#endif + if (verbosity_level > 100) { + BufferToConsole ("\n\tPerform Shrinkage"); + echo_values(simplex[best_idx], function_values(0,0)); + } + //x[i] = x[1] + δ(x[i] − x[1]). for (long i = 1L; i <= N; i++) { long idx = function_values(i,1); _Matrix t (simplex[idx]); t -= simplex[best_idx]; t *= simplex_delta; -#ifdef NEDLER_MEAD_DEBUG - BufferToConsole("\nOld point\n"); - ObjectToConsole(&simplex[idx]); -#endif simplex[idx] = simplex[best_idx]; simplex[idx] += t; -#ifdef NEDLER_MEAD_DEBUG - BufferToConsole("\nMoved point\n"); - ObjectToConsole(&simplex[idx]); -#endif + function_values.Store (i, 0, set_point_and_compute(simplex[idx])); + if (verbosity_level > 100) { + echo_values(simplex[idx], function_values(i,0)); + } } } resort_values (function_values); - + if (verbosity_level > 100) { + BufferToConsole ("\nCurrent simplex"); + echo_simplex_values (function_values); + NLToConsole(); + } if (verbosity_level==1) { UpdateOptimizationStatus (-function_values (0,0),progress_tracker.PushValue (-function_values (0,0)),1,true,progressFileString); } else { @@ -7370,11 +7436,31 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l #endif } - + /* + BufferToConsole("\n$$$$$\nFunction values\n"); + ObjectToConsole(&function_values); + BufferToConsole("\nBest point\n"); + ObjectToConsole(&simplex[(long)function_values(0,1)]); + */ + long best_idx = function_values(0,1); + /*for (long i = 0; i < simplex[best_idx].GetHDim(); i++) { + fprintf (stdout, "%d %s %g\n", i, GetIthIndependentName(i)->get_str(), simplex[best_idx][i]); + }*/ + N_inv = -set_point_and_compute (simplex[best_idx]); + if (verbosity_level > 100) { + BufferToConsole ("\nFinal simplex point"); + echo_values (simplex[best_idx], N_inv); + NLToConsole(); + } + + if (!CheckEqual(N_inv , -function_values(0,0))) { + _TerminateAndDump(_String("Internal error in _SimplexMethod; final point log-likelihood does not match the best recorded log-L: ") & N_inv & " vs " & -function_values(0,0)); + } + delete [] simplex; return N_inv; } @@ -7932,22 +8018,47 @@ void _LikelihoodFunction::UpdateIndependent (long index, bool purgeResults, _ if (f!=-1) { theList->Delete(f); (*secondList)<ScanForVariables(al,true); - al.ReorderList(); - } + _AVLList al (&newVars); + LocateVar(index)->ScanForVariables(al,true); + al.ReorderList(); + + + /* SLKP 20201027 TODO + This involves linear searches of potentially large arrays; especially if there are many variables in `newVars` + */ + newVars.Each([theList] (long idx, unsigned long ) -> void { + if (LocateVar(idx)->IsIndependent() && theList->Find(idx)==-1) { + (*theList) << idx; + } + }); - for (f=0; fIsIndependent()&& theList->Find(newVars.list_data[f])==-1) { (*theList) << newVars.list_data[f]; } - } + }*/ + + if (theList!=whichList) { + /* SLKP 20201026 + if the variable is being set to a constant value, and some other variables depend on it, + this could create the situation where they don't get properly refreshed + */ + if (purgeResults) { + for (long i = 0; i < indexDep.lLength; i++) { + _Variable* ith_dep = GetIthDependentVar(i); + if (ith_dep->CheckFForDependence(index)) { + ith_dep->ClearValue (); + ith_dep->Compute(); + } + } + } + for (f = 0; f < indVarsByPartition.lLength; f++) { UpdateIndependent (index, false, (_SimpleList*)indVarsByPartition(f), (_SimpleList*)depVarsByPartition(f)); } @@ -8335,7 +8446,7 @@ hyFloat _LikelihoodFunction::ComputeBlock (long index, hyFloat* siteRes, long c long snID = -1; if (nodeID == 1) { - if (matrices->lLength == 2) { + if (matrices->lLength > 1) { branches->Clear(false); matrices->Clear(false); @@ -8385,8 +8496,9 @@ hyFloat _LikelihoodFunction::ComputeBlock (long index, hyFloat* siteRes, long c branches->Populate (t->GetINodeCount()+t->GetLeafCount()-1,0,1); } + #ifdef _UBER_VERBOSE_LF_DEBUG - fprintf (stderr, "%d matrices, %d branches marked for rate class %d\n", matrices->lLength, branches->lLength, catID); + fprintf (stderr, "\n%d matrices, %d branches marked for rate class %d, doCachedComp = %d\n", matrices->lLength, branches->lLength, catID, doCachedComp); if (matrices->lLength == 0) { fprintf (stderr, "Hmm\n"); } @@ -8591,6 +8703,7 @@ hyFloat _LikelihoodFunction::ComputeBlock (long index, hyFloat* siteRes, long c printf ("AFTER CONSTRUCT CACHE overallScalingFactors = %ld\n", overallScalingFactors[0]); }*/ + if (sum > -INFINITY) { hyFloat checksum = t->ComputeLLWithBranchCache (*sl, doCachedComp, diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index 067063b2b..b6598e3f9 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -5451,7 +5451,7 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat tempS.vDim = vDim; tempS.lDim = lDim; tempS.theData = stash; - // zero out the stash 20200929 : is this necessary? + // zero out the stash TODO: 20200929 : is this necessary? memset (stash, 0, sizeof (hyFloat)*lDim); do { temp.MultbyS (*this,false, &tempS, nil); @@ -6901,12 +6901,34 @@ hyFloat _Matrix::Sqr (hyFloat* _hprestrict_ stash) { } } - //memcpy (theData, stash, lDim * sizeof (hyFloat)); + + long lDimmod4 = (lDim >> 2) << 2; + hyFloat diffs[4] = {0.0,0.0,0.0,0.0}; + + for (long s = 0; s < lDimmod4; s+=4) { + hyFloat d1 = fabs (theData[s ] - stash[s ]); + hyFloat d2 = fabs (theData[s+1] - stash[s+1]); + hyFloat d3 = fabs (theData[s+2] - stash[s+2]); + hyFloat d4 = fabs (theData[s+3] - stash[s+3]); + if (d1 > diffs[0]) diffs[0] = d1; + if (d2 > diffs[1]) diffs[1] = d2; + if (d3 > diffs[2]) diffs[2] = d3; + if (d4 > diffs[3]) diffs[3] = d4; + } + + for (long s = lDimmod4; s < lDim; s++) { + hyFloat d1 = fabs (theData[s] - stash[s]); + if (d1 > diffs[0]) diffs[0] = d1; + } + + diff = MAX (MAX (diffs[0], diffs[1]), MAX (diffs[2], diffs[3])); + + memcpy (theData, stash, lDim * sizeof (hyFloat)); - for (long s = 0; s < lDim; s++) { + /*for (long s = 0; s < lDim; s++) { StoreIfGreater(diff, fabs (theData[s] - stash[s])); theData[s] = stash[s]; - } + }*/ } return diff; } diff --git a/src/core/variable.cpp b/src/core/variable.cpp index 8d3a9ef77..5f333fef5 100644 --- a/src/core/variable.cpp +++ b/src/core/variable.cpp @@ -707,6 +707,7 @@ void _Variable::SetFormula (_Formula& theF) { for (AVLListXIteratorKeyValue variable_record : AVLListXIterator (&variableNames)) { _Variable * theV = LocateVar(variable_record.get_value()); //printf ("%s\n", theV->GetName()->get_str()); + // SLKP 20201028 TODO: this is slow for large trees, because the entire variable space is being traversed. if (theV->IsContainer()) { _VariableContainer* theVC = (_VariableContainer*)theV; if (theVC->SetDependance(theIndex) == -2) { diff --git a/src/core/variablecontainer.cpp b/src/core/variablecontainer.cpp index ebfcea8d4..eb2142ce5 100644 --- a/src/core/variablecontainer.cpp +++ b/src/core/variablecontainer.cpp @@ -456,6 +456,12 @@ bool _VariableContainer::NeedToExponentiate (bool ignoreCats) const { auto has_changed = [=] (long var_index, long ref_index, unsigned long) -> bool { if (ref_index >= 0L) { return LocateVar (var_index) -> HasChanged (ignoreCats); + /*bool haz = LocateVar (var_index) -> HasChanged (ignoreCats); + if (haz) { + _Variable *lv = LocateVar (var_index); + fprintf (stderr, "==> %s HAZ changed in the context of %s (%d, %x, %d)\n", lv->GetName()->get_str(), GetName()->get_str(), lv->varFlags, LocateVar (var_index)->varValue, lv->varValue->IsVariable()); + } + return haz;*/ } return false; }; From 9f2a32b8e20481480dc4beb71fd2628f917ab954 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Thu, 29 Oct 2020 06:44:17 -0400 Subject: [PATCH 3/5] LMGT fixes; relaxing SimplexCheck stringency for now --- src/core/likefunc.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index e9821d041..de0711b13 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -6217,7 +6217,7 @@ void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, hyFloat& gradi } hyFloat check_vv = GetIthIndependent(index);*/ if (verbosity_level > 150) { - printf ("_LikelihoodFunction::ComputeGradient %d\t%s\t%20.18g\t%e\t%e\t%e\t%e\t%15.12g\n", index, GetIthIndependentName(index)->get_str(), funcValue, testStep, currentValue, check_vv, check_vv-currentValue, DerivativeCorrection (index, currentValue)); + printf ("_LikelihoodFunction::ComputeGradient %ld\t%s\t%20.18g\t%e\t%e\t%e\t%e\t%15.12g\n", index, GetIthIndependentName(index)->get_str(), funcValue, testStep, currentValue, check_vv, check_vv-currentValue, DerivativeCorrection (index, currentValue)); } SetIthIndependent(index,currentValue); } else { @@ -7208,7 +7208,7 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l auto echo_simplex_values = [=] (_Matrix &function_values)-> void { char buffer [512]; for (long i = 0; i <= N; i++) { - snprintf(buffer, 512, "\n\tSimple point %d => %15.12g (map to point %d)", i, function_values (i,0), (long)function_values(i,1)); + snprintf(buffer, 512, "\n\tSimple point %ld => %15.12g (map to point %ld)", i, function_values (i,0), (long)function_values(i,1)); BufferToConsole(buffer); } }; @@ -7289,7 +7289,7 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l if (verbosity_level > 100) { char buffer[512]; - snprintf (buffer, 512, "\n>Simplex iteration %d", (it_count+1)); + snprintf (buffer, 512, "\n>Simplex iteration %ld", (it_count+1)); BufferToConsole(buffer); echo_values(new_point, trial_value); } @@ -7300,7 +7300,7 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l // accept the reflection if (verbosity_level > 100) { char buffer[512]; - snprintf (buffer, 512, "\nACCEPT REFLECTION replace point %d\n", (long)function_values(N,1)); + snprintf (buffer, 512, "\nACCEPT REFLECTION replace point %ld\n", (long)function_values(N,1)); BufferToConsole (buffer); } replace_point (new_point, trial_value, (long)function_values(N,1), N); @@ -7457,7 +7457,7 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l NLToConsole(); } - if (!CheckEqual(N_inv , -function_values(0,0))) { + if (fabs(N_inv + function_values(0,0)) > .1) { _TerminateAndDump(_String("Internal error in _SimplexMethod; final point log-likelihood does not match the best recorded log-L: ") & N_inv & " vs " & -function_values(0,0)); } From 8c7d1255686f297f1fed2711202b2189a2bb6069 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Thu, 29 Oct 2020 07:11:08 -0400 Subject: [PATCH 4/5] Remove tracer info info FMM.wbf --- tests/hbltests/libv3/FMM.wbf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/hbltests/libv3/FMM.wbf b/tests/hbltests/libv3/FMM.wbf index 5b823dcb8..e2c765855 100644 --- a/tests/hbltests/libv3/FMM.wbf +++ b/tests/hbltests/libv3/FMM.wbf @@ -1,6 +1,6 @@ GetString (version, HYPHY_VERSION, 0); -PRODUCE_OPTIMIZATION_LOG = 1; +//PRODUCE_OPTIMIZATION_LOG = 1; if (+version >= 2.4) { LoadFunctionLibrary ("SelectionAnalyses/FitMultiModel.bf", {"--code" : "Universal", "--alignment" : PATH_TO_CURRENT_BF + "data/CD2.nex", "--branches" : "GROUP1", "--srv" : "No", "--starting-points" : "5"}); @@ -11,7 +11,7 @@ if (+version >= 2.4) { LoadFunctionLibrary ("shared.bf"); LoadFunctionLibrary ("libv3/IOFunctions.bf"); -fprintf ("logger.hbl", CLEAR_FILE, ^((fitter.results[terms.likelihood_function])+".trace")); +//fprintf ("logger.hbl", CLEAR_FILE, ^((fitter.results[terms.likelihood_function])+".trace")); assert (check_value ( From 89c682051fa48bb74cf39bdfb2e0008d1d75f0ef Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Thu, 29 Oct 2020 08:32:42 -0400 Subject: [PATCH 5/5] Adding FMM --- .../SelectionAnalyses/FitMultiModel.bf | 565 ++++++++++++++++++ tests/hbltests/libv3/FMM.wbf | 6 +- 2 files changed, 568 insertions(+), 3 deletions(-) create mode 100644 res/TemplateBatchFiles/SelectionAnalyses/FitMultiModel.bf diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FitMultiModel.bf b/res/TemplateBatchFiles/SelectionAnalyses/FitMultiModel.bf new file mode 100644 index 000000000..431beb406 --- /dev/null +++ b/res/TemplateBatchFiles/SelectionAnalyses/FitMultiModel.bf @@ -0,0 +1,565 @@ +RequireVersion ("2.4.0"); + + +LoadFunctionLibrary("libv3/all-terms.bf"); // must be loaded before CF3x4 +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); +LoadFunctionLibrary("libv3/IOFunctions.bf"); +LoadFunctionLibrary("libv3/tasks/estimators.bf"); +LoadFunctionLibrary("libv3/tasks/alignments.bf"); +LoadFunctionLibrary("libv3/tasks/ancestral.bf"); +LoadFunctionLibrary("libv3/models/codon.bf"); +LoadFunctionLibrary("libv3/tasks/trees.bf"); +LoadFunctionLibrary("libv3/tasks/genetic_code.bf"); +LoadFunctionLibrary("SelectionAnalyses/modules/io_functions.ibf"); +LoadFunctionLibrary("SelectionAnalyses/modules/selection_lib.ibf"); +LoadFunctionLibrary("libv3/models/codon/MG_REV.bf"); +LoadFunctionLibrary("libv3/models/codon/MG_REV_MH.bf"); +LoadFunctionLibrary("libv3/models/codon/MG_REV_TRIP.bf"); +LoadFunctionLibrary("libv3/convenience/math.bf"); +LoadFunctionLibrary("libv3/models/rate_variation.bf"); +LoadFunctionLibrary("libv3/models/codon/BS_REL.bf"); + + +utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE); +//utility.SetEnvVariable ("VERBOSITY_LEVEL", 10); +fitter.rate_classes = 3; + +KeywordArgument ("code", "Which genetic code should be used", "Universal"); + /** + keyword, description (for inline documentation and help messages), default value + */ +KeywordArgument ("alignment", "An in-frame codon alignment in one of the formats supported by HyPhy"); + /** + keyword, description (for inline documentation and help messages), no default value, + meaning that it will be required + */ + +KeywordArgument ("tree", "A phylogenetic tree (optionally annotated with {})", null, "Please select a tree file for the data:"); + /** the use of null as the default argument means that the default expectation is for the + argument to be missing, i.e. the tree is expected to be in the file + the fourth, optional argument, can match this keyword with the dialog prompt / choice list title, + meaning that it can only be consumed when this dialog prompt / choice list is invoked + This allows handling some branching logic conditionals + */ + +KeywordArgument ("rates", "The number omega rate classes to include in the model [1-10, default 3, 1 to turn-off rate variation]", 3); + +KeywordArgument ("triple-islands", "Use a separate rate parameter for synonymous triple-hit substitutions", "No"); + + +fitter.analysis_description = {terms.io.info : "Examine whether or not a codon alignment is better fit by models which permit multiple instantaneous substitutions. v0.2 adds a separate rate for codon-island triple-hit rates", + terms.io.version : "1.0", + terms.io.authors : "Sergei L Kosakovsky Pond, Sadie Wisotsky and Alexander Lucaci", + terms.io.contact : "spond@temple.edu", + terms.io.reference: "Submitted; preprint at hyphy.org/resources/fmm.pdf", + terms.io.requirements : "in-frame codon alignment and a phylogenetic tree" + }; + +io.DisplayAnalysisBanner (fitter.analysis_description); + + +namespace fitter.terms { + MG94 = "Standard MG94"; + MG94x2 = "MG94 with double instantaneous substitutions"; + MG94x3 = "MG94 with double and triple instantaneous substitutions"; + MG94x3xS = "MG94 with double and triple instantaneous substitutions [only synonymous islands]"; + json.site_logl = "Site Log Likelihood"; + json.evidence_ratios = "Evidence Ratios"; + json.site_reports = "Site substitutions"; +} + +fitter.json = { terms.json.analysis: fitter.analysis_description, + terms.json.input: {}, + terms.json.fits : {}, + terms.json.timers : {}, + fitter.terms.json.site_logl : {}, + fitter.terms.json.evidence_ratios : {}, + fitter.terms.json.site_reports : {} + }; + +fitter.display_orders = {terms.original_name: -1, + terms.json.nucleotide_gtr: 0, + fitter.terms.MG94: 1, + fitter.terms.MG94x2: 2, + fitter.terms.MG94x3: 3 + }; + + + +selection.io.startTimer (fitter.json [terms.json.timers], "Overall", 0); + + +namespace fitter { + LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf"); + load_file ({utility.getGlobalValue("terms.prefix"): "fitter", utility.getGlobalValue("terms.settings") : {utility.getGlobalValue("terms.settings.branch_selector") : "selection.io.SelectAllBranches"}}); +} + +fitter.rate_classes = io.PromptUser ("The number of omega rate classes to include in the model", 3, 1, 10, TRUE); + +fitter.do_islands = io.SelectAnOption ({"Yes" : "Use a separate rate parameter for synonymous triple-hit substitutions (e.g. serine islands)", + "No" : "All triple hits have the same rate multiplier"}, + "Synonymous triple-hit substitutions use a separate rate" + ) == "Yes"; + + +namespace fitter { + doGTR ("fitter"); +} + +KeywordArgument ("output", "Write the resulting JSON to this file (default is to save to the same path as the alignment file + 'FITTER.json')", fitter.codon_data_info [terms.json.json]); +fitter.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to"); + +KeywordArgument ("save-fit", "Write model fit files (HyPhy NEXUS) to this file path with extensions .MODEL_NAME.bf; default is NOT to save, or 'dev/null'", "/dev/null"); +fitter.save_model_path = io.PromptUserForFilePath ("Save model fit files to"); + + +estimators.fixSubsetOfEstimates(fitter.gtr_results, fitter.gtr_results[terms.global]); + +namespace fitter { + scaler_prefix = "fitter.scaler"; + doPartitionedMG ("fitter", FALSE); +} + +//Export(lf_serialized, ^(fitter.partioned_mg_results)); +//fprintf(PROMPT_FOR_FILE,CLEAR_FILE, lf_serialized, "\n"); + +//*************** GENERIC FITTER HANDLER ***** + +function fitter.run_model_fit (model_name, model_generator, initial_values) { + io.ReportProgressMessageMD ("fitter", model_name, "Fitting `model_name`"); + selection.io.startTimer (fitter.json [terms.json.timers], model_name, fitter.display_order [model_name]); + + fitter.results = estimators.FitCodonModel (fitter.filter_names, fitter.trees, model_generator, fitter.codon_data_info [utility.getGlobalValue("terms.code")], + { + utility.getGlobalValue ("terms.run_options.model_type"): utility.getGlobalValue("terms.global"), + utility.getGlobalValue ("terms.run_options.retain_lf_object"): TRUE, + utility.getGlobalValue ("terms.run_options.retain_model_object"): TRUE + }, initial_values); + + + if (fitter.save_model_path != "/dev/null") { + //^(fitter.results[terms.likelihood_function]) + io.SpoolLF(fitter.results[terms.likelihood_function], fitter.save_model_path, model_name); + } + + fitter.models = fitter.results [terms.model]; + + if (^"fitter.rate_classes" > 1) { + fitter.cat_info = rate_variation.extract_category_information (fitter.models[fitter.models ["INDEXORDER"][0]]); + fitter.cat_info = Transpose (fitter.cat_info[fitter.cat_info["INDEXORDER"][0]])%0; + } else { + fitter.cat_info = {{1,1}}; + } + + + ConstructCategoryMatrix (fitter.run_model_fit.sl, ^(fitter.results[terms.likelihood_function]), SITE_LOG_LIKELIHOODS); + (fitter.json [fitter.terms.json.site_logl])[model_name] = fitter.run_model_fit.sl; + + io.ReportProgressMessageMD("fitter", model_name, "* " + selection.io.report_fit (fitter.results, 0, fitter.codon_data_info[terms.data.sample_size])); + fitter.global_dnds = selection.io.extract_global_MLE_re (fitter.results, "^(" + terms.parameters.omega_ratio + "|" + terms.parameters.multiple_hit_rate + "|" + terms.parameters.triple_hit_rate + ")"); + + + utility.ForEach (fitter.global_dnds, "_value_", 'io.ReportProgressMessageMD ("fitter", model_name, "* " + _value_[terms.description] + " = " + Format (_value_[terms.fit.MLE],8,4));'); + + if (^"fitter.rate_classes" > 1) { + io.ReportProgressMessageMD("fitter", model_name, "* The following relative rate distribution (mean 1) for site-to-site **non-synonymous** rate variation was inferred"); + selection.io.report_distribution (fitter.cat_info); + } + + fitter.cat_info = { + terms.rate_variation.distribution : fitter.cat_info, + terms.parameters: utility.Map (fitter.results[terms.global], "_value_", '_value_ [terms.fit.MLE]') + }; + + selection.io.json_store_lf (fitter.json, + model_name, + fitter.results[terms.fit.log_likelihood], + fitter.results[terms.parameters], + fitter.sample_size, + fitter.cat_info, + fitter.display_orders[model_name]); + + + utility.ForEachPair (fitter.filter_specification, "_key_", "_value_", + 'selection.io.json_store_branch_attribute(fitter.json, model_name, terms.branch_length, fitter.display_orders[model_name], + _key_, + selection.io.extract_branch_info((fitter.results[terms.branch_length])[_key_], "selection.io.branch.length"));'); + + selection.io.stopTimer (fitter.json [terms.json.timers], model_name); + return fitter.results; +} + +//*************** END GENERIC FITTER HANDLER ***** + +//****** ADDING RATE VARIATION **** + +function fitter.modifier_omega (q_ij, from, to, namespace, cat_name) { + if (Abs (q_ij[terms.model.rate_entry])) { + if (utility.Has (q_ij[terms.global], terms.parameters.omega_ratio, "String")) { + q_ij[terms.model.rate_entry] = "(" + q_ij[terms.model.rate_entry] + ")*" + cat_name; + //console.log (q_ij); + } + } + return q_ij; +} + + +lfunction MG_REV.model.with.GDD (type, code) { + def = models.codon.MG_REV.ModelDescription (type, code); + if (^"fitter.rate_classes" > 1) { + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("fitter.rate_classes")}); + (def [utility.getGlobalValue("terms.model.rate_variation")])[utility.getGlobalValue("terms.rate_variation.rate_modifier")] = "fitter.modifier_omega"; + } + return def; + } + +lfunction MG_REV_MH.model.with.GDD (type, code) { + def = models.codon.MG_REV_MH.ModelDescription (type, code); + if (^"fitter.rate_classes" > 1) { + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("fitter.rate_classes")}); + (def [utility.getGlobalValue("terms.model.rate_variation")])[utility.getGlobalValue("terms.rate_variation.rate_modifier")] = "fitter.modifier_omega"; + } + return def; + } + +lfunction MG_REV_TRIP.model.with.GDD (type, code) { + def = models.codon.MG_REV_TRIP.ModelDescription (type, code); + if (^"fitter.rate_classes" > 1) { + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("fitter.rate_classes")}); + (def [utility.getGlobalValue("terms.model.rate_variation")])[utility.getGlobalValue("terms.rate_variation.rate_modifier")] = "fitter.modifier_omega"; + } + return def; + } + +//*************** END RATE VARIATION ***** + + +fitter.one_hit_results = fitter.run_model_fit (fitter.terms.MG94, "MG_REV.model.with.GDD", fitter.partitioned_mg_results); + +utility.Extend (fitter.one_hit_results[terms.global], + { + terms.parameters.multiple_hit_rate : { utility.getGlobalValue ("terms.fit.MLE") : 0.05, terms.fix : FALSE} + + }); + +fitter.two_hit_results = fitter.run_model_fit (fitter.terms.MG94x2, "MG_REV_MH.model.with.GDD", fitter.one_hit_results); + +utility.Extend (fitter.two_hit_results[terms.global], + { + terms.parameters.triple_hit_rate : { utility.getGlobalValue ("terms.fit.MLE") : 0.05, terms.fix : FALSE}, + terms.parameters.triple_hit_rate_syn : { utility.getGlobalValue ("terms.fit.MLE") : 0.05, terms.fix : FALSE} + + }); + + +lfunction fitter.MG_REV_TRIP.model.with.GDD (type, code) { + model = MG_REV_TRIP.model.with.GDD (type, code); + model[utility.getGlobalValue("terms.model.post_definition")] = "fitter.handle_triple_islands"; + return model; +} + +lfunction fitter.MG_REV_TRIP.model.with.GDD.islands (type, code) { + model = MG_REV_TRIP.model.with.GDD (type, code); + model[utility.getGlobalValue("terms.model.post_definition")] = "fitter.handle_only_triple_islands"; + return model; +} + +lfunction fitter.handle_triple_islands (model) { + if (utility.getGlobalValue("fitter.do_islands") == FALSE) { + parameters.SetConstraint (model.generic.GetGlobalParameter (model, utility.getGlobalValue("terms.parameters.triple_hit_rate_syn")), + model.generic.GetGlobalParameter (model, utility.getGlobalValue("terms.parameters.triple_hit_rate")), ""); + } + return models.generic.post.definition (model); +} + +lfunction fitter.handle_only_triple_islands (model) { + parameters.SetConstraint (model.generic.GetGlobalParameter (model, utility.getGlobalValue("terms.parameters.triple_hit_rate")), "0", ""); + return models.generic.post.definition (model); +} + +fitter.three_hit_results = fitter.run_model_fit (fitter.terms.MG94x3, "fitter.MG_REV_TRIP.model.with.GDD", fitter.one_hit_results); + +if (fitter.do_islands) { + fitter.three_hit_island_results = fitter.run_model_fit (fitter.terms.MG94x3xS, "fitter.MG_REV_TRIP.model.with.GDD.islands", fitter.three_hit_results); +} +// + +selection.io.stopTimer (fitter.json [terms.json.timers], "Overall"); + +// PERFORM HYPOTHESIS TESTING AND POPULATE TABLE REPORT + +fitter.LRT = { + "Double-hit vs single-hit" : math.DoLRT (fitter.one_hit_results[terms.fit.log_likelihood], fitter.two_hit_results[terms.fit.log_likelihood], 1), + "Triple-hit vs single-hit" : math.DoLRT (fitter.one_hit_results[terms.fit.log_likelihood], fitter.three_hit_results[terms.fit.log_likelihood], 2 + fitter.do_islands), + "Triple-hit vs double-hit" : math.DoLRT (fitter.two_hit_results[terms.fit.log_likelihood], fitter.three_hit_results[terms.fit.log_likelihood], 1 + fitter.do_islands) +}; + +if (fitter.do_islands) { + fitter.LRT ["Triple-hit-island vs double-hit"] = math.DoLRT (fitter.two_hit_results[terms.fit.log_likelihood], fitter.three_hit_island_results[terms.fit.log_likelihood],1); + fitter.LRT ["Triple-hit vs Triple-hit-island"] = math.DoLRT (fitter.three_hit_island_results[terms.fit.log_likelihood], fitter.three_hit_results[terms.fit.log_likelihood],1); +} + +fitter.json [terms.json.test_results] = fitter.LRT; + +fitter.table_output_options = { + utility.getGlobalValue("terms.table_options.header"): TRUE, + utility.getGlobalValue("terms.table_options.minimum_column_width"): 10, + utility.getGlobalValue("terms.table_options.align"): "center", + utility.getGlobalValue("terms.table_options.column_widths"): { + "0": 38, + "1": 12, + "2": 12, + "3": 12, + "4": 36, + "5": 12, + "6": 36 + } + }; + +fitter.report = { + { + "Model", "Log-L", "omega", " 2-hit rate", "p-value", "3-hit rate", "p-value" + } +}; + +io.ReportProgressMessageMD ("fitter", "testing", "Summary of rate estimates and significance testing"); + + +fprintf(stdout, io.FormatTableRow(fitter.report , fitter.table_output_options)); +fitter.table_output_options[utility.getGlobalValue("terms.table_options.header")] = FALSE; + +// single rate model +fprintf(stdout, io.FormatTableRow( + { + { + fitter.terms.MG94, + Format (fitter.one_hit_results[terms.fit.log_likelihood], 8, 2), + Format (((fitter.one_hit_results[terms.global])[terms.parameters.omega_ratio])[terms.fit.MLE],8,4), + "N/A", + "N/A", + "N/A", + "N/A" + } + }, + fitter.table_output_options) +); + +// double-hit rate model +fprintf(stdout, io.FormatTableRow( + { + { + fitter.terms.MG94 + " + 2 hits", + Format (fitter.two_hit_results[terms.fit.log_likelihood], 8, 2), + Format (((fitter.two_hit_results[terms.global])[terms.parameters.omega_ratio])[terms.fit.MLE],8,4), + Format (((fitter.two_hit_results[terms.global])[terms.parameters.multiple_hit_rate])[terms.fit.MLE],8,4) , + Format ((fitter.LRT["Double-hit vs single-hit"])[terms.p_value], 8, 4) + " (2-hit rate = 0)", + "N/A", + "N/A" + } + }, + fitter.table_output_options) +); + +if (fitter.do_islands) { + + + + fprintf(stdout, io.FormatTableRow( + { + { + fitter.terms.MG94 + " + 3 hits (islands)", + Format (fitter.three_hit_island_results[terms.fit.log_likelihood], 8, 2), + Format (((fitter.three_hit_island_results[terms.global])[terms.parameters.omega_ratio])[terms.fit.MLE],8,4), + Format (((fitter.three_hit_island_results[terms.global])[terms.parameters.triple_hit_rate_syn])[terms.fit.MLE],8,4) , + Format ((fitter.LRT["Triple-hit-island vs double-hit"])[terms.p_value], 8, 4) + " (3-hit island vs 2-hit)", + Format (((fitter.three_hit_results[terms.global])[terms.parameters.triple_hit_rate])[terms.fit.MLE],8,4), + Format ((fitter.LRT["Triple-hit vs Triple-hit-island"])[terms.p_value], 8, 4) + " (3-hit = 0)" + } + }, + fitter.table_output_options) + ); +} + +// triple-hit rate model +fprintf(stdout, io.FormatTableRow( + { + { + fitter.terms.MG94 + " + 2 or 3 hits", + Format (fitter.three_hit_results[terms.fit.log_likelihood], 8, 2), + Format (((fitter.three_hit_results[terms.global])[terms.parameters.omega_ratio])[terms.fit.MLE],8,4), + Format (((fitter.three_hit_results[terms.global])[terms.parameters.multiple_hit_rate])[terms.fit.MLE],8,4), + Format ((fitter.LRT["Triple-hit vs single-hit"])[terms.p_value], 8, 4) + " (2&3-hit rates = 0)", + Format (((fitter.three_hit_results[terms.global])[terms.parameters.triple_hit_rate])[terms.fit.MLE],8,4), + Format ((fitter.LRT["Triple-hit vs double-hit"])[terms.p_value], 8, 4) + " (3-hit rate(s) = 0)" + } + }, + fitter.table_output_options) +); + + + +(fitter.json [fitter.terms.json.evidence_ratios])["Two-hit"] = + fitter.EvidenceRatios ((fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x2], + (fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94]); + +(fitter.json [fitter.terms.json.evidence_ratios])["Three-hit"] = + fitter.EvidenceRatios ((fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x3], + (fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x2]); + +if (fitter.do_islands) { + (fitter.json [fitter.terms.json.evidence_ratios])["Three-hit islands vs 2-hit"] = + fitter.EvidenceRatios ((fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x3xS], + (fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x2]); + + (fitter.json [fitter.terms.json.evidence_ratios])["Three-hit vs three-hit islands"] = + fitter.EvidenceRatios ((fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x3], + (fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x3xS]); + +} + + +fitter.callout = {}; + +utility.ForEachPair ((fitter.json [fitter.terms.json.evidence_ratios])["Two-hit"], "_index_", "_value_", +' + if (Log (_value_) > 2) { + fitter.callout [_index_[1]] = 1; + } +'); + +utility.ForEachPair ((fitter.json [fitter.terms.json.evidence_ratios])["Three-hit"], "_index_", "_value_", +' + if (Log (_value_) > 2) { + fitter.callout [_index_[1]] = 1; + } +'); + +if (fitter.do_islands) { + utility.ForEachPair ((fitter.json [fitter.terms.json.evidence_ratios])["Three-hit islands vs 2-hit"], "_index_", "_value_", + ' + if (Log (_value_) > 2) { + fitter.callout [_index_[1]] = 1; + } + '); + utility.ForEachPair ((fitter.json [fitter.terms.json.evidence_ratios])["Three-hit vs three-hit islands"], "_index_", "_value_", + ' + if (Log (_value_) > 2) { + fitter.callout [_index_[1]] = 1; + } + '); +} + +if (utility.Array1D (fitter.callout)) { + // reconstruct ancestors here + + fitter.site_reports = {}; + fitter.ancestral_cache = ancestral.build (fitter.three_hit_results[terms.likelihood_function], 0, None); + + io.ReportProgressMessageMD ("fitter", "sites", "" + utility.Array1D (fitter.callout) + " individual " + io.SingularOrPlural (utility.Array1D (fitter.callout) , "site", "sites") + " which showed sufficiently strong preference for multiple-hit models"); + fitter.table_output_options = { + utility.getGlobalValue("terms.table_options.header"): TRUE, + utility.getGlobalValue("terms.table_options.minimum_column_width"): 10, + utility.getGlobalValue("terms.table_options.align"): "center", + utility.getGlobalValue("terms.table_options.column_widths"): { + "0": 10, + "1": 25, + "2": 25, + "3": 60 + } + }; + + + if (fitter.do_islands) { + (fitter.table_output_options[utility.getGlobalValue("terms.table_options.column_widths")]) [3] = 25; + (fitter.table_output_options[utility.getGlobalValue("terms.table_options.column_widths")]) [4] = 25; + (fitter.table_output_options[utility.getGlobalValue("terms.table_options.column_widths")]) [5] = 60; + fitter.report = { + { + "Site", "ER (2 vs 1)", "ER (3 vs 2)", "ER (3-island vs 2)", "ER (3-island vs 3)", "Substitutions" + } + }; + } else { + fitter.report = { + { + "Site", "ER (2 vs 1)", "ER (3 vs 2)", "Substitutions" + } + }; + } + fprintf(stdout, "\n", io.FormatTableRow(fitter.report , fitter.table_output_options)); + + fitter.table_output_options[utility.getGlobalValue("terms.table_options.header")] = FALSE; + utility.ForEachPair (fitter.callout, "_site_", "_value_", " + + fitter.site_reports [_site_] = (ancestral.ComputeSubstitutionBySite (fitter.ancestral_cache, +_site_, None))[terms.substitutions]; + + if (fitter.do_islands) { + fprintf(stdout, io.FormatTableRow( + { + { + '' + (+_site_ + 1), + Format (((fitter.json [fitter.terms.json.evidence_ratios])['Two-hit'])[+_site_], 10, 4), + Format (((fitter.json [fitter.terms.json.evidence_ratios])['Three-hit'])[+_site_], 10, 4), + Format (((fitter.json [fitter.terms.json.evidence_ratios])['Three-hit islands vs 2-hit'])[+_site_], 10, 4), + Format (((fitter.json [fitter.terms.json.evidence_ratios])['Three-hit vs three-hit islands'])[+_site_], 10, 4), + fitter.SubstitutionHistory (fitter.site_reports [_site_]) + } + } + , fitter.table_output_options)); + + } else { + fprintf(stdout, io.FormatTableRow( + { + { + '' + (+_site_ + 1), + Format (((fitter.json [fitter.terms.json.evidence_ratios])['Two-hit'])[+_site_], 10, 4), + Format (((fitter.json [fitter.terms.json.evidence_ratios])['Three-hit'])[+_site_], 10, 4), + fitter.SubstitutionHistory (fitter.site_reports [_site_]) + } + } + , fitter.table_output_options)); + } + "); + + fitter.json [fitter.terms.json.site_reports] = fitter.site_reports; + + +} else { + io.ReportProgressMessageMD ("fitter", "sites", "No individual sites showed sufficiently strong preference for multiple-hit models"); + +} + +io.ReportProgressMessageMD ("fitter", "writing", "Writing detailed analysis report to \`" + fitter.codon_data_info [terms.json.json] + "\'"); + +io.SpoolJSON (fitter.json, fitter.codon_data_info [terms.json.json]); +return fitter.json; + +//------------------------------------------------------------------------------ +lfunction fitter.EvidenceRatios (ha, h0) { + return ha["Exp(_MATRIX_ELEMENT_VALUE_-h0[_MATRIX_ELEMENT_COLUMN_])"]; +} + + +//------------------------------------------------------------------------------ + +lfunction fitter.SubstitutionHistory (subs) { + result = ""; + result * 128; + keys = utility.sortStrings (utility.Keys (subs)); + + for (i = 0; i < Abs (subs); i+=1) { + source = keys[i]; + targets = subs[source]; + if (i > 0) { + result * ", "; + } + result * (source + "->"); + keys2 = utility.sortStrings (utility.Keys (targets)); + for (k = 0; k < Abs (targets); k+=1) { + result * (keys2[k] + "(" + Abs(targets[keys2[k]]) + ")"); + } + } + + result * 0; + return result; + +} diff --git a/tests/hbltests/libv3/FMM.wbf b/tests/hbltests/libv3/FMM.wbf index e2c765855..a317a5ce9 100644 --- a/tests/hbltests/libv3/FMM.wbf +++ b/tests/hbltests/libv3/FMM.wbf @@ -1,9 +1,9 @@ GetString (version, HYPHY_VERSION, 0); -//PRODUCE_OPTIMIZATION_LOG = 1; +/*PRODUCE_OPTIMIZATION_LOG = 1;*/ if (+version >= 2.4) { - LoadFunctionLibrary ("SelectionAnalyses/FitMultiModel.bf", {"--code" : "Universal", "--alignment" : PATH_TO_CURRENT_BF + "data/CD2.nex", "--branches" : "GROUP1", "--srv" : "No", "--starting-points" : "5"}); + LoadFunctionLibrary ("SelectionAnalyses/FitMultiModel.bf", {"--code" : "Universal", "--alignment" : PATH_TO_CURRENT_BF + "data/CD2.nex"}); } else { LoadFunctionLibrary ("SelectionAnalyses/FitMultiModel.bf", {"0" : "Universal", "1" : PATH_TO_CURRENT_BF + "data/CD2.nex", "2" : "GROUP1", "3" : "Yes", "4" : "0.1"}); @@ -11,7 +11,7 @@ if (+version >= 2.4) { LoadFunctionLibrary ("shared.bf"); LoadFunctionLibrary ("libv3/IOFunctions.bf"); -//fprintf ("logger.hbl", CLEAR_FILE, ^((fitter.results[terms.likelihood_function])+".trace")); +/*fprintf ("logger.hbl", CLEAR_F ILE, ^((fitter.results[terms.likelihood_function])+".trace"));*/ assert (check_value (