From 0eb6497392f87830d429e28e8e79a80f84fff842 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Fri, 30 Oct 2020 20:33:46 -0400 Subject: [PATCH 01/13] Tweaking optimization settings for large datasets --- CMakeLists.txt | 3 ++- .../libv3/tasks/estimators.bf | 16 +++++++++++++++ src/core/likefunc.cpp | 20 +++++++++++++++---- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ac5ad531..49c56da8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -567,7 +567,7 @@ makeLink(HYPHYMP hyphy hyphy) #------------------------------------------------------------------------------- # TESTS -#---------------------------------------------geany---------------------------------- +#------------------------------------------------------------------------------- add_test (NAME UNIT-TESTS COMMAND bash run_unit_tests.sh) add_test (CODON HYPHYMP tests/hbltests/SimpleOptimizations/SmallCodon.bf) @@ -579,6 +579,7 @@ add_test (NAME SLAC COMMAND HYPHYMP tests/hbltests/libv3/SLAC.wbf) add_test (NAME SLAC-PARTITIONED COMMAND HYPHYMP tests/hbltests/libv3/SLAC-partitioned.wbf) add_test (NAME FEL COMMAND HYPHYMP tests/hbltests/libv3/FEL.wbf) add_test (MEME HYPHYMP tests/hbltests/libv3/MEME.wbf) +add_test (MEME_MPI mpirun -np 4 HYPHYMPI tests/hbltests/libv3/MEME.wbf) add_test (MEME-PARTITIONED HYPHYMP tests/hbltests/libv3/MEME-partitioned.wbf) add_test (BUSTED HYPHYMP tests/hbltests/libv3/BUSTED.wbf) add_test (BUSTED-SRV HYPHYMP tests/hbltests/libv3/BUSTED-SRV.wbf) diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index a5b1f1ed6..919309252 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -906,6 +906,18 @@ lfunction estimators.FitSingleModel_Ext (data_filter, tree, model_template, init this_namespace = this_namespace[0][Abs (this_namespace)-3]; df = estimators.CreateLFObject (this_namespace, data_filter, tree, model_template, initial_values, run_options, None); + + /* + partition parameters into groups + */ + + pg = utility.getEnvVariable ("PARAMETER_GROUPING"); + + if (Type (pg) != "AssociativeList") { + GetString (params, likelihoodFunction,-1); + pg = {"0" : params["Global Independent"]}; + utility.ToggleEnvVariable ("PARAMETER_GROUPING", pg); + } if (utility.Has (run_options,utility.getGlobalValue("terms.run_options.optimization_settings"),"AssociativeList")) { Optimize (mles, likelihoodFunction, run_options[utility.getGlobalValue("terms.run_options.optimization_settings")]); @@ -913,6 +925,10 @@ lfunction estimators.FitSingleModel_Ext (data_filter, tree, model_template, init Optimize (mles, likelihoodFunction); } + if (Type (pg) == "AssociativeList") { + utility.ToggleEnvVariable ("PARAMETER_GROUPING", None); + } + if (Type(initial_values) == "AssociativeList") { utility.ToggleEnvVariable("USE_LAST_RESULTS", None); } diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 40e401cd6..521913bdb 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -3899,7 +3899,8 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) kMethodHybrid ("hybrid"), kMethodGradientDescent ("gradient-descent"), kInitialGridMaximum ("LF_INITIAL_GRID_MAXIMUM"), - kInitialGridMaximumValue ("LF_INITIAL_GRID_MAXIMUM_VALUE"); + kInitialGridMaximumValue ("LF_INITIAL_GRID_MAXIMUM_VALUE"), + kMaxGradientDimension ("MAXIMUM_GRADIENT_DIMENSION"); // optimization setting to produce a detailed log of optimization runs @@ -4026,6 +4027,7 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) TimeDifference timer; hyFloat hardLimitOnOptimizationValue = hy_env::EnvVariableGetNumber(kOptimizationHardLimit, (hyFloat) INFINITY); + long maxGradientBlockDimension = hy_env::EnvVariableGetNumber(kMaxGradientDimension, 1000.); hardLimitOnOptimizationValue = MAX (hardLimitOnOptimizationValue, 0.0); if (hardLimitOnOptimizationValue != INFINITY) { @@ -4334,7 +4336,13 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) //ConjugateGradientDescent (0.5, bestSoFar, true, 10); if (gradientBlocks.nonempty()) { for (long b = 0; b < gradientBlocks.lLength; b++) { - maxSoFar = ConjugateGradientDescent (currentPrecision, bestSoFar,true,10,(_SimpleList*)(gradientBlocks(b)),maxSoFar); + _SimpleList * this_block = (_SimpleList*)(gradientBlocks(b)); + if (this_block->countitems() <= maxGradientBlockDimension) { + //printf ("\n...Performing a gradient pass on block with %ld variables\n", this_block->countitems()); + maxSoFar = ConjugateGradientDescent (currentPrecision, bestSoFar,true,10,this_block,maxSoFar); + } else { + //printf ("\n...Skipping a large gradient block with %ld variables\n", this_block->countitems()); + } } } else { maxSoFar = ConjugateGradientDescent (currentPrecision, bestSoFar,true,10,nil,maxSoFar); @@ -4642,7 +4650,11 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) if (gradientBlocks.nonempty()) { for (long b = 0; b < gradientBlocks.lLength; b++) { - maxSoFar = ConjugateGradientDescent (prec, bestMSoFar,true,10,(_SimpleList*)(gradientBlocks(b)),maxSoFar); + _SimpleList * this_block = (_SimpleList*)(gradientBlocks(b)); + if (this_block->countitems() <= maxGradientBlockDimension) { + maxSoFar = ConjugateGradientDescent (prec, bestMSoFar,true,10,this_block,maxSoFar); + } + //maxSoFar = ConjugateGradientDescent (prec, bestMSoFar,true,10,(_SimpleList*)(gradientBlocks(b)),maxSoFar); } } else { maxSoFar = ConjugateGradientDescent (prec, bestMSoFar,true,10,nil,maxSoFar); @@ -6378,7 +6390,7 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision if (only_these_parameters) { only_these_parameters->Sort(); _SimpleList all (indexInd.lLength,0,1); - freeze.Intersect (all, *only_these_parameters); + freeze.Subtract (all, *only_these_parameters); } From 2e4f5c904feb02b1da2dd287904072e397f1caeb Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Sat, 31 Oct 2020 10:17:19 -0400 Subject: [PATCH 02/13] Adding missing nuc branch length step to CFEL --- res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf | 8 +++++++- src/core/likefunc.cpp | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf b/res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf index 00f09c5f7..b05041f83 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf @@ -267,6 +267,12 @@ fel.table_output_options = {terms.table_options.header : TRUE, terms.table_optio terms.table_options.column_widths : { "0" : 8, "1" : 16, "2" : 30, "3" : 30, "4" : 40, "5" : 10, "6" : 10}}; selection.io.startTimer (fel.json [terms.json.timers], "Model fitting",1); + +namespace fel { + doGTR ("fel"); +} + + estimators.fixSubsetOfEstimates(fel.gtr_results, fel.gtr_results[terms.global]); namespace fel { @@ -293,7 +299,6 @@ utility.ForEach (fel.global_dnds, "_value_", 'io.ReportProgressMessageMD ("fel", - //Store MG94 to JSON selection.io.json_store_lf_withEFV (fel.json, terms.json.global_mg94xrev, @@ -682,6 +687,7 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod } start.grid + values; } + Optimize (results, ^lf, { "OPTIMIZATION_METHOD" : "nedler-mead", diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 521913bdb..39d2642c2 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -4336,7 +4336,7 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) //ConjugateGradientDescent (0.5, bestSoFar, true, 10); if (gradientBlocks.nonempty()) { for (long b = 0; b < gradientBlocks.lLength; b++) { - _SimpleList * this_block = (_SimpleList*)(gradientBlocks(b)); + _SimpleList * this_block = (_SimpleList*)gradientBlocks(b); if (this_block->countitems() <= maxGradientBlockDimension) { //printf ("\n...Performing a gradient pass on block with %ld variables\n", this_block->countitems()); maxSoFar = ConjugateGradientDescent (currentPrecision, bestSoFar,true,10,this_block,maxSoFar); From a416ffd682bf10e9b4709c220259ad57d343834f Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Sat, 31 Oct 2020 10:35:58 -0400 Subject: [PATCH 03/13] Remove MPI test --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49c56da8c..a6dd610d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -579,7 +579,7 @@ add_test (NAME SLAC COMMAND HYPHYMP tests/hbltests/libv3/SLAC.wbf) add_test (NAME SLAC-PARTITIONED COMMAND HYPHYMP tests/hbltests/libv3/SLAC-partitioned.wbf) add_test (NAME FEL COMMAND HYPHYMP tests/hbltests/libv3/FEL.wbf) add_test (MEME HYPHYMP tests/hbltests/libv3/MEME.wbf) -add_test (MEME_MPI mpirun -np 4 HYPHYMPI tests/hbltests/libv3/MEME.wbf) +#add_test (MEME_MPI mpirun -np 4 HYPHYMPI tests/hbltests/libv3/MEME.wbf) add_test (MEME-PARTITIONED HYPHYMP tests/hbltests/libv3/MEME-partitioned.wbf) add_test (BUSTED HYPHYMP tests/hbltests/libv3/BUSTED.wbf) add_test (BUSTED-SRV HYPHYMP tests/hbltests/libv3/BUSTED-SRV.wbf) From 6e9e25c5f27272fe484f3f010500b45b430660c2 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Mon, 2 Nov 2020 16:38:26 -0500 Subject: [PATCH 04/13] Improve under/overlow scaling; add batching of clear constraints; add intermediate caching for FEL/SLAC/MEME --- .../SelectionAnalyses/FEL.bf | 38 ++++++-- .../SelectionAnalyses/MEME.bf | 39 ++++++-- .../modules/shared-load-file.bf | 71 +++++++++++--- res/TemplateBatchFiles/libv3/IOFunctions.bf | 24 +++++ res/TemplateBatchFiles/libv3/all-terms.bf | 1 + .../libv3/tasks/estimators.bf | 11 ++- src/core/batchlan.cpp | 31 ++++--- src/core/batchlanruntime.cpp | 46 +++++++-- src/core/hbl_env.cpp | 3 + src/core/include/hbl_env.h | 1 + src/core/include/parser.h | 1 + src/core/likefunc.cpp | 2 + src/core/parser.cpp | 21 ++++- src/core/tree.cpp | 7 +- src/core/tree_evaluator.cpp | 93 ++++++++++++++++--- src/core/variable.cpp | 26 +++--- src/core/variablecontainer.cpp | 3 +- 17 files changed, 338 insertions(+), 80 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf index e93df085e..adc32d0dc 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf @@ -158,16 +158,36 @@ namespace fel { io.ReportProgressMessageMD ("fel", "codon-refit", "Improving branch lengths, nucleotide substitution biases, and global dN/dS ratios under a full codon model"); - -fel.final_partitioned_mg_results = estimators.FitMGREV (fel.filter_names, fel.trees, fel.codon_data_info [terms.code], { - terms.run_options.model_type: terms.local, - terms.run_options.partitioned_omega: fel.selected_branches, - terms.run_options.retain_lf_object: TRUE, - terms.run_options.optimization_settings: { - "OPTIMIZATION_METHOD" : "coordinate-wise" +fel.run_full_mg94 = TRUE; + +if (Type (fel.save_intermediate_fits) == "AssociativeList") { + if (None != fel.save_intermediate_fits[^"terms.data.value"]) { + if (utility.Has (fel.save_intermediate_fits[^"terms.data.value"], "Full-MG94", "AssociativeList")) { + fel.final_partitioned_mg_results = (fel.save_intermediate_fits[^"terms.data.value"])["Full-MG94"]; + if (utility.Has (fel.save_intermediate_fits[^"terms.data.value"], "Full-MG94-LF", "String")) { + ExecuteCommands ((fel.save_intermediate_fits[^"terms.data.value"])["Full-MG94-LF"]); + fel.run_full_mg94 = FALSE; + } + } } -}, fel.partitioned_mg_results); - +} + +if (fel.run_full_mg94) { + fel.final_partitioned_mg_results = estimators.FitMGREV (fel.filter_names, fel.trees, fel.codon_data_info [terms.code], { + terms.run_options.model_type: terms.local, + terms.run_options.partitioned_omega: fel.selected_branches, + terms.run_options.retain_lf_object: TRUE, + terms.run_options.optimization_settings: { + "OPTIMIZATION_METHOD" : "coordinate-wise" + } + }, fel.partitioned_mg_results); + if (Type (fel.save_intermediate_fits) == "AssociativeList") { + (fel.save_intermediate_fits[^"terms.data.value"])["Full-MG94"] = fel.final_partitioned_mg_results; + Export (lfe, ^fel.final_partitioned_mg_results[^"terms.likelihood_function"]); + (fel.save_intermediate_fits[^"terms.data.value"])["Full-MG94-LF"] = lfe; + io.SpoolJSON (fel.save_intermediate_fits[^"terms.data.value"],fel.save_intermediate_fits[^"terms.data.file"]); + } +} diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index 97428fd8f..b51f51801 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -159,6 +159,7 @@ namespace meme { estimators.fixSubsetOfEstimates(meme.partitioned_mg_results, meme.partitioned_mg_results[terms.global]); + meme.final_partitioned_mg_results_bl = estimators.FitMGREV (meme.filter_names, meme.trees, meme.codon_data_info [terms.code], { terms.run_options.model_type: terms.local, terms.run_options.partitioned_omega: meme.selected_branches, @@ -177,16 +178,40 @@ utility.ForEach (meme.global_dnds, "_value_", 'io.ReportProgressMessageMD ("MEME io.ReportProgressMessageMD ("MEME", "codon-refit", "Improving branch lengths, nucleotide substitution biases, and global dN/dS ratios under a full codon model"); -meme.final_partitioned_mg_results = estimators.FitMGREV (meme.filter_names, meme.trees, meme.codon_data_info [terms.code], { - terms.run_options.model_type: terms.local, - terms.run_options.partitioned_omega: meme.selected_branches, - terms.run_options.retain_lf_object: TRUE, - terms.run_options.optimization_settings: { - "OPTIMIZATION_METHOD" : "coordinate-wise" + +meme.run_full_mg94 = TRUE; + +if (Type (meme.save_intermediate_fits) == "AssociativeList") { + if (None != meme.save_intermediate_fits[^"terms.data.value"]) { + if (utility.Has (meme.save_intermediate_fits[^"terms.data.value"], "Full-MG94", "AssociativeList")) { + meme.final_partitioned_mg_results = (meme.save_intermediate_fits[^"terms.data.value"])["Full-MG94"]; + if (utility.Has (meme.save_intermediate_fits[^"terms.data.value"], "Full-MG94-LF", "String")) { + ExecuteCommands ((meme.save_intermediate_fits[^"terms.data.value"])["Full-MG94-LF"]); + meme.run_full_mg94 = FALSE; + } + } } -}, meme.partitioned_mg_results); +} +if (meme.run_full_mg94) { + meme.final_partitioned_mg_results = estimators.FitMGREV (meme.filter_names, meme.trees, meme.codon_data_info [terms.code], { + terms.run_options.model_type: terms.local, + terms.run_options.partitioned_omega: meme.selected_branches, + terms.run_options.retain_lf_object: TRUE, + terms.run_options.optimization_settings: { + "OPTIMIZATION_METHOD" : "coordinate-wise" + } + }, meme.partitioned_mg_results); + + if (Type (meme.save_intermediate_fits) == "AssociativeList") { + (meme.save_intermediate_fits[^"terms.data.value"])["Full-MG94"] = meme.final_partitioned_mg_results; + Export (lfe, ^meme.final_partitioned_mg_results[^"terms.likelihood_function"]); + (meme.save_intermediate_fits[^"terms.data.value"])["Full-MG94-LF"] = lfe; + io.SpoolJSON (meme.save_intermediate_fits[^"terms.data.value"],meme.save_intermediate_fits[^"terms.data.file"]); + } +} + //meme.final_partitioned_mg_results = meme.partitioned_mg_results; diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index 9e0f34aed..c3e35c24d 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -292,10 +292,33 @@ function doGTR (prefix) { //utility.ToggleEnvVariable("VERBOSITY_LEVEL", 10); + + KeywordArgument ("intermediate-fits", "Use/save parameter estimates from 'initial-guess' model fits to a JSON file (default is not to save)", "/dev/null"); + save_intermediate_fits = io.ReadFromOrCreate ("Use/Save parameter estimates from 'initial-guess' model fits", {}); + + run_gtr = TRUE; + + if (None != save_intermediate_fits[^"terms.data.value"]) { + if (utility.Has (save_intermediate_fits[^"terms.data.value"], "GTR", "AssociativeList")) { + gtr_results = (save_intermediate_fits[^"terms.data.value"])["GTR"]; + run_gtr = FALSE; + } + } else { + save_intermediate_fits = None; + } + + - gtr_results = estimators.FitGTR(filter_names, - trees, - gtr_results); + if (run_gtr) { + gtr_results = estimators.FitGTR(filter_names, + trees, + gtr_results); + + if (Type (save_intermediate_fits) == "AssociativeList") { + (save_intermediate_fits[^"terms.data.value"])["GTR"] = gtr_results; + io.SpoolJSON (save_intermediate_fits[^"terms.data.value"],save_intermediate_fits[^"terms.data.file"]); + } + } KeywordArgument ("kill-zero-lengths", "Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise)", "Yes"); @@ -334,7 +357,6 @@ function doGTR (prefix) { selection.io.report_fit (gtr_results, 0, 3*(^"`prefix`.sample_size"))); - /* Store nucleotide fit */ gtr_rates = utility.Map( utility.Map (gtr_results[utility.getGlobalValue("terms.global")], "_value_", ' {terms.fit.MLE : _value_[terms.fit.MLE]}'), @@ -406,14 +428,41 @@ function doPartitionedMG (prefix, keep_lf) { scaler_variables = utility.PopulateDict (0, partition_count, "`prefix`.scaler_prefix + '_' + _k_", "_k_"); utility.ForEach (scaler_variables, "_value_", "parameters.DeclareGlobal(_value_, None);parameters.SetValue(_value_, 3);"); - - partitioned_mg_results = estimators.FitMGREV(filter_names, trees, codon_data_info [utility.getGlobalValue("terms.code")], { - utility.getGlobalValue("terms.run_options.model_type"): utility.getGlobalValue("terms.local"), - utility.getGlobalValue("terms.run_options.proportional_branch_length_scaler"): scaler_variables, - utility.getGlobalValue("terms.run_options.partitioned_omega"): selected_branches, - utility.getGlobalValue("terms.run_options.retain_lf_object"): keep_lf - }, gtr_results); + run_mg94 = TRUE; + + if (Type (save_intermediate_fits) == "AssociativeList") { + if (None != save_intermediate_fits[^"terms.data.value"]) { + if (utility.Has (save_intermediate_fits[^"terms.data.value"], "MG94", "AssociativeList")) { + partitioned_mg_results = (save_intermediate_fits[^"terms.data.value"])["MG94"]; + if (keep_lf) { + if (utility.Has (save_intermediate_fits[^"terms.data.value"], "MG94-LF", "String")) { + ExecuteCommands ((save_intermediate_fits[^"terms.data.value"])["MG94-LF"]); + run_mg94 = FALSE; + } + } else { + run_mg94 = FALSE; + } + } + } + } + + if (run_mg94) { + partitioned_mg_results = estimators.FitMGREV(filter_names, trees, codon_data_info [utility.getGlobalValue("terms.code")], { + utility.getGlobalValue("terms.run_options.model_type"): utility.getGlobalValue("terms.local"), + utility.getGlobalValue("terms.run_options.proportional_branch_length_scaler"): scaler_variables, + utility.getGlobalValue("terms.run_options.partitioned_omega"): selected_branches, + utility.getGlobalValue("terms.run_options.retain_lf_object"): keep_lf + }, gtr_results); + if (Type (save_intermediate_fits) == "AssociativeList") { + (save_intermediate_fits[^"terms.data.value"])["MG94"] = partitioned_mg_results; + if (keep_lf) { + Export (lfe, ^(partitioned_mg_results[^"terms.likelihood_function"])); + (save_intermediate_fits[^"terms.data.value"])["MG94-LF"] = lfe; + } + io.SpoolJSON (save_intermediate_fits[^"terms.data.value"],save_intermediate_fits[^"terms.data.file"]); + } + } diff --git a/res/TemplateBatchFiles/libv3/IOFunctions.bf b/res/TemplateBatchFiles/libv3/IOFunctions.bf index 7563f437a..c86962bf9 100644 --- a/res/TemplateBatchFiles/libv3/IOFunctions.bf +++ b/res/TemplateBatchFiles/libv3/IOFunctions.bf @@ -66,6 +66,30 @@ lfunction io.PromptUserForString(prompt) { return str_val; } +/** + * @name io.HandleCacheRequest + * @param prompt + */ +lfunction io.ReadFromOrCreate (prompt, default_value) { + SetDialogPrompt (prompt); + contents = None; + fscanf (PROMPT_FOR_FILE, CREATE_FILE, "Raw", contents); + + if (^"FILE_CREATED") { + contents = default_value; + } else { + if (None != contents) { + contents = Eval (contents); + } else { + contents = default_value; + } + } + return { + ^"terms.data.file" : ^"LAST_FILE_PATH", + ^"terms.data.value" : contents + }; +} + /** * @name io.PromptUserForFilePath * @param prompt diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index 41c5ab733..bd34bbda9 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -171,6 +171,7 @@ namespace terms{ file = "file"; cache = "cache"; name = "name"; + value = "value"; name_mapping = "name-mapping"; partitions = "partitions"; tree = "tree"; diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 919309252..83c2638d6 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -458,6 +458,9 @@ lfunction estimators.TraverseLocalParameters (likelihood_function_id, model_desc */ function estimators.ApplyExistingEstimatesToTree (_tree_name, model_descriptions, initial_values, _application_type, keep_track_of_proportional_scalers) { + SetParameter (DEFER_CONSTRAINT_APPLICATION, 1, 0); + + estimators.ApplyExistingEstimatesToTree.constraint_count = 0; @@ -500,6 +503,9 @@ function estimators.ApplyExistingEstimatesToTree (_tree_name, model_descriptions } } + SetParameter (DEFER_CONSTRAINT_APPLICATION, 0, 0); + + //fprintf (stdout, Format (^_tree_name, 1,1), "\n"); return estimators.ApplyExistingEstimatesToTree.constraint_count; @@ -1125,6 +1131,8 @@ lfunction estimators.FitCodonModel(codon_data, tree, generator, genetic_code, op alpha = model.generic.GetLocalParameter(mg_rev, utility.getGlobalValue("terms.parameters.synonymous_rate")); beta = model.generic.GetLocalParameter(mg_rev, utility.getGlobalValue("terms.parameters.nonsynonymous_rate")); io.CheckAssertion("None!=`&alpha` && None!=`&beta`", "Could not find expected local synonymous and non-synonymous rate parameters in \`estimators.FitMGREV\`"); + + SetParameter (DEFER_CONSTRAINT_APPLICATION, 1, 0); apply_constraint: = component_tree + "." + node_name + "." + beta + ":=" + component_tree + "." + node_name + "." + alpha + "*" + new_globals[branch_map[node_name]]; @@ -1143,7 +1151,8 @@ lfunction estimators.FitCodonModel(codon_data, tree, generator, genetic_code, op ExecuteCommands(apply_constraint); } } - } else {} + SetParameter (DEFER_CONSTRAINT_APPLICATION, 0, 0); + } else {} LikelihoodFunction likelihoodFunction = (lf_components); diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index 8f19d0749..341fa5e17 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -1801,8 +1801,9 @@ void _ExecutionList::BuildExecuteCommandInstruction (_List * pieces, long code) void _ExecutionList::BuildFscanf(_List * pieces, long code) { - static const _String kFscanfRewind ("REWIND"); - + static const _String kFscanfRewind ("REWIND"), + kScanfCreate ("CREATE_FILE"); + long names_vs_types_offset = 0L; _List local_object_manager; @@ -1812,26 +1813,26 @@ void _ExecutionList::BuildFscanf(_List * pieces, long code) { scanf->parameters << pieces->GetItem(0); bool has_rewind = *(_String*) pieces->GetItem (1) == kFscanfRewind; - + bool has_create = *(_String*) pieces->GetItem (1) == kScanfCreate; // process argument types local_object_manager < scanf; _List argument_types; - _String* argument_type_spec = (_String*)pieces->GetItem(has_rewind ? 2L : 1L); + _String* argument_type_spec = (_String*)pieces->GetItem(has_rewind || has_create ? 2L : 1L); argument_type_spec->StripQuotes(); _ElementaryCommand::ExtractConditions(*argument_type_spec, 0, argument_types, ','); argument_types.ForEach ([&] (BaseRefConst t, unsigned long) -> void { - long argument_type = _ElementaryCommand :: fscanf_allowed_formats.FindObject(t); - if (argument_type == kNotFound) { - throw (((_String*)t)->Enquote() & " is not one of the supported argument types: " & _ElementaryCommand :: fscanf_allowed_formats.Join (", ")); - } - scanf->simpleParameters << argument_type; - } - ); + long argument_type = _ElementaryCommand :: fscanf_allowed_formats.FindObject(t); + if (argument_type == kNotFound) { + throw (((_String*)t)->Enquote() & " is not one of the supported argument types: " & _ElementaryCommand :: fscanf_allowed_formats.Join (", ")); + } + scanf->simpleParameters << argument_type; + } + ); - for (unsigned long index = has_rewind ? 3L : 2L; index < pieces->countitems(); index ++) { + for (unsigned long index = has_rewind || has_create? 3L : 2L; index < pieces->countitems(); index ++) { scanf->parameters << pieces->GetItem(index); } @@ -1843,6 +1844,9 @@ void _ExecutionList::BuildFscanf(_List * pieces, long code) { if (has_rewind) { scanf->simpleParameters << -1L; } + if (has_create) { + scanf->simpleParameters << -2L; + } local_object_manager.Pop(); scanf->addAndClean (*this); @@ -5100,10 +5104,11 @@ void SerializeModel (_StringBuffer & rec, long theModel, _AVLList* alreadyDo _StringBuffer glVars (128L), locVars(128L); + ExportIndVariables (glVars,locVars, &ind); ExportDepVariables (glVars,locVars, &dep); - rec << glVars <get_str(); } + + if (!ProcessFileName(file_path, true,false,(hyPointer)current_program.nameSpacePrefix, false, ¤t_program)) { return false; } FILE * input_stream = doFileOpen (file_path.get_str(), "rb"); + hy_env::EnvVariableSet(hy_env::file_created, new HY_CONSTANT_FALSE, false); if (!input_stream) { + if (has_create) { + input_stream = doFileOpen (file_path.get_str(), "wb"); + if (input_stream){ + while (argument_index < upper_bound) { + _Variable * store_here = _ValidateStorageVariable (current_program, argument_index + 1UL); + store_here->SetValue(new _MathObject, false, true, nil); + argument_index++; + } + hy_env::EnvVariableSet(hy_env::end_of_file, new HY_CONSTANT_TRUE, false); + hy_env::EnvVariableSet(hy_env::file_created, new HY_CONSTANT_TRUE, false); + return true; + } else { + throw (file_path.Enquote() & " could not be opened for reading or writing (CREATE mode) by fscanf. Path stack:\n\t" & GetPathStack("\n\t")); + } + } + throw (file_path.Enquote() & " could not be opened for reading by fscanf. Path stack:\n\t" & GetPathStack("\n\t")); } if (hy_env::EnvVariableTrue(hy_env::end_of_file)) { // reset path hy_scanf_last_file_path = kEmptyString; } - if (source_name != hy_scanf_last_file_path || has_rewind) { // new stream, or rewind on the current stream + if (source_name != hy_scanf_last_file_path || has_rewind || has_create) { // new stream, or rewind on the current stream hy_scanf_last_file_path = source_name; last_call_stream_position = 0L; } @@ -3644,8 +3676,6 @@ bool _ElementaryCommand::HandleFscanf (_ExecutionList& current_program, boo } } - unsigned long argument_index = 0UL, - upper_bound = has_rewind ? simpleParameters.countitems() - 1L : simpleParameters.countitems(); while (argument_index < upper_bound && current_stream_position < input_data->length()) { _Variable * store_here = _ValidateStorageVariable (current_program, argument_index + 1UL); @@ -3771,7 +3801,7 @@ bool _ElementaryCommand::HandleFscanf (_ExecutionList& current_program, boo current_stream_position = lookahead + 1L; argument_index ++; } - if (argument_index + has_rewind countitems()) { + DoForEachVariable ([](_Variable* v, long v_idx) -> void { + if (v->IsContainer()) { + ((_VariableContainer*)v)->RemoveDependance (*deferClearConstraint); + } + }); - - + for (unsigned long i = 0UL; inonempty()) { + ((_LikelihoodFunction*)likeFuncList(i))->UpdateDependent(*deferClearConstraint); + } + } + + if (deferClearConstraint) { + delete deferClearConstraint->dataList; + DeleteAndZeroObject(deferClearConstraint); } DeleteObject (deferSetFormula); deferSetFormula = nil; diff --git a/src/core/tree.cpp b/src/core/tree.cpp index 35e8feadd..5c3fd83d3 100644 --- a/src/core/tree.cpp +++ b/src/core/tree.cpp @@ -96,11 +96,11 @@ extern long likeFuncEvalCallCount, matrix_exp_count; -hyFloat _lfScalerPower = 100., +hyFloat _lfScalerPower = 64, _lfScalerUpwards = pow(2.,_lfScalerPower), _lfScalingFactorThreshold = 1./_lfScalerUpwards, _logLFScaler = _lfScalerPower *log(2.), - _lfMaxScaler = DBL_MAX * 1.e-10, + _lfMaxScaler = sqrt(DBL_MAX * 1.e-10), _lfMinScaler = 1./_lfMaxScaler; @@ -2584,7 +2584,8 @@ long _TheTree::ComputeReleafingCost (_DataSetFilter const* dsf, long firstInd bool * marked_nodes = (bool*) alloca (sizeof(bool) * flatTree.lLength); - InitializeArray(marked_nodes, flatTree.lLength, false); + memset (marked_nodes, 0, sizeof(bool) * flatTree.lLength); + //InitializeArray(marked_nodes, flatTree.lLength, false); dsf->CompareTwoSitesCallback (firstIndex, secondIndex, [marked_nodes, this] (long idx, unsigned long di)->void{ diff --git a/src/core/tree_evaluator.cpp b/src/core/tree_evaluator.cpp index 2b893dbb6..25667d7ec 100644 --- a/src/core/tree_evaluator.cpp +++ b/src/core/tree_evaluator.cpp @@ -487,6 +487,18 @@ template inline void __ll_loop_handle_scaling (hyFloat& sum fprintf (stderr, "THE SUM IS EXACTLY ZERO parent code %ld\n", parentCode); }*/ + /* + if (isnan (sum)) { + HandleApplicationError(_String("Site ") & siteID & " evaluated to a NaN probability in ComputeTreeBlockByBranch at branch " & parentCode & "; this is not a recoverable error and indicates some serious COVFEFE taking place."); + }*/ + + /*if (sum == 0.) { + printf ("Exactly 0 in __ll_loop_handle_scaling: site = %ld, branch = %ld\n", siteID, parentCode); + } + if (isinf(sum)) { + printf ("Infinity at __ll_loop_handle_scaling: site = %ld, branch = %ld\n", siteID, parentCode); + }*/ + if (__builtin_expect(sum < _lfScalingFactorThreshold && sum > 0.0,0)) { hyFloat scaler = _computeBoostScaler(scalingAdjustments [parentCode*siteCount + siteID] * _lfScalerUpwards, sum, didScale); @@ -504,6 +516,9 @@ template inline void __ll_loop_handle_scaling (hyFloat& sum //if (likeFuncEvalCallCount == 15098 && siteID == 91) { //fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]); // } + //if (isnan (parentConditionals [c])) { + // HandleApplicationError(_String("Site ") & siteID & " evaluated to a NaN probability in ComputeTreeBlockByBranch at branch " & parentCode & "; this is not a recoverable error and indicates some serious COVFEFE taking place."); + //} } if (siteFrequency == 1L) { @@ -529,6 +544,9 @@ template inline void __ll_loop_handle_scaling (hyFloat& sum #pragma GCC unroll 4 for (long c = 0; c < D; c++) { parentConditionals [c] *= scaler; + //if (isnan (parentConditionals [c])) { + // HandleApplicationError(_String("Site ") & siteID & " evaluated to a NaN probability in ComputeTreeBlockByBranch at branch " & parentCode & "; this is not a recoverable error and indicates some serious COVFEFE taking place."); + //} //if (likeFuncEvalCallCount == 15098 && siteID == 91) { // fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]); // } @@ -1030,10 +1048,33 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList #ifdef _SLKP_USE_AVX_INTRINSICS + //Site 297 evaluated to a NaN probability in ComputeTreeBlockByBranch at branch 9698; this is not a recoverable error and indicates some serious COVFEFE taking place. + + /*if (siteID == 297 && parentCode == 9507) { + printf ("\nCONDITIONAL DUMP @ %s %ld (%ld parent)\n", currentTreeNode->GetName()->get_str(), nodeCode, parentCode); + for (int i = 0; i < 61; i++) { + printf ("%d %lg %lg\n", i, childVector[i], parentConditionals[i]); + } + }*/ + __m256d grandTotal = __ll_handle_block20_product_sum<61,0> (tMatrixT, childVector, parentConditionals, nil); + /*if (siteID == 297 && parentCode == 9698) { + double checkGT [4]; + _mm256_storeu_pd (checkGT, grandTotal); + printf ("\n%g %g %g %g", checkGT [0], checkGT [1], checkGT [2], checkGT [3]); + }*/ grandTotal = __ll_handle_block20_product_sum<61,20> (tMatrixT, childVector, parentConditionals, &grandTotal); + /*if (siteID == 297 && parentCode == 9698) { + double checkGT [4]; + _mm256_storeu_pd (checkGT, grandTotal); + printf ("\n%g %g %g %g", checkGT [0], checkGT [1], checkGT [2], checkGT [3]); + }*/ grandTotal = __ll_handle_block20_product_sum<61,40> (tMatrixT, childVector, parentConditionals, &grandTotal); - + /*if (siteID == 297 && parentCode == 9698) { + double checkGT [4]; + _mm256_storeu_pd (checkGT, grandTotal); + printf ("\n%g %g %g %g", checkGT [0], checkGT [1], checkGT [2], checkGT [3]); + }*/ hyFloat const * __restrict tT = tMatrix + 61*60; __m256d lastTotal; @@ -1044,6 +1085,22 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList hyFloat s60 = _avx_sum_4(lastTotal) + childVector[60] * tT[60]; parentConditionals[60] *= s60; sum += _avx_sum_4(grandTotal) + s60; + /*if (siteID == 297 && parentCode == 9698) { + printf ("\n%g => %g\n", s60, sum); + }*/ + + /*if (siteID == 297 && parentCode == 9507) { + printf ("\nPRE-SCALE @ %s %ld\n", currentTreeNode->GetName()->get_str(), nodeCode); + for (int i = 0; i < 61; i++) { + printf ("%d %lg %lg\n", i, childVector[i], parentConditionals[i]); + } + } + for (int i = 0; i < 61; i++) { + if (isnan (parentConditionals[i])) { + HandleApplicationError(_String("Site ") & siteID & " evaluated to a NaN probability in ComputeTreeBlockByBranch; this is not a recoverable error and indicates some serious COVFEFE taking place, node " & long(nodeCode) & "\n\n"); + } + }*/ + #elif defined _SLKP_USE_SSE_INTRINSICS __m128d grandTotal; @@ -1070,6 +1127,12 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList #endif __ll_loop_handle_scaling<61L, true> (sum, parentConditionals, scalingAdjustments, didScale, parentCode, siteCount, siteID, localScalerChange, theFilter->theFrequencies.get (siteOrdering.list_data[siteID])); + /*if (siteID == 297 && parentCode == 9507) { + printf ("\nPOST-SCALE @ %s %ld\n", currentTreeNode->GetName()->get_str(), nodeCode); + for (int i = 0; i < 61; i++) { + printf ("%d %lg %lg\n", i, childVector[i], parentConditionals[i]); + } + }*/ childVector += 61; __ll_loop_epilogue @@ -1305,20 +1368,24 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList break; } - hyFloat term; - long const site_frequency = theFilter->theFrequencies (siteOrdering.list_data[siteID]); - - if (site_frequency > 1L) { - term = log(accumulator) * site_frequency - correction; + if (!isnan (accumulator)) { + hyFloat term; + long const site_frequency = theFilter->theFrequencies (siteOrdering.list_data[siteID]); + + if (site_frequency > 1L) { + term = log(accumulator) * site_frequency - correction; + } else { + term = log(accumulator) - correction; + } + // Kahan sum + + + hyFloat temp_sum = result + term; + correction = (temp_sum - result) - term; + result = temp_sum; } else { - term = log(accumulator) - correction; + HandleApplicationError(_String("Site ") & (1L+siteOrdering.list_data[siteID]) & " evaluated to a NaN probability in ComputeTreeBlockByBranch; this is not a recoverable error and indicates some serious COVFEFE taking place."); } - // Kahan sum - - - hyFloat temp_sum = result + term; - correction = (temp_sum - result) - term; - result = temp_sum; } } diff --git a/src/core/variable.cpp b/src/core/variable.cpp index 5f333fef5..ad96c47ac 100644 --- a/src/core/variable.cpp +++ b/src/core/variable.cpp @@ -407,18 +407,22 @@ void _Variable::SetValue (HBLObjectRef theP, bool dup, bool do_checks, _AVLList // also update the fact that this variable is no longer dependent in all declared // variable containers which contain references to this variable - DoForEachVariable([this] (_Variable* v, long v_idx) -> void { - if (v->IsContainer()) { - if (!((_VariableContainer*)v)->RemoveDependance (this->theIndex)) { - ReportWarning ((_String("Can't make variable ")&GetName()->Enquote()&" independent in the context of "&*v->GetName()&" because its template variable is not independent.")); + if (deferClearConstraint) { + deferClearConstraint->InsertNumber(theIndex); + } else { + DoForEachVariable([this] (_Variable* v, long v_idx) -> void { + if (v->IsContainer()) { + if (!((_VariableContainer*)v)->RemoveDependance (this->theIndex)) { + ReportWarning ((_String("Can't make variable ")&GetName()->Enquote()&" independent in the context of "&*v->GetName()&" because its template variable is not independent.")); + } } - } - }); - - for (unsigned long i = 0UL; inonempty()) { - ((_LikelihoodFunction*)likeFuncList(i))->UpdateDependent(theIndex); - } + }); + + for (unsigned long i = 0UL; inonempty()) { + ((_LikelihoodFunction*)likeFuncList(i))->UpdateDependent(theIndex); + } + } } //_Formula::Clear(); diff --git a/src/core/variablecontainer.cpp b/src/core/variablecontainer.cpp index eb2142ce5..63a7c6bea 100644 --- a/src/core/variablecontainer.cpp +++ b/src/core/variablecontainer.cpp @@ -777,8 +777,7 @@ long _VariableContainer::SetDependance (long varIndex) { } //__________________________________________________________________________________ -bool _VariableContainer::SetMDependance (_SimpleList const & mDep) -{ +bool _VariableContainer::SetMDependance (_SimpleList const & mDep) { if (iVariables) { if (mDep.lLength*2 > iVariables->lLength) for (long k=iVariables->lLength-2; k>=0; k-=2) { From 70002e0ee0fae92a713f7d56db78197cd9fb31ff Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Tue, 3 Nov 2020 09:56:56 -0500 Subject: [PATCH 05/13] Adding _namespace_ global and option to selection analyses --- .../SelectionAnalyses/FEL.bf | 3 + .../SelectionAnalyses/FUBAR.bf | 2 +- .../SelectionAnalyses/MEME.bf | 3 + .../SelectionAnalyses/RELAX.bf | 2 +- .../modules/shared-load-file.bf | 66 ++++++++++++++++++- res/TemplateBatchFiles/libv3/all-terms.bf | 1 + .../libv3/tasks/estimators.bf | 4 +- src/core/batchlan.cpp | 25 ++++++- src/core/global_things.cpp | 6 +- src/core/include/global_things.h | 1 + 10 files changed, 101 insertions(+), 12 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf index adc32d0dc..efa7f69bb 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf @@ -145,6 +145,7 @@ utility.ForEachPair (fel.selected_branches, "_partition_", "_selection_", selection.io.startTimer (fel.json [terms.json.timers], "Model fitting",1); +namespace_tag = "fel"; namespace fel { doGTR ("fel"); @@ -173,10 +174,12 @@ if (Type (fel.save_intermediate_fits) == "AssociativeList") { } if (fel.run_full_mg94) { + fel.final_partitioned_mg_results = estimators.FitMGREV (fel.filter_names, fel.trees, fel.codon_data_info [terms.code], { terms.run_options.model_type: terms.local, terms.run_options.partitioned_omega: fel.selected_branches, terms.run_options.retain_lf_object: TRUE, + terms.run_options.apply_user_constraints: fel.zero_branch_length_constrain, terms.run_options.optimization_settings: { "OPTIMIZATION_METHOD" : "coordinate-wise" } diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf index e1de7f533..38984a8a1 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf @@ -769,7 +769,7 @@ lfunction fubar.scalers.SetBranchLength (model, value, parameter) { //------------------------------------------------------------------------------ -lfunction fubar.scalers.Constrain (tree_name, node_name, model_description) { +lfunction fubar.scalers.Constrain (tree_name, node_name, model_description, ignore) { parameters.SetProprtionalConstraint (tree_name + "." + node_name + "." + (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.parameters.synonymous_rate")], (model_description[utility.getGlobalValue ("terms.global")])[utility.getGlobalValue ("terms.fubar.branch_length_scaler.alpha")]); diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index b51f51801..a4f0e3f2e 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -143,6 +143,8 @@ meme.pairwise_counts = genetic_code.ComputePairwiseDifferencesAndExpectedSites(m selection.io.startTimer (meme.json [terms.json.timers], "Model fitting",1); +namespace_tag = "meme"; + namespace meme { doGTR ("meme"); } @@ -199,6 +201,7 @@ if (meme.run_full_mg94) { terms.run_options.model_type: terms.local, terms.run_options.partitioned_omega: meme.selected_branches, terms.run_options.retain_lf_object: TRUE, + terms.run_options.apply_user_constraints: meme.zero_branch_length_constrain, terms.run_options.optimization_settings: { "OPTIMIZATION_METHOD" : "coordinate-wise" } diff --git a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf index 03b97ceab..5377d628a 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf @@ -1159,7 +1159,7 @@ lfunction relax.extract.k(branch_info) { //------------------------------------------------------------------------------ -lfunction relax.set.k (tree_name, node_name, model_description) { +lfunction relax.set.k (tree_name, node_name, model_description, ignore) { if (utility.Has (model_description [utility.getGlobalValue ("terms.local")], utility.getGlobalValue ("terms.relax.k"), "String")) { k = (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.relax.k")]; t = (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.parameters.synonymous_rate")]; diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index c3e35c24d..d2d82a09c 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -274,7 +274,7 @@ function store_tree_information () { - +//------------------------------------------------------------------------------ function doGTR (prefix) { @@ -325,12 +325,15 @@ function doGTR (prefix) { kill0 = io.SelectAnOption ( { "Yes":"Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise)", + "Constrain":"Keep zero-length branches, but constrain their values to 0", "No":"Keep all branches" }, - "The set of properties to use in the model") == "Yes"; + "Reduce zero-length branches"); + zero_branch_length_constrain = NULL; + deleted_by_tree = NULL; - if (kill0) { + if (kill0 == "Yes") { for (index, tree; in; trees) { deleted = {}; if (^(prefix + ".selected_branches") / index) { @@ -350,6 +353,32 @@ function doGTR (prefix) { (partitions_and_trees[i])[^"terms.data.tree"] = trees[i]; } store_tree_information (); + } else { + if (kill0 == "Constrain") { + zero_branch_length_constrain = ^"namespace_tag"+".constrain_zero_branches"; + deleted_by_tree = {}; + for (index, tree; in; trees) { + deleted = {}; + if (^(prefix + ".selected_branches") / index) { + trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], (^(prefix + ".selected_branches"))[index], deleted); + } else { + trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], null, deleted); + } + + if (utility.Array1D (deleted)) { + io.ReportProgressMessageMD(prefix, 'selector', 'Marked ' + Abs(deleted) + ' zero-length internal branches to be constrained: \`' + Join (', ',utility.Values(deleted)) + '\`'); + } + if (Abs (deleted)) { + deleted_dict = {}; + for (v; in; deleted) { + deleted_dict[v] = 1; + } + deleted_by_tree[index] = deleted_dict; + } else { + deleted_by_tree[index] = deleted; + } + } + } } @@ -477,3 +506,34 @@ function doPartitionedMG (prefix, keep_lf) { /** extract and report dN/dS estimates */ } + +//------------------------------------------------------------------------------ + + +function constrain_zero_branches (lf_id, components, data_filter, tree, model_map, initial_values, model_objects) { + SetParameter (DEFER_CONSTRAINT_APPLICATION, 1, 0); + //console.log (">>>IN constrain_zero_branches"); + constrain_zero_branches.parameters = estimators.TraverseLocalParameters (lf_id, model_objects, ^"namespace_tag" + ".constrain_one_branch"); + constrain_zero_branches.parameters = +constrain_zero_branches.parameters; + SetParameter (DEFER_CONSTRAINT_APPLICATION, 0, 0); + return constrain_zero_branches.parameters; +} + +//------------------------------------------------------------------------------ + +function constrain_one_branch (tree_name, node_name, model_description,i) { + //console.log (">>>IN constrain_one_branch") + constrain_one_branch.tag = ^"namespace_tag" + ".deleted_by_tree"; + if (Type (^constrain_one_branch.tag) == "AssociativeList") { + if ((^constrain_one_branch.tag)[i] / node_name) { + if (utility.Has (model_description [utility.getGlobalValue ("terms.local")], utility.getGlobalValue ("terms.parameters.synonymous_rate"), "String")) { + constrain_one_branch.t = (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.parameters.synonymous_rate")]; + //console.log (tree_name + "." + node_name + "." + constrain_one_branch.t + " => " + Eval (tree_name + "." + node_name + "." + constrain_one_branch.t)); + parameters.SetConstraint (tree_name + "." + node_name + "." + constrain_one_branch.t, "0", ""); + return 1; + } + } + } + return 0; + } + diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index bd34bbda9..071b651a4 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -8,6 +8,7 @@ terms.rate_variation.Gamma = "Gamma"; namespace terms{ + /* Generic terms which are used in a variety of contexts */ alphabet = "alphabet"; code = "code"; diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 83c2638d6..dc75b59b2 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -442,7 +442,7 @@ lfunction estimators.TraverseLocalParameters (likelihood_function_id, model_desc branch_names = Rows (map); for (b = 0; b < Abs(map); b += 1) { _branch_name = branch_names[b]; - result[_branch_name] = Call (callback, tree_name, _branch_name, (model_descriptions [map[_branch_name]])[utility.getGlobalValue ("terms.parameters")]); + result[_branch_name] = Call (callback, tree_name, _branch_name, (model_descriptions [map[_branch_name]])[utility.getGlobalValue ("terms.parameters")], i); } } return result; @@ -665,7 +665,7 @@ lfunction estimators.BuildLFObject (lf_id, data_filter, tree, model_map, initial if (Type(initial_values) == "AssociativeList") { utility.ToggleEnvVariable("USE_LAST_RESULTS", 1); - df = estimators.ApplyExistingEstimates(lf_id, model_objects, initial_values, run_options[utility.getGlobalValue("terms.run_options.proportional_branch_length_scaler")]); + df = estimators.ApplyExistingEstimates(lf_id, model_objects, initial_values, run_options[utility.getGlobalValue("terms.run_options.proportional_branch_length_scaler")]); } if (utility.Has (run_options,utility.getGlobalValue("terms.run_options.apply_user_constraints"),"String")) { diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index 341fa5e17..f7164a724 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -3715,7 +3715,17 @@ bool _ElementaryCommand::Execute (_ExecutionList& chain) { case HY_HBL_COMMAND_NESTED_LIST: chain.currentCommand++; { + _FString *stash_current = (_FString*)hy_env::EnvVariableGet(kNamespaceName, STRING); + if (stash_current) { + stash_current->AddAReference(); + } + hy_env::EnvVariableSet(kNamespaceName, new _FString ((_String*)GetIthParameter(1)->toStr(), false), false); ((_ExecutionList*)parameters.GetItem(0))->Execute(&chain); + if (stash_current) { + hy_env::EnvVariableSet(kNamespaceName, stash_current, false); + } else { + hy_env::EnvVariableSet(kNamespaceName, new _MathObject, false); + } } break; @@ -4787,7 +4797,6 @@ bool _ElementaryCommand::ConstructLF (_String&source, _ExecutionList&target) bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& chain) { // syntax: function (comma separated list of parameters) {body} - bool isFFunction = source.BeginsWith (blFFunction), isLFunction = ! isFFunction && source.BeginsWith (blLFunction), isCFunction = ! isFFunction && ! isLFunction && source.BeginsWith (blCFunction), @@ -4795,6 +4804,7 @@ bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& c _hy_nested_check save_state = isInFunction; + if (!isNameSpace) { if (isInFunction == _HY_FUNCTION) { HandleApplicationError ("Nested function declarations are not allowed"); @@ -4804,6 +4814,8 @@ bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& c long mark1, mark2; + _FString *save_nmspc = nil; + if (isNameSpace) { mark1 = source.FirstNonSpaceIndex(blNameSpace.length(), kStringEnd, kStringDirectionForward); mark2 = source.Find ('{', mark1, kStringEnd); @@ -4944,23 +4956,30 @@ bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& c return false; } _String namespace_text (source, mark2+1,source.length()-2); + bool success = false; isInFunction = _HY_NAMESPACE; _ExecutionList * namespace_payload = new _ExecutionList (namespace_text, funcID, false, &success); - DeleteObject (funcID); + // 20180713 SLKP -- this was marked as deleted in one of the v2.3 branches if (success) { _ElementaryCommand * nested_list = new _ElementaryCommand (HY_HBL_COMMAND_NESTED_LIST); nested_list->parameters.AppendNewInstance(namespace_payload); + nested_list->parameters << funcID; chain.AppendNewInstance(nested_list); + DeleteObject (funcID); } else { DeleteObject (namespace_payload); + DeleteObject (funcID); + return false; } } - + if (isNameSpace) { + + } isInFunction = save_state; return true; diff --git a/src/core/global_things.cpp b/src/core/global_things.cpp index 976de1b7b..5d386c08b 100644 --- a/src/core/global_things.cpp +++ b/src/core/global_things.cpp @@ -112,6 +112,7 @@ namespace hy_global { http://www.hyphy.org/docs/HyphyDocs.pdf may be a good starting point.\n"), kXVariableName ("_x_"), kNVariableName ("_n_"), + kNamespaceName ("_namespace_"), kErrorStringIncompatibleOperands ("Incompatible operands"), kErrorStringBadMatrixDefinition ("Invalid matrix definition "), kErrorStringInvalidMatrixIndex ("Invalid matrix index "), @@ -120,7 +121,7 @@ namespace hy_global { kErrorStringDatasetRefIndexError ("Dataset index reference out of range"), kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"), kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"), - kHyPhyVersion = _String ("2.5.21"), + kHyPhyVersion = _String ("2.5.22"), kNoneToken = "None", kNullToken = "null", @@ -266,7 +267,8 @@ namespace hy_global { &kNVariableName, &status_bar_update_string, &last_model_parameter_list, - &use_last_model + &use_last_model, + &kNamespaceName }; for (_String const * item : mark_as_globals) { diff --git a/src/core/include/global_things.h b/src/core/include/global_things.h index 826a82180..1410b2307 100644 --- a/src/core/include/global_things.h +++ b/src/core/include/global_things.h @@ -298,6 +298,7 @@ namespace hy_global { kHyphyCiteString, kXVariableName, kNVariableName, + kNamespaceName, kErrorStringIncompatibleOperands, // -101 kErrorStringBadMatrixDefinition, // -104 kErrorStringInvalidMatrixIndex, // -106 From 785261733722ba55d7c25fd4f1b4623c1c8f35f1 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Tue, 3 Nov 2020 11:23:51 -0500 Subject: [PATCH 06/13] Fixing some HBL bugs --- res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf | 2 +- res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf index 38984a8a1..ec315b094 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf @@ -780,7 +780,7 @@ lfunction fubar.scalers.Constrain (tree_name, node_name, model_description, igno //------------------------------------------------------------------------------ -lfunction fubar.scalers.Unconstrain (tree_name, node_name, model_description) { +lfunction fubar.scalers.Unconstrain (tree_name, node_name, model_description, ignore) { parameters.RemoveConstraint (tree_name + "." + node_name + "." + (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.parameters.synonymous_rate")]); parameters.RemoveConstraint (tree_name + "." + node_name + "." + (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.parameters.nonsynonymous_rate")]); } diff --git a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf index 5377d628a..158be4eea 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf @@ -1172,7 +1172,7 @@ lfunction relax.set.k (tree_name, node_name, model_description, ignore) { //------------------------------------------------------------------------------ -lfunction relax.set.k2 (tree_name, node_name, model_description) { +lfunction relax.set.k2 (tree_name, node_name, model_description, ignore) { if (utility.Has (model_description [utility.getGlobalValue ("terms.local")], utility.getGlobalValue ("terms.relax.k"), "String")) { k = (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.relax.k")]; parameters.RemoveConstraint (tree_name + "." + node_name + "." + k); From 279086a5e62f6437e5233b76c09a04b9ea551c2d Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Mon, 9 Nov 2020 20:15:12 -0500 Subject: [PATCH 07/13] Various small fixes --- .../SelectionAnalyses/BUSTED.bf | 41 ++++++- .../SelectionAnalyses/MEME.bf | 18 +++ .../SelectionAnalyses/PRIME.bf | 23 ++-- res/TemplateBatchFiles/libv3/all-terms.bf | 1 + .../libv3/models/codon/MG_REV_PROPERTIES.bf | 112 ++++++++++++++++++ .../libv3/models/rate_variation.bf | 27 +++-- .../libv3/tasks/alignments.bf | 7 +- src/core/batchlanruntime.cpp | 2 +- src/core/category.cpp | 2 +- 9 files changed, 209 insertions(+), 24 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf index ec913d752..947b50f56 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf @@ -179,12 +179,38 @@ namespace busted { } +busted.run_full_mg94 = TRUE; + +if (Type (busted.save_intermediate_fits) == "AssociativeList") { + if (None != busted.save_intermediate_fits[^"terms.data.value"]) { + if (utility.Has (busted.save_intermediate_fits[^"terms.data.value"], "Full-MG94", "AssociativeList")) { + busted.final_partitioned_mg_results = (busted.save_intermediate_fits[^"terms.data.value"])["Full-MG94"]; + busted.run_full_mg94 = FALSE; + } + } +} + io.ReportProgressMessageMD ("BUSTED", "codon-refit", "Improving branch lengths, nucleotide substitution biases, and global dN/dS ratios under a full codon model"); -busted.final_partitioned_mg_results = estimators.FitMGREV (busted.filter_names, busted.trees, busted.codon_data_info [terms.code], { - terms.run_options.model_type: terms.local, - terms.run_options.partitioned_omega: busted.selected_branches, -}, busted.partitioned_mg_results); +if (busted.run_full_mg94) { + busted.final_partitioned_mg_results = estimators.FitMGREV (busted.filter_names, busted.trees, busted.codon_data_info [terms.code], { + terms.run_options.model_type: terms.local, + terms.run_options.partitioned_omega: busted.selected_branches, + terms.run_options.apply_user_constraints: busted.zero_branch_length_constrain, + terms.run_options.optimization_settings: { + "OPTIMIZATION_METHOD" : "coordinate-wise" + } + }, busted.partitioned_mg_results); + + if (Type (busted.save_intermediate_fits) == "AssociativeList") { + (busted.save_intermediate_fits[^"terms.data.value"])["Full-MG94"] = busted.final_partitioned_mg_results; + Export (lfe, ^busted.final_partitioned_mg_results[^"terms.likelihood_function"]); + (busted.save_intermediate_fits[^"terms.data.value"])["Full-MG94-LF"] = lfe; + io.SpoolJSON (busted.save_intermediate_fits[^"terms.data.value"],busted.save_intermediate_fits[^"terms.data.file"]); + } +} + + io.ReportProgressMessageMD("BUSTED", "codon-refit", "* " + selection.io.report_fit (busted.final_partitioned_mg_results, 0, busted.codon_data_info[terms.data.sample_size])); @@ -317,6 +343,7 @@ if (busted.has_background) { busted.model_object_map = { "busted.test" : busted.test.bsrel_model }; } + if (busted.do_srv) { if (busted.do_bs_srv) { @@ -395,6 +422,12 @@ io.ReportProgressMessageMD ("BUSTED", "main", "Performing the full (dN/dS > 1 al busted.nm.precision = -0.00025*busted.final_partitioned_mg_results[terms.fit.log_likelihood]; +if (busted.do_srv_hmm) { + busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda); + //parameters.SetConstraint (((busted.test.bsrel_model[terms.parameters])[terms.global])[terms.rate_variation.hmm_lambda],"1e-6", ""); +} + + parameters.DeclareGlobalWithRanges ("busted.bl.scaler", 1, 0, 1000); busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.final_partitioned_mg_results, busted.model_object_map, { "retain-lf-object": TRUE, diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index a4f0e3f2e..1edaf5bc3 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -563,41 +563,59 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa initial_guess_grid = { "0" : { + + "meme.site_alpha" : ^"meme.site_alpha", + "meme.site_beta_nuisance" : ^"meme.site_beta_nuisance", "meme.site_beta_plus": ^"meme.site_beta_plus", "meme.site_omega_minus": ^"meme.site_omega_minus", "meme.site_mixture_weight": ^"meme.site_mixture_weight" + }, "1" : { + "meme.site_alpha" : ^"meme.site_alpha", + "meme.site_beta_nuisance" : ^"meme.site_beta_nuisance", "meme.site_beta_plus": ^"meme.site_beta_plus" * 2, "meme.site_omega_minus": 0.5, "meme.site_mixture_weight": 0.5 }, "2" : { + "meme.site_alpha" : ^"meme.site_alpha", + "meme.site_beta_nuisance" : ^"meme.site_beta_nuisance", "meme.site_beta_plus": ^"meme.site_beta_plus" * 4, "meme.site_omega_minus": 0.25, "meme.site_mixture_weight": 0.25 }, "3" : { + "meme.site_alpha" : ^"meme.site_alpha", + "meme.site_beta_nuisance" : ^"meme.site_beta_nuisance", "meme.site_beta_plus": ^"meme.site_beta_plus", "meme.site_omega_minus": 0.5, "meme.site_mixture_weight": 0.5 }, "4" : { + "meme.site_alpha" : ^"meme.site_alpha", + "meme.site_beta_nuisance" : ^"meme.site_beta_nuisance", "meme.site_beta_plus": ^"meme.site_beta_plus", "meme.site_omega_minus": 0.75, "meme.site_mixture_weight": 0.8 }, "5" : { + "meme.site_alpha" : ^"meme.site_alpha", + "meme.site_beta_nuisance" : ^"meme.site_beta_nuisance", "meme.site_beta_plus": ^"meme.site_beta_plus" * 8, "meme.site_omega_minus": 0.5, "meme.site_mixture_weight": 0.8 }, "6" : { + "meme.site_alpha" : ^"meme.site_alpha", + "meme.site_beta_nuisance" : ^"meme.site_beta_nuisance", "meme.site_beta_plus": ^"meme.site_beta_plus", "meme.site_omega_minus": 0, "meme.site_mixture_weight": 0.01 }, "7" : { + "meme.site_alpha" : ^"meme.site_alpha", + "meme.site_beta_nuisance" : ^"meme.site_beta_nuisance", "meme.site_beta_plus": ^"meme.site_beta_plus", "meme.site_omega_minus": ^"meme.site_omega_minus", "meme.site_mixture_weight": 1.0 diff --git a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf index fc105fdf7..9963f4d3d 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf @@ -560,17 +560,26 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa // fit the universal alternative ^"prime.site_beta" = Eval (^"prime.site_beta"); - LFCompute (^lf_prop,LF_START_COMPUTE); + /*LFCompute (^lf_prop,LF_START_COMPUTE); LFCompute (^lf_prop,results); - LFCompute (^lf_prop,LF_DONE_COMPUTE); + LFCompute (^lf_prop,LF_DONE_COMPUTE);*/ + parameters.SetConstraint ("prime.site_beta", Eval(^"prime.site_beta"),""); + console.log (^"prime.site_beta" + "\n"); + + start_grid = {}; + propN = utility.Array1D (^"prime.lambdas"); + + for (k = 0; k ) + for (l; in; ^"prime.lambdas") { + console.log (l); ^l = 0; } - /*Export (lfe, ^lf_prop); - fprintf ("/Users/sergei/Desktop/prime.bf",CLEAR_FILE,lfe);*/ + //Export (lfe, ^lf_prop); + //fprintf ("/Users/sergei/Desktop/prime." + ^"MPI_NODE_ID" + ".bf",CLEAR_FILE,lfe); Optimize (results, ^lf_prop, { @@ -579,13 +588,13 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa /*"OPTIMIZATION_START_GRID" : { "0" : { - "prime.site_beta": -0.75, + "prime.site_beta": -2, }, "1" : { - "prime.site_beta": -0.5, + "prime.site_beta": -1, }, "2" : { - "prime.site_beta": -0.25, + "prime.site_beta": -0, } diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index 071b651a4..cea015b2d 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -145,6 +145,7 @@ namespace terms{ category_parameters = "category parameters"; bins = "bins"; weights = "weights"; + weight_vector = "weight_vector"; represent = "represent"; PDF = "PDF"; CDF = "CDF"; diff --git a/res/TemplateBatchFiles/libv3/models/codon/MG_REV_PROPERTIES.bf b/res/TemplateBatchFiles/libv3/models/codon/MG_REV_PROPERTIES.bf index bf69ee216..01d993274 100644 --- a/res/TemplateBatchFiles/libv3/models/codon/MG_REV_PROPERTIES.bf +++ b/res/TemplateBatchFiles/libv3/models/codon/MG_REV_PROPERTIES.bf @@ -251,6 +251,118 @@ terms.model.MG_REV_PROP.predefined = { {-1.14198047362342} {0.2306464200526206} } + }, + "Random" : { + "Random Factor 1":{ + "A":Random(-2,2), + "C":Random(-2,2), + "D":Random(-2,2), + "E":Random(-2,2), + "F":Random(-2,2), + "G":Random(-2,2), + "H":Random(-2,2), + "I":Random(-2,2), + "K":Random(-2,2), + "L":Random(-2,2), + "M":Random(-2,2), + "N":Random(-2,2), + "P":Random(-2,2), + "Q":Random(-2,2), + "R":Random(-2,2), + "S":Random(-2,2), + "T":Random(-2,2), + "V":Random(-2,2), + "W":Random(-2,2), + "Y":Random(-2,2) + }, + "Random Factor 2":{ + "A":Random(-2,2), + "C":Random(-2,2), + "D":Random(-2,2), + "E":Random(-2,2), + "F":Random(-2,2), + "G":Random(-2,2), + "H":Random(-2,2), + "I":Random(-2,2), + "K":Random(-2,2), + "L":Random(-2,2), + "M":Random(-2,2), + "N":Random(-2,2), + "P":Random(-2,2), + "Q":Random(-2,2), + "R":Random(-2,2), + "S":Random(-2,2), + "T":Random(-2,2), + "V":Random(-2,2), + "W":Random(-2,2), + "Y":Random(-2,2) + }, + "Random Factor 3":{ + "A":Random(-2,2), + "C":Random(-2,2), + "D":Random(-2,2), + "E":Random(-2,2), + "F":Random(-2,2), + "G":Random(-2,2), + "H":Random(-2,2), + "I":Random(-2,2), + "K":Random(-2,2), + "L":Random(-2,2), + "M":Random(-2,2), + "N":Random(-2,2), + "P":Random(-2,2), + "Q":Random(-2,2), + "R":Random(-2,2), + "S":Random(-2,2), + "T":Random(-2,2), + "V":Random(-2,2), + "W":Random(-2,2), + "Y":Random(-2,2) + }, + "Random Factor 4":{ + "A":Random(-2,2), + "C":Random(-2,2), + "D":Random(-2,2), + "E":Random(-2,2), + "F":Random(-2,2), + "G":Random(-2,2), + "H":Random(-2,2), + "I":Random(-2,2), + "K":Random(-2,2), + "L":Random(-2,2), + "M":Random(-2,2), + "N":Random(-2,2), + "P":Random(-2,2), + "Q":Random(-2,2), + "R":Random(-2,2), + "S":Random(-2,2), + "T":Random(-2,2), + "V":Random(-2,2), + "W":Random(-2,2), + "Y":Random(-2,2) + }, + "Random Factor 5":{ + "A":Random(-2,2), + "C":Random(-2,2), + "D":Random(-2,2), + "E":Random(-2,2), + "F":Random(-2,2), + "G":Random(-2,2), + "H":Random(-2,2), + "I":Random(-2,2), + "K":Random(-2,2), + "L":Random(-2,2), + "M":Random(-2,2), + "N":Random(-2,2), + "P":Random(-2,2), + "Q":Random(-2,2), + "R":Random(-2,2), + "S":Random(-2,2), + "T":Random(-2,2), + "V":Random(-2,2), + "W":Random(-2,2), + "Y":Random(-2,2) + } } }; diff --git a/res/TemplateBatchFiles/libv3/models/rate_variation.bf b/res/TemplateBatchFiles/libv3/models/rate_variation.bf index 36db000a0..acc6dcee4 100644 --- a/res/TemplateBatchFiles/libv3/models/rate_variation.bf +++ b/res/TemplateBatchFiles/libv3/models/rate_variation.bf @@ -37,8 +37,11 @@ function rate_variation.modifier_everything (q_ij, from, to, namespace, cat_name lfunction rate_variation_define_HMM (categories, namespace, globals, definition) { + + W = (definition[utility.getGlobalValue("terms.category.category_parameters")])[utility.getGlobalValue("terms.category.weight_vector")]; + lambda = parameters.ApplyNameSpace ("hmm_lambda", namespace); - parameters.DeclareGlobalWithRanges (lambda, .1/categories, 1e-6, 1/(categories)); + parameters.DeclareGlobalWithRanges (lambda, .1/categories, 1e-6, 0.9999999999); globals[utility.getGlobalValue("terms.rate_variation.hmm_lambda")] = lambda; hmm_T = parameters.ApplyNameSpace ("hmm_transition_matrix", namespace); @@ -50,18 +53,21 @@ lfunction rate_variation_define_HMM (categories, namespace, globals, definition) f_def = {categories,1}; - cm1 = categories-1; - for (k = 0; k < categories; k+=1) { - matrix_def * ('`hmm_T`[`k`][`k`] := 1-`cm1`*`lambda`;\n'); - matrix_def * ('`hmm_F`[`k`] = 1/`categories`;\n'); - //f_def[k] = "" + 1/categories; - for (k2 = k+1; k2 < categories; k2+=1) { - matrix_def * ('`hmm_T`[`k`][`k2`] := `lambda`;\n'); - matrix_def * ('`hmm_T`[`k2`][`k`] := `lambda`;\n'); + matrix_def * ('`hmm_F`[`k`] = ' + W[k] + ';\n'); + diag = {}; + for (k2 = 0; k2 < categories; k2+=1) { + w = W[k2]; + if (k != k2) { + matrix_def * ('`hmm_T`[`k`][`k2`] := `lambda`*(`w`);\n'); + diag + ('`lambda`*(`w`)'); + } } + diag = Join ("-", diag); + matrix_def * ('`hmm_T`[`k`][`k`] := 1-`diag`;\n'); } - + + matrix_def * "Model `hmm_M` = (`hmm_T`, `hmm_F`, 0);\n"; matrix_def * 0; utility.ExecuteInGlobalNamespace (matrix_def); @@ -158,6 +164,7 @@ lfunction rate_variation_define_gdd(options, namespace) { utility.getGlobalValue("terms.category.CDF"): "{{" + Join (",", utility.Map (rates, "_value_", "'('+_value_+')/`normalizer`'")) + "}}", utility.getGlobalValue("terms.lower_bound") : 0, utility.getGlobalValue("terms.upper_bound") : 1e25, + utility.getGlobalValue("terms.category.weight_vector") : weight_vector }, utility.getGlobalValue("terms.rate_variation.before") : None, utility.getGlobalValue("terms.rate_variation.after") : None, diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index d9f02f6ff..6b0e21383 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -355,11 +355,16 @@ function alignments.LoadGeneticCodeAndAlignment(dataset_name, datafilter_name, p * @returns {Dictionary} updated data_info that includes the number of sites, dataset, and datafilter name */ function alignments.LoadCodonDataFile(dataset_name, datafilter_name, data_info) { + + + DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , , data_info[terms.stop_codons]); + + if (^"`datafilter_name`.sites"*3 != ^"`dataset_name`.sites") { // generate a more diagnostic error here for (alignments.LoadCodonDataFile.i = 0; alignments.LoadCodonDataFile.i < ^"`dataset_name`.species"; alignments.LoadCodonDataFile.i += 1) { - DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , "" + alignments.LoadCodonDataFile.i , data_info[terms.stop_codons]); + DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , "" + alignments.LoadCodonDataFile.i , data_info[terms.stop_codons]); if (^"`datafilter_name`.sites"*3 != ^"`dataset_name`.sites") { alignments.LoadCodonDataFile.name = alignments.GetIthSequenceOriginalName (dataset_name, alignments.LoadCodonDataFile.i); diff --git a/src/core/batchlanruntime.cpp b/src/core/batchlanruntime.cpp index 0d1749140..09730a8be 100644 --- a/src/core/batchlanruntime.cpp +++ b/src/core/batchlanruntime.cpp @@ -1694,7 +1694,7 @@ bool _ElementaryCommand::HandleComputeLFFunction (_ExecutionList& current_p source_object->DoneComputing (true); } else { if (!source_object->HasBeenSetup()) { - throw (_String("Please call LFCompute (, ") & *GetIthParameter (0UL)& kLFStartCompute & ") before evaluating the likelihood function"); + throw (_String("Please call LFCompute (") & *GetIthParameter (0UL)& ", " & kLFStartCompute & ") before evaluating the likelihood function"); } else { if (op_kind == kLFTrackCache) { source_object->DetermineLocalUpdatePolicy(); diff --git a/src/core/category.cpp b/src/core/category.cpp index 30836bc24..741b2dd3d 100644 --- a/src/core/category.cpp +++ b/src/core/category.cpp @@ -872,7 +872,7 @@ _Matrix* _CategoryVariable::GetIntervalEnds (void) _Matrix* _CategoryVariable::ComputeHiddenMarkov (void) { _Matrix *hmmr = (_Matrix*)((_Matrix*)LocateVar (modelMatrixIndices.list_data[hiddenMarkovModel])->GetValue())->ComputeNumeric(); if (!hmmr->IsValidTransitionMatrix()) { - HandleApplicationError(_String ("Hidden Markov Model transition matrix variable did not compute to a valid transition matrix")); + HandleApplicationError(_String ("Hidden Markov Model transition matrix variable did not compute to a valid transition matrix: ") & _String ((_String*)hmmr->toStr())); } return hmmr; } From 11b541fcd78a6ef7249ced7eeeccb04a801f4c2b Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Tue, 10 Nov 2020 10:04:35 -0500 Subject: [PATCH 08/13] Fixing incorrect branch tagging --- res/TemplateBatchFiles/SelectionAnalyses/MEME.bf | 2 ++ .../SelectionAnalyses/modules/shared-load-file.bf | 8 +++----- src/core/likefunc.cpp | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index 1edaf5bc3..98226035a 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -145,11 +145,13 @@ selection.io.startTimer (meme.json [terms.json.timers], "Model fitting",1); namespace_tag = "meme"; + namespace meme { doGTR ("meme"); } + estimators.fixSubsetOfEstimates(meme.gtr_results, meme.gtr_results[terms.global]); // Step 1 - Fit to MG94xREV diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index d2d82a09c..bd4185fd1 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -359,11 +359,9 @@ function doGTR (prefix) { deleted_by_tree = {}; for (index, tree; in; trees) { deleted = {}; - if (^(prefix + ".selected_branches") / index) { - trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], (^(prefix + ".selected_branches"))[index], deleted); - } else { - trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], null, deleted); - } + //console.log (((^"meme.selected_branches")[0])["Node166"]); + trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], null, deleted); + //console.log (((^"meme.selected_branches")[0])["Node166"]); if (utility.Array1D (deleted)) { io.ReportProgressMessageMD(prefix, 'selector', 'Marked ' + Abs(deleted) + ' zero-length internal branches to be constrained: \`' + Join (', ',utility.Values(deleted)) + '\`'); diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 40b388b17..79735b74d 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -670,7 +670,7 @@ bool _LikelihoodFunction::MapTreeTipsToData (long f, _String *errorMessage, b tips.AppendNewInstance (new _String (iterator->ContextFreeName ())); } if (iterator->GetModelIndex () == HY_NO_MODEL) { - throw _String ("Model is not associated with the node:") & iterator->ContextFreeName(); + throw _String ("No model is not associated with node ") & iterator->ContextFreeName().Enquote(); } else if (iterator->GetModelDimension() != dfDim) { throw _String ("The dimension of the transition matrix at node ") & iterator->ContextFreeName ().Enquote() & " is not equal to the state count in the data filter associated with the tree."; } From f65210f9e3a8b0d7a6d423c55d99b9d41b8790ea Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Tue, 10 Nov 2020 11:27:07 -0500 Subject: [PATCH 09/13] Better error reporting --- res/TemplateBatchFiles/SelectionAnalyses/MEME.bf | 2 -- src/core/formula.cpp | 6 ++++++ src/core/global_things.cpp | 5 +++++ src/core/include/formula.h | 2 ++ 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index 98226035a..1edaf5bc3 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -145,13 +145,11 @@ selection.io.startTimer (meme.json [terms.json.timers], "Model fitting",1); namespace_tag = "meme"; - namespace meme { doGTR ("meme"); } - estimators.fixSubsetOfEstimates(meme.gtr_results, meme.gtr_results[terms.global]); // Step 1 - Fit to MG94xREV diff --git a/src/core/formula.cpp b/src/core/formula.cpp index 80cbd5947..5d3ad941a 100644 --- a/src/core/formula.cpp +++ b/src/core/formula.cpp @@ -57,6 +57,8 @@ using namespace hyphy_global_objects; hyFloat const sqrtPi = 1.77245385090551603, twoOverSqrtPi = 2./sqrtPi; +_Formula * current_formula_being_computed = nil; + //__________________________________________________________________________________ @@ -1914,6 +1916,7 @@ HBLObjectRef _Formula::Compute (long startAt, _VariableContainer const * nameSpa // TODO SLKP 20170925 Needs code review { _Stack * scrap_here; + current_formula_being_computed = this; if (theFormula.empty()) { theStack.theStack.Clear(); theStack.Push (new _MathObject, false); @@ -2021,6 +2024,8 @@ HBLObjectRef _Formula::Compute (long startAt, _VariableContainer const * nameSpa } errorText & "\n------------------\n"; } + + current_formula_being_computed = nil; if (errMsg) { *errMsg = *errMsg & errorText; @@ -2047,6 +2052,7 @@ HBLObjectRef _Formula::Compute (long startAt, _VariableContainer const * nameSpa recursion_calls = nil; } } + current_formula_being_computed = nil; return valid_type == HY_ANY_OBJECT ? return_value : ((return_value->ObjectClass() & valid_type) ? return_value : nil); } diff --git a/src/core/global_things.cpp b/src/core/global_things.cpp index 5d386c08b..869ed8556 100644 --- a/src/core/global_things.cpp +++ b/src/core/global_things.cpp @@ -348,6 +348,7 @@ namespace hy_global { EnvVariableSet(random_seed, new _Constant (hy_random_seed), false); isInFunction = _HY_NO_FUNCTION; isDefiningATree = kTreeNotBeingDefined; + current_formula_being_computed = nil; #ifdef __HYPHYMPI__ MPI_Comm_size (MPI_COMM_WORLD, &hy_mpi_node_count); EnvVariableSet (mpi_node_count, new _Constant (hy_mpi_node_count), false); @@ -601,6 +602,10 @@ namespace hy_global { if (doDefault) { (*error_message) << "Error:\n" << theMessage; + if (current_formula_being_computed) { + (*error_message) << "\n\tWhile computing: " << (_String*)current_formula_being_computed->toStr(kFormulaStringConversionNormal); + } + if (calls.nonempty()) { (*error_message) << "\n\nFunction call stack\n"; for (unsigned long k = 0UL; k < calls.countitems(); k++) { diff --git a/src/core/include/formula.h b/src/core/include/formula.h index 80b82b5c4..7ee2d3ca1 100644 --- a/src/core/include/formula.h +++ b/src/core/include/formula.h @@ -278,4 +278,6 @@ class _Formula { }; +extern _Formula * current_formula_being_computed; + #endif From ef0331bfa80e83cc96ffe3b3b79333fd70f15c10 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Tue, 10 Nov 2020 13:50:46 -0500 Subject: [PATCH 10/13] Error in duplicate_tree with callback --- .../modules/shared-load-file.bf | 2 +- res/TemplateBatchFiles/libv3/tasks/trees.bf | 8 +++---- src/core/classes.cp | 22 ++++++++++++------- src/core/fstring.cpp | 5 ++++- src/core/include/classes.h | 2 +- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index bd4185fd1..568f868f6 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -332,7 +332,7 @@ function doGTR (prefix) { zero_branch_length_constrain = NULL; deleted_by_tree = NULL; - + if (kill0 == "Yes") { for (index, tree; in; trees) { deleted = {}; diff --git a/res/TemplateBatchFiles/libv3/tasks/trees.bf b/res/TemplateBatchFiles/libv3/tasks/trees.bf index 31f3deb5e..198df02c3 100644 --- a/res/TemplateBatchFiles/libv3/tasks/trees.bf +++ b/res/TemplateBatchFiles/libv3/tasks/trees.bf @@ -85,11 +85,11 @@ LoadFunctionLibrary("distances.bf"); lfunction trees.GetTreeString._sanitize(string) { if (utility.GetEnvVariable("_DO_TREE_REBALANCE_")) { - console.log ("BEFORE REBALANCE\n"); - console.log (string); + //console.log ("BEFORE REBALANCE\n"); + //console.log (string); string = RerootTree(string, 0); - console.log ("AFTER REBALANCE\n"); - console.log (string); + //console.log ("AFTER REBALANCE\n"); + //console.log (string); } if (utility.GetEnvVariable("_KEEP_I_LABELS_")) { diff --git a/src/core/classes.cp b/src/core/classes.cp index 39ecb94e1..325aca1fc 100644 --- a/src/core/classes.cp +++ b/src/core/classes.cp @@ -185,28 +185,34 @@ template void node::delete_tree(bool delSelf){ } //------------------------------------------------------------- -template node* node::duplicate_tree(void (callback) (node*)) { +template node* node::duplicate_tree(void (callback) (node*, node*)) { node* result = new node; for (int i=1; i<=get_num_nodes(); i++) { - result->add_node(*(go_down(i)->duplicate_tree())); + result->add_node(*(go_down(i)->duplicate_tree(callback))); } + if (callback) { - callback (result); + callback (this, result); + } else { + result->in_object = in_object; } - result->in_object = in_object; return result; } //------------------------------------------------------------- -template void node_count_descendants (node* n) { - n->in_object = 0L; - for (int i=1; i<=n->get_num_nodes(); i++) { - n->in_object += n->go_down(i)->in_object; +template void node_count_descendants (node* source, node* n) { + if (n->get_num_nodes() == 0) { + n->in_object = 1L; + } else { + n->in_object = 0L; + for (int i=1; i<=n->get_num_nodes(); i++) { + n->in_object += n->go_down(i)->in_object; + } } } diff --git a/src/core/fstring.cpp b/src/core/fstring.cpp index d3fe07140..8ff81609e 100644 --- a/src/core/fstring.cpp +++ b/src/core/fstring.cpp @@ -448,7 +448,7 @@ HBLObjectRef _FString::RerootTree (HBLObjectRef root, HBLObjectRef cache) { while (_CalcNode * iterator = ti.Next()) { node* counter_tree = ni.Next(); long nodeMin = totalNodeCount-counter_tree->in_object-1L; - hyFloat thisRatio = nodeMin/(1L+counter_tree->in_object); + hyFloat thisRatio = nodeMin/(1.+counter_tree->in_object); if (thisRatio>1.0) { thisRatio = 1.0/thisRatio; @@ -473,6 +473,8 @@ HBLObjectRef _FString::RerootTree (HBLObjectRef root, HBLObjectRef cache) { rerootAt = nil; } } + + //printf ("Node %s, nodeMin = %ld, bRatio = %lg\n", iterator->GetName()->get_str(), nodeMin, thisRatio); } counted_descendants->delete_tree(true); @@ -480,6 +482,7 @@ HBLObjectRef _FString::RerootTree (HBLObjectRef root, HBLObjectRef cache) { HBLObjectRef res; if (rerootAt) { _FString rAt (rerootAt->ContextFreeName()); + //BufferToConsole("\nREROOT AT:"); ObjectToConsole(&rAt); NLToConsole(); res = (_FString*)rTree.RerootTree (&rAt, cache); } else { res = _returnStringOrUseCache (get_str(), cache); diff --git a/src/core/include/classes.h b/src/core/include/classes.h index bc175186b..5af00fe91 100644 --- a/src/core/include/classes.h +++ b/src/core/include/classes.h @@ -238,7 +238,7 @@ template class node { int next (void); int up (void); void delete_tree (bool = false); - node* duplicate_tree (void (callback) (node*) = nil); + node* duplicate_tree (void (callback) (node*, node*) = nil); // traverse from this node down duplicating the tree and return // pointer to the root of the duplicate; bool compare_subtree (node*); From 903d5530e502f549b99d71143c2c29ebd9b80558 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Wed, 11 Nov 2020 14:27:49 -0500 Subject: [PATCH 11/13] Minor bug fixes --- src/core/likefunc.cpp | 2 +- src/core/matrix.cpp | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 79735b74d..7bd4f5dbd 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -5175,7 +5175,7 @@ bool CheckEqual (hyFloat a, hyFloat b, hyFloat tolerance) { a = (a>b)?(a-b)/a:(b-a)/a; return a>0.0 ? a<=tolerance : a>=-tolerance; } - return (b<=tolerance)&&(b>=-tolerance); + return fabs(b)<=tolerance; } //_______________________________________________________________________________________ diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index b6598e3f9..adc4d7079 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -2002,13 +2002,20 @@ bool _Matrix::AmISparseFast (_Matrix& whereTo) { bool _Matrix::IsValidTransitionMatrix() const { if (is_square() && is_numeric()) { long d = GetHDim(); - hyFloat * sums = new hyFloat [d] {0.0}; + hyFloat * sums = (hyFloat*)alloca (sizeof (hyFloat)*d); long idx = 0L; for (long r = 0L; r < d; r++) { for (long c = 0L; c < d; c++, idx++) { hyFloat term = theData[idx]; if (term < 0.0 || term > 1.0) { - delete [] sums; + if (CheckEqual(term, 0.0)) { + theData[idx] = 0.; + continue; + } + if (CheckEqual(term, 1.0)) { + theData[idx] = 1.; + continue; + } return false; } sums[r] += term; @@ -2016,11 +2023,9 @@ bool _Matrix::IsValidTransitionMatrix() const { } for (long r = 0L; r < d; r++) { if (!CheckEqual(1.0, sums[r])) { - delete [] sums; return false; } } - delete [] sums; return true; } return false; From 5a26a0677571dc89c0f165ba6f60a2e987a4c6b9 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Wed, 11 Nov 2020 20:14:29 -0500 Subject: [PATCH 12/13] HMM normalization --- src/core/matrix.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index adc4d7079..19e39b235 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -2014,6 +2014,7 @@ bool _Matrix::IsValidTransitionMatrix() const { } if (CheckEqual(term, 1.0)) { theData[idx] = 1.; + sums[r] += term; continue; } return false; From 13d9aa4493ebae9425421c6c4d52df3e625e7136 Mon Sep 17 00:00:00 2001 From: Segei L Kosakovsky Pond Date: Fri, 13 Nov 2020 11:36:36 -0500 Subject: [PATCH 13/13] 2.5.22 rc --- .../SelectionAnalyses/PRIME.bf | 89 +++++++++++++------ .../libv3/tasks/alignments.bf | 9 +- 2 files changed, 65 insertions(+), 33 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf index 9963f4d3d..e67cf3887 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf @@ -174,11 +174,53 @@ if (Type (debug.checkpoint) != "String") { io.ReportProgressMessageMD ("PRIME", "codon-refit", "Improving branch lengths, nucleotide substitution biases, and global dN/dS ratios under a full codon model"); - prime.final_partitioned_mg_results = estimators.FitMGREV (prime.filter_names, prime.trees, prime.codon_data_info [terms.code], { - terms.run_options.model_type: terms.local, - terms.run_options.partitioned_omega: prime.selected_branches, - terms.run_options.retain_lf_object: TRUE - }, prime.partitioned_mg_results); + prime.run_full_mg94 = TRUE; + + if (Type (prime.save_intermediate_fits) == "AssociativeList") { + if (None != prime.save_intermediate_fits[^"terms.data.value"]) { + if (utility.Has (prime.save_intermediate_fits[^"terms.data.value"], "Full-MG94", "AssociativeList")) { + prime.final_partitioned_mg_results = (prime.save_intermediate_fits[^"terms.data.value"])["Full-MG94"]; + if (utility.Has (prime.save_intermediate_fits[^"terms.data.value"], "Full-MG94-LF", "String")) { + ExecuteCommands ((prime.save_intermediate_fits[^"terms.data.value"])["Full-MG94-LF"]); + prime.run_full_mg94 = FALSE; + } + } + } + } + + + if (prime.run_full_mg94) { + + prime.final_partitioned_mg_results = estimators.FitMGREV (prime.filter_names, prime.trees, prime.codon_data_info [terms.code], { + terms.run_options.model_type: terms.local, + terms.run_options.partitioned_omega: prime.selected_branches, + terms.run_options.retain_lf_object: TRUE, + terms.run_options.apply_user_constraints: prime.zero_branch_length_constrain, + terms.run_options.optimization_settings: { + "OPTIMIZATION_METHOD" : "coordinate-wise" + } + }, prime.partitioned_mg_results); + + /*prime.final_partitioned_mg_results = estimators.FitMGREV (prime.filter_names, prime.trees, prime.codon_data_info [terms.code], { + terms.run_options.model_type: terms.local, + terms.run_options.partitioned_omega: prime.selected_branches, + terms.run_options.retain_lf_object: TRUE, + terms.run_options.apply_user_constraints: prime.zero_branch_length_constrain, + terms.run_options.optimization_settings: { + "OPTIMIZATION_METHOD" : "coordinate-wise" + } + }, prime.partitioned_mg_results);*/ + + if (Type (prime.save_intermediate_fits) == "AssociativeList") { + (prime.save_intermediate_fits[^"terms.data.value"])["Full-MG94"] = prime.final_partitioned_mg_results; + Export (lfe, ^prime.final_partitioned_mg_results[^"terms.likelihood_function"]); + (prime.save_intermediate_fits[^"terms.data.value"])["Full-MG94-LF"] = lfe; + io.SpoolJSON (prime.save_intermediate_fits[^"terms.data.value"],prime.save_intermediate_fits[^"terms.data.file"]); + } + } + + + debug.spool = utility.GetEnvVariable ("DEBUG_CHECKPOINT_STORE"); if (Type (debug.spool) == "String" ) { @@ -566,18 +608,25 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa parameters.SetConstraint ("prime.site_beta", Eval(^"prime.site_beta"),""); - console.log (^"prime.site_beta" + "\n"); start_grid = {}; propN = utility.Array1D (^"prime.lambdas"); - for (k = 0; k ) - - for (l; in; ^"prime.lambdas") { - console.log (l); - ^l = 0; + for (sp = 0; sp < 100; sp += 1) { + point = {}; + point ["prime.site_alpha"] = Random (0.75, 1.25); + point ["prime.site_beta"] = Random (0.5, 1.5); + point ["prime.site_beta_nuisance"] = Random (0.5, 1.5); + for (l; in; ^"prime.lambdas") { + point [l] = Random (-0.5,0.5); + } + start_grid + point; } + + //console.log (start_grid); + + //Export (lfe, ^lf_prop); //fprintf ("/Users/sergei/Desktop/prime." + ^"MPI_NODE_ID" + ".bf",CLEAR_FILE,lfe); @@ -585,22 +634,12 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa Optimize (results, ^lf_prop, { "OPTIMIZATION_METHOD" : "nedler-mead", //"OPTIMIZATION_METHOD" : "gradient-descent", - /*"OPTIMIZATION_START_GRID" : - { - "0" : { - "prime.site_beta": -2, - }, - "1" : { - "prime.site_beta": -1, - }, - "2" : { - "prime.site_beta": -0, - } - - - }*/ + "OPTIMIZATION_START_GRID" : start_grid }); + + //console.log ("\n" + ^"LF_INITIAL_GRID_MAXIMUM" + " : " + results[1][0] + "\n"); + character_map = None; if (^"prime.impute_states") { DataSet anc = ReconstructAncestors ( ^lf_prop, {{0}}, MARGINAL, DOLEAVES); diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 6b0e21383..8cd827d97 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -991,15 +991,8 @@ lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, map * @param {String} file - write the result here * @param {Bool} isCodon - is the filter a codon filter? - * @returns {String} the mapped sequence - - * @example - GCAAAATCATTAGGGACTATGGAAAACAGA - -AKSLGTMEN-R - - maps to + * @returns None - ---GCAAAATCATTAGGGACTATGGAAAAC---AGA */