From 0e6f6a75d4c348747c7a0d7b952ba785378d226f Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Thu, 17 May 2018 21:06:15 -0400 Subject: [PATCH 01/53] Resolving a memory leak caused if AUTOMATICALLY_CONVERT_BRANCH_LENGTHS=1 is set --- src/core/calcnode.cpp | 6 +++--- src/core/include/avllistx.h | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/core/calcnode.cpp b/src/core/calcnode.cpp index 3e68fb47b..8052f8ab6 100644 --- a/src/core/calcnode.cpp +++ b/src/core/calcnode.cpp @@ -1143,7 +1143,7 @@ void _TheTree::PreTreeConstructor (bool) aCache = new _AVLListXL (new _SimpleList); - convertedMatrixExpressionsL.ClearFormulasInList(); + convertedMatrixExpressions.ClearFormulasInList(); convertedMatrixExpressions.Clear(); getINodePrefix(); @@ -1170,7 +1170,7 @@ void _TheTree::PostTreeConstructor (bool dupMe) DeleteObject (aCache); aCache = nil; - convertedMatrixExpressionsL.ClearFormulasInList(); + convertedMatrixExpressions.ClearFormulasInList(); convertedMatrixExpressions.Clear(); while (theRoot->get_num_nodes() == 1) { // dumb tree w/ an extra top level node @@ -1778,8 +1778,8 @@ bool _TheTree::FinalizeNode (node* nodie, long number , _String nodeNam for (unsigned long cc = 0; cc < cNt.categoryVariables.lLength; cc++) { _CategoryVariable * thisCC = (_CategoryVariable *)LocateVar(cNt.categoryVariables.lData[cc]); thisCC -> SetValue (new _Constant(thisCC->Mean()), false); - } + convertedMatrixExpressions.Insert ((BaseRef)nodeModelID, (long)expressionToSolveFor, false, false); } DeleteObject (result); } else { diff --git a/src/core/include/avllistx.h b/src/core/include/avllistx.h index af184d3ac..306fd71ba 100644 --- a/src/core/include/avllistx.h +++ b/src/core/include/avllistx.h @@ -74,6 +74,10 @@ class _AVLListX: public _AVLList { long GetXtra(long) const; long FindAndGetXtra (BaseRefConst, long not_found_value = -1); + + void ClearFormulasInList (void) { + xtraD.ClearFormulasInList(); + } }; From d6a3a03478f9cce941fcafa6d17febe17ac73b6c Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Fri, 18 May 2018 14:49:25 -0400 Subject: [PATCH 02/53] Another memory leak fix --- src/core/matrix.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index 8e825ae72..86a471d65 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -2075,8 +2075,7 @@ _Matrix::_Matrix (long theHDim, long theVDim, bool sparse, bool allocateStorage) //_____________________________________________________________________________________________ -void _Matrix::Convert2Formulas (void) -{ +void _Matrix::Convert2Formulas (void) { if (storageType == 1) { storageType = 2; _Formula** tempData = (_Formula**)MatrixMemAllocate (sizeof(void*)*lDim); @@ -5585,7 +5584,7 @@ void _Matrix::StoreFormula (long i, long j, _Formula& f, bool copyF, bool ((_Formula**)theData)[-lIndex-2]->SimplifyConstants(); } } else { - if (copyF && ((_Formula**)theData)[lIndex]!=(_Formula*)ZEROPOINTER) { + if (((_Formula**)theData)[lIndex]!=(_Formula*)ZEROPOINTER) { delete ((_Formula**)theData)[lIndex]; } ((_Formula**)theData)[lIndex] = copyF?(_Formula*)f.makeDynamic():&f; From 864a7f27a3438b08542a6c6dd1644db3e9fc835b Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Tue, 19 Jun 2018 11:35:52 -0400 Subject: [PATCH 03/53] Working on FADE simulator --- CMakeLists.txt | 2 +- .../SelectionAnalyses/Simulators/FADE.bf | 766 ++++++++++++++++++ .../modules/io_functions.ibf | 1 + res/TemplateBatchFiles/libv3/IOFunctions.bf | 2 +- res/TemplateBatchFiles/libv3/all-terms.bf | 1 + .../libv3/models/model_functions.bf | 39 +- .../libv3/models/protein/empirical.bf | 46 +- .../libv3/tasks/alignments.bf | 48 +- .../libv3/tasks/estimators.bf | 80 +- src/core/batchlan.cpp | 2 + 10 files changed, 915 insertions(+), 72 deletions(-) create mode 100644 res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c49d968f..49e8cac2f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) if(${GCC_VERSION} VERSION_LESS 6.0) set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -std=gnu++14") else(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g") endif(${GCC_VERSION} VERSION_LESS 6.0) if(NOAVX) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf new file mode 100644 index 000000000..e6f9aacd2 --- /dev/null +++ b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf @@ -0,0 +1,766 @@ +RequireVersion ("2.3.12"); + + +LoadFunctionLibrary ("libv3/all-terms.bf"); +LoadFunctionLibrary ("libv3/UtilityFunctions.bf"); +LoadFunctionLibrary ("libv3/IOFunctions.bf"); +LoadFunctionLibrary ("libv3/tasks/estimators.bf"); +LoadFunctionLibrary ("libv3/tasks/alignments.bf"); +LoadFunctionLibrary ("libv3/tasks/ancestral.bf"); +LoadFunctionLibrary ("libv3/tasks/trees.bf"); +LoadFunctionLibrary ("../modules/io_functions.ibf"); +LoadFunctionLibrary ("../modules/selection_lib.ibf"); +LoadFunctionLibrary ("libv3/convenience/math.bf"); +LoadFunctionLibrary ("libv3/models/protein.bf"); +LoadFunctionLibrary ("libv3/models/protein/empirical.bf"); +LoadFunctionLibrary ("libv3/models/protein/REV.bf"); +LoadFunctionLibrary ("libv3/tasks/mpi.bf"); +LoadFunctionLibrary ("libv3/stats.bf"); +LoadFunctionLibrary ("libv3/convenience/random.bf"); + + + +utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE); +utility.SetEnvVariable ("ACCEPT_ROOTED_TREES", TRUE); + +namespace terms.fade { + mode = "mode"; + regimes = "regimes"; + bias = "substitution bias"; + rate = "rate multiplier"; +}; + + +fade.parameter.bias = "FADE.bias"; +fade.parameter.rate = "FADE.rate"; +fade.tree.name = "FADE.simulated_tree"; + +fade.settings = {}; +fade.alphabet = "ACDEFGHIKLMNPQRSTVWY"; +fade.alphabet.matrix = {1,20}; +fade.simulation.matrix = {2,20}; +fade.evolutionary_modes = {"Null" : "Evolution under baseline model"}; + +for (r = 0; r < Abs (fade.alphabet); r += 1) { + fade.evolutionary_modes [fade.alphabet[r]] = "Directional evolution towards `fade.alphabet[r]`"; + fade.alphabet.matrix [r] = fade.alphabet[r]; + fade.simulation.matrix[0][r] = fade.alphabet[r]; +} + +fade.simulation.matrix[1][0] = "1"; + +fade.analysis_description = {terms.io.info : + "A companion data simulator for FADE", + terms.io.version : "0.1", + terms.io.reference : "TBD", + terms.io.authors : "Sergei L Kosakovsky Pond", + terms.io.contact : "spond@temple.edu", + terms.io.requirements : "A **rooted** phylogenetic tree with branch lengths (optionally annotated with {} to define a branch partition set)" + }; + +io.DisplayAnalysisBanner (fade.analysis_description); + + +// =========== LOAD DATA AND SET UP CACHES + +SetDialogPrompt ("Specify a rooted tree to use for data simulations"); +fade.baseline.tree = trees.LoadAnnotatedTopology (TRUE); +assert (trees.HasBranchLengths(fade.baseline.tree), "Input tree MUST have branch lengths"); +assert (fade.baseline.tree[terms.trees.rooted], "Input tree MUST be rooted"); + + +fade.replicates = io.PromptUser ("How many replicate datasets be simulated", 100, 1, 10000, true); +fade.sites_class_count = io.PromptUser ("How many types of sites will be simulated", 2, 1, 10000, true); +fade.site_classes = {}; + +for (k = 0; k < fade.sites_class_count; k += 1) { + this_class = { + terms.data.sites : io.PromptUser ("How many sites are in class " + (k+1), 100, 1, 10000, true), + terms.fade.mode : io.SelectAnOption (fade.evolutionary_modes, "Evolutionary regime for site class " + (k+1)), + fade.parameter.rate : io.PromptUser ("Relative overall rate for site class " + (k+1) + " (1 = average)", 1, 0, 1000, false) + }; + + if (this_class [terms.fade.mode] != "Null") { + this_class [fade.parameter.bias] = io.PromptUser ("Substitution bias for site class " + (k+1) + " (0 = no bias)", 1, 0, 1000, false); + } + fade.site_classes [k] = this_class; +} + + +fade.selected_branches = (selection.io.defineBranchSets ( {"0" : { terms.data.tree : fade.baseline.tree}} ))[0]; + +fade.settings [terms.data.tree] = fade.baseline.tree[terms.trees.newick_with_lengths]; +fade.settings [terms.json.tested] = fade.selected_branches; +fade.settings [terms.fade.regimes] = fade.site_classes; +fade.settings [terms.replicates] = fade.replicates; + +utility.Extend (models.protein.empirical_models, {"GTR" : "General time reversible model (189 estimated parameters)."}); +fade.baseline_model = io.SelectAnOption (models.protein.empirical_models, "Baseline substitution model"); +fade.generator = (utility.Extend (models.protein.empirical.plusF_generators , {"GTR" : "models.protein.REV.ModelDescription"}))[fade.baseline_model ]; +fade.branch_lengths = parameters.helper.tree_lengths_to_initial_values ({"0" : fade.baseline.tree}, None); + +fade.settings [terms.model] = fade.baseline_model; +fade.settings [terms.fade.generator] = fade.generator; + + +lfunction fade.rate.modifier (fromChar, toChar, namespace, model_type, model) { + + baseline = Call (^"fade.baseline_model.rate", fromChar,toChar, namespace, model_type, model); + utility.EnsureKey (baseline, model_type); + selection.io.json_store_key_value_pair (baseline, model_type, utility.getGlobalValue("terms.fade.bias"), utility.getGlobalValue("fade.parameter.bias")); + selection.io.json_store_key_value_pair (baseline, model_type, utility.getGlobalValue("terms.fade.rate"), utility.getGlobalValue("fade.parameter.rate")); + baseline [utility.getGlobalValue("terms.model.rate_entry")] = parameters.AppendMultiplicativeTerm (baseline [utility.getGlobalValue("terms.model.rate_entry")], utility.getGlobalValue("fade.parameter.rate")); + if ( Type (model["fade.residue_bias"]) == "String") { + if (toChar == model["fade.residue_bias"]) { + baseline [utility.getGlobalValue("terms.model.rate_entry")] = + parameters.AppendMultiplicativeTerm ( baseline [utility.getGlobalValue("terms.model.rate_entry")], + "`utility.getGlobalValue("fade.parameter.bias")`/(1-Exp (-`utility.getGlobalValue("fade.parameter.bias")`))"); + } else { + if (fromChar == model["fade.residue_bias"]) { + parameters.AppendMultiplicativeTerm ( baseline [utility.getGlobalValue("terms.model.rate_entry")], + "`utility.getGlobalValue("fade.parameter.bias")`/(Exp (`utility.getGlobalValue("fade.parameter.bias")`-1))"); + } + } + } + return baseline; +} + + + +lfunction fade.biased.model.generator (type, residue) { + model = Call (^"fade.generator", type); + utility.setGlobalValue("fade.baseline_model.rate", model[utility.getGlobalValue ("terms.model.q_ij")]); + model[utility.getGlobalValue ("terms.model.q_ij")] = "fade.rate.modifier"; + model["fade.residue_bias"] = residue; + model[utility.getGlobalValue ("terms.alphabet")] = utility.getGlobalValue ("fade.alphabet.matrix"); + return model; +} + + +fade.bias.residue = "F"; + +fade.model.biased = model.generic.DefineModel("fade.biased.model.generator", + "fade.biased_model", { + "0": "terms.global", + "1": parameters.Quote (fade.bias.residue) + }, + None, + "frequencies.equal"); + + +fade.model.baseline = model.generic.DefineModel("fade.biased.model.generator", + "fade.baseline_model", { + "0": "terms.global", + "1": None + }, + None, + "frequencies.equal"); + +fade.model_id_to_object = { + "fade.biased_model": fade.model.biased, + "fade.baseline_model": fade.model.baseline + }; + + +fade.model_assignment = { + "fade.baseline_model" : utility.Filter (fade.selected_branches, "_value_", "_value_ == terms.tree_attributes.background"), + "fade.biased_model" : utility.Filter (fade.selected_branches, "_value_", "_value_ == terms.tree_attributes.test"), +}; + + + +parameters.DeclareGlobalWithRanges (fade.parameter.rate, 1, 0, 100); +parameters.DeclareGlobalWithRanges (fade.parameter.bias, 1e-10, 1e-10, 100); + +model.ApplyModelToTree(fade.tree.name, fade.baseline.tree, None, fade.model_assignment); +//function estimators.ApplyExistingEstimatesToTree (_tree_name, model_descriptions, initial_values, _application_type, keep_track_of_proportional_scalers) { + + +parameters.SetValue (fade.parameter.bias, 1e-10); +parameters.SetValue (fade.parameter.rate, 1); + +estimators.ApplyExistingEstimatesToTree (fade.tree.name, fade.model_id_to_object, (fade.branch_lengths[terms.branch_length])[0], None, {}); + +fprintf (stdout, Format (^fade.tree.name, 1, 1), "\n"); + +parameters.SetValue (fade.parameter.bias, 10); +parameters.SetValue (fade.parameter.rate, 5); + +fprintf (stdout, Format (^fade.tree.name, 1, 1), "\n"); + + +fade.sim_frequencies = fade.model.biased[terms.efv_estimate]; + +DataSet simulated_block = Simulate (^fade.tree.name, fade.sim_frequencies , fade.simulation.matrix, 200); +DataSetFilter simulated_block_filter = CreateFilter (simulated_block, 1); + +fprintf (stdout, simulated_block_filter, "\n"); + +return 0; + + + +namespace fade { + + site.composition.string := fade.CompositionString (((cache [^"terms.fade.cache.composition"])[partition_index])[s]); + site.substitution.string := fade.SubstitutionHistory (((cache [^"terms.fade.cache.substitutions"])[partition_index])[s]); + + site_annotation_headers = { + "Composition" : "Aminoacid composition of site", + "Substitutions" : "Substitution history on selected branches" + }; + + if (run_settings["method"] == ^"terms.fade.methods.MH") { + table_headers = {{"rate", "Mean posterior relative rate at a site"} + {"bias", "Mean posterior bias parameter at a site"} + {"Prob[bias>0]", "Posterior probability of substitution bias towards `bias.residue`"} + {"BayesFactor[bias>0]", "Empiricial Bayes Factor for substitution bias towards `bias.residue`"} + {"PSRF", "Potential scale reduction factor - an MCMC mixing measure"} + {"Neff", "Estimated effective sample site for Prob [bias>0]"}}; + + table_screen_output = {{"Site", "Partition", "target", "rate", "bias", "N.eff", "Bayes Factor",site_annotation_headers["Composition"], site_annotation_headers["Substitutions"]}}; + report.biased_site = {{"" + (1+filter_info[s]), + partition_index + 1, + bias.residue, + Format(partition_results[s][0],8,2), + Format(partition_results[s][1],8,2), + Format(partition_results[s][5],8,2), + Format(partition_results[s][3],8,2), + site.composition.string, + site.substitution.string}}; + } else { + table_headers = {{"rate", "Mean posterior relative rate at a site"} + {"bias", "Mean posterior bias parameter at a site"} + {"Prob[bias>0]", "Posterior probability of substitution bias"} + {"BayesFactor[bias>0]", "Empiricial Bayes Factor for substitution bias"} + }; + + + + table_screen_output = {{"Site", "Partition", "target", "rate", "bias", "Bayes Factor", site_annotation_headers["Composition"], site_annotation_headers["Substitutions"]}}; + report.biased_site = {{"" + (1+filter_info[s]), + partition_index + 1, + bias.residue, + Format(partition_results[s][0],8,2), + Format(partition_results[s][1],8,2), + Format(partition_results[s][3],8,2), + site.composition.string, + site.substitution.string + }}; + } + + for (partition_index = 0; partition_index < partition_count; partition_index += 1) { + filter_info = (filter_specification [partition_index])[utility.getGlobalValue ("terms.data.coverage")]; + sites_in_partition = utility.Array1D (filter_info); + site_annotations_p = {sites_in_partition, 2}; + site_annotations_p [0] = ""; + + for (s = 0; s < sites_in_partition; s += 1) { + site_annotations_p [s][0] = site.composition.string; + site_annotations_p [s][1] = site.substitution.string; + } + + site_annotations [partition_index] = site_annotations_p; + + } + s = 0; partition_index = 0; +} + + + + +for (fade.residue = 0; fade.residue < 20; fade.residue += 1) { + + + fade.bias.residue = fade.alphabet[fade.residue]; + selection.io.startTimer (fade.json [terms.json.timers], "Residue `fade.bias.residue` analysis", 2 + fade.residue); + + if (utility.Has (fade.cache [terms.fade.cache.conditionals], fade.bias.residue, "AssociativeList")) { + fade.conditionals = (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue]; + io.ReportProgressBar ("fade", "[`fade.bias.residue`] Loaded the phylogenetic likelihood function on the grid"); + } else { + io.ReportProgressBar ("fade", "[`fade.bias.residue`] Computing the phylogenetic likelihood function on the grid"); + fade.model.baseline = model.generic.DefineModel(fade.generator, + "fade.baseline_model", { + "0": "terms.global" + }, + fade.filter_names, + None); + + fade.model.biased = model.generic.DefineModel("fade.biased.model.generator", + "fade.biased_model", { + "0": "terms.global", + "1": parameters.Quote (fade.bias.residue) + }, + fade.filter_names, + None); + + + fade.parameter.scalers = { + terms.fade.bias : fade.parameter.bias, + terms.fade.rate : fade.parameter.rate + }; + + utility.Extend ((fade.biased [terms.parameters])[terms.global], fade.parameter.scalers); + parameters.DeclareGlobalWithRanges (fade.parameter.rate, 1, 0, 100); + parameters.DeclareGlobalWithRanges (fade.parameter.bias, 1e-10, 1e-10, 100); + + fade.model_id_to_object = { + "fade.biased_model": fade.model.biased, + "fade.baseline_model": fade.model.baseline + }; + + fade.trees.names = utility.Map (utility.Range (fade.partition_count, 1, 1), "_index_", "'fade.grid_tree_' + _index_"); + fade.lf.components = {fade.partition_count * 2, 1}; + utility.ForEachPair (fade.filter_names, "_index_", "_filter_", + ' + fade.model_assignment = { + "fade.baseline_model" : utility.Filter (fade.selected_branches[_index_], "_value_", "_value_ == terms.tree_attributes.background"), + "fade.biased_model" : utility.Filter (fade.selected_branches[_index_], "_value_", "_value_ == terms.tree_attributes.test"), + }; + + + fade.lf.components [2*(0+_index_)] = _filter_; + fade.lf.components [2*(0+_index_) + 1] = fade.trees.names[_index_]; + model.ApplyModelToTree(fade.trees.names [_index_], fade.trees[_index_], None, fade.model_assignment); + ' + ); + + + + LikelihoodFunction fade.lf = (fade.lf.components); + estimators.ApplyExistingEstimates ("fade.lf", fade.model_id_to_object, fade.baseline_fit, None); + + fade.conditionals.raw = fade.ComputeOnGrid ("fade.lf", + fade.grid.MatrixToDict (fade.cache[terms.fade.cache.grid]), + "fade.pass2.evaluator", + "fade.pass1.result_handler"); + + + + (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue] = fade.ConvertToConditionals (fade.conditionals.raw); + io.WriteCacheToFile (fade.path.cache, fade.cache); + } + + + if (fade.run_settings["method"] == ^"terms.fade.methods.VB0") { + if (utility.Has (fade.cache [terms.fade.cache.posterior], fade.bias.residue, "Matrix")) { + io.ReportProgressBar ("fade", "[`fade.bias.residue`] Loaded posterior means for grid loadings"); + } else { + io.ReportProgressBar ("fade", "[`fade.bias.residue`] Estimating posterior means for grid loadings "); + (fade.cache[terms.fade.cache.posterior])[fade.bias.residue] = fade.RunVariationalBayes (fade.run_settings, + fade.cache[terms.fade.cache.grid], + (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue], + None + ); + } + } else { + if (fade.run_settings["method"] == terms.fade.methods.MH) { + if (utility.Has (fade.cache [terms.fade.cache.mcmc], fade.bias.residue, "AssociativeList")) { + io.ReportProgressBar ("fade", "[`fade.bias.residue`] Loaded posterior sample for grid loadings"); + } else { + (fade.cache[terms.fade.cache.mcmc])[fade.bias.residue] = fade.RunMCMC (fade.run_settings, + fade.cache[terms.fade.cache.grid], + (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue], + "fade.pass1.result_handler", + None); + io.WriteCacheToFile (fade.path.cache, fade.cache); + } + } else { + if (utility.Has (fade.cache [terms.fade.cache.mcmc], fade.bias.residue, "AssociativeList")) { + io.ReportProgressBar ("fade", "[`fade.bias.residue`] Loaded posterior sample for grid loadings"); + } else { + (fade.cache[terms.fade.cache.mcmc])[fade.bias.residue] = fade.RunCollapsedGibbs (fade.run_settings, + fade.cache[terms.fade.cache.grid], + (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue], + None + ); + io.WriteCacheToFile (fade.path.cache, fade.cache); + } + } + + } + + io.ClearProgressBar (); + + namespace fade { + sites = (json [utility.getGlobalValue ("terms.json.input")])[utility.getGlobalValue ("terms.json.sites")]; + grid_points = Rows (cache['grid']); + bias_present_stencil = {grid_points,sites} ["(cache['grid'])[_MATRIX_ELEMENT_ROW_][1]>0."]; + + rates = Transpose ((cache['grid'])[-1][0]); + biases = Transpose ((cache['grid'])[-1][1]); + + + if (run_settings["method"] != ^"terms.fade.methods.VB0") { + + samples = run_settings["samples"]; + chains = run_settings["chains"]; + + results.log_L = {1,samples}; + results.samples = {samples,grid_points}; + + per_chain = samples $ chains; + + + from = 0; + to = per_chain; + + + posterior_mean_rates = {}; + posterior_mean_biases = {}; + denominators = {}; + posteriors = {}; + biased_ks = {}; + + + for (chain_id = 0; chain_id < chains; chain_id += 1) { + io.ReportProgressBar ("PROCESSING", "Samples from chain " + (chain_id + 1)); + + + grid_samples = (((cache[utility.getGlobalValue("terms.fade.cache.mcmc")])[bias.residue])[chain_id])["weights"]; + grid_samples_T = Transpose (grid_samples); + P_ks = grid_samples * + ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]; + + denominators[chain_id] = P_ks; + + + posterior_mean_rates[chain_id] = (grid_samples $ rates * + ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; + + posterior_mean_biases[chain_id] = (grid_samples $ biases * + ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; + + + biased_ks[chain_id] = grid_samples * + (bias_present_stencil $ ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; + + if (run_settings["method"] == ^"terms.fade.methods.MH") { + logL_samples = (((cache[utility.getGlobalValue("terms.fade.cache.mcmc")])[bias.residue])[chain_id])["likelihoods"]; + draw_from_this_chain = Random ({1,samples}["_MATRIX_ELEMENT_COLUMN_"], 0); + + for (i = from; i < to; i += 1) { + draw_this_index = draw_from_this_chain[i]; + results.log_L [i] = logL_samples [draw_this_index]; + for (r = 0; r < grid_points; r += 1) { + results.samples [i][r] = grid_samples[draw_this_index][r]; + } + } + } else { + results.samples = grid_samples; + } + + from = to; + if (chain_id == chains - 2) { + to = samples; + } else { + to += per_chain; + } + } + io.ClearProgressBar (); + posterior_mean_over_grid = {grid_points,1}["(+results.samples[-1][_MATRIX_ELEMENT_ROW_])/samples"]; + + } else { + posterior_mean_over_grid = (cache[^"terms.fade.cache.posterior"])[bias.residue]; + posterior_mean_over_grid_T = Transpose (posterior_mean_over_grid); + cache[terms.fade.cache.posterior] = posterior_mean_over_grid; + + P_ks = posterior_mean_over_grid_T * ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]; + + + posterior_mean_rates = (posterior_mean_over_grid_T $ rates * + ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; + posterior_mean_biases = (posterior_mean_over_grid_T $ biases * + ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; + + biased_ks = posterior_mean_over_grid_T * + (bias_present_stencil $ ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; + + } + + prior_weight_bias = +posterior_mean_over_grid ["_MATRIX_ELEMENT_VALUE_*((cache['grid'])[_MATRIX_ELEMENT_ROW_][1]>0.)"]; + + headers.printed = FALSE; + + i = 0; + s = 0; // reset to ensure good re-entrant behavior + + report.sites_found = {}; + //report.posteriors[bias.residue] = {}; + site_results[bias.residue] = {}; + + chain_iterator = utility.Range (chains, 0, 1); + + for (partition_index = 0; partition_index < partition_count; partition_index += 1) { + filter_info = (filter_specification [partition_index])[utility.getGlobalValue ("terms.data.coverage")]; + sites_in_partition = utility.Array1D (filter_info); + + partition_posteriors = {}; + if (run_settings["method"] != utility.getGlobalValue ("terms.fade.methods.MH")) { + partition_results = {sites_in_partition, 4}; + } else { + partition_results = {sites_in_partition, 6}; + } + + + + for (s = 0; s < sites_in_partition; s += 1) { + + pp = posterior_mean_over_grid $ (((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"])[-1][i]; + partition_posteriors [s] = Transpose (pp * (1/(+pp))); + + if (run_settings["method"] != utility.getGlobalValue ("terms.fade.methods.VB0")) { + partition_results[s][0] = fade.ComputeRandNeff ( + utility.Map (chain_iterator, "_value_", "((`&posterior_mean_rates`)[_value_])[-1][`&i`]") + )[0]; + partition_results[s][1] = fade.ComputeRandNeff ( + utility.Map (chain_iterator, "_value_", "((`&posterior_mean_biases`)[_value_])[-1][`&i`]") + )[0]; + + biased_posterior = fade.ComputeRandNeff ( + utility.Map (chain_iterator, "_value_", "((`&biased_ks`)[_value_])[-1][`&i`]") + ); + + partition_results[s][2] = biased_posterior[0]; + partition_results[s][3] = stats.BayesFactor (prior_weight_bias, biased_posterior[0]) ; + + if (run_settings["method"] == utility.getGlobalValue ("terms.fade.methods.MH")) { + partition_results[s][4] = biased_posterior[1]; + partition_results[s][5] = biased_posterior[2]; + } + + } else { + + if (run_settings["method"] == utility.getGlobalValue ("terms.fade.methods.VB0")) { + partition_results [s][0] = posterior_mean_rates[i]; + partition_results [s][1] = posterior_mean_biases[i]; + partition_results [s][2] = biased_ks[i]; + partition_results [s][3] = stats.BayesFactor (prior_weight_bias, biased_ks[i]); + } + } + + + if (partition_results[s][3] >= run_settings["bayes factor"]) { + if (Abs(report.sites_found) == 0 && table_output_options[^"terms.table_options.header"]) { + fprintf (stdout, "\n", io.FormatTableRow (table_screen_output,table_output_options)); + table_output_options[^"terms.table_options.header"] = FALSE; + } + fprintf (stdout, io.FormatTableRow (report.biased_site,table_output_options)); + report.sites_found + (1-partition_results [s][3]); + } + + i+=1; + } + + (site_results[bias.residue]) [partition_index] = partition_results; + //(report.posteriors[bias.residue]) [partition_index] = partition_posteriors; + s = 0; // for re-entrancy + } + + partition_index = 0; + sites_found = Abs(report.sites_found); + sites_found_summary [bias.residue] = Abs(report.sites_found); + + } + selection.io.stopTimer (fade.json [terms.json.timers], "Residue `fade.bias.residue` analysis"); + +} + + + +// =========== ANALYSIS SUMMARY ======== + +fade.json [terms.fade.cache.settings] = fade.run_settings; +fade.json [terms.fade.json.site_annotations] = { + terms.fade.json.headers : fade.site_annotation_headers, + terms.fade.json.site_annotations : fade.site_annotations +}; +fade.json [terms.fit.MLE] = {terms.json.headers : fade.table_headers, + terms.json.content : fade.site_results }; + +//fade.json [terms.fade.posterior] = fade.report.posteriors; + +console.log ("----\n## FADE analysis summary. Evidence for directional selection evaluated using empirical Bayes factor threshold of " + fade.run_settings["bayes factor"]); + + +utility.ForEachPair (fade.sites_found_summary, "_residue_", "_count_", +' + if (_count_ == 0) { + console.log ("* No sites are evolving directionally towards " + _residue_); + } else { + console.log ("* " + _count_ + " " + io.SingularOrPlural (_count_, "site is", "sites are") + " evolving directionally towards " + _residue_); + } + +'); + + +selection.io.stopTimer (fade.json [terms.json.timers], "Overall"); + +io.SpoolJSON (fade.json, fade.alignment_info[terms.json.json]); + +// HELPER FUNCTIONS GO HERE +//---------------------------------------------------------------------------- + +function fade.RunPrompts (prompts) { + if (prompts["branches"]) { + fade.selected_branches = selection.io.defineBranchSets ( fade.partitions_and_trees ); + fade.cache [terms.fade.cache.branches] = fade.selected_branches; + prompts["branches"] = FALSE; + } + + if (prompts["grid"]) { + fade.run_settings["grid size"] = io.PromptUser ("> Number of grid points per dimension (total number is D^2)",fade.run_settings["grid size"],5,50,TRUE); + prompts["grid"] = FALSE; + } + + + + if (prompts["model"]) { + utility.Extend (models.protein.empirical_models, {"GTR" : "General time reversible model (189 estimated parameters)."}); + fade.baseline_model = io.SelectAnOption (models.protein.empirical_models, "Baseline substitution model"); + fade.generator = (utility.Extend (models.protein.empirical.plusF_generators , {"GTR" : "models.protein.REV.ModelDescription"}))[fade.baseline_model ]; + fade.cache[terms.fade.cache.model] = fade.baseline_model; + fade.cache[terms.fade.cache.model_generator] = fade.generator; + prompts["model"] = FALSE; + } + + + if (prompts["method"]) { + fade.run_settings["method"] = io.SelectAnOption ({ + terms.fade.methods.MH : "Full Metropolis-Hastings MCMC algorithm (slowest, original 2013 paper implementation)", + terms.fade.methods.CG : "Collapsed Gibbs sampler (intermediate speed)", + terms.fade.methods.VB0 : "0-th order Variational Bayes approximations (fastest, recommended default)" + }, "Posterior estimation method"); + prompts["method"] = FALSE; + } + + if (prompts["chain"]) { + if (fade.run_settings["method"] == terms.fade.methods.MH) { + fade.run_settings["chains"] = io.PromptUser ("> Number of MCMC chains to run",fade.run_settings["chains"],2,20,TRUE); + } else { + fade.run_settings["chains"] = 1; + } + if (fade.run_settings["method"] != terms.fade.methods.VB0) { + fade.run_settings["chain-length"] = io.PromptUser ("> The length of each chain",fade.run_settings["chain-length"],5e3,5e7,TRUE); + fade.run_settings["burn-in"] = io.PromptUser ("> Use this many samples as burn-in",fade.run_settings["chain-length"]$2,fade.run_settings["chain-length"]$20,fade.run_settings["chain-length"]*95$100,TRUE); + fade.run_settings["samples"] = io.PromptUser ("> How many samples should be drawn from each chain",fade.run_settings["samples"],50,fade.run_settings["chain-length"]-fade.run_settings["burn-in"],TRUE); + } + fade.run_settings["concentration"] = io.PromptUser ("> The concentration parameter of the Dirichlet prior",fade.run_settings["concentration"],0.001,1,FALSE); + prompts["chain"] = FALSE; + } +} + +//------------------------------------------------------------------------------------------------// + +lfunction fade.DefineGrid (one_d_points) { + // only one point for rate = 0, because bias is not identifiable if rate = 0 + + one_d_points = Max (one_d_points, 5); + + alphaBetaGrid = {one_d_points^2,2}; // (alpha, beta) pair + + oneDGridRate = {one_d_points,1}; + oneDGridBias = {one_d_points,1}; + + below1_frac = 0.7; + below1 = ((one_d_points)*below1_frac+0.5)$1; + above1 = (one_d_points-1)*(1-below1_frac)$1; + + if (below1 + above1 != one_d_points) { + above1 = one_d_points - below1; + } + + _neg_step = 1/(below1); + _neg_stepP1 = 1/(below1+1); + + for (_k = 0; _k < below1; _k += 1) { + oneDGridBias [_k][0] = _neg_step * (_k); + oneDGridRate [_k][0] = _neg_stepP1 * (_k + 1); + } + + oneDGridRate [below1-1][0] = 1; + oneDGridBias [below1-1][0] = 1; + + _pos_step = 49^(1/3)/above1; + for (_k = 1; _k <= above1; _k += 1) { + oneDGridBias [below1+_k-1][0] = 1+(_pos_step*_k)^3; + oneDGridRate [below1+_k-1][0] = 1+(_pos_step*_k)^3; + } + + _p = 0; + + for (_r = 0; _r < one_d_points; _r += 1) { + for (_c = 0; _c < one_d_points; _c += 1) { + alphaBetaGrid[_p][0] = oneDGridRate[_r]; + alphaBetaGrid[_p][1] = oneDGridBias[_c]; + _p += 1; + } + } + alphaBetaGrid[0][0] = 0; alphaBetaGrid[0][1] = 0; + alphaBetaGrid[1][1] = 0; + + return alphaBetaGrid; +} + +//------------------------------------------------------------------------------------------------// + +lfunction fade.SubstitutionHistory (subs) { + result = ""; + result * 128; + keys = utility.sortStrings (utility.Keys (subs)); + + for (i = 0; i < Abs (subs); i+=1) { + source = keys[i]; + targets = subs[source]; + if (i > 0) { + result * ", "; + } + result * (source + "->"); + keys2 = utility.sortStrings (utility.Keys (targets)); + for (k = 0; k < Abs (targets); k+=1) { + result * (keys2[k] + "(" + Abs(targets[keys2[k]]) + ")"); + } + } + + result * 0; + return result; + +} + +//------------------------------------------------------------------------------------------------// + +lfunction fade.CompositionString (composition) { + result = ""; + result * 128; + keys = utility.sortStrings (utility.Keys (composition)); + + for (i = 0; i < Abs (composition); i+=1) { + residue = keys[i]; + if (i) { + result * ","; + } + result * (residue + composition [residue]); + } + + result * 0; + return result; + +} + + +//------------------------------------------------------------------------------------------------// + + +lfunction fade.grid.MatrixToDict (grid) { + return utility.Map (utility.MatrixToListOfRows (grid), "_value_", + '{ terms.fade.bias : { + terms.id : fade.parameter.scalers [ terms.fade.bias ], + terms.fit.MLE : _value_[1] + }, + terms.fade.rate : { + terms.id : fade.parameter.scalers [ terms.fade.rate ], + terms.fit.MLE : _value_[0] + } + }'); +} diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf b/res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf index e5ac44596..db84c7220 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf @@ -21,6 +21,7 @@ lfunction selection.io.defineBranchSets(partition_info) { utility.ForEach (tree_for_analysis[utility.getGlobalValue("terms.trees.model_map")], "_value_", "`&available_models`[_value_] += 1"); list_models = utility.sortStrings(utility.Keys (available_models)); // get keys option_count = Abs (available_models); + } else { option_count = 0; } diff --git a/res/TemplateBatchFiles/libv3/IOFunctions.bf b/res/TemplateBatchFiles/libv3/IOFunctions.bf index 73ebee3b1..fc2d9623f 100644 --- a/res/TemplateBatchFiles/libv3/IOFunctions.bf +++ b/res/TemplateBatchFiles/libv3/IOFunctions.bf @@ -551,7 +551,7 @@ lfunction debug.log (arg) { * @returns nothing */ lfunction messages.log (arg) { - fprintf (MESSAGE_LOG, arg, "\n"); + fprintf (MESSAGE_LOG, "\n", arg); } diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index 9b8272edc..c7967e9a0 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -22,6 +22,7 @@ namespace terms{ synonymous_sub_count = "synonymous substitution count"; nonsynonymous_sub_count = "nonsynonymous substitution count"; original_name = "original name"; + replicates = "replicates"; category = "category"; mixture = "mixture"; diff --git a/res/TemplateBatchFiles/libv3/models/model_functions.bf b/res/TemplateBatchFiles/libv3/models/model_functions.bf index 24d8cfb0b..bf155c7f5 100644 --- a/res/TemplateBatchFiles/libv3/models/model_functions.bf +++ b/res/TemplateBatchFiles/libv3/models/model_functions.bf @@ -8,7 +8,7 @@ LoadFunctionLibrary ("../convenience/regexp.bf"); /** * @name model.GetParameters_RegExp * @param model {String} - model ID - * @param re {String} - regular expression + * @param re {String} - regular expression * @return a dictionary of global model parameters that match a regexp */ lfunction model.GetParameters_RegExp(model, re) { @@ -34,7 +34,7 @@ lfunction model.GetParameters_RegExp(model, re) { * @param rules */ function model.ApplyModelToTree (id, tree, model_list, rules) { - + if (Type (rules) == "AssociativeList") { // this has the form // model id : list of branches to apply the model (as a string COLUMN matrix with branch names, @@ -53,15 +53,16 @@ function model.ApplyModelToTree (id, tree, model_list, rules) { "); } - - /* - + + /* + debug.log (tree); _t = Eval ("Format (`id`,1,1)"); debug.log (_t); - + */ - + + model.ApplyModelToTree.ids = Rows (rules); for (model.ApplyModelToTree.k = 0; model.ApplyModelToTree.k < Abs (rules); model.ApplyModelToTree.k += 1) { model.ApplyModelToTree.name = model.ApplyModelToTree.ids[model.ApplyModelToTree.k]; @@ -219,20 +220,19 @@ function model.generic.DefineModel (model_spec, id, arguments, data_filter, esti // Add data filter information to model description - models.generic.AttachFilter (model.generic.DefineModel.model, data_filter); - - + if ( None != data_filter) { + models.generic.AttachFilter (model.generic.DefineModel.model, data_filter); + } // Set Q field model.generic.DefineModel.model = Call (model.generic.DefineModel.model [terms.model.defineQ], model.generic.DefineModel.model, id); // Define type of frequency estimator - if (estimator_type != None) { + if (None != estimator_type) { model.generic.DefineModel.model [terms.model.frequency_estimator] = estimator_type; } - // Set EFV field model.generic.DefineModel.model = Call (model.generic.DefineModel.model [terms.model.frequency_estimator], model.generic.DefineModel.model, id, @@ -247,13 +247,13 @@ function model.generic.DefineModel (model_spec, id, arguments, data_filter, esti parameters.StringMatrixToFormulas (model.generic.DefineModel.model [terms.model.matrix_id],model.generic.DefineModel.model[terms.model.rate_matrix]); - + if (Type ((model.generic.DefineModel.model[terms.efv_estimate])[0]) == "String") { parameters.StringMatrixToFormulas (model.generic.DefineModel.model [terms.model.efv_id],model.generic.DefineModel.model[terms.efv_estimate]); } else { utility.SetEnvVariable (model.generic.DefineModel.model [terms.model.efv_id], model.generic.DefineModel.model[terms.efv_estimate]); } - + model.define_from_components (id, model.generic.DefineModel.model [terms.model.matrix_id], model.generic.DefineModel.model [terms.model.efv_id], model.generic.DefineModel.model [terms.model.canonical]); @@ -261,7 +261,7 @@ function model.generic.DefineModel (model_spec, id, arguments, data_filter, esti Call (model.generic.DefineModel.model[terms.model.post_definition], model.generic.DefineModel.model); } - + return model.generic.DefineModel.model; } @@ -277,7 +277,9 @@ function model.generic.DefineModel (model_spec, id, arguments, data_filter, esti function model.generic.DefineMixtureModel (model_spec, id, arguments, data_filter, estimator_type) { model.generic.DefineModel.model = utility.CallFunction (model_spec, arguments); - models.generic.AttachFilter (model.generic.DefineModel.model, data_filter); + if (None != estimator_type) { + models.generic.AttachFilter (model.generic.DefineModel.model, data_filter); + } // for mixture models this will define the mixture components as well model.generic.DefineModel.model = Call (model.generic.DefineModel.model [terms.model.defineQ], model.generic.DefineModel.model, id); @@ -361,7 +363,7 @@ function models.generic.ConstrainBranchLength (model, value, parameter) { messages.log ("models.generic.ConstrainBranchLength: not exactly one local model parameter"); } } else { - messages.log ("models.generic.ConstrainBranchLength: unsupported value type " + Type (value) + "\n" + value); + messages.log ("models.generic.ConstrainBranchLength: unsupported value type " + Type (value) + "\n" + value); } return 0; } @@ -446,8 +448,7 @@ function models.generic.SetBranchLength (model, value, parameter) { * @returns 0 */ lfunction models.generic.AttachFilter (model, filter) { - - + if (Type (filter) != "String") { utility.ForEach (filter, "_filter_", "models.generic.AttachFilter (`&model`, _filter_)"); model[utility.getGlobalValue("terms.model.data")] = filter; diff --git a/res/TemplateBatchFiles/libv3/models/protein/empirical.bf b/res/TemplateBatchFiles/libv3/models/protein/empirical.bf index a5eac2370..138759ed7 100644 --- a/res/TemplateBatchFiles/libv3/models/protein/empirical.bf +++ b/res/TemplateBatchFiles/libv3/models/protein/empirical.bf @@ -61,7 +61,7 @@ LoadFunctionLibrary("matrices/HIV.ibf"); lfunction models.protein.empirical._GenerateRate(fromChar, toChar, namespace, model_type, model) { models.protein.empirical._GenerateRate.p = {}; models.protein.empirical._GenerateRate.p [model_type] = {}; - + if (fromChar < toChar) { models.protein.empirical._GenerateRate.p [utility.getGlobalValue("terms.model.rate_entry")] = "" + ((model[utility.getGlobalValue ("terms.model.empirical_rates")])[fromChar])[toChar]; } else { @@ -78,10 +78,12 @@ lfunction models.protein.empirical._GenerateRate(fromChar, toChar, namespace, mo lfunction models.protein.empirical._DefineQ(model_dict, namespace) { // Call frequencies here. Will be repeated in model.generic.DefineModel, but we are ok with that. - frequencies._aux.empirical.singlechar(model_dict, namespace, model_dict[utility.getGlobalValue("terms.model.data")]); - models.protein.empirical._NormalizeEmpiricalRates(model_dict, namespace); - models.protein.empirical.DefineQMatrix (model_dict, namespace); + if (utility.Has (model_dict, utility.getGlobalValue("terms.model.data"), "String")) { + frequencies._aux.empirical.singlechar(model_dict, namespace, model_dict[utility.getGlobalValue("terms.model.data")]); + models.protein.empirical._NormalizeEmpiricalRates(model_dict, namespace); + } + models.protein.empirical.DefineQMatrix (model_dict, namespace); return model_dict; } @@ -92,7 +94,7 @@ lfunction models.protein.empirical._DefineQ(model_dict, namespace) { */ lfunction models.protein.empirical._NormalizeEmpiricalRates(model_dict, namespace) { - + alphabet = model_dict[utility.getGlobalValue("terms.alphabet")]; dim = utility.Array1D (alphabet); raw_rates = model_dict[utility.getGlobalValue("terms.model.empirical_rates")]; @@ -107,10 +109,10 @@ lfunction models.protein.empirical._NormalizeEmpiricalRates(model_dict, namespac for (i = 0; i < dim; i +=1 ){ for (j = i+1; j < dim; j += 1){ if ( i!=j ){ - - + + rate = (raw_rates[alphabet[i]])[alphabet[j]]; - + Q[i][j] = rate * EFV[j]; Q[j][i] = rate * EFV[i]; @@ -132,22 +134,22 @@ lfunction models.protein.empirical._NormalizeEmpiricalRates(model_dict, namespac norm += Q[i][i] * EFV[i]; } norm = -1*norm; - + // perform normalization for (i = 0; i < dim; i +=1 ){ for ( j = 0; j < dim; j += 1){ Q[i][j] = Q[i][j] / norm; } - } + } // Now convert it BACK TO hyphy dictionary with frequencies divided out. // // ************** This sets the new empirical rates. ************** // - + new_empirical_rates = {}; for (l1 = 0; l1 < dim - 1; l1 += 1) { new_empirical_rates[alphabet[l1]] = {}; for (l2 = l1 + 1; l2 < dim; l2 += 1) { - + if (EFV[l2] == 0){ nof_rate = 0.; } @@ -195,9 +197,9 @@ function models.protein.empirical.DefineQMatrix (modelSpec, namespace) { // ADDED FOR EMPIRICAL MODELS __empirical_rates = modelSpec[terms.model.empirical_rates]; - - - + + + __global_cache = {}; if (None != __rate_variation) { @@ -205,13 +207,13 @@ function models.protein.empirical.DefineQMatrix (modelSpec, namespace) { __rp = Call (__rate_variation[terms.rate_variation.distribution], __rate_variation[terms.rate_variation.options], namespace); __rate_variation [terms.id] = (__rp[terms.category])[terms.id]; - + parameters.DeclareCategory (__rp[terms.category]); parameters.helper.copy_definitions (modelSpec[terms.parameters], __rp); - } + } for (_rowChar = 0; _rowChar < models.protein.dimensions; _rowChar +=1 ){ - for (_colChar = 0; _colChar < models.protein.dimensions; _colChar += 1) { + for (_colChar = 0; _colChar < models.protein.dimensions; _colChar += 1) { if (_rowChar == _colChar) { continue; } @@ -220,10 +222,10 @@ function models.protein.empirical.DefineQMatrix (modelSpec, namespace) { namespace, __modelType, modelSpec); - + if (None != __rate_variation) { - __rp = Call (__rate_variation[terms.rate_variation.rate_modifier], + __rp = Call (__rate_variation[terms.rate_variation.rate_modifier], __rp, __alphabet[_rowChar], __alphabet[_colChar], @@ -264,7 +266,7 @@ models.protein.empirical.default_generators = {"LG": "models.protein.LG.ModelDes "gcpREV": "models.protein.gcpREV.ModelDescription", "HIVBm": "models.protein.HIVBm.ModelDescription", "HIVWm": "models.protein.HIVWm.ModelDescription"}; - + models.protein.empirical.plusF_generators = {"LG": "models.protein.LGF.ModelDescription", "WAG": "models.protein.WAGF.ModelDescription", "JTT": "models.protein.JTTF.ModelDescription", @@ -274,7 +276,7 @@ models.protein.empirical.plusF_generators = {"LG": "models.protein.LGF.ModelDesc "gcpREV": "models.protein.gcpREVF.ModelDescription", "HIVBm": "models.protein.HIVBmF.ModelDescription", "HIVWm": "models.protein.HIVWmF.ModelDescription"}; - + models.protein.empirical.mleF_generators = {"LG": "models.protein.LGML.ModelDescription", "WAG": "models.protein.WAGML.ModelDescription", "JTT": "models.protein.JTTML.ModelDescription", diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 877524ead..7383470ee 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -187,19 +187,19 @@ lfunction alignments.ReadNucleotideDataSet(dataset_name, file_name) { */ lfunction alignments.ReadProteinDataSet(dataset_name, file_name) { result = alignments.ReadNucleotideDataSet (dataset_name, file_name); - + /* check that the alignment has protein data */ DataSetFilter throwaway = CreateFilter (^dataset_name, 1); GetDataInfo (alphabet, throwaway, "CHARACTERS"); DeleteObject (throwaway); - io.CheckAssertion("alignments.AlphabetType(`&alphabet`)==utility.getGlobalValue ('terms.amino_acid')", + io.CheckAssertion("alignments.AlphabetType(`&alphabet`)==utility.getGlobalValue ('terms.amino_acid')", "The input alignment must contain protein data"); - + return result; } /** - * Categorize an alphabet for an alignment + * Categorize an alphabet for an alignment * @name alignments.AlphabetType * @param alphabet - the alphabet vector, e.g. fetched by GetDataInfo (alphabet, ..., "CHARACTERS"); * @returns {String} one of standard alphabet types or None if unknown @@ -214,7 +214,7 @@ lfunction alignments.AlphabetType (alphabet) { } } return None; -} +} /** * Ensure that name mapping is not None by creating a f(x)=x map if needed @@ -224,14 +224,14 @@ lfunction alignments.AlphabetType (alphabet) { * @param file_name - path to file * @returns {Dictionary} r - metadata pertaining to the dataset */ - + lfunction alignments.EnsureMapping(dataset_name, data) { name_mapping = data[utility.getGlobalValue("terms.data.name_mapping")]; if (None == name_mapping) { /** create a 1-1 mapping if nothing was done */ name_mapping = {}; utility.ForEach (alignments.GetSequenceNames (dataset_name), "_value_", "`&name_mapping`[_value_] = _value_"); data[utility.getGlobalValue("terms.data.name_mapping")] = name_mapping; - } + } return data; } @@ -316,9 +316,9 @@ function alignments.ReadNucleotideAlignment(file_name, dataset_name, datafilter_ */ lfunction alignments.CompressDuplicateSequences (filter_in, filter_out, rename) { GetDataInfo (duplicate_info, ^filter_in, -2); - + DataSetFilter ^filter_out = CreateFilter (^filter_in, 1, "", Join (",", duplicate_info["UNIQUE_INDICES"])); - + if (rename) { utility.ForEachPair (duplicate_info["UNIQUE_INDICES"], "_idx_", @@ -328,11 +328,11 @@ lfunction alignments.CompressDuplicateSequences (filter_in, filter_out, rename) _seq_name_ += ":" + ((`&duplicate_info`)["UNIQUE_COUNTS"])[_idx_[1]]; SetParameter (^`&filter_in`,_seq_idx_,_seq_name_); '); - - } - - - + + } + + + return duplicate_info["UNIQUE_SEQUENCES"]; //DataSetFilter ^filter_out = CreateFilter (filter_in); } @@ -354,10 +354,10 @@ lfunction alignments.DefineFiltersForPartitions(partitions, source_data, prefix, this_filter[utility.getGlobalValue("terms.data.name")] = prefix + (partitions[i])[utility.getGlobalValue("terms.data.name")]; DataSetFilter ^ (this_filter[utility.getGlobalValue("terms.data.name")]) = CreateFilter( ^ source_data, 3, (partitions[i])[utility.getGlobalValue("terms.data.filter_string")], , data_info[utility.getGlobalValue("terms.stop_codons")]); diff = test.sites - 3 * ^ (this_filter[utility.getGlobalValue("terms.data.name")] + ".sites"); - + //TODO: BELOW, IS THE "names" CORRECT OR SHOULD IT BE "name"????? SJS can't locate another time when the plural is used through libv3. io.CheckAssertion("`&diff` == 0", "Partition " + (filters["names"])[i] + " is either has stop codons or is not in frame"); - + this_filter[utility.getGlobalValue("terms.data.coverage")] = utility.DictToArray(utility.Map(utility.Filter( ^ (this_filter[utility.getGlobalValue("terms.data.name")] + ".site_map"), "_value_", "_value_%3==0"), "_value_", "_value_$3")); filters + this_filter; } @@ -437,25 +437,25 @@ lfunction alignments.TranslateCodonsToAminoAcids (sequence, offset, code) { * @param {String} sequence - the string to translate * @param {Number} offset - start at this position (should be in {0,1,2}) * @param {Dictionary} code - genetic code description (e.g. returned by alignments.LoadGeneticCode) - * @param {lookup} code - resolution lookup dictionary + * @param {lookup} code - resolution lookup dictionary * @returns {Dict} list of possible amino-acids (as dicts) at this position */ lfunction alignments.TranslateCodonsToAminoAcidsWithAmbiguities (sequence, offset, code, lookup) { - console.log (sequence); - + //console.log (sequence); + l = Abs (sequence); translation = {}; - + DataSet single_seq = ReadFromString (">s\n" + sequence[offset][Abs (sequence)-1]); DataSetFilter single_seq_filter = CreateFilter (single_seq, 3, "", ""); - + GetDataInfo (patterns, single_seq_filter); GetDataInfo (alphabet, single_seq_filter, "CHARACTERS"); GetDataInfo (single_seq_data, single_seq_filter, 0); code_lookup = code [utility.getGlobalValue("terms.code.ordering")]; code_table = code [utility.getGlobalValue("terms.code")]; - + for (s = 0; s < single_seq_filter.sites; s += 1) { codon = single_seq_data[3*s][3*s+2]; if (lookup / codon) { @@ -481,8 +481,8 @@ lfunction alignments.TranslateCodonsToAminoAcidsWithAmbiguities (sequence, offse translation[s] = my_resolution; } } - - + + return translation; } diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index e341ee02b..b00104325 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -381,6 +381,58 @@ lfunction estimators.TraverseLocalParameters (likelihood_function_id, model_desc return result; } +/** + * @name + * @param {String} tree_name + * @param {Dictionary} model_descriptions + * @param {Matrix} initial_values + * @param branch_length_conditions + * @returns number of constrained parameters; + */ +function estimators.ApplyExistingEstimatesToTree (_tree_name, model_descriptions, initial_values, _application_type, keep_track_of_proportional_scalers) { + estimators.ApplyExistingEstimatesToTree.constraint_count = 0; + + + ExecuteCommands("GetInformation (estimators.ApplyExistingEstimatesToTree.map, `_tree_name`);"); + estimators.ApplyExistingEstimatesToTree.branch_names = Rows(estimators.ApplyExistingEstimatesToTree.map); + + for (estimators.ApplyExistingEstimatesToTree.b = 0; estimators.ApplyExistingEstimatesToTree.b < Abs(estimators.ApplyExistingEstimatesToTree.map); estimators.ApplyExistingEstimatesToTree.b += 1) { + _branch_name = estimators.ApplyExistingEstimatesToTree.branch_names[estimators.ApplyExistingEstimatesToTree.b]; + + if (initial_values / _branch_name) { // have an entry for this branch name + _existing_estimate = initial_values[_branch_name]; + + if (Type(_existing_estimate) == "AssociativeList") { + _set_branch_length_to = (initial_values[_branch_name])[terms.fit.MLE]; + if (None != branch_length_conditions) { + if (None != _application_type) { + + if (Type(_application_type) == "String") { + if (_application_type == terms.model.branch_length_constrain ) { + estimators.ApplyExistingEstimatesToTree.constraint_count += estimators.constrainBranchLength(_tree_name, _branch_name, model_descriptions[estimators.ApplyExistingEstimatesToTree.map[_branch_name]], _set_branch_length_to); + continue; + } + _set_branch_length_to = {}; + _set_branch_length_to[terms.branch_length] = _existing_estimate[terms.fit.MLE]; + _set_branch_length_to[terms.model.branch_length_scaler] = _application_type; + keep_track_of_proportional_scalers[_application_type] = 1; + } + } + } + + estimators.ApplyExistingEstimatesToTree.constraint_count += estimators.applyBranchLength(_tree_name, _branch_name, model_descriptions[estimators.ApplyExistingEstimatesToTree.map[_branch_name]], _set_branch_length_to); + } else { + if (Type(_existing_estimate) != "Unknown") { + warning.log ("Incorrect type for the initial values object of for branch '" + _branch_name + "' : " + _existing_estimate); + } + } + } + } + + //fprintf (stdout, Format (^_tree_name, 1,1), "\n"); + + return estimators.ApplyExistingEstimatesToTree.constraint_count; +} /** * @name @@ -426,8 +478,23 @@ function estimators.ApplyExistingEstimates(likelihood_function_id, model_descrip if (Type((initial_values[terms.branch_length])[estimators.ApplyExistingEstimates.i]) == "AssociativeList") { // have branch lengths for this partition - _tree_name = (estimators.ApplyExistingEstimates.lfInfo[terms.fit.trees])[estimators.ApplyExistingEstimates.i]; + _application_type = None; + if (Type (branch_length_conditions) == "AssociativeList") { + if (Abs(branch_length_conditions) > estimators.ApplyExistingEstimates.i) { + _application_type = branch_length_conditions[estimators.ApplyExistingEstimates.i]; + } + } + + estimators.ApplyExistingEstimates.df_correction += estimators.ApplyExistingEstimatesToTree ((estimators.ApplyExistingEstimates.lfInfo[terms.fit.trees])[estimators.ApplyExistingEstimates.i], + model_descriptions, + (initial_values[terms.branch_length])[estimators.ApplyExistingEstimates.i], + _application_type, + estimators.ApplyExistingEstimates.keep_track_of_proportional_scalers); + + + + /* ExecuteCommands("GetInformation (estimators.ApplyExistingEstimates.map, `_tree_name`);"); estimators.ApplyExistingEstimates.branch_names = Rows(estimators.ApplyExistingEstimates.map); @@ -463,7 +530,8 @@ function estimators.ApplyExistingEstimates(likelihood_function_id, model_descrip } } } - } + + }*/ } else { if (Type((initial_values[terms.branch_length])[estimators.ApplyExistingEstimates.i]) != "Unknown") { @@ -551,8 +619,8 @@ lfunction estimators.BuildLFObject (lf_id, data_filter, tree, model_map, initial utility.ExecuteInGlobalNamespace ("LikelihoodFunction `lf_id` = (`&lf_components`)"); - - + + df = 0; if (Type(initial_values) == "AssociativeList") { @@ -613,7 +681,7 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o lf_id = &likelihoodFunction; utility.ExecuteInGlobalNamespace ("LikelihoodFunction `lf_id` = (`&lf_components`)"); - + df = 0; if (Type(initial_values) == "AssociativeList") { @@ -629,8 +697,10 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o Optimize (mles, likelihoodFunction); + /* Export (lf,likelihoodFunction); console.log (lf); + */ if (Type(initial_values) == "AssociativeList") { utility.ToggleEnvVariable("USE_LAST_RESULTS", None); diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index d5b441004..650b643fb 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -5469,6 +5469,8 @@ void _ElementaryCommand::ExecuteCase52 (_ExecutionList& chain) { } if (baseSet.sLength == alphabetMatrix->GetVDim ()) { + + long unitSize = ((_FString*)alphabetMatrix->GetFormula(1,0)->Compute())->theString->toNum(); if (unitSize >= 1) { From 9a6777e213b0c1229060a48910825e307114ed90 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Tue, 19 Jun 2018 13:25:32 -0400 Subject: [PATCH 04/53] Fixing some memory leaks --- CMakeLists.txt | 2 +- src/core/batchlan.cpp | 572 +++++++++++++++++------------------ src/core/matrix.cpp | 662 ++++++++++++++++++++--------------------- src/core/operation.cpp | 1 + 4 files changed, 620 insertions(+), 617 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49e8cac2f..eb63c3e26 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) if(${GCC_VERSION} VERSION_LESS 6.0) set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -std=gnu++14") else(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g -fsanitize=address -fsanitize=leak") endif(${GCC_VERSION} VERSION_LESS 6.0) if(NOAVX) diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index 650b643fb..a5a827047 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -1,21 +1,21 @@ /* - + HyPhy - Hypothesis Testing Using Phylogenies. - + Copyright (C) 1997-now Core Developers: Sergei L Kosakovsky Pond (sergeilkp@icloud.com) Art FY Poon (apoon@cfenet.ubc.ca) Steven Weaver (sweaver@temple.edu) - + Module Developers: Lance Hepler (nlhepler@gmail.com) Martin Smith (martin.audacis@gmail.com) - + Significant contributions from: Spencer V Muse (muse@stat.ncsu.edu) Simon DW Frost (sdf22@cam.ac.uk) - + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -23,10 +23,10 @@ distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -34,7 +34,7 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - + */ #include "likefunc.h" @@ -421,9 +421,9 @@ _String* MPIRecvString (long senderT, long& senderID) { } MPI_Status status; - + // nonagressive polling mode - + int message_received = 0; while (! message_received) { MPI_Iprobe (senderT, HYPHY_MPI_SIZE_TAG, MPI_COMM_WORLD, &message_received, MPI_STATUS_IGNORE); @@ -480,7 +480,7 @@ _String* MPIRecvString (long senderT, long& senderID) { //____________________________________________________________________________________ const _String GetStringFromFormula (_String* data,_VariableContainer* theP) { - + _Formula nameForm (*data,theP); _PMathObj formRes = nameForm.Compute(); @@ -515,7 +515,7 @@ bool numericalParameterSuccessFlag = true; _Parameter ProcessNumericArgument (_String* data, _VariableContainer const* theP, _ExecutionList* currentProgram) { _String errMsg; _Formula nameForm (*data,theP, currentProgram?&errMsg:nil); - + if (!errMsg.sLength) { _PMathObj formRes = nameForm.Compute(); numericalParameterSuccessFlag = true; @@ -529,13 +529,13 @@ _Parameter ProcessNumericArgument (_String* data, _VariableContainer const* the } } } - + if (currentProgram) { currentProgram->ReportAnExecutionError (errMsg); } else { WarnError(errMsg); } - + numericalParameterSuccessFlag = false; return 0.0; } @@ -546,9 +546,9 @@ _PMathObj ProcessAnArgumentByType (_String const* expression, _VariableContain { _String errMsg; - + _Formula expressionProcessor (*expression, theP, currentProgram?&errMsg:nil); - + if (errMsg.sLength && currentProgram) { currentProgram->ReportAnExecutionError (errMsg); } @@ -559,7 +559,7 @@ _PMathObj ProcessAnArgumentByType (_String const* expression, _VariableContain return expressionResult; } } - + return nil; } @@ -569,13 +569,13 @@ _PMathObj ProcessAnArgumentByType (_String const* expression, _VariableContain const _String ProcessLiteralArgument (_String const* data, _VariableContainer const* theP, _ExecutionList* currentProgram) { //NLToConsole(); BufferToConsole("ProcessLiteralArgument:"); StringToConsole(*data); NLToConsole(); _PMathObj getString = ProcessAnArgumentByType (data, theP, STRING, currentProgram); - + if (getString) { _String result (*((_FString*)getString)->theString); DeleteObject(getString); return result; } - + return emptyString; } @@ -664,14 +664,14 @@ _HY_BL_FUNCTION_TYPE GetBFFunctionType (long idx) { //____________________________________________________________________________________ _String const ExportBFFunction (long idx, bool recursive) { - - + + _String bf (8192UL, true); if (IsBFFunctionIndexValid(idx)) { - + _String hbf_name = GetBFFunctionNameByIndex (idx); _ExecutionList * body = &GetBFFunctionBody(idx); - + if (body->enclosingNamespace.sLength) { bf << "namespace " << body->enclosingNamespace << " {\n"; } @@ -686,19 +686,19 @@ _String const ExportBFFunction (long idx, bool recursive) { default: bf << blFunction; } - - + + bf << hbf_name; bf << '('; - + long argument_count = GetBFFunctionArgumentCount (idx); _List * argument_list = &GetBFFunctionArgumentList (idx); for (long argument_id = 0; argument_id < argument_count; argument_id ++) { if (argument_id) { bf << ','; } - - + + bf << body->TrimNameSpaceFromID(*(_String*)argument_list->Element (argument_id)); if (GetBFFunctionArgumentTypes (idx).GetElement(argument_id) == BL_FUNCTION_ARGUMENT_REFERENCE) { bf << '&'; @@ -707,7 +707,7 @@ _String const ExportBFFunction (long idx, bool recursive) { bf << ") {\n"; bf << body->sourceText; bf << "\n}"; - + if (body->enclosingNamespace.sLength) { bf << "\n}"; } @@ -715,11 +715,11 @@ _String const ExportBFFunction (long idx, bool recursive) { if (recursive) { _List hbl_functions; _AVLListX other_functions (&hbl_functions); - + other_functions.Insert (new _String (hbf_name), HY_BL_HBL_FUNCTION, false, false); - + body->BuildListOfDependancies (other_functions, true); - + for (long i = 0; i < hbl_functions.lLength; i++) { _String * a_name = (_String*)hbl_functions (i); if (! a_name -> Equal( &hbf_name)) { @@ -732,30 +732,30 @@ _String const ExportBFFunction (long idx, bool recursive) { } } } - + bf.Finalize(); return bf; - + } //____________________________________________________________________________________ void ClearBFFunctionLists (long start_here) { if (start_here > 0L && start_here < batchLanguageFunctionNames.countitems()) { - + _SimpleList delete_me (batchLanguageFunctionNames.countitems()-start_here, start_here, 1L); - + for (long k = 0; k < delete_me.countitems(); k++) { batchLanguageFunctionNamesIndexed.Delete (batchLanguageFunctionNames.GetItem (delete_me.Get (k))); } - + batchLanguageFunctionNames.DeleteList (delete_me); batchLanguageFunctions.DeleteList (delete_me); batchLanguageFunctionClassification.DeleteList (delete_me); batchLanguageFunctionParameterLists.DeleteList (delete_me); batchLanguageFunctionParameterTypes.DeleteList (delete_me); - - - + + + } /*else { batchLanguageFunctionNames.Clear(); batchLanguageFunctions.Clear(); @@ -802,7 +802,7 @@ long FindBFFunctionName (_String const&s, _VariableContainer const* theP) { }; } - + //ReportWarning (_String ("Looking for ") & s.Enquote() & " in global context"); return batchLanguageFunctionNamesIndexed.FindAndGetXtra(&s,-1); } @@ -837,8 +837,8 @@ long AddDataSetToList (_String& theName,_DataSet* theDS) { void KillLFRecord (long lfID, bool completeKill) { /* compile the list of variables which will no longer be referenced */ - - + + if (lfID>=0) { //printf ("\n****\nKillLFRecord\n%s\n****\n", (char const*) * (_String*)likeFuncNamesList.GetItem (lfID)); _LikelihoodFunction *me = (_LikelihoodFunction*)likeFuncList (lfID); @@ -855,21 +855,21 @@ void KillLFRecord (long lfID, bool completeKill) { myVars << me->GetIndependentVars(); myVars << me->GetDependentVars(); - - + + for (unsigned long k=0UL; ksLength) { _LikelihoodFunction *lf = (_LikelihoodFunction*)likeFuncList (k); otherVars << lf->GetIndependentVars(); otherVars << lf->GetDependentVars(); - + unsigned long component_count = lf->CountObjects(kLFCountPartitions); - + for (long tree_index = 0UL; tree_index < component_count; tree_index++) { lf->GetIthTree(tree_index)->CompileListOfModels(otherModels); } - + } } } @@ -877,7 +877,7 @@ void KillLFRecord (long lfID, bool completeKill) { myVars.Sort (); otherVars.Sort(); otherModels.Sort(); - + wastedVars.Subtract(myVars, otherVars); for (unsigned long k=0UL; kCountObjects(kLFCountPartitions); for (long tree_index = 0UL; tree_index < component_count; tree_index++) { @@ -914,9 +914,9 @@ void KillLFRecord (long lfID, bool completeKill) { } // unregister event listeners to release reference counts - + me->UnregisterListeners(); - + if (lfIDGetGlobalVars (l); - + for (unsigned long k=0UL; kGetName()); } l.Clear (); - + unsigned long partition_count = lf->CountObjects(kLFCountPartitions); for (unsigned long k=0UL; kerrorHandlingMode; errorState = currentExecutionList->errorState; @@ -1207,9 +1207,9 @@ void _ExecutionList::ReportAnExecutionError (_String errMsg, bool doCurrentCo } } setParameter(_hyLastExecutionError, new _FString (errMsg, false), nil, false); - + break; - default: + default: WarnError (errMsg); } } @@ -1260,8 +1260,8 @@ void _ExecutionList::BuildListOfDependancies (_AVLListX & collection, bool rec _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { //setParameter(_hyLastExecutionError, new _MathObject, nil, false); - - + + _ExecutionList* stashCEL = currentExecutionList; callPoints << currentCommand; executionStack << this; @@ -1272,7 +1272,7 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { } else { parent = nil; } - + _FString cfp (PeekFilePath() ? *PeekFilePath () :emptyString), * stashed = (_FString*)FetchObjectFromVariableByType (&pathToCurrentBF, STRING); @@ -1297,7 +1297,7 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { TimeDifference timer; (((_ElementaryCommand**)lData)[currentCommand])->Execute(*this); timeDiff = timer.TimeSinceStart(); - + if (profileCounter) { // a call to _hyphy_profile_dump can set this to NULL @@ -1351,7 +1351,7 @@ long _ExecutionList::ExecuteAndClean (long g, _String* fName) skipWarningMessages = false; ClearBFFunctionLists (g); - + return f; } @@ -1384,7 +1384,7 @@ bool _ExecutionList::TryToMakeSimple (void) long parseCode = Parse(f,*formulaString,fpc,f2); if (parseCode == HY_FORMULA_EXPRESSION || parseCode == HY_FORMULA_VARIABLE_VALUE_ASSIGNMENT || parseCode == HY_FORMULA_FORMULA_VALUE_ASSIGNMENT) { - + if (f->AmISimple(stackDepth,varList)) { try { if (parseCode == HY_FORMULA_FORMULA_VALUE_ASSIGNMENT) { @@ -1396,17 +1396,17 @@ bool _ExecutionList::TryToMakeSimple (void) f->GetIthTerm (0)->SetAVariable(mx->GetAVariable()); _Operation * last = f->GetIthTerm(assignment_length-1); if (! (last->TheCode() == HY_OP_CODE_MCOORD && last->GetNoTerms() == 2)) throw 0; - - + + f2->GetList() << f->GetList(); f->Clear(); - + _Formula *t = f2; f2 = f; f = t; - + } - + } catch (int e) { status = false; break; @@ -1414,13 +1414,13 @@ bool _ExecutionList::TryToMakeSimple (void) aStatement->simpleParameters<simpleParameters<<(long)f; aStatement->simpleParameters<<(long)f2; - - + + aStatement->simpleParameters<AppendNewInstance ((_String*)GetItem(i)->toStr()); } result->Finalize(); - + currentExecutionList = stash; return result; } @@ -1598,7 +1598,7 @@ _String* _ExecutionList::GetNameSpace () { _String _ExecutionList::AddNameSpaceToID (_String& theID, _String * extra) { _String name_space; - + if (extra && extra->sLength) { if (nameSpacePrefix) { name_space = (*nameSpacePrefix->GetName())&'.'& *extra; @@ -1607,10 +1607,10 @@ _String _ExecutionList::AddNameSpaceToID (_String& theID, _String * extra) { } } else { if (nameSpacePrefix) { - name_space = (*nameSpacePrefix->GetName()); + name_space = (*nameSpacePrefix->GetName()); } } - + return AppendContainerName (theID, &name_space); } @@ -1628,18 +1628,18 @@ _String _ExecutionList::TrimNameSpaceFromID (_String& theID) -/* - - holds all the expressions that require that spaces between them and the next expressions be +/* + + holds all the expressions that require that spaces between them and the next expressions be maintained, like - + return expr - DataSet expr = + DataSet expr = DateSetFilter expr = - - if (expr) is an identifier, then the spaces will be maintained, otherwise they will + + if (expr) is an identifier, then the spaces will be maintained, otherwise they will be squished, causing incorrect behavior (like DataSet(expr) will gets parsed as a formula) - + initialized in _HBL_Init_Const_Arrays */ @@ -1650,7 +1650,7 @@ bool _ExecutionList::BuildList (_String& s, _SimpleList* bc, bool proce } char * savePointer = s.sData; - + _SimpleList triePath; while (s.Length()) { // repeat while there is stuff left in the buffer @@ -1663,13 +1663,13 @@ bool _ExecutionList::BuildList (_String& s, _SimpleList* bc, bool proce if (!currentLine.Length()) { continue; } - + triePath.Clear(false); long prefixTreeCode = _HY_ValidHBLExpressions.FindKey (currentLine, &triePath, true); - + _List *pieces = nil; _HBLCommandExtras *commandExtraInfo = nil; - + if (prefixTreeCode != HY_TRIE_NOTFOUND) { prefixTreeCode = _HY_ValidHBLExpressions.GetValue(prefixTreeCode); long commandExtra = _HY_HBLCommandHelper.FindLong (prefixTreeCode); @@ -1696,7 +1696,7 @@ bool _ExecutionList::BuildList (_String& s, _SimpleList* bc, bool proce acknError (parseFail); } DeleteObject (pieces); - return false; + return false; } } if (commandExtraInfo->do_trim) { @@ -1705,9 +1705,9 @@ bool _ExecutionList::BuildList (_String& s, _SimpleList* bc, bool proce } } } - + bool handled = false; - + switch (prefixTreeCode) { case HY_HBL_COMMAND_FOR: _ElementaryCommand::BuildFor (currentLine, *this, pieces); @@ -1750,18 +1750,18 @@ bool _ExecutionList::BuildList (_String& s, _SimpleList* bc, bool proce _ElementaryCommand::ExtractValidateAddHBLCommand (currentLine, prefixTreeCode, pieces, commandExtraInfo, *this); handled = true; break; - + } - + if (handled) { DeleteObject (pieces); if (currentLine.Length() > 1UL) { WarnError (currentLine.Enquote() & " contained syntax errors, possibly a missing semicolon. " & s.Enquote()); } } - - // 20111212: this horrendous switch statement should be replaced with a - // prefix tree lookup + + // 20111212: this horrendous switch statement should be replaced with a + // prefix tree lookup if (!handled) { if (currentLine.startswith (blFunction)||currentLine.startswith (blFFunction)||currentLine.startswith (blLFunction) || currentLine.startswith (blNameSpace)) { // function declaration @@ -1873,7 +1873,7 @@ bool _ExecutionList::BuildList (_String& s, _SimpleList* bc, bool proce _ElementaryCommand::ConstructSCFG (currentLine, *this); } else if (currentLine.startswith (blBGM)) { // Bayesian Graphical Model definition _ElementaryCommand::ConstructBGM (currentLine, *this); - } + } // plain ol' formula - parse it as such! else { _String checker (currentLine); @@ -1896,7 +1896,7 @@ bool _ExecutionList::BuildList (_String& s, _SimpleList* bc, bool proce } } } - + /*if (currentLine.sLength > 1 || currentLine.sLength == 1 && currentLine.getChar(0) != ';'){ WarnError (_String ("Missing semicolon before ") & currentLine); return false; @@ -2621,13 +2621,13 @@ BaseRef _ElementaryCommand::toStr (unsigned long) result = _String ("Assert ") & "'" & *converted & "'"; break; } - + case HY_HBL_COMMAND_NESTED_LIST: { converted = (_String*)parameters(0)->toStr(); result = _String("Call a nested list (via namespace):\n ") & *converted; break; } - + } DeleteObject (converted); @@ -2639,9 +2639,9 @@ BaseRef _ElementaryCommand::toStr (unsigned long) void _ElementaryCommand::ExecuteCase0 (_ExecutionList& chain) { chain.currentCommand++; - + _String * errMsg = nil; - + try { if (chain.cli) { @@ -2659,7 +2659,7 @@ void _ElementaryCommand::ExecuteCase0 (_ExecutionList& chain) f2; _String* theFla = (_String*)parameters(0); - + _FormulaParsingContext fpc (nil, chain.nameSpacePrefix); long parseCode = Parse(&f,(*theFla),fpc,&f2); @@ -2672,7 +2672,7 @@ void _ElementaryCommand::ExecuteCase0 (_ExecutionList& chain) simpleParameters <sData); @@ -2780,7 +2780,7 @@ void _ElementaryCommand::ExecuteCase4 (_ExecutionList& chain) errMsg = new _String(_String(" did not evaluate to a number, a string, or a null (") & (_String*)result->toStr() & ")"); throw (0); } - + if (expression) { delete expression; } @@ -2821,8 +2821,8 @@ void _ElementaryCommand::ExecuteCase5 (_ExecutionList& chain) FILE* df; _String fName = *(_String*)parameters(1); _DataSet*ds; - - + + if (simpleParameters.lLength == 1) { fName = GetStringFromFormula ((_String*)parameters(1),chain.nameSpacePrefix); @@ -2945,13 +2945,13 @@ void _ElementaryCommand::ExecuteCase11 (_ExecutionList& chain) while (1) { _CalcNode *thisNode = ti.Next(); - + if ((theModelID = thisNode->GetModelIndex()) == HY_NO_MODEL) { // this node has no model done = false; break; } theFreqID = modelFrequenciesIndices.lData[theModelID]; - + while((thisNode = ti.Next()) && !ti.IsAtRoot()) { theModelID = thisNode->GetModelIndex(); if (theModelID == HY_NO_MODEL) { // no model @@ -3121,13 +3121,13 @@ void _ElementaryCommand::ExecuteCase12 (_ExecutionList& chain) _Variable* catValVar = nil, * catNameVar = nil; - - + + if (parameters.lLength>3) { // a matrix to store simulated category values _String matrixName (chain.AddNameSpaceToID(*(_String*)parameters(3))); - + if (!(catValVar = CheckReceptacle(&matrixName,blSimulateDataSet,true))) { return; } else { @@ -3228,22 +3228,22 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { _String *commands, theCommand, *namespc = nil; - + _AVLListXL * inArg = nil; _List * inArgAux = nil; bool pop_path = false; - + try { if (code == 39) { commands = ProcessCommandArgument((_String*)parameters(0)); } else { _String filePath = GetStringFromFormula((_String*)parameters(0),chain.nameSpacePrefix), originalPath = filePath; - + FILE * commandSource = nil; - + _Parameter reload = 0.; checkParameter(alwaysReloadLibraries, reload, 0.); @@ -3257,7 +3257,7 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { // printf ("%s\n", tryPath.sData); tryPath.ProcessFileName (false, false, (Ptr)chain.nameSpacePrefix); - + if (loadedLibraryPaths.Find(&tryPath) >= 0 && parameters.lLength == 2 && reload < 0.5) { ReportWarning (_String("Already loaded '") & originalPath & "' from " & tryPath); return; @@ -3273,16 +3273,16 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { } } - + if (commandSource == nil) { filePath.ProcessFileName (false,false,(Ptr)chain.nameSpacePrefix); - + if (code == 66 && loadedLibraryPaths.Find(&filePath) >= 0 && parameters.lLength == 2 && reload < 0.5) { ReportWarning (_String("Already loaded '") & originalPath & "' from " & filePath); return; } - + if ((commandSource = doFileOpen (filePath.getStr(), "rb")) == nil) { WarnError (_String("Could not read command file in ExecuteAFile.\nOriginal path: '") & originalPath & "'.\nExpanded path: '" & filePath & "'"); @@ -3381,7 +3381,7 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { } else { bool result = false; _ExecutionList exc (theCommand,namespc, false, &result); - + if (!result) { chain.ReportAnExecutionError("Encountered an error while parsing HBL", false, true); } else { @@ -3406,9 +3406,9 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { } } } catch (int e) { - + } - + DeleteObject (inArg); DeleteObject (inArgAux); @@ -3911,9 +3911,9 @@ void _ElementaryCommand::ExecuteCase25 (_ExecutionList& chain, bool issscan shifter = simpleParameters.lData[0] < 0; bool skipDataDelete = false; - + _Variable* iseof = CheckReceptacle (&hasEndBeenReached,emptyString,false); - + if (currentParameter==_String("stdin")) { // if (chain.stdinRedirect) { @@ -3923,7 +3923,7 @@ void _ElementaryCommand::ExecuteCase25 (_ExecutionList& chain, bool issscan if (! (redirect && redirect->theString->sLength)) { StringToConsole (*data); NLToConsole(); } - + } else { if (!CheckEqual(iseof->Compute()->Value(),0) && currentParameter.Equal (&scanfLastFilePath)) { WarnError ("Ran out of standard input\n"); @@ -4218,8 +4218,8 @@ void _ElementaryCommand::ExecuteCase31 (_ExecutionList& chain) f = modelMatrixIndices[f3]; usingLastDefMatrix = true; } - - + + if (doExpressionBased) { _String matrixExpression (ProcessLiteralArgument((_String*)parameters.lData[1],chain.nameSpacePrefix)), defErrMsg = _String ("The expression for the explicit matrix exponential passed to Model must be a valid matrix-valued HyPhy formula that is not an assignment") & ':' & matrixExpression; @@ -4233,9 +4233,9 @@ void _ElementaryCommand::ExecuteCase31 (_ExecutionList& chain) WarnError (defErrMsg & " parse code = " & parseCode & " " & (parseCode == HY_FORMULA_EXPRESSION ? (_String(", object type code ") & _String((long) isExpressionBased->ObjectClass())) : emptyString )); return; } - + //for (unsigned long k = 0; k < isExpressionBased - + checkMatrix = (_Matrix*)isExpressionBased->Compute(); @@ -4257,9 +4257,9 @@ void _ElementaryCommand::ExecuteCase31 (_ExecutionList& chain) } checkMatrix = (_Matrix*)checkVar->GetValue(); } - - + + // so far so good matrixDim = checkMatrix->GetHDim(); if ( matrixDim!=checkMatrix->GetVDim() || matrixDim<2 ) { @@ -4280,9 +4280,9 @@ void _ElementaryCommand::ExecuteCase31 (_ExecutionList& chain) WarnError (*parameterName & " must refer to a column/row vector in the call to Model = ..."); return; } - + checkMatrix = (_Matrix*)checkVar->GetValue(); - + if (checkMatrix->GetVDim()==1UL) { if (checkMatrix->GetHDim()!=matrixDim) { WarnError (*parameterName & " must be a column vector of the same dimension as the model matrix in the call to Model = ..."); @@ -4384,7 +4384,7 @@ void _ElementaryCommand::ExecuteCase32 (_ExecutionList& chain) long do_markdown; checkParameter (markdownOutput,do_markdown,0L); - + _Variable* holder; if (fixedLength<0) { @@ -4446,20 +4446,20 @@ void _ElementaryCommand::ExecuteCase32 (_ExecutionList& chain) f = FindDataFilter (nmspName); if (f>=0) { parameters.Delete(4); - + _DataSetFilter const *theFilter = GetDataFilter (f); _DataSet *linked_set = theFilter->GetData(); - + for (unsigned long species_index = 0; species_index < theFilter->NumberSpecies(); species_index ++) { if (exclusions.BinaryFind(species_index) >= 0) { continue; } - + choices < &((*new _List) << linked_set->GetSequenceName(species_index) < new _String (_String ("Taxon ") & (species_index + 1) & '(' & *linked_set->GetSequenceName(species_index) & ')')); } - + validChoices = true; parameters&& & choices; } else { @@ -4477,7 +4477,7 @@ void _ElementaryCommand::ExecuteCase32 (_ExecutionList& chain) < new _String (_String ("Taxon ") & (species_index + 1) & '(' & *linked_set->GetSequenceName(species_index) & ')')); } - + validChoices = true; parameters&& & choices; } else { @@ -4489,12 +4489,12 @@ void _ElementaryCommand::ExecuteCase32 (_ExecutionList& chain) if (f>=0) { parameters.Delete(4); - + _Variable *theSet = LocateVar (modelMatrixIndices.lData[f]); _SimpleList modelParms; - + choices << &((*new _List) < "All Parameters" < "All local model parameters are constrained"); - + _AVLList modelParmsA (&modelParms); theSet->ScanForVariables(modelParmsA,false); modelParmsA.ReorderList(); @@ -4506,7 +4506,7 @@ void _ElementaryCommand::ExecuteCase32 (_ExecutionList& chain) choices << &((*new _List) << LocateVar(modelParms.lData[f])->GetName() < new _String (_String ("Constrain parameter ") & *LocateVar(modelParms.lData[f])->GetName())); - + } validChoices = true; parameters&& & choices; @@ -4609,13 +4609,13 @@ void _ElementaryCommand::ExecuteCase32 (_ExecutionList& chain) WarnError ("Unhandled request for data from standard input in ChoiceList in headless HyPhy"); return; #else - - - + + + if (do_markdown) { printf ("\n\n####%s\n", dialog_title.getStr()); } else { - + printf ("\n\n\t\t\t+"); for (f = 1; fgetStr(),((_String*)(*(_List*)(*theChoices)(choice))(1))->getStr()); } } - + printf ("\n\n%sPlease choose an option (or press q to cancel selection):", do_markdown ? ">" : ""); _String buffer (StringFromConsole()); if (buffer.sData[0] == 'q' || buffer.sData[0] =='Q') { @@ -4857,13 +4857,13 @@ void _ElementaryCommand::ExecuteCase36 (_ExecutionList& chain) // GetInformation() void _ElementaryCommand::ExecuteCase37 (_ExecutionList& chain) { chain.currentCommand++; - + _String matrixName = chain.AddNameSpaceToID(*(_String*)parameters(0)), *objectName = (_String*)parameters(1); - - + + _Matrix *result = nil; - + // object is a non-emptyString string if (objectName->sLength > 2 && objectName->sData[0] == '"' && objectName->sData[objectName->sLength-1] == '"') { // regular expression @@ -4872,13 +4872,13 @@ void _ElementaryCommand::ExecuteCase37 (_ExecutionList& chain) { Ptr regex = PrepRegExp (®Exp, errNo, true); if (regex) { _List matches; - - - + + + _SimpleList tcache; long iv, k = variableNames.Traverser (tcache, iv, variableNames.GetRoot()); - + for (; k>=0; k = variableNames.Traverser (tcache, iv)) { _String* vName = (_String*)variableNames.Retrieve (k); _SimpleList mtch; @@ -4886,13 +4886,13 @@ void _ElementaryCommand::ExecuteCase37 (_ExecutionList& chain) { if (mtch.lLength) { matches << vName; } - + } - + if (matches.lLength) { result = new _Matrix (matches); } - + FlushRegExp (regex); } else { WarnError (GetRegExpError (errNo)); @@ -4910,13 +4910,13 @@ void _ElementaryCommand::ExecuteCase37 (_ExecutionList& chain) { if (theObject->IsCategory()) { _CategoryVariable * thisCV = (_CategoryVariable*)theObject; thisCV->Refresh(); - + _Matrix *values = thisCV->GetValues(), *weights = thisCV->GetWeights(!thisCV->IsUncorrelated()); - + f = values->GetHDim()*values->GetVDim(); result = new _Matrix (2,f,false,true); - + for (long k = 0; ktheData[k] = values->theData[k]; result->theData[f+k] = weights->theData[k]; @@ -4930,10 +4930,10 @@ void _ElementaryCommand::ExecuteCase37 (_ExecutionList& chain) { } } else { if (theObject->ObjectClass() == TOPOLOGY || theObject->ObjectClass() == TREE) { - + _List* map = ((_TreeTopology*)theObject)->MapNodesToModels (); _AssociativeList* return_this = new _AssociativeList(); - + for (unsigned long i = 0; i < map->lLength; i++) { _List * nodeInfo = (_List*) map->GetItem(i); return_this->MStore(*(_String*)nodeInfo->GetItem(0), *(_String*)nodeInfo->GetItem (1)); @@ -4942,7 +4942,7 @@ void _ElementaryCommand::ExecuteCase37 (_ExecutionList& chain) { DeleteObject (map); } } - + if ((!result)&& theObject->ObjectClass()==NUMBER) { checkPointer(result = new _Matrix (1,3,false,true)); result->theData[0]=theObject->Compute()->Value(); @@ -4959,14 +4959,14 @@ void _ElementaryCommand::ExecuteCase37 (_ExecutionList& chain) { if (f==0) { f++; } - + _List catVars; - + for (long k=0; kGetCategoryVars().lLength; k++) { _String varName = *LocateVar(lf->GetCategoryVars().lData[k])->GetName(); catVars && & varName; } - + result = (_Matrix*) checkPointer(new _Matrix (catVars)); } else { if ((f = FindDataFilter(objectNameID))>=0) @@ -4981,33 +4981,33 @@ void _ElementaryCommand::ExecuteCase37 (_ExecutionList& chain) { // for models, return the list of variables in the model _SimpleList modelParms; _AVLList modelParmsA (&modelParms); - - + + if (IsModelOfExplicitForm (f)) { ((_Formula*)modelMatrixIndices.lData[f])->ScanFForVariables(modelParmsA,false); } else { LocateVar (modelMatrixIndices.lData[f])->ScanForVariables(modelParmsA,false); - + } _List modelPNames; - + for (unsigned long vi=0; viGetName(); } - + result = new _Matrix (modelPNames); } } } } } - + if (!result) { result = new _Matrix (0,0,false,false); } - + CheckReceptacleAndStore (&matrixName, emptyString, true, result, false); - + } @@ -5037,7 +5037,7 @@ void _ElementaryCommand::ExecuteCase43 (_ExecutionList& chain) if (terminateExecution) { return; } - + _Formula * dF = (code==43)?theExpression.Differentiate (*(_String*)parameters(2),false):nil; @@ -5175,31 +5175,31 @@ void _ElementaryCommand::ExecuteCase45 (_ExecutionList& chain) void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { chain.currentCommand++; - + _String *arg1 = (_String*)parameters(1), *arg2 = (_String*)parameters(0), errMsg; - + const _String source_name = AppendContainerName(*arg1,chain.nameSpacePrefix); - + long object_type = HY_BL_DATASET|HY_BL_DATASET_FILTER; BaseRefConst source_object = _HYRetrieveBLObjectByName (source_name, object_type,nil,false); - + //_DataSetFilter const * dsf = GetDataFilter (filter_name); //_DataSet const * ds = - + if (source_object == nil) { errMsg = source_name.Enquote('\'') & " is not a defined data set / filter ID "; } else { - + const _String receptacle_name = AppendContainerName(*arg2,chain.nameSpacePrefix); _Variable * stVar = CheckReceptacle(&receptacle_name,"GetDataInfo"); - + if (stVar) { - + _DataSetFilter const * filter_source = object_type == HY_BL_DATASET_FILTER ? (_DataSetFilter const *)source_object : nil; _DataSet const * dataset_source = filter_source ? nil : (_DataSet const *)source_object; - + switch (parameters.lLength) { case 2UL: { // get site->pattern map if (filter_source) { @@ -5209,16 +5209,16 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { } } break; - + case 3UL: { // data parameters, or sequence string _String argument = ProcessLiteralArgument ((_String*)parameters(2),chain.nameSpacePrefix); if (argument == _String("CHARACTERS")) { _List characters; if (filter_source) { - + unsigned long character_count = filter_source->GetDimension(true), fd = filter_source->GetUnitLength(); - + for (unsigned long idx = 0UL; idx < character_count; idx++) { characters < new _String (filter_source->ConvertCodeToLetters (filter_source->CorrectCode(idx), fd)); } @@ -5232,14 +5232,14 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { } else if (argument == _String ("PARAMETERS")) { // argument == _String("CHARACTERS") if (filter_source) { _AssociativeList * parameterInfo = new _AssociativeList; - + (*parameterInfo) < (_associative_list_key_value){"ATOM_SIZE", new _Constant (filter_source->GetUnitLength())} < (_associative_list_key_value){"EXCLUSIONS", new _FString (filter_source->GetExclusions())} < (_associative_list_key_value){"SITES_STRING", new _FString ((_String*)filter_source->theOriginalOrder.ListToPartitionString())} < (_associative_list_key_value){"SEQUENCES_STRING", new _FString ((_String*)filter_source->theNodeMap.ListToPartitionString())}; - + stVar->SetValue (parameterInfo,false); - + } else { errMsg = argument.Enquote('\'') & " is a supported argument for the dataset source"; } @@ -5254,7 +5254,7 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { } } else { // argument == _String("CONSENSUS") long seqID = ProcessNumericArgument ((_String*)parameters(2),chain.nameSpacePrefix); - + if (filter_source) { if (seqID>=0 && seqID < filter_source->NumberSpecies()) { stVar->SetValue (new _FString (filter_source->GetSequenceCharacters(seqID)),false); @@ -5276,23 +5276,23 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { } // else numeric cases } break; - + case 4UL : { if (filter_source) { long seq = ProcessNumericArgument ((_String*)parameters(2),chain.nameSpacePrefix), site = ProcessNumericArgument ((_String*)parameters(3),chain.nameSpacePrefix); - + if (site >=0 && siteGetPatternCount()) { if ( seq>=0 && seqNumberSpecies()) { _Matrix * res = new _Matrix (filter_source->GetDimension (true), 1, false, true); - + _Parameter onlyTheIndex = 0.0; checkParameter (getDataInfoReturnsOnlyTheIndex,onlyTheIndex,0.0); - - + + _String character (filter_source->RetrieveState(site, seq)); long theValue = filter_source->Translate2Frequencies (character, res->theData, true); - + if (onlyTheIndex > 0.5) { stVar->SetValue (new _Constant (theValue),false); DeleteObject (res); @@ -5302,15 +5302,15 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { } else { _Parameter count_gaps = 0.0; checkParameter (hfCountGap,count_gaps,1.0); - - + + _Matrix * accumulator = new _Matrix (filter_source->GetDimension (true), 1, false, true), * storage = new _Matrix (filter_source->GetDimension (true), 1, false, true); - - - + + + _String *buffer = filter_source->MakeSiteBuffer(); - + for (long species_index = filter_source->NumberSpecies()-1; species_index >= 0; species_index --) { filter_source->RetrieveState(site,species_index,*buffer, false); filter_source->Translate2Frequencies (*buffer, storage->theData, count_gaps >= 0.5); @@ -5318,9 +5318,9 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { } DeleteObject (storage); stVar -> SetValue (accumulator, false); - + DeleteObject (buffer); - + } } else { errMsg = _String (site) & " is an invalid site index"; @@ -5330,7 +5330,7 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { } } break; - + case 5UL: { if (filter_source) { long seq1 = ProcessNumericArgument ((_String*)parameters(2),chain.nameSpacePrefix), @@ -5338,7 +5338,7 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { if ( seq1>=0 && seq2 >=0 && seq1< filter_source->NumberSpecies() && seq2 NumberSpecies()) { _String* resFlag = (_String*)parameters(4); _Matrix * res; - + if (pcAmbiguitiesAverage.Equal (resFlag)) { res = filter_source->ComputePairwiseDifferences (seq1,seq2,kAmbiguityHandlingAverageFrequencyAware); } else if (pcAmbiguitiesResolve.Equal (resFlag)) { @@ -5348,7 +5348,7 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { } else { res = filter_source->ComputePairwiseDifferences (seq1,seq2,kAmbiguityHandlingResolveFrequencyAware); } - + stVar->SetValue (res,false); } else { errMsg = _String (seq1).Enquote() & "," & _String (seq2).Enquote() & " is an invalid sequence pair specification."; @@ -5362,8 +5362,8 @@ void _ElementaryCommand::ExecuteCase46 (_ExecutionList& chain) { } else { errMsg = receptacle_name.Enquote() & " is not a valid receptacle identifier"; } - - + + } if (errMsg.sLength) { errMsg = errMsg & " in call to GetDataInfo "; @@ -5469,14 +5469,14 @@ void _ElementaryCommand::ExecuteCase52 (_ExecutionList& chain) { } if (baseSet.sLength == alphabetMatrix->GetVDim ()) { - - + + long unitSize = ((_FString*)alphabetMatrix->GetFormula(1,0)->Compute())->theString->toNum(); if (unitSize >= 1) { _Formula* exclusionFormula = alphabetMatrix->GetFormula(1,1); _String* theExclusions = &emptyString; - + if (exclusionFormula) theExclusions = ((_FString*)exclusionFormula->Compute())->theString; @@ -5570,12 +5570,12 @@ void _ElementaryCommand::ExecuteCase52 (_ExecutionList& chain) { if (errMsg.sLength == 0) { long filterID = StoreDataFilter (simulationFilter, newFilter); - + spawningTree->SetUp(); spawningTree->InitializeTreeFrequencies((_Matrix*)freqVar->Compute(),true); - + _String filter_specification = *GetFilterName (filterID) & spawningTree->GetName()->Enquote(',') & *freqVar->GetName(); - + _LikelihoodFunction lf (filter_specification, nil); if (terminateExecution) { @@ -5702,7 +5702,7 @@ void _ElementaryCommand::ExecuteCase52 (_ExecutionList& chain) { //____________________________________________________________________________________ bool _ElementaryCommand::Execute (_ExecutionList& chain) { - + switch (code) { case 0: // formula reparser @@ -5778,7 +5778,7 @@ bool _ElementaryCommand::Execute (_ExecutionList& chain) { WarnError ("Illegal right hand side in call to Tree id = ...; it must be a string, a Newick tree spec or a topology"); return false; } - + if (leftOverVars.lLength) { // mod 02/03/2003 - the entire "if" block _SimpleList indep, dep, holder; { @@ -5844,22 +5844,22 @@ bool _ElementaryCommand::Execute (_ExecutionList& chain) { case 14: { // a return statement - + if (parameters.lLength) { - + _Formula * expression = nil; _String * errMsg = nil; try { - - + + if (simpleParameters.lLength < 2) { - + expression = new _Formula; //printf ("Namespace: %x\nCode: %s\n", chain.nameSpacePrefix, ((_String*)parameters(0))->sData); - + _FormulaParsingContext fpc (nil, chain.nameSpacePrefix); long status = Parse (expression, *(_String*)parameters(0), fpc, nil); - + if (status== HY_FORMULA_EXPRESSION) { if (fpc.isVolatile() == false) { simpleParameters<<(long)expression; @@ -5871,12 +5871,12 @@ bool _ElementaryCommand::Execute (_ExecutionList& chain) { throw 0; } } - + _PMathObj ret_val = nil; // important to store the return value in a local variable // because chain.result may be overwritten by recursive calls to // this function - + if (expression) { //printf ("Return interpreted\n"); ret_val = expression->Compute(); @@ -5885,14 +5885,14 @@ bool _ElementaryCommand::Execute (_ExecutionList& chain) { //printf ("Return compiled %d\n", ((_Formula*)simpleParameters(1))->GetList().lLength); ret_val = ((_Formula*)simpleParameters(1))->Compute(); } - + DeleteObject (chain.result); - + chain.result = ret_val; if (ret_val) { chain.result->AddAReference(); } - + if (expression) { delete (expression); } @@ -5905,14 +5905,14 @@ bool _ElementaryCommand::Execute (_ExecutionList& chain) { return false; } } - + chain.currentCommand = simpleParameters(0); if (chain.currentCommand<0) { chain.currentCommand = 0x7fffffff; } break; } - + case 16: { // data set merger operation chain.currentCommand++; @@ -6165,7 +6165,7 @@ bool _ElementaryCommand::Execute (_ExecutionList& chain) { ((_ExecutionList*)parameters.GetItem(0))->Execute(&chain); } break; - + default: chain.currentCommand++; } @@ -6180,11 +6180,11 @@ const _String _ElementaryCommand::FindNextCommand (_String& input, bool useSo { long index = input.Length(); - + if (index == 0L) { return emptyString; } - + bool isStringDouble = false, isStringSingle = false, skipping = false; @@ -6208,7 +6208,7 @@ const _String _ElementaryCommand::FindNextCommand (_String& input, bool useSo char lastChar = 0; - + // non printable characters at the end ? while (index>=0 && !isprint(input[--index])) ; input.Trim (0,index, useSoftTrim); @@ -6252,16 +6252,16 @@ const _String _ElementaryCommand::FindNextCommand (_String& input, bool useSo // skip spaces, except for special cases, like return and data set filters - + if (!(isStringDouble || isStringSingle) && isspace(c)) { - + // skip/compress spaces, unless we are in a higher level HBL statement // where spaces can't be compressed // examples include // DataSet|DataSetFilter|return|LikelihoodFunction (something) // need to maintain spaces for this to work appropriately - - + + /*if (index >= 6 && input.getChar(index-1) == 'n' && input.getChar(index-2) == 'r' && input.getChar(index-3) == 'u' @@ -6272,7 +6272,7 @@ const _String _ElementaryCommand::FindNextCommand (_String& input, bool useSo result<<' '; } }*/ - + if (!skipping && index > 0) { _String lookback = input.Cut (MAX (0, index - 20), index-1); long trie_match = _HY_HBL_KeywordsPreserveSpaces.FindKey(lookback.Flip(), nil, true); @@ -6449,7 +6449,7 @@ const _String _ElementaryCommand::FindNextCommand (_String& input, bool useSo } else { input.DuplicateErasing (&emptyString); } - + return result; @@ -6779,7 +6779,7 @@ bool _ElementaryCommand::ConstructDataSet (_String&source, _ExecutionList&tar long mark1 = source.FirstNonSpaceFollowingSpace(), mark2 = source.FindTerminator(mark1, '='); ; - + if (mark1==-1 || mark2==-1 || mark2 - 1 <= mark1 ) { WarnErrorWhileParsing ("DataSet declaration missing a valid identifier", source); return false; @@ -6974,8 +6974,8 @@ bool _ElementaryCommand::ConstructStateCounter (_String&source, _ExecutionLis //____________________________________________________________________________________ bool _ElementaryCommand::ConstructChoiceList(_String&source, _ExecutionList&target) { _List args; - - + + ExtractConditions (source,blChoiceList.sLength,args,','); if (args.lLength<5UL) { WarnError ("ChoiceList needs at least 5 arguments"); @@ -7006,7 +7006,7 @@ bool _ElementaryCommand::ConstructChoiceList(_String&source, _ExecutionList&t cv->simpleParameters<<1; } - + cv->addAndClean(target,nil,0); return true; } @@ -7018,14 +7018,14 @@ bool _ElementaryCommand::ConstructReplicateConstraint (_String&source, _Execu // this1 .. etc are all expected to be either trees of nodes of trees with wildcards. { _List args; - + ExtractConditions (source,blReplicate.sLength,args,','); if (args.lLength<2) { _String errMsg ("Expected: ReplicateConstraint (\"constraint to be replicated in terms of this1,...,thisn and wildcard *\", list of n variables to put in place of this1, this2, ... thisn);"); acknError (errMsg); return false; } - + _ElementaryCommand cv; cv.code = 26; cv.parameters << args; @@ -7042,8 +7042,8 @@ bool _ElementaryCommand::ConstructTree (_String&source, _ExecutionList&target if (mark1 > 0) { mark1 = source.FirstNonSpaceIndex (mark1 + 1, -1); } - - + + long mark2 = source.FindTerminator(mark1, "="); long mark3 = mark2; @@ -7054,10 +7054,10 @@ bool _ElementaryCommand::ConstructTree (_String&source, _ExecutionList&target _String dsID = source.Cut (mark1,mark2-1); // now look for the opening paren - + //(long& from, char open, char close, bool respectQuote, bool respectEscape) mark3 = source.ExtractEnclosedExpression (mark1, '(', ')', true, true); - + if (mark1 < 0 || mark3 < 0 || mark3 <= mark1) { if (source.Find(getDString)==-1) { @@ -7071,10 +7071,10 @@ bool _ElementaryCommand::ConstructTree (_String&source, _ExecutionList&target } _ElementaryCommand * dsc = new _ElementaryCommand(source.startswith(blTree)?7:54); - + dsc->parameters&&(&dsID); dsc->parameters.AppendNewInstance(new _String(source,mark1,mark3)); - + dsc->addAndClean(target,nil,0); return true; } @@ -7099,7 +7099,7 @@ bool _ElementaryCommand::ConstructDataSetFilter (_String&source, _ExecutionLi acknError ("DataSetFilter declaration missing a valid identifier"); return false; } - + // now look for the opening paren mark1 = source.Find ('(',mark2,-1); @@ -7600,21 +7600,21 @@ bool _ElementaryCommand::ConstructLF (_String&source, _ExecutionList&target) bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& chain) // syntax: function (comma separated list of parameters) {body} { - - + + bool isFFunction = source.beginswith (blFFunction), isLFunction = source.beginswith (blLFunction), isNameSpace = source.beginswith (blNameSpace); - + if (!isNameSpace) { if (isInFunction == _HY_FUNCTION) { WarnError ("Nested function declarations are not allowed"); return false; } - + } - + long mark1 = source.FirstNonSpaceIndex(isNameSpace ? blNameSpace.sLength: ((isFFunction||isLFunction)?blFFunction.sLength:blFunction.sLength),-1,1), mark2 = source.Find (isNameSpace ? '{' : '(', mark1, -1); @@ -7631,9 +7631,10 @@ bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& c if (!funcID->IsValidIdentifier(true)) { WarnError (_String("Not a valid function/namespace identifier '") & *funcID & "'"); isInFunction = _HY_NO_FUNCTION; + DeleteObject (funcID); return false; } - + *funcID = chain.AddNameSpaceToID (*funcID); // now look for the opening paren @@ -7641,11 +7642,11 @@ bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& c if (!isNameSpace) { isInFunction = _HY_FUNCTION; - + if ((mark1=FindBFFunctionName(*funcID)) >= 0L) { ReportWarning (_String("Overwritten previously defined function:'") & *funcID & '\''); } - + _List arguments; _SimpleList argument_types; @@ -7661,9 +7662,9 @@ bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& c _String extraNamespace; if (isLFunction) extraNamespace = _HYGenerateANameSpace(); - + for (long k = 0UL; k < arguments.lLength; k++) { - + _String* namespaced = new _String(chain.AddNameSpaceToID (*(_String*)arguments(k), & extraNamespace)); if (namespaced->getChar(namespaced->sLength - 1L) == '&') { namespaced->Trim(0,namespaced->sLength-2); @@ -7673,11 +7674,11 @@ bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& c } arguments.Replace (k,namespaced,false); } - + _String sfunctionBody (source, upto+1,source.Length()-2); _ExecutionList * functionBody; - + if (isLFunction) { _String * existing_namespace = chain.GetNameSpace(); if (existing_namespace) { @@ -7691,7 +7692,7 @@ bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& c else { functionBody = new _ExecutionList (sfunctionBody,chain.GetNameSpace(),true); } - + // take care of all the return statements while (returnlist.lLength) { @@ -7722,10 +7723,11 @@ bool _ElementaryCommand::ConstructFunction (_String&source, _ExecutionList& c } _String namespace_text (source, mark2+1,source.Length()-2); bool success = false; - - + _ExecutionList * namespace_payload = new _ExecutionList (namespace_text, funcID, false, &success); - + + DeleteObject (funcID); + if (success) { _ElementaryCommand * nested_list = new _ElementaryCommand (HY_HBL_COMMAND_NESTED_LIST); nested_list->parameters.AppendNewInstance(namespace_payload); @@ -7762,7 +7764,7 @@ bool _ElementaryCommand::ConstructReturn (_String&source, _ExecutionList&targ ret.parameters&&(&cut_s); } - + if (isInFunction) { returnlist< @@ -377,7 +377,7 @@ bool _Matrix::HasChanged(bool) } else if (storageType == 3) { if (cmd->has_volatile_entries) return true; - + for (unsigned long vid = 0; vid < cmd->varIndex.lLength; vid++) { if (((_Variable*)(((BaseRef*)(variablePtrs.lData))[cmd->varIndex.lData[vid]]))->HasChanged ()) return true; @@ -1294,7 +1294,7 @@ _PMathObj _Matrix::MultByFreqs (long freqID) } } } - + for (unsigned long row_start = 0UL, row = 0UL; row_start < lDim; row_start+=vDim, row++) { unsigned long diag = row_start + row; theMatrix [diag] = 0.; @@ -1321,15 +1321,15 @@ _PMathObj _Matrix::Compute (void) { return this; } } - + if (IsAStringMatrix()) { return this; } - + if (theValue) { DeleteObject (theValue); } - + if (storageType != _SIMPLE_FORMULA_TYPE) { theValue = Evaluate(false); } else { @@ -1393,7 +1393,7 @@ _PMathObj _Matrix::Sum (void) { _PMathObj _Matrix::ExecuteSingleOp (long opCode, _List* arguments, _hyExecutionContext* context) { - + switch (opCode) { // first check operations without arguments case HY_OP_CODE_ABS: // Abs return Abs(); @@ -1423,9 +1423,9 @@ _PMathObj _Matrix::ExecuteSingleOp (long opCode, _List* arguments, _hyExecutionC case HY_OP_CODE_TYPE: // Type return Type(); } - + _MathObject * arg0 = _extract_argument (arguments, 0UL, false); - + switch (opCode) { // next check operations without arguments or with one argument case HY_OP_CODE_ADD: // + if (arg0) { @@ -1442,7 +1442,7 @@ _PMathObj _Matrix::ExecuteSingleOp (long opCode, _List* arguments, _hyExecutionC } break; } - + if (arg0) { switch (opCode) { // operations that require exactly one argument case HY_OP_CODE_IDIV: // $ @@ -1489,18 +1489,18 @@ _PMathObj _Matrix::ExecuteSingleOp (long opCode, _List* arguments, _hyExecutionC return new _Constant (opCode == HY_OP_CODE_MAX?MaxElement (0):MinElement (0)); } _MathObject * arg1 = _extract_argument (arguments, 1UL, false); - + switch (opCode) { - + case HY_OP_CODE_MACCESS: // MAccess return MAccess (arg0,arg1); - + case HY_OP_CODE_MCOORD: // MCoord return MCoord (arg0, arg1); } - + } - + switch (opCode) { case HY_OP_CODE_ADD: // + case HY_OP_CODE_SUB: // - @@ -1679,7 +1679,7 @@ bool _Matrix::AmISparseFast (_Matrix& whereTo) long k = 0L, i, threshold = lDim*_Matrix::switchThreshold/100; - + for (i=0; iIsAConstant(); - - + + (*this)[vDim*hPos+vPos]; k = Hash (hPos,vPos); ((_Formula**)theData)[k]=theTerm; @@ -2392,7 +2392,7 @@ _Matrix::_Matrix (_List const& sl) if (sl.lLength) { CreateMatrix (this, 1, sl.lLength, false, true, false); Convert2Formulas(); - + for (unsigned long k=0UL; ktoStr(); - } + } } return sendMeBack; } @@ -3014,12 +3014,12 @@ _PMathObj _Matrix::EvaluateSimple (void) { _Matrix * result = new _Matrix (hDim, vDim, bool (theIndex), true); checkPointer (result); - + if (cmd->varIndex.lLength) { for (long i=0; ivarIndex.lLength; i++) { _Variable* curVar = LocateVar(cmd->varIndex.lData[i]); if (curVar->ObjectClass () != MATRIX) { - + if (curVar->IsIndependent()) { cmd->varValues[i].value = LocateVar (cmd->varIndex.lData[i])->Value(); #ifdef __REPORT_DETAILED_COMPS_FOR_SPECIFIC_CALL @@ -3040,8 +3040,8 @@ _PMathObj _Matrix::EvaluateSimple (void) } } } - - + + for (long f = 0; f < cmd->formulasToEval.lLength; f++) { cmd->formulaValues [f] = ((_Formula*)cmd->formulasToEval.lData[f])->ComputeSimple(cmd->theStack, cmd->varValues); #ifdef __REPORT_DETAILED_COMPS_FOR_SPECIFIC_CALL @@ -3058,9 +3058,9 @@ _PMathObj _Matrix::EvaluateSimple (void) return result; }*/ } - + long * fidx = cmd->formulaRefs; - + if (theIndex) { result->lDim = lDim; result->bufferPerRow = bufferPerRow; @@ -3068,22 +3068,22 @@ _PMathObj _Matrix::EvaluateSimple (void) result->allocationBlock = allocationBlock; result->theIndex = (long*)MemReallocate((Ptr)result->theIndex,sizeof(long)*lDim); result->theData = (_Parameter*)MemReallocate ((Ptr)result->theData,sizeof(_Parameter)*lDim); - + /*memcpy (result->theIndex,theIndex,sizeof(long)*lDim);*/ - - - - + + + + for (long i = 0; itheData[i] = cmd->formulaValues[fidx[i]]; } - + result->theIndex[i] = idx; } - + /*for (long i = 0; i= 0) { result->theData[i] = cmd->formulaValues[fidx[i]]; } } - + if (hDim==vDim) for (long i = 0; itheData[j]; } - + for (j = k*vDim+k+1; j<(k+1)*vDim; j++) { st-=result->theData[j]; } - + result->theData[i] = st; //} } @@ -3359,9 +3359,9 @@ void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract } else { _Parameter _hprestrict_ * argData = secondArg.theData; _Parameter _hprestrict_ * stData = storage.theData; - + long upto = secondArg.lDim - secondArg.lDim%4; - + if (subtract) for (long idx = 0; idx < upto; idx+=4) { stData[idx]-=argData[idx]; @@ -3376,7 +3376,7 @@ void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract stData[idx+2]+=argData[idx+2]; stData[idx+3]+=argData[idx+3]; } - + if (subtract) for (long idx = upto; idx < secondArg.lDim; idx++) { stData[idx]-=argData[idx]; @@ -3385,7 +3385,7 @@ void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract for (long idx = upto; idx < secondArg.lDim; idx++) { stData[idx]+=argData[idx]; } - + } } else @@ -3576,7 +3576,7 @@ void _Matrix::Multiply (_Matrix& storage, _Parameter c) if (storageType == 1) { // numbers _Parameter _hprestrict_ * destination = storage.theData; _Parameter _hprestrict_ * source = theData; - + if (theIndex) { for (long k = 0; k < lDim; k++) if (storage.theIndex[k] != -1) { @@ -3587,7 +3587,7 @@ void _Matrix::Multiply (_Matrix& storage, _Parameter c) destination[k] = source[k]*c; } } - + } else { _Constant * cc = new _Constant (c); checkPointer (cc); @@ -3665,7 +3665,7 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) /* two square dense matrices */ { unsigned long cumulativeIndex = 0UL; - + const unsigned long dimm4 = vDim - vDim%4, column_shift2 = secondArg.vDim * 2, @@ -3674,70 +3674,70 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) const _Parameter * row = theData; _Parameter * dest = storage.theData; - + #ifndef _SLKP_SSE_VECTORIZATION_ - - - + + + /*#ifdef _SLKP_USE_AVX_INTRINSICS __m256d buffer1, buffer2; __m128d two1, two2; - + double d[2] __attribute__ ((aligned (16))); for (unsigned long i=0UL; i= 256) { long nt = 1; @@ -3860,12 +3860,12 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) + __GNUC_PATCHLEVEL__) #ifdef __HYPHYMPI__ if (_hy_mpi_node_rank == 0) - + #endif nt = MIN(omp_get_max_threads(),secondArg.vDim / _HY_MATRIX_CACHE_BLOCK + 1); #endif for (long r = 0; r < hDim; r ++) { -#ifdef _OPENMP +#ifdef _OPENMP #if GCC_VERSION > 40400 #pragma omp parallel for default(none) shared(r,secondArg,storage) schedule(static) if (nt>1) num_threads (nt) #else @@ -3895,7 +3895,7 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) updater += pr1 + pr3; } storage.theData[r*secondArg.vDim + c + p] += updater; - } + } } else for (long p = 0; p < upto_p; p++) { _Parameter updater = 0.; @@ -3903,14 +3903,14 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) updater += theData[r*vDim + r2 + p2]*cacheBlockInMatrix2[p][p2]; } storage.theData[r*secondArg.vDim + c + p] += updater; - } + } } } } - + } else { - - + + if (vDim % 4) { long mod4 = vDim-vDim%4; for (long i=0; iTranspose(); } - + for (long s = 0; sObjectClass() == MATRIX) { if (p2 == nil) { _Matrix * nn = (_Matrix*)p; @@ -4916,26 +4916,26 @@ _PMathObj _Matrix::MAccess (_PMathObj p, _PMathObj p2) { if (nn->hDim == hDim && nn->vDim == vDim) { _SimpleList hL, vL; - + for (long r=0; r 0.0) { hL << r; vL << c; } - + return ExtractElementsByEnumeration (&hL,&vL); } else { if (nn->hDim > 0 && nn->vDim == 1) { // extract by row _SimpleList hL; - + for (unsigned long r=0UL; rhDim; r++) { long v = floor((*nn)(r,0L)); if (v>=0L && vvDim > 0 && nn->hDim == 1) { // extract by column _SimpleList hL; - + for (long r=0; rvDim; r++) { long v = (*nn)(0,r); if (v>=0 && vObjectClass() == MATRIX) { _Matrix * nn = (_Matrix*)((_Matrix*)p)->ComputeNumeric(); _Matrix * nn2 = (_Matrix*)((_Matrix*)p2)->ComputeNumeric(); - + if (nn->hDim == 1 && nn->vDim == 2 && nn->storageType == 1 && nn2->hDim == 1 && nn2->vDim == 2 && nn2->storageType == 1) { long left = (*nn)(0,0), top = (*nn)(0,1), bottom = (*nn2)(0,1), right = (*nn2)(0,0); - + if (left >= 0 && left < hDim && right >= 0 && right < hDim && left <=right && top >= 0 && top < vDim && bottom >=0 && bottom < vDim && top <= bottom) { _SimpleList hL, vL; - + for (long r=left; r<=right; r++) for (long c=top; c<=bottom; c++) { hL << r; vL << c; } - + _Matrix * subM = ExtractElementsByEnumeration (&hL,&vL); subM->hDim = right-left+1; subM->vDim = bottom-top+1; - + return subM; } } ReportWarning ("Incorrect dimensions or matrix type (must be numeric 2x1 matrices) for an rectangular extract in call to []"); } - + } return new _Constant (0.0); } else { if (p->ObjectClass() == STRING) { - - + + _String aFormulaString = *((_FString*)p)->theString; _Formula f (aFormulaString, currentExecutionList ? currentExecutionList->nameSpacePrefix : nil); - + if (!f.IsEmpty()) { /* check formula validity */ - + _String cell_value ("_MATRIX_ELEMENT_VALUE_"), cell_row ("_MATRIX_ELEMENT_ROW_"), cell_column("_MATRIX_ELEMENT_COLUMN_"); - + _Variable * cv = CheckReceptacle(&cell_value, emptyString, false), * cr = CheckReceptacle(&cell_row, emptyString, false), * cc = CheckReceptacle(&cell_column, emptyString, false); - + cv->CheckAndSet (0.0); cr->CheckAndSet (0.0); cc->CheckAndSet (0.0); - + f.Compute(); if (terminateExecution) { return new _Matrix (); } else { - + _Formula * conditionalCheck = nil; - + if (p2 && p2->ObjectClass() == STRING) { conditionalCheck = new _Formula (*((_FString*)p2)->theString, currentExecutionList ? currentExecutionList->nameSpacePrefix : nil); if (conditionalCheck->IsEmpty()) { delete conditionalCheck; conditionalCheck = nil; } - + conditionalCheck->Compute(); if (terminateExecution) { delete conditionalCheck; return new _Matrix (); } } - + _Matrix * retMatrix = new _Matrix (hDim,vDim,false,true); - + long stackDepth = 0; _SimpleList vIndex; - + if (f.AmISimple (stackDepth,vIndex) && (!conditionalCheck || conditionalCheck->AmISimple(stackDepth,vIndex))) { _SimpleFormulaDatum * stack = new _SimpleFormulaDatum [stackDepth+1], * varValues = new _SimpleFormulaDatum [vIndex.lLength]; - + bool constantValue = false; _Parameter constantV = f.Compute()->Value(); - + if (f.IsConstant()) { constantValue = true; constantV = f.Compute()->Value(); } else { f.ConvertToSimple (vIndex); } - - + + if (conditionalCheck) { conditionalCheck->ConvertToSimple(vIndex); } - + if (constantValue && !conditionalCheck) { for (long r=0; rStore (r,c,constantV); } } else { - + long rid []= {cr->GetAVariable(),cc->GetAVariable(),cv->GetAVariable()}; - + for (long k=0; k<3; k++) { rid[k] = vIndex.Find(rid[k]); } - + PopulateArraysForASimpleFormula(vIndex, varValues); - + for (long r=0; r=0) { varValues[rid[0]].value = r; } - + for (long c=0; c=0) { varValues[rid[1]].value = c; } - + if (rid[2]>=0) { varValues[rid[2]].value = (*this)(r,c); } - + if (conditionalCheck && CheckEqual(conditionalCheck->ComputeSimple(stack,varValues),0.0)) { if (rid[2]>=0) { retMatrix->Store (r,c,varValues[rid[2]].value); @@ -5114,23 +5114,23 @@ _PMathObj _Matrix::MAccess (_PMathObj p, _PMathObj p2) { } continue; } - + if (constantValue) { retMatrix->Store (r,c,constantV); } else { //printf ("Formula eval (stack depth= %d) (%d, %g, %g) %g\n", stackDepth, rid[2], varValues[rid[2]], f.ComputeSimple(stack,varValues)); - + retMatrix->Store (r,c,f.ComputeSimple(stack,varValues)); } } } - + f.ConvertFromSimple (vIndex); } if (conditionalCheck) { conditionalCheck->ConvertFromSimple(vIndex); } - + delete [] stack; delete [] varValues; } else { @@ -5140,7 +5140,7 @@ _PMathObj _Matrix::MAccess (_PMathObj p, _PMathObj p2) { cc->CheckAndSet (c); cv->CheckAndSet ((*this)(r,c)); _PMathObj fv; - + if (conditionalCheck) { fv = conditionalCheck->Compute(); if (fv->ObjectClass() == NUMBER) @@ -5149,7 +5149,7 @@ _PMathObj _Matrix::MAccess (_PMathObj p, _PMathObj p2) { continue; } } - + fv = f.Compute(); if (fv->ObjectClass()==NUMBER) { retMatrix->Store (r,c,fv->Value()); @@ -5168,48 +5168,48 @@ _PMathObj _Matrix::MAccess (_PMathObj p, _PMathObj p2) { return new _Matrix; } } - + long ind1 = p->Value(), ind2 = -1; - + if (p2) { ind2 = p2->Value(); // handle the row/column access operations here i.e. [R][-1] or [-1][R] - + if (ind1 == -1 && ind2 >=0 && ind2 =0 && ind1 =hDim || ind2>=vDim) { MatrixIndexError (ind1,ind2,hDim,vDim); return new _Constant (0.0); } - + if (ind2>=0) { // element access if (storageType == 2) { // formulas if (!theIndex) { @@ -5234,7 +5234,7 @@ _PMathObj _Matrix::MAccess (_PMathObj p, _PMathObj p2) { } else { return new _Constant (theData[ind1*vDim+ind2]); } - + } else { _MathObject* cell; if (!theIndex) { @@ -5251,7 +5251,7 @@ _PMathObj _Matrix::MAccess (_PMathObj p, _PMathObj p2) { } } } - + return new _Constant (0.0); } @@ -5405,7 +5405,7 @@ void _Matrix::MStore (long ind1, long ind2, _Formula& f, long opCode) StoreFormula (ind1,ind2,f_joint); return; } - } + } StoreFormula (ind1,ind2,f); } else { if (!f.IsAConstant()) { @@ -5800,9 +5800,9 @@ void _Matrix::AplusBx (_Matrix& B, _Parameter x) //_____________________________________________________________________________________________ _Parameter _Matrix::Sqr (_Parameter* _hprestrict_ stash) { - + _Parameter diff = 0.0; - + if (hDim!=vDim) { return diff; } @@ -5824,7 +5824,7 @@ _Parameter _Matrix::Sqr (_Parameter* _hprestrict_ stash) { _Parameter p2 = theData[i+1] * theData [j+4]; p1 += theData[i+2] * theData [j+8]; p2 += theData[i+3] * theData [j+12]; - + stash[k] = p1+p2; } } @@ -5834,7 +5834,7 @@ _Parameter _Matrix::Sqr (_Parameter* _hprestrict_ stash) { // loop interchange rocks! - + _Parameter _hprestrict_ * column = stash+lDim; _Parameter const _hprestrict_ * source = theData; @@ -5847,47 +5847,47 @@ _Parameter _Matrix::Sqr (_Parameter* _hprestrict_ stash) { if (vDim == 61UL) { for (unsigned long i = 0; i < lDim; i += 61) { _Parameter * row = theData + i; - - + + __m256d sum256 = _mm256_setzero_pd(); - + for (unsigned long k = 0; k < 60; k += 12) { __m256d term0 = _mm256_mul_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k)); __m256d term1 = _mm256_mul_pd (_mm256_loadu_pd (row+k+4), _mm256_loadu_pd (column+k+4)); __m256d term2 = _mm256_mul_pd (_mm256_loadu_pd (row+k+8), _mm256_loadu_pd (column+k+8)); - + __m256d sum01 = _mm256_add_pd(term0,term1); __m256d plus2 = _mm256_add_pd(term2, sum256); - + sum256 = _mm256_add_pd (sum01, plus2); - + } - + stash[i+j] = _avx_sum_4(sum256) + row[60] * column [60]; - + } - + } else { for (unsigned long i = 0; i < lDim; i += vDim) { _Parameter * row = theData + i; - - + + __m256d sum256 = _mm256_setzero_pd(); - + long k; - + for (k = 0; k < loopBound; k += 4) { sum256 = _mm256_add_pd (_mm256_mul_pd (_mm256_loadu_pd (row+k), _mm256_loadu_pd (column+k)), sum256); } - + _Parameter result = _avx_sum_4(sum256); - + for (; k < vDim; k++) { result += row[k] * column [k]; } - + stash[i+j] = result; - + } } @@ -5915,18 +5915,18 @@ _Parameter _Matrix::Sqr (_Parameter* _hprestrict_ stash) { #endif } } - - + + //memcpy (theData, stash, lDim * sizeof (_Parameter)); - - + + for (long s = 0; s < lDim; s++) { StoreIfGreater(diff, fabs (theData[s] - stash[s])); theData[s] = stash[s]; } } - + return diff; } //_____________________________________________________________________________________________ @@ -5939,7 +5939,7 @@ void _Matrix::AgreeObjects (_Matrix& m) Evaluate(true); } } - + if (m.storageType==2) { if (toPolyOrNot!=0.0) { m.ConvertFormulas2Poly (); @@ -5947,7 +5947,7 @@ void _Matrix::AgreeObjects (_Matrix& m) m.Evaluate(true); } } - + if (storageType!=m.storageType) { if (toPolyOrNot) { if (storageType == 1) { @@ -6141,20 +6141,20 @@ void _Matrix::RecursiveIndexSort (long from, long to, _SimpleList* index) /* Use '+' to denote an element that is gretae than 'M' (the 'middle' element) and '-' to denote an element than is less than 'M' - + Initially we may have something like - + --++--+M--+++--++- and we want to end up with ---------M+++++++ - + Initially, we arrange the elements as - + ----+++M-----++++++, and then swap 'bottommove' pluses (of which there are 3 in this case) with 'topmove' minuses (of which there are 5) - + */ - + if (middle) while (middle-bottommove>=from && CompareRows (middle-bottommove, middle) >= 0L) { @@ -6181,7 +6181,7 @@ void _Matrix::RecursiveIndexSort (long from, long to, _SimpleList* index) if (CompareRows(i,middle) <= 0L) { SwapRows (i, middle+topmove); index->Swap(i, middle+topmove); - + topmove++; while (middle+topmove<=to && CompareRows (middle+topmove,middle) <= 0L) { topmove++; @@ -6197,18 +6197,18 @@ void _Matrix::RecursiveIndexSort (long from, long to, _SimpleList* index) } else if (topmove>bottommove) { long shift = topmove-bottommove; // in the example above, shift = 2 - + for (long i=1; iSwap(middle-i, middle+i+shift); } - // at the end of this loop, the example above will look like + // at the end of this loop, the example above will look like // -------M--+++++++++, so now if we swap 'M' with the last '-', we'll arrive at the desired configuration - + SwapRows (middle, middle+shift); index->Swap (middle, middle+shift); middle+=shift; - + } else { long shift = bottommove-topmove; for (long i=1; iComputeNumeric(); long sortBy = sortOnM->GetHDim()*sortOnM->GetVDim(), maxColumnID = GetVDim(); - + for (long k=0; k= maxColumnID) { - WarnError (_String("Invalid column index to sort on in call to ") & __func__ & " : " & idx); - return new _MathObject(); + WarnError (_String("Invalid column index to sort on in call to ") & __func__ & " : " & idx); + return new _MathObject(); } sortOn << idx; } @@ -6272,7 +6272,7 @@ _PMathObj _Matrix::SortMatrixOnColumn (_PMathObj mp) // SLKP 20111109 -- replace with a generic sort function // the code below is BROKEN - + _SimpleList idx (hDim,0,1); _Matrix theColumn (hDim,sortOn.lLength,false,true); @@ -6649,7 +6649,7 @@ _PMathObj _Matrix::Random (_PMathObj kind) // Associative list should contain following arguments: // "PDF" - string corresponding to p.d.f. ("Gamma", "Normal") // "ARG0" ... "ARGn" - whatever parameter arguments (matrices) are required for the p.d.f. - + _AssociativeList * pdfArgs = (_AssociativeList *)kind; _List * keys = pdfArgs->GetKeys(); _String pdfkey ("PDF"), @@ -6658,9 +6658,9 @@ _PMathObj _Matrix::Random (_PMathObj kind) if (arg0->Equal(&pdfkey)) { _String pdf ((_String *) (pdfArgs->GetByKey(pdfkey,STRING))->toStr()), arg ("ARG0"); - + long pdfCode = _HY_MatrixRandomValidPDFs.GetValueFromString (pdf); - + switch (pdfCode) { case _HY_MATRIX_RANDOM_DIRICHLET: return (_Matrix *) DirichletDeviate(); @@ -7150,22 +7150,22 @@ bool _Matrix::Equal(_PMathObj mp) } _Matrix * m = (_Matrix*)mp; - + if (m->storageType == storageType && storageType == 1 && (bool) m->theIndex == (bool) theIndex && m->hDim == hDim && m->vDim == vDim) { if (theIndex) { - + _SimpleList nonZeroThis ((unsigned long)lDim), nonZeroOther((unsigned long)m->lDim), shared; - + NonZeroEntries (nonZeroThis); m->NonZeroEntries (nonZeroOther); - + shared.Intersect(nonZeroThis, nonZeroOther); for (long elementID = 0; elementID < lDim; elementID ++) { - + } - + } else { for (long elementID = 0; elementID < lDim; elementID ++) { if (!CheckEqual(theData[elementID], m->theData[elementID])) { @@ -7173,10 +7173,10 @@ bool _Matrix::Equal(_PMathObj mp) } } } - + return true; } - + return false; } @@ -7266,7 +7266,7 @@ void _Matrix::MultbyS (_Matrix& m, bool leftMultiply, _Matrix* externalSt //_____________________________________________________________________________________________ _PMathObj _Matrix::MultObj (_PMathObj mp) { - + if (mp->ObjectClass()!=ObjectClass()) { if (mp->ObjectClass()!=NUMBER) { warnError(-101); @@ -7276,43 +7276,43 @@ _PMathObj _Matrix::MultObj (_PMathObj mp) return (_PMathObj)((*this)*theV).makeDynamic(); } } - + _Matrix* m = (_Matrix*)mp; if (!CheckDimensions (*m)) return new _MathObject; AgreeObjects (*m); - + _Matrix* result = new _Matrix (hDim, m->vDim, false, storageType); checkPointer (result); - + Multiply (*result,*m); return result; - + } //_____________________________________________________________________________________________ _PMathObj _Matrix::MultElements (_PMathObj mp, bool elementWiseDivide) { - - - + + + if (mp->ObjectClass()!=ObjectClass()) { warnError(-101); return new _Matrix (1,1); } _Matrix* m = (_Matrix*)mp; - - + + bool by_column = false; // if the second argument has dimension 1xcolumns of the first matrix, then // result [i][j] is assigned this [i][j] * / argument [0][j] // in other words, divide or multiply each column - + bool by_row = false; // if the first argument has dimension rows of the second matrix x 1 then // result [i][j] is assigned argument [i][j] * / this [i][0] // in other words, divide or multiply each row - + if ( GetHDim()!=m->GetHDim() || GetVDim()!=m->GetVDim()) { if (GetVDim() == m->GetVDim() && m->GetHDim () == 1) { @@ -7333,10 +7333,10 @@ _PMathObj _Matrix::MultElements (_PMathObj mp, bool elementWiseDivide) { } _Matrix* result = new _Matrix (GetHDim(), m->GetVDim(), false, true); - + if (theIndex || m->theIndex) { auto operation = elementWiseDivide ? DivNumbers : MultNumbers; - + long index = 0L; if (by_row) { for (long row = 0; row < hDim; row++) { @@ -7439,11 +7439,11 @@ bool _Matrix::CheckDimensions (_Matrix& secondArg) //_____________________________________________________________________________________________ _Matrix _Matrix::operator * (_Matrix& m) { - if (!CheckDimensions (m)) { + if (!CheckDimensions (m)) { _Matrix d; return d; } - + AgreeObjects (m); _Matrix result (hDim, m.vDim, false, storageType); Multiply (result,m); @@ -7475,39 +7475,39 @@ _Matrix _Matrix::operator - (_Matrix& m) //_____________________________________________________________________________________________ BaseRef _Matrix::toStr(unsigned long padding) { - + _String *result = new _String (2048L,true), padder (" ", padding); - + checkParameter (printDigitsSpec,printDigits,0L); - + char number_buffer [256]; - + //if (vDim<500) { if (storageType == 1 || (storageType == 2 && IsAStringMatrix())) { bool printStrings = storageType != 1; - + _Parameter useJSON = 0.0; checkParameter (USE_JSON_FOR_MATRIX, useJSON, 0.0); - + bool doJSON = !CheckEqual(useJSON, 0.0); - + char openBracket = doJSON ? '[' : '{', closeBracket = doJSON ? ']' : '}'; - + //(*result) << padder; (*result) << openBracket << '\n'; - + for (long i = 0L; iFinalize(); DeleteObject (result); return Compute()->toStr(padding); } - + for (long i = 0; i15L) { digs = 8L; } @@ -7671,7 +7671,7 @@ void _Matrix::toFileStr (FILE*dest, unsigned long padding){ if (j) { fprintf (dest,", "); } - + if (printStrings) { fprintf (dest,"\"");; _Formula * f = GetFormula (i,j); @@ -9355,23 +9355,23 @@ bool _AssociativeList::ParseStringRepresentation (_String& serializedForm, _Form bool doErrors = fpc.errMsg() == nil, compute_keys_values = fpc.buildComplexObjects(); _VariableContainer const* theP = fpc.formulaScope(); - + _ElementaryCommand::ExtractConditions (serializedForm, 0, splitKeys, ',' , false); - + for (unsigned long k = 0UL; k < splitKeys.lLength; k ++) { _List aPair; _ElementaryCommand::ExtractConditions (*(_String*)splitKeys(k), 0, aPair, ':' , false); if (aPair.lLength == 2UL) { _String key (compute_keys_values ? ProcessLiteralArgument((_String*)aPair(0),theP) : *(_String*)aPair(0)), errMsg; - + if (key.sLength == 0UL) { key = *(_String*)aPair(0); } - + _Formula value (*(_String*)aPair(1),theP, doErrors?nil :&errMsg); _PMathObj valueC = compute_keys_values ? value.Compute() : new _MathObject; - + if (valueC) { MStore (key, valueC, compute_keys_values); } else { @@ -9518,14 +9518,14 @@ _PMathObj _AssociativeList::MIterator (_PMathObj p, _PMathObj p2) if (s->Equal (&AVL_ITERATOR_ORDER) || s->Equal (&AVL_ITERATOR_ORDER_VALUE)) { long index = avl.GetByIndex(p2->Compute()->Value()); - + if (index >= 0) { result = s->Equal (&AVL_ITERATOR_ORDER)? (new _FString(*((_String**)avl.dataList->lData)[index],false)): ((_PMathObj)avl.GetXtra (index)->makeDynamic()); } else { WarnError ("Index out of bounds in call to AVL iterator (by index)"); } } - + DeleteObject (s); if (result) return result; @@ -9594,7 +9594,7 @@ void _AssociativeList::MStore (_PMathObj p, _PMathObj inObject, bool repl, long if (opCode == HY_OP_CODE_ADD) { _List arguments; arguments << inObject; - + _PMathObj newObject = ((_PMathObj)avl.GetXtra(f))->ExecuteSingleOp(HY_OP_CODE_ADD,&arguments); if (repl == false) { DeleteObject (inObject); @@ -9656,12 +9656,12 @@ void _AssociativeList::MStore (const _String& obj, const _String& info) { //_____________________________________________________________________________________________ _String* _AssociativeList::Serialize (unsigned long padding) { - + _String * outString = new _String (1024L,true), padder (" ", padding); - - - + + + (*outString) << "{"; bool doComma = false; _List * meKeys = GetKeys(); @@ -9671,11 +9671,11 @@ _String* _AssociativeList::Serialize (unsigned long padding) { if (doComma) { (*outString) << ','; } - + (*outString) << '\n'; (*outString) << padder; (*outString) << ' '; - + (*outString) << '"'; outString->EscapeAndAppend(*thisKey, false); (*outString) << '"'; @@ -9714,12 +9714,12 @@ _List* _AssociativeList::GetKeys (void) { //_____________________________________________________________________________________________ void _AssociativeList::FillInList (_List& fill_me) { - + unsigned long ll = fill_me.countitems(); try { // checkpoint the length of the list unsigned long my_length = avl.countitems(); - + for (long index = 0; index < my_length; index++) { _String key (index); if (_PMathObj value = GetByKey (key)) { @@ -9729,16 +9729,16 @@ void _AssociativeList::FillInList (_List& fill_me) { } } } - + catch (int e) { while (fill_me.countitems () > ll) { fill_me.Delete(fill_me.countitems ()-1); } - + _SimpleList hist; long ls, cn = avl.Traverser (hist,ls,avl.GetRoot()); - + while (cn >= 0) { _String* aKey = ((_String**)avl.dataList->lData)[cn]; if (aKey) { @@ -9753,7 +9753,7 @@ void _AssociativeList::FillInList (_List& fill_me) { //_____________________________________________________________________________________________ void _AssociativeList::Merge (_PMathObj p) { - //SW20111207: I don't think we should ever have to worry about avl traversing + //SW20111207: I don't think we should ever have to worry about avl traversing //here as long as the other methods are implemented properly @@ -9764,18 +9764,18 @@ void _AssociativeList::Merge (_PMathObj p) if (p && p->ObjectClass() == ASSOCIATIVE_LIST) { _AssociativeList *rhs = (_AssociativeList*) p; - + if (rhs->avl.countitems()) { - + _SimpleList hist; long ls, cn = rhs->avl.Traverser (hist,ls,rhs->avl.GetRoot()); - + /* SLKP20120111: we need to skip over "blanks" (e.g. resulting from previous delete operations) here; using the traversal of the second list is the easiest way to go. */ - + while (cn >= 0) { MStore(*(_String*)(*(_List*)rhs->avl.dataList)(cn),(_PMathObj)rhs->avl.GetXtra (cn),true); cn = rhs->avl.Traverser (hist,ls); @@ -9791,13 +9791,13 @@ void _AssociativeList::Merge (_PMathObj p) _PMathObj _AssociativeList::ExtremeValue (bool do_mimimum) const { _String const * best_key = nil; _Parameter best_value = do_mimimum ? INFINITY : -INFINITY; - + if (avl.countitems()) { - + _SimpleList hist; long ls = -1L, cn = avl.Traverser (hist,ls,avl.GetRoot()); - + while (cn >= 0) { _PMathObj value = (_PMathObj)avl.GetXtra (cn); switch (value->ObjectClass()){ @@ -9819,22 +9819,22 @@ _PMathObj _AssociativeList::ExtremeValue (bool do_mimimum) const { cn = avl.Traverser (hist,ls); } } - + _AssociativeList * result = new _AssociativeList; (*result) < _associative_list_key_value {"key", best_key ? new _FString (*best_key, false) : new _MathObject} < _associative_list_key_value {"value", new _Constant (best_value)}; return result; - + } //_____________________________________________________________________________________________ _PMathObj _AssociativeList::Sum (void) { _Parameter sum = 0.; - + _SimpleList hist; long ls, cn = avl.Traverser (hist,ls,avl.GetRoot()); - + while (cn >= 0) { _PMathObj value = (_PMathObj)avl.GetXtra (cn); switch (value->ObjectClass()){ @@ -9859,7 +9859,7 @@ _PMathObj _AssociativeList::Sum (void) { } cn = avl.Traverser (hist,ls); } - + return new _Constant (sum); } @@ -9867,19 +9867,19 @@ _PMathObj _AssociativeList::Sum (void) { _PMathObj _AssociativeList::ExecuteSingleOp (long opCode, _List* arguments, _hyExecutionContext* context) { - + switch (opCode) { case HY_OP_CODE_ABS: return new _Constant (Length()); - + case HY_OP_CODE_EVAL: return (_PMathObj) makeDynamic(); - + case HY_OP_CODE_COLUMNS: { // Columns -- get all unique values (as strings) _List unique_values_aux; _AVLList unique_values (&unique_values_aux); - + for (unsigned long k=0UL; klLength; k++) { BaseRef anItem = ((BaseRef*)avl.dataList->lData)[k]; if (anItem) { @@ -9892,7 +9892,7 @@ _PMathObj _AssociativeList::ExecuteSingleOp (long opCode, _List* arguments, _hyE unique_values.ReorderList(); return new _Matrix (*(_List*)unique_values.dataList); } - + case HY_OP_CODE_ROWS: { // Rows - get keys if (avl.emptySlots.lLength) { @@ -9907,22 +9907,22 @@ _PMathObj _AssociativeList::ExecuteSingleOp (long opCode, _List* arguments, _hyE } return new _Matrix (*(_List*)avl.dataList); } - + case HY_OP_CODE_TYPE: // Type return Type(); - + case HY_OP_CODE_MAX: // Max return ExtremeValue (false); - + case HY_OP_CODE_MIN: // Max return ExtremeValue (true); - - + + } - + _MathObject * arg0 = _extract_argument (arguments, 0UL, false); - + switch (opCode) { // next check operations without arguments or with one argument case HY_OP_CODE_ADD: // + if (arg0) { @@ -9931,23 +9931,23 @@ _PMathObj _AssociativeList::ExecuteSingleOp (long opCode, _List* arguments, _hyE } return Sum (); } - + if (arg0) { switch (opCode) { // operations that require exactly one argument case HY_OP_CODE_MCOORD: // MCoord return MCoord (arg0); - + case HY_OP_CODE_MUL: // merge Merge (arg0); return new _Constant (avl.countitems()); - + case HY_OP_CODE_SUB: DeleteByKey (arg0); return new _Constant (avl.countitems()); - + case HY_OP_CODE_DIV: - + if (arg0->ObjectClass () == STRING) { if (avl.Find (((_FString*)arg0)->theString) >= 0L) { return new _Constant (1.0); @@ -9959,10 +9959,10 @@ _PMathObj _AssociativeList::ExecuteSingleOp (long opCode, _List* arguments, _hyE } } return new _Constant (0.0); - + } _MathObject * arg1 = _extract_argument (arguments, 1UL, false); - + switch (opCode) { // check operations with 1 or 2 arguments case HY_OP_CODE_MACCESS: // MAccess if (arg1) { @@ -9972,8 +9972,8 @@ _PMathObj _AssociativeList::ExecuteSingleOp (long opCode, _List* arguments, _hyE } } } - - + + switch (opCode) { case HY_OP_CODE_TYPE: case HY_OP_CODE_ADD: @@ -9987,9 +9987,9 @@ _PMathObj _AssociativeList::ExecuteSingleOp (long opCode, _List* arguments, _hyE default: WarnNotDefined (this, opCode,context); } - + return new _MathObject; - + } /*--------------------------------------------------------------------------------------------------------------------------------*/ diff --git a/src/core/operation.cpp b/src/core/operation.cpp index 720a490cf..e000b9ca5 100644 --- a/src/core/operation.cpp +++ b/src/core/operation.cpp @@ -624,6 +624,7 @@ bool _Operation::ExecutePolynomial (_Stack& theScrap, _VariableContainer* theScrap.theStack.Place(temp); return true; } else { + DeleteObject (temp); return false; } From 451b9ff789a0073888c731bdf5c076b6df8d1051 Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Tue, 19 Jun 2018 14:38:02 -0400 Subject: [PATCH 05/53] FADE simulator; removing -g from MP target --- CMakeLists.txt | 2 +- .../SelectionAnalyses/Simulators/FADE.bf | 643 ++---------------- .../libv3/tasks/alignments.bf | 21 + 3 files changed, 79 insertions(+), 587 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb63c3e26..1c49d968f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) if(${GCC_VERSION} VERSION_LESS 6.0) set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -std=gnu++14") else(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g -fsanitize=address -fsanitize=leak") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3") endif(${GCC_VERSION} VERSION_LESS 6.0) if(NOAVX) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf index e6f9aacd2..6272dad45 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf @@ -28,6 +28,7 @@ namespace terms.fade { regimes = "regimes"; bias = "substitution bias"; rate = "rate multiplier"; + generator = "generator"; }; @@ -100,9 +101,7 @@ fade.generator = (utility.Extend (models.protein.empirical.plusF_ge fade.branch_lengths = parameters.helper.tree_lengths_to_initial_values ({"0" : fade.baseline.tree}, None); fade.settings [terms.model] = fade.baseline_model; -fade.settings [terms.fade.generator] = fade.generator; - - +fade.settings [terms.replicates] = fade.replicates; lfunction fade.rate.modifier (fromChar, toChar, namespace, model_type, model) { baseline = Call (^"fade.baseline_model.rate", fromChar,toChar, namespace, model_type, model); @@ -137,15 +136,6 @@ lfunction fade.biased.model.generator (type, residue) { } -fade.bias.residue = "F"; - -fade.model.biased = model.generic.DefineModel("fade.biased.model.generator", - "fade.biased_model", { - "0": "terms.global", - "1": parameters.Quote (fade.bias.residue) - }, - None, - "frequencies.equal"); fade.model.baseline = model.generic.DefineModel("fade.biased.model.generator", @@ -156,611 +146,92 @@ fade.model.baseline = model.generic.DefineModel("fade.biased.model.generator", None, "frequencies.equal"); -fade.model_id_to_object = { - "fade.biased_model": fade.model.biased, - "fade.baseline_model": fade.model.baseline - }; +fade.settings [terms.fade.generator] = fade.model.baseline; -fade.model_assignment = { +fade.model_assignment_with_bias = { "fade.baseline_model" : utility.Filter (fade.selected_branches, "_value_", "_value_ == terms.tree_attributes.background"), "fade.biased_model" : utility.Filter (fade.selected_branches, "_value_", "_value_ == terms.tree_attributes.test"), }; +fade.model_assignment_without_bias = { + "fade.baseline_model" : utility.Filter (fade.selected_branches, "_value_", "TRUE"), +}; + parameters.DeclareGlobalWithRanges (fade.parameter.rate, 1, 0, 100); parameters.DeclareGlobalWithRanges (fade.parameter.bias, 1e-10, 1e-10, 100); -model.ApplyModelToTree(fade.tree.name, fade.baseline.tree, None, fade.model_assignment); -//function estimators.ApplyExistingEstimatesToTree (_tree_name, model_descriptions, initial_values, _application_type, keep_track_of_proportional_scalers) { - - -parameters.SetValue (fade.parameter.bias, 1e-10); -parameters.SetValue (fade.parameter.rate, 1); - -estimators.ApplyExistingEstimatesToTree (fade.tree.name, fade.model_id_to_object, (fade.branch_lengths[terms.branch_length])[0], None, {}); - -fprintf (stdout, Format (^fade.tree.name, 1, 1), "\n"); - -parameters.SetValue (fade.parameter.bias, 10); -parameters.SetValue (fade.parameter.rate, 5); - -fprintf (stdout, Format (^fade.tree.name, 1, 1), "\n"); - - -fade.sim_frequencies = fade.model.biased[terms.efv_estimate]; - -DataSet simulated_block = Simulate (^fade.tree.name, fade.sim_frequencies , fade.simulation.matrix, 200); -DataSetFilter simulated_block_filter = CreateFilter (simulated_block, 1); - -fprintf (stdout, simulated_block_filter, "\n"); - -return 0; - - - -namespace fade { - - site.composition.string := fade.CompositionString (((cache [^"terms.fade.cache.composition"])[partition_index])[s]); - site.substitution.string := fade.SubstitutionHistory (((cache [^"terms.fade.cache.substitutions"])[partition_index])[s]); - - site_annotation_headers = { - "Composition" : "Aminoacid composition of site", - "Substitutions" : "Substitution history on selected branches" - }; - - if (run_settings["method"] == ^"terms.fade.methods.MH") { - table_headers = {{"rate", "Mean posterior relative rate at a site"} - {"bias", "Mean posterior bias parameter at a site"} - {"Prob[bias>0]", "Posterior probability of substitution bias towards `bias.residue`"} - {"BayesFactor[bias>0]", "Empiricial Bayes Factor for substitution bias towards `bias.residue`"} - {"PSRF", "Potential scale reduction factor - an MCMC mixing measure"} - {"Neff", "Estimated effective sample site for Prob [bias>0]"}}; - - table_screen_output = {{"Site", "Partition", "target", "rate", "bias", "N.eff", "Bayes Factor",site_annotation_headers["Composition"], site_annotation_headers["Substitutions"]}}; - report.biased_site = {{"" + (1+filter_info[s]), - partition_index + 1, - bias.residue, - Format(partition_results[s][0],8,2), - Format(partition_results[s][1],8,2), - Format(partition_results[s][5],8,2), - Format(partition_results[s][3],8,2), - site.composition.string, - site.substitution.string}}; - } else { - table_headers = {{"rate", "Mean posterior relative rate at a site"} - {"bias", "Mean posterior bias parameter at a site"} - {"Prob[bias>0]", "Posterior probability of substitution bias"} - {"BayesFactor[bias>0]", "Empiricial Bayes Factor for substitution bias"} - }; - - - - table_screen_output = {{"Site", "Partition", "target", "rate", "bias", "Bayes Factor", site_annotation_headers["Composition"], site_annotation_headers["Substitutions"]}}; - report.biased_site = {{"" + (1+filter_info[s]), - partition_index + 1, - bias.residue, - Format(partition_results[s][0],8,2), - Format(partition_results[s][1],8,2), - Format(partition_results[s][3],8,2), - site.composition.string, - site.substitution.string - }}; - } - - for (partition_index = 0; partition_index < partition_count; partition_index += 1) { - filter_info = (filter_specification [partition_index])[utility.getGlobalValue ("terms.data.coverage")]; - sites_in_partition = utility.Array1D (filter_info); - site_annotations_p = {sites_in_partition, 2}; - site_annotations_p [0] = ""; - - for (s = 0; s < sites_in_partition; s += 1) { - site_annotations_p [s][0] = site.composition.string; - site_annotations_p [s][1] = site.substitution.string; - } - - site_annotations [partition_index] = site_annotations_p; - - } - s = 0; partition_index = 0; -} +fade.replicate_data = {}; +fade.simulation_path = io.PromptUserForString ("Save simulation settings to (all simulation files will be saved to the same location with .N.fas extensions)"); +fprintf (fade.simulation_path, CLEAR_FILE, fade.settings); +fade.sim_frequencies = fade.model.baseline[terms.efv_estimate]; - -for (fade.residue = 0; fade.residue < 20; fade.residue += 1) { - - - fade.bias.residue = fade.alphabet[fade.residue]; - selection.io.startTimer (fade.json [terms.json.timers], "Residue `fade.bias.residue` analysis", 2 + fade.residue); - - if (utility.Has (fade.cache [terms.fade.cache.conditionals], fade.bias.residue, "AssociativeList")) { - fade.conditionals = (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue]; - io.ReportProgressBar ("fade", "[`fade.bias.residue`] Loaded the phylogenetic likelihood function on the grid"); - } else { - io.ReportProgressBar ("fade", "[`fade.bias.residue`] Computing the phylogenetic likelihood function on the grid"); - fade.model.baseline = model.generic.DefineModel(fade.generator, - "fade.baseline_model", { - "0": "terms.global" - }, - fade.filter_names, - None); +for (fade.block_id = 0; fade.block_id < Abs (fade.site_classes); fade.block_id += 1) { + io.ReportProgressBar ("simulation", "Generating data for selection regime " + (fade.block_id+1) ); + fade.this_block = fade.site_classes[fade.block_id]; + if (utility.Has ( fade.this_block, fade.parameter.bias, "Number")) { + fade.bias.residue = fade.parameter.bias; fade.model.biased = model.generic.DefineModel("fade.biased.model.generator", - "fade.biased_model", { - "0": "terms.global", - "1": parameters.Quote (fade.bias.residue) - }, - fade.filter_names, - None); - - - fade.parameter.scalers = { - terms.fade.bias : fade.parameter.bias, - terms.fade.rate : fade.parameter.rate - }; - - utility.Extend ((fade.biased [terms.parameters])[terms.global], fade.parameter.scalers); - parameters.DeclareGlobalWithRanges (fade.parameter.rate, 1, 0, 100); - parameters.DeclareGlobalWithRanges (fade.parameter.bias, 1e-10, 1e-10, 100); + "fade.biased_model", { + "0": "terms.global", + "1": parameters.Quote (fade.bias.residue) + }, + None, + "frequencies.equal"); fade.model_id_to_object = { - "fade.biased_model": fade.model.biased, - "fade.baseline_model": fade.model.baseline - }; - - fade.trees.names = utility.Map (utility.Range (fade.partition_count, 1, 1), "_index_", "'fade.grid_tree_' + _index_"); - fade.lf.components = {fade.partition_count * 2, 1}; - utility.ForEachPair (fade.filter_names, "_index_", "_filter_", - ' - fade.model_assignment = { - "fade.baseline_model" : utility.Filter (fade.selected_branches[_index_], "_value_", "_value_ == terms.tree_attributes.background"), - "fade.biased_model" : utility.Filter (fade.selected_branches[_index_], "_value_", "_value_ == terms.tree_attributes.test"), + "fade.biased_model": fade.model.biased, + "fade.baseline_model": fade.model.baseline }; - - - fade.lf.components [2*(0+_index_)] = _filter_; - fade.lf.components [2*(0+_index_) + 1] = fade.trees.names[_index_]; - model.ApplyModelToTree(fade.trees.names [_index_], fade.trees[_index_], None, fade.model_assignment); - ' - ); - - - - LikelihoodFunction fade.lf = (fade.lf.components); - estimators.ApplyExistingEstimates ("fade.lf", fade.model_id_to_object, fade.baseline_fit, None); - - fade.conditionals.raw = fade.ComputeOnGrid ("fade.lf", - fade.grid.MatrixToDict (fade.cache[terms.fade.cache.grid]), - "fade.pass2.evaluator", - "fade.pass1.result_handler"); - - - - (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue] = fade.ConvertToConditionals (fade.conditionals.raw); - io.WriteCacheToFile (fade.path.cache, fade.cache); - } - - - if (fade.run_settings["method"] == ^"terms.fade.methods.VB0") { - if (utility.Has (fade.cache [terms.fade.cache.posterior], fade.bias.residue, "Matrix")) { - io.ReportProgressBar ("fade", "[`fade.bias.residue`] Loaded posterior means for grid loadings"); - } else { - io.ReportProgressBar ("fade", "[`fade.bias.residue`] Estimating posterior means for grid loadings "); - (fade.cache[terms.fade.cache.posterior])[fade.bias.residue] = fade.RunVariationalBayes (fade.run_settings, - fade.cache[terms.fade.cache.grid], - (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue], - None - ); - } - } else { - if (fade.run_settings["method"] == terms.fade.methods.MH) { - if (utility.Has (fade.cache [terms.fade.cache.mcmc], fade.bias.residue, "AssociativeList")) { - io.ReportProgressBar ("fade", "[`fade.bias.residue`] Loaded posterior sample for grid loadings"); - } else { - (fade.cache[terms.fade.cache.mcmc])[fade.bias.residue] = fade.RunMCMC (fade.run_settings, - fade.cache[terms.fade.cache.grid], - (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue], - "fade.pass1.result_handler", - None); - io.WriteCacheToFile (fade.path.cache, fade.cache); - } - } else { - if (utility.Has (fade.cache [terms.fade.cache.mcmc], fade.bias.residue, "AssociativeList")) { - io.ReportProgressBar ("fade", "[`fade.bias.residue`] Loaded posterior sample for grid loadings"); - } else { - (fade.cache[terms.fade.cache.mcmc])[fade.bias.residue] = fade.RunCollapsedGibbs (fade.run_settings, - fade.cache[terms.fade.cache.grid], - (fade.cache [terms.fade.cache.conditionals])[fade.bias.residue], - None - ); - io.WriteCacheToFile (fade.path.cache, fade.cache); - } - } - - } - - io.ClearProgressBar (); - - namespace fade { - sites = (json [utility.getGlobalValue ("terms.json.input")])[utility.getGlobalValue ("terms.json.sites")]; - grid_points = Rows (cache['grid']); - bias_present_stencil = {grid_points,sites} ["(cache['grid'])[_MATRIX_ELEMENT_ROW_][1]>0."]; - - rates = Transpose ((cache['grid'])[-1][0]); - biases = Transpose ((cache['grid'])[-1][1]); - - - if (run_settings["method"] != ^"terms.fade.methods.VB0") { - - samples = run_settings["samples"]; - chains = run_settings["chains"]; - - results.log_L = {1,samples}; - results.samples = {samples,grid_points}; - - per_chain = samples $ chains; - - - from = 0; - to = per_chain; - - - posterior_mean_rates = {}; - posterior_mean_biases = {}; - denominators = {}; - posteriors = {}; - biased_ks = {}; - - - for (chain_id = 0; chain_id < chains; chain_id += 1) { - io.ReportProgressBar ("PROCESSING", "Samples from chain " + (chain_id + 1)); - - - grid_samples = (((cache[utility.getGlobalValue("terms.fade.cache.mcmc")])[bias.residue])[chain_id])["weights"]; - grid_samples_T = Transpose (grid_samples); - P_ks = grid_samples * - ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]; - - denominators[chain_id] = P_ks; - - - posterior_mean_rates[chain_id] = (grid_samples $ rates * - ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; - - posterior_mean_biases[chain_id] = (grid_samples $ biases * - ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; - - - biased_ks[chain_id] = grid_samples * - (bias_present_stencil $ ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; - - if (run_settings["method"] == ^"terms.fade.methods.MH") { - logL_samples = (((cache[utility.getGlobalValue("terms.fade.cache.mcmc")])[bias.residue])[chain_id])["likelihoods"]; - draw_from_this_chain = Random ({1,samples}["_MATRIX_ELEMENT_COLUMN_"], 0); - - for (i = from; i < to; i += 1) { - draw_this_index = draw_from_this_chain[i]; - results.log_L [i] = logL_samples [draw_this_index]; - for (r = 0; r < grid_points; r += 1) { - results.samples [i][r] = grid_samples[draw_this_index][r]; - } - } - } else { - results.samples = grid_samples; - } - - from = to; - if (chain_id == chains - 2) { - to = samples; - } else { - to += per_chain; - } - } - io.ClearProgressBar (); - posterior_mean_over_grid = {grid_points,1}["(+results.samples[-1][_MATRIX_ELEMENT_ROW_])/samples"]; - - } else { - posterior_mean_over_grid = (cache[^"terms.fade.cache.posterior"])[bias.residue]; - posterior_mean_over_grid_T = Transpose (posterior_mean_over_grid); - cache[terms.fade.cache.posterior] = posterior_mean_over_grid; - - P_ks = posterior_mean_over_grid_T * ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]; - - - posterior_mean_rates = (posterior_mean_over_grid_T $ rates * - ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; - posterior_mean_biases = (posterior_mean_over_grid_T $ biases * - ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; - - biased_ks = posterior_mean_over_grid_T * - (bias_present_stencil $ ((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"]) / P_ks; - - } - - prior_weight_bias = +posterior_mean_over_grid ["_MATRIX_ELEMENT_VALUE_*((cache['grid'])[_MATRIX_ELEMENT_ROW_][1]>0.)"]; - - headers.printed = FALSE; - - i = 0; - s = 0; // reset to ensure good re-entrant behavior - - report.sites_found = {}; - //report.posteriors[bias.residue] = {}; - site_results[bias.residue] = {}; - - chain_iterator = utility.Range (chains, 0, 1); - - for (partition_index = 0; partition_index < partition_count; partition_index += 1) { - filter_info = (filter_specification [partition_index])[utility.getGlobalValue ("terms.data.coverage")]; - sites_in_partition = utility.Array1D (filter_info); - - partition_posteriors = {}; - if (run_settings["method"] != utility.getGlobalValue ("terms.fade.methods.MH")) { - partition_results = {sites_in_partition, 4}; - } else { - partition_results = {sites_in_partition, 6}; - } - - - - for (s = 0; s < sites_in_partition; s += 1) { - - pp = posterior_mean_over_grid $ (((cache[utility.getGlobalValue("terms.fade.cache.conditionals")])[bias.residue])["conditionals"])[-1][i]; - partition_posteriors [s] = Transpose (pp * (1/(+pp))); - - if (run_settings["method"] != utility.getGlobalValue ("terms.fade.methods.VB0")) { - partition_results[s][0] = fade.ComputeRandNeff ( - utility.Map (chain_iterator, "_value_", "((`&posterior_mean_rates`)[_value_])[-1][`&i`]") - )[0]; - partition_results[s][1] = fade.ComputeRandNeff ( - utility.Map (chain_iterator, "_value_", "((`&posterior_mean_biases`)[_value_])[-1][`&i`]") - )[0]; - - biased_posterior = fade.ComputeRandNeff ( - utility.Map (chain_iterator, "_value_", "((`&biased_ks`)[_value_])[-1][`&i`]") - ); - - partition_results[s][2] = biased_posterior[0]; - partition_results[s][3] = stats.BayesFactor (prior_weight_bias, biased_posterior[0]) ; - - if (run_settings["method"] == utility.getGlobalValue ("terms.fade.methods.MH")) { - partition_results[s][4] = biased_posterior[1]; - partition_results[s][5] = biased_posterior[2]; - } - - } else { - - if (run_settings["method"] == utility.getGlobalValue ("terms.fade.methods.VB0")) { - partition_results [s][0] = posterior_mean_rates[i]; - partition_results [s][1] = posterior_mean_biases[i]; - partition_results [s][2] = biased_ks[i]; - partition_results [s][3] = stats.BayesFactor (prior_weight_bias, biased_ks[i]); - } - } - - - if (partition_results[s][3] >= run_settings["bayes factor"]) { - if (Abs(report.sites_found) == 0 && table_output_options[^"terms.table_options.header"]) { - fprintf (stdout, "\n", io.FormatTableRow (table_screen_output,table_output_options)); - table_output_options[^"terms.table_options.header"] = FALSE; - } - fprintf (stdout, io.FormatTableRow (report.biased_site,table_output_options)); - report.sites_found + (1-partition_results [s][3]); - } - - i+=1; - } - - (site_results[bias.residue]) [partition_index] = partition_results; - //(report.posteriors[bias.residue]) [partition_index] = partition_posteriors; - s = 0; // for re-entrancy - } - - partition_index = 0; - sites_found = Abs(report.sites_found); - sites_found_summary [bias.residue] = Abs(report.sites_found); - - } - selection.io.stopTimer (fade.json [terms.json.timers], "Residue `fade.bias.residue` analysis"); - -} - - - -// =========== ANALYSIS SUMMARY ======== - -fade.json [terms.fade.cache.settings] = fade.run_settings; -fade.json [terms.fade.json.site_annotations] = { - terms.fade.json.headers : fade.site_annotation_headers, - terms.fade.json.site_annotations : fade.site_annotations -}; -fade.json [terms.fit.MLE] = {terms.json.headers : fade.table_headers, - terms.json.content : fade.site_results }; - -//fade.json [terms.fade.posterior] = fade.report.posteriors; - -console.log ("----\n## FADE analysis summary. Evidence for directional selection evaluated using empirical Bayes factor threshold of " + fade.run_settings["bayes factor"]); - - -utility.ForEachPair (fade.sites_found_summary, "_residue_", "_count_", -' - if (_count_ == 0) { - console.log ("* No sites are evolving directionally towards " + _residue_); + parameters.SetValue (fade.parameter.bias, fade.this_block[fade.parameter.bias]); + fade.model_assignment = fade.model_assignment_with_bias; } else { - console.log ("* " + _count_ + " " + io.SingularOrPlural (_count_, "site is", "sites are") + " evolving directionally towards " + _residue_); - } - -'); - - -selection.io.stopTimer (fade.json [terms.json.timers], "Overall"); - -io.SpoolJSON (fade.json, fade.alignment_info[terms.json.json]); - -// HELPER FUNCTIONS GO HERE -//---------------------------------------------------------------------------- - -function fade.RunPrompts (prompts) { - if (prompts["branches"]) { - fade.selected_branches = selection.io.defineBranchSets ( fade.partitions_and_trees ); - fade.cache [terms.fade.cache.branches] = fade.selected_branches; - prompts["branches"] = FALSE; - } - - if (prompts["grid"]) { - fade.run_settings["grid size"] = io.PromptUser ("> Number of grid points per dimension (total number is D^2)",fade.run_settings["grid size"],5,50,TRUE); - prompts["grid"] = FALSE; - } - - - - if (prompts["model"]) { - utility.Extend (models.protein.empirical_models, {"GTR" : "General time reversible model (189 estimated parameters)."}); - fade.baseline_model = io.SelectAnOption (models.protein.empirical_models, "Baseline substitution model"); - fade.generator = (utility.Extend (models.protein.empirical.plusF_generators , {"GTR" : "models.protein.REV.ModelDescription"}))[fade.baseline_model ]; - fade.cache[terms.fade.cache.model] = fade.baseline_model; - fade.cache[terms.fade.cache.model_generator] = fade.generator; - prompts["model"] = FALSE; + fade.model_assignment = fade.model_assignment_without_bias; + fade.model_id_to_object = { + "fade.baseline_model": fade.model.baseline + }; } + parameters.SetValue (fade.parameter.bias, 1e-10); + parameters.SetValue (fade.parameter.rate, 1); + model.ApplyModelToTree(fade.tree.name, fade.baseline.tree, None, fade.model_assignment); + estimators.ApplyExistingEstimatesToTree (fade.tree.name, fade.model_id_to_object, (fade.branch_lengths[terms.branch_length])[0], None, {}); + parameters.SetValue (fade.parameter.rate, fade.this_block[fade.parameter.rate]); + parameters.SetValue (fade.parameter.bias, fade.this_block[fade.parameter.bias]); - if (prompts["method"]) { - fade.run_settings["method"] = io.SelectAnOption ({ - terms.fade.methods.MH : "Full Metropolis-Hastings MCMC algorithm (slowest, original 2013 paper implementation)", - terms.fade.methods.CG : "Collapsed Gibbs sampler (intermediate speed)", - terms.fade.methods.VB0 : "0-th order Variational Bayes approximations (fastest, recommended default)" - }, "Posterior estimation method"); - prompts["method"] = FALSE; - } - if (prompts["chain"]) { - if (fade.run_settings["method"] == terms.fade.methods.MH) { - fade.run_settings["chains"] = io.PromptUser ("> Number of MCMC chains to run",fade.run_settings["chains"],2,20,TRUE); + for (fade.replicate_id = 0; fade.replicate_id < fade.replicates; fade.replicate_id += 1) { + DataSet simulated_block = Simulate (^fade.tree.name, fade.sim_frequencies , fade.simulation.matrix, fade.this_block[terms.data.sites]); + fade.data_block = alignments.GetAllSequences ("simulated_block"); + io.ReportProgressBar ("simulation", "Generating data for selection regime " + (fade.block_id+1) + " replicate " + fade.replicate_id + " / " + fade.replicates); + if (fade.block_id == 0) { + fade.replicate_data [fade.replicate_id] = fade.data_block; } else { - fade.run_settings["chains"] = 1; - } - if (fade.run_settings["method"] != terms.fade.methods.VB0) { - fade.run_settings["chain-length"] = io.PromptUser ("> The length of each chain",fade.run_settings["chain-length"],5e3,5e7,TRUE); - fade.run_settings["burn-in"] = io.PromptUser ("> Use this many samples as burn-in",fade.run_settings["chain-length"]$2,fade.run_settings["chain-length"]$20,fade.run_settings["chain-length"]*95$100,TRUE); - fade.run_settings["samples"] = io.PromptUser ("> How many samples should be drawn from each chain",fade.run_settings["samples"],50,fade.run_settings["chain-length"]-fade.run_settings["burn-in"],TRUE); - } - fade.run_settings["concentration"] = io.PromptUser ("> The concentration parameter of the Dirichlet prior",fade.run_settings["concentration"],0.001,1,FALSE); - prompts["chain"] = FALSE; - } -} - -//------------------------------------------------------------------------------------------------// - -lfunction fade.DefineGrid (one_d_points) { - // only one point for rate = 0, because bias is not identifiable if rate = 0 - - one_d_points = Max (one_d_points, 5); - - alphaBetaGrid = {one_d_points^2,2}; // (alpha, beta) pair - - oneDGridRate = {one_d_points,1}; - oneDGridBias = {one_d_points,1}; - - below1_frac = 0.7; - below1 = ((one_d_points)*below1_frac+0.5)$1; - above1 = (one_d_points-1)*(1-below1_frac)$1; - - if (below1 + above1 != one_d_points) { - above1 = one_d_points - below1; - } - - _neg_step = 1/(below1); - _neg_stepP1 = 1/(below1+1); - - for (_k = 0; _k < below1; _k += 1) { - oneDGridBias [_k][0] = _neg_step * (_k); - oneDGridRate [_k][0] = _neg_stepP1 * (_k + 1); - } - - oneDGridRate [below1-1][0] = 1; - oneDGridBias [below1-1][0] = 1; - - _pos_step = 49^(1/3)/above1; - for (_k = 1; _k <= above1; _k += 1) { - oneDGridBias [below1+_k-1][0] = 1+(_pos_step*_k)^3; - oneDGridRate [below1+_k-1][0] = 1+(_pos_step*_k)^3; - } - - _p = 0; - - for (_r = 0; _r < one_d_points; _r += 1) { - for (_c = 0; _c < one_d_points; _c += 1) { - alphaBetaGrid[_p][0] = oneDGridRate[_r]; - alphaBetaGrid[_p][1] = oneDGridBias[_c]; - _p += 1; - } - } - alphaBetaGrid[0][0] = 0; alphaBetaGrid[0][1] = 0; - alphaBetaGrid[1][1] = 0; - - return alphaBetaGrid; -} - -//------------------------------------------------------------------------------------------------// - -lfunction fade.SubstitutionHistory (subs) { - result = ""; - result * 128; - keys = utility.sortStrings (utility.Keys (subs)); - - for (i = 0; i < Abs (subs); i+=1) { - source = keys[i]; - targets = subs[source]; - if (i > 0) { - result * ", "; - } - result * (source + "->"); - keys2 = utility.sortStrings (utility.Keys (targets)); - for (k = 0; k < Abs (targets); k+=1) { - result * (keys2[k] + "(" + Abs(targets[keys2[k]]) + ")"); - } - } - - result * 0; - return result; - -} - -//------------------------------------------------------------------------------------------------// - -lfunction fade.CompositionString (composition) { - result = ""; - result * 128; - keys = utility.sortStrings (utility.Keys (composition)); - - for (i = 0; i < Abs (composition); i+=1) { - residue = keys[i]; - if (i) { - result * ","; + utility.ForEachPair (fade.data_block, "_id_", "_seq_", + ' + (fade.replicate_data[fade.replicate_id])[_id_] += _seq_; + '); } - result * (residue + composition [residue]); } - result * 0; - return result; } +io.ClearProgressBar (); +for (fade.replicate_id = 0; fade.replicate_id < fade.replicates; fade.replicate_id += 1) { + fade.current_file = fade.simulation_path + "." + (fade.replicate_id+1) + ".fas"; + console.log (fade.current_file); + fprintf (fade.current_file, CLEAR_FILE, KEEP_OPEN); -//------------------------------------------------------------------------------------------------// - + utility.ForEachPair ((fade.replicate_data[fade.replicate_id]), "_id_", "_seq_", + ' + fprintf (fade.current_file, ">", _id_, "\n", _seq_, "\n"); + '); -lfunction fade.grid.MatrixToDict (grid) { - return utility.Map (utility.MatrixToListOfRows (grid), "_value_", - '{ terms.fade.bias : { - terms.id : fade.parameter.scalers [ terms.fade.bias ], - terms.fit.MLE : _value_[1] - }, - terms.fade.rate : { - terms.id : fade.parameter.scalers [ terms.fade.rate ], - terms.fit.MLE : _value_[0] - } - }'); + fprintf (fade.current_file, "\n", fade.settings [terms.data.tree], ";", CLOSE_FILE); } diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 7383470ee..a35c71f22 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -157,6 +157,27 @@ lfunction alignments.GetIthSequence (dataset_name, index) { return {utility.getGlobalValue("terms.id") : seq_id, utility.getGlobalValue("terms.data.sequence") : seq_string}; } +/** + * Get all sequences as "id" : "sequence" dictionary + * @name alignments.GetAllSequences + * @param {String} dataset_name - name of dataset to get sequence names from + * @returns {Dict} { id -> sequence} + */ + +lfunction alignments.GetAllSequences (dataset_name) { + + GetString (seq_id, ^dataset_name, -1); + result = {}; + + utility.ForEachPair (seq_id, "_index_", "_name_", + ' + GetDataInfo (`&seq_string`, ^`&dataset_name`, _index_[1]); + `&result`[_name_] = `&seq_string`; + '); + + return result; +} + /** * Read Nucleotide dataset from file_name * @name alignments.ReadNucleotideDataSet From 89ac86acdfc8373b3115d5cfa94589cf3b94851d Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Tue, 19 Jun 2018 14:51:35 -0400 Subject: [PATCH 06/53] FADE simulator; removing -g from MP target --- res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf index 6272dad45..35af0f602 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf @@ -174,7 +174,7 @@ for (fade.block_id = 0; fade.block_id < Abs (fade.site_classes); fade.block_id + io.ReportProgressBar ("simulation", "Generating data for selection regime " + (fade.block_id+1) ); fade.this_block = fade.site_classes[fade.block_id]; if (utility.Has ( fade.this_block, fade.parameter.bias, "Number")) { - fade.bias.residue = fade.parameter.bias; + fade.bias.residue = fade.this_block[fade.parameter.bias]; fade.model.biased = model.generic.DefineModel("fade.biased.model.generator", "fade.biased_model", { From 4d777dac44cc929ffa7f39ef7c53f3c4eceee022 Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Tue, 19 Jun 2018 14:57:27 -0400 Subject: [PATCH 07/53] FADE simulator fixes --- res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf index 35af0f602..cd996f947 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf @@ -174,7 +174,9 @@ for (fade.block_id = 0; fade.block_id < Abs (fade.site_classes); fade.block_id + io.ReportProgressBar ("simulation", "Generating data for selection regime " + (fade.block_id+1) ); fade.this_block = fade.site_classes[fade.block_id]; if (utility.Has ( fade.this_block, fade.parameter.bias, "Number")) { - fade.bias.residue = fade.this_block[fade.parameter.bias]; + fade.bias.residue = fade.this_block[terms.fade.mode]; + + console.log (fade.bias.residue); fade.model.biased = model.generic.DefineModel("fade.biased.model.generator", "fade.biased_model", { From def4e5a056f923b234dff56945bfd19572f03f34 Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Tue, 19 Jun 2018 14:57:47 -0400 Subject: [PATCH 08/53] FADE simulator fixes --- res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf | 3 --- 1 file changed, 3 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf index cd996f947..db37d1ebb 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/Simulators/FADE.bf @@ -175,9 +175,6 @@ for (fade.block_id = 0; fade.block_id < Abs (fade.site_classes); fade.block_id + fade.this_block = fade.site_classes[fade.block_id]; if (utility.Has ( fade.this_block, fade.parameter.bias, "Number")) { fade.bias.residue = fade.this_block[terms.fade.mode]; - - console.log (fade.bias.residue); - fade.model.biased = model.generic.DefineModel("fade.biased.model.generator", "fade.biased_model", { "0": "terms.global", From c5d589b861ba1db8b226ea2446ea971ea75576a2 Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Tue, 19 Jun 2018 16:45:59 -0400 Subject: [PATCH 09/53] Slight 20x20 dense matrix multiplication tuning --- CMakeLists.txt | 2 +- .../SelectionAnalyses/FUBAR.bf | 3 +- src/core/matrix.cpp | 121 ++++++++---------- tests/hbltests/Ancestors/NucAncestors.bf | 7 +- 4 files changed, 60 insertions(+), 73 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c49d968f..49e8cac2f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) if(${GCC_VERSION} VERSION_LESS 6.0) set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -std=gnu++14") else(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g") endif(${GCC_VERSION} VERSION_LESS 6.0) if(NOAVX) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf index 5c560ba0f..9029d52c1 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf @@ -253,8 +253,7 @@ if (utility.Has (fubar.cache, terms.fubar.cache.grid, "Matrix") && utility.Has ( estimators.ApplyExistingEstimates ("fubar.lf.codon", fubar.model_id_to_object, fubar.gtr_results, None); estimators.TraverseLocalParameters ("fubar.lf.codon", fubar.model_id_to_object, "fubar.scalers.Constrain"); - - + fubar.pass1 = Max (fubar.ComputeOnGrid ("fubar.lf.codon", fubar.grid.MatrixToDict (fubar.grid.matrix), "fubar.pass1.evaluator", diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index 86a471d65..f1f9b4436 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -3666,67 +3666,13 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) { unsigned long cumulativeIndex = 0UL; - const unsigned long - dimm4 = vDim - vDim%4, - column_shift2 = secondArg.vDim * 2, - column_shift3 = secondArg.vDim * 3, - column_shift4 = secondArg.vDim * 4; - - const _Parameter * row = theData; - _Parameter * dest = storage.theData; - + const unsigned long dimm4 = (vDim >> 2) << 2; -#ifndef _SLKP_SSE_VECTORIZATION_ - - - -/*#ifdef _SLKP_USE_AVX_INTRINSICS - __m256d buffer1, - buffer2; - __m128d two1, - two2; + const _Parameter * row = theData; + _Parameter * dest = storage.theData; - double d[2] __attribute__ ((aligned (16))); - for (unsigned long i=0UL; i Date: Wed, 20 Jun 2018 16:44:34 -0400 Subject: [PATCH 10/53] Bug fixes (memory errors); more informative error messaging when issues are encountered with codon alignments --- .../libv3/tasks/alignments.bf | 41 ++++++++++++++++++- src/core/batchlan.cpp | 28 +++++++++++-- src/core/operation.cpp | 21 +++++++--- src/core/site.cpp | 4 +- 4 files changed, 84 insertions(+), 10 deletions(-) diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index a35c71f22..f53b06082 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -151,8 +151,24 @@ lfunction alignments.GetSequenceByName (dataset_name, sequence_name) { */ lfunction alignments.GetIthSequence (dataset_name, index) { + GetString (seq_id, ^dataset_name, index); + GetDataInfo (seq_string, ^dataset_name, index); + return {utility.getGlobalValue("terms.id") : seq_id, utility.getGlobalValue("terms.data.sequence") : seq_string}; +} + +/** + * Get i-th sequence name/value from an alignment; retrieve the original sequence name if available + * @name alignments.GetIthSequenceOriginalName + * @param {String} dataset_name - name of dataset to get sequence names from + * @param {String} index - the name of the sequence to extract or None to set up the initial mapping + * @returns {Dict} {"id" : sequence name, "sequence" : sequence data} + */ +lfunction alignments.GetIthSequenceOriginalName (dataset_name, index) { GetString (seq_id, ^dataset_name, index); + if (Type (^"`dataset_name`.mapping") == "AssociativeList") { + seq_id = (^"`dataset_name`.mapping")[seq_id]; + } GetDataInfo (seq_string, ^dataset_name, index); return {utility.getGlobalValue("terms.id") : seq_id, utility.getGlobalValue("terms.data.sequence") : seq_string}; } @@ -301,7 +317,30 @@ function alignments.LoadGeneticCodeAndAlignment(dataset_name, datafilter_name, p */ function alignments.LoadCodonDataFile(dataset_name, datafilter_name, data_info) { DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , , data_info[terms.stop_codons]); - io.CheckAssertion("`datafilter_name`.sites*3==`dataset_name`.sites", "The input alignment must not contain stop codons"); + if (^"`datafilter_name`.sites"*3 != ^"`dataset_name`.sites") { + // generate a more diagnostic error here + for (alignments.LoadCodonDataFile.i = 0; alignments.LoadCodonDataFile.i < ^"`dataset_name`.species"; alignments.LoadCodonDataFile.i += 1) { + DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , "" + alignments.LoadCodonDataFile.i , data_info[terms.stop_codons]); + if (^"`datafilter_name`.sites"*3 != ^"`dataset_name`.sites") { + alignments.LoadCodonDataFile.name = alignments.GetIthSequenceOriginalName (dataset_name, alignments.LoadCodonDataFile.i); + alignments.LoadCodonDataFile.site_map = ^"`datafilter_name`.site_map"; + + alignments.LoadCodonDataFile.annotation_string = utility.PopulateDict (0, ^"`dataset_name`.sites", + '(alignments.LoadCodonDataFile.name[terms.data.sequence])[_idx]', + '_idx'); + + + utility.ForEach (alignments.LoadCodonDataFile.site_map, "_value_", + ' + `&alignments.LoadCodonDataFile.annotation_string`[_value_] = `&alignments.LoadCodonDataFile.annotation_string`[_value_] && 0; + '); + + console.log ("*** PROBLEM WITH SEQUENCE ' " + alignments.LoadCodonDataFile.name[terms.id] + "' (" + ^"`dataset_name`.sites" + " nt long, stop codons shown in capital letters)\n\n" + Join ("",alignments.LoadCodonDataFile.annotation_string)); + break; + } + } + io.CheckAssertion("`datafilter_name`.sites*3==`dataset_name`.sites", "The input alignment must have the number of sites that is divisible by 3 and must not contain stop codons"); + } data_info[terms.data.sites] = ^ "`datafilter_name`.sites"; data_info[terms.data.dataset] = dataset_name; data_info[terms.data.datafilter] = datafilter_name; diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index a5a827047..3d89464a8 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -1267,8 +1267,10 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { executionStack << this; if (parent && stdinRedirect == nil) { - stdinRedirect = parent->stdinRedirect; + stdinRedirect = parent->stdinRedirect; stdinRedirectAux = parent->stdinRedirectAux; + parent->stdinRedirect->AddAReference(); + parent->stdinRedirectAux->AddAReference(); } else { parent = nil; } @@ -1328,6 +1330,8 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { if (parent) { stdinRedirect = nil; stdinRedirectAux = nil; + parent->stdinRedirect->RemoveAReference(); + parent->stdinRedirectAux->RemoveAReference(); } return result; @@ -3386,8 +3390,20 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { chain.ReportAnExecutionError("Encountered an error while parsing HBL", false, true); } else { - exc.stdinRedirectAux = inArgAux?inArgAux:chain.stdinRedirectAux; - exc.stdinRedirect = inArg?inArg:chain.stdinRedirect; + bool references_added = false; + + if (inArg && inArgAux) { + exc.stdinRedirectAux = inArgAux; + exc.stdinRedirect = inArg; + } else { + if (chain.stdinRedirect) { + references_added = true; + chain.stdinRedirect->AddAReference(); + chain.stdinRedirectAux->AddAReference(); + } + exc.stdinRedirectAux = chain.stdinRedirectAux; + exc.stdinRedirect = chain.stdinRedirect; + } if (simpleParameters.lLength && exc.TryToMakeSimple()) { ReportWarning (_String ("Successfully compiled an execution list.\n") & _String ((_String*)exc.toStr()) ); @@ -3396,8 +3412,14 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { exc.Execute(); } + if (references_added) { + chain.stdinRedirect->RemoveAReference(); + chain.stdinRedirectAux->RemoveAReference(); + } + exc.stdinRedirectAux = nil; exc.stdinRedirect = nil; + if (exc.result) { DeleteObject (chain.result); chain.result = exc.result; diff --git a/src/core/operation.cpp b/src/core/operation.cpp index e000b9ca5..af4332e2a 100644 --- a/src/core/operation.cpp +++ b/src/core/operation.cpp @@ -473,16 +473,27 @@ bool _Operation::Execute (_Stack& theScrap, _VariableContainer const* nam function_body->ResetFormulae(); } + _PMathObj ret; + if (currentExecutionList && currentExecutionList->stdinRedirect) { + // 20180620: SLKP, need to split this off because if Execute fails + // then there will be a double free on stdinRedirect function_body -> stdinRedirect = currentExecutionList->stdinRedirect; function_body -> stdinRedirectAux = currentExecutionList->stdinRedirectAux; + + function_body -> stdinRedirect -> AddAReference(); + function_body -> stdinRedirectAux -> AddAReference(); + + ret = function_body->Execute(); + + function_body -> stdinRedirect -> RemoveAReference(); + function_body -> stdinRedirectAux -> RemoveAReference(); + function_body -> stdinRedirect = nil; + function_body -> stdinRedirectAux = nil; + } else { + ret = function_body->Execute(); } - _PMathObj ret = function_body->Execute(); - - function_body -> stdinRedirect = nil; - function_body -> stdinRedirectAux = nil; - if (terminateExecution) { theScrap.Push (new _Constant (0.0)); return true; diff --git a/src/core/site.cpp b/src/core/site.cpp index 6dd2aeccf..f82061fb1 100644 --- a/src/core/site.cpp +++ b/src/core/site.cpp @@ -2282,7 +2282,9 @@ unsigned long _DataSetFilter::FindUniqueSequences (_SimpleList& indices, _Si _String state1 (unit,false), state2 (unit,false); - + + sites = sites / unit; + for (long sequenceIndex = 0; sequenceIndex < seqs; sequenceIndex++) { bool checkState = false; for (long idx=0; idx Date: Wed, 20 Jun 2018 17:02:31 -0400 Subject: [PATCH 11/53] Fix the previous bug fix not to segfault on recursion --- src/core/batchlan.cpp | 24 +++++++++++++++--------- src/core/operation.cpp | 20 +++++++++++++------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index 3d89464a8..cd621612a 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -1266,9 +1266,13 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { callPoints << currentCommand; executionStack << this; + _AVLListXL * stash1 = nil; + _List* stash2 = nil; // recursion + + if (parent && stdinRedirect == nil) { - stdinRedirect = parent->stdinRedirect; - stdinRedirectAux = parent->stdinRedirectAux; + stash1 = stdinRedirect = parent->stdinRedirect; + stash2 = stdinRedirectAux = parent->stdinRedirectAux; parent->stdinRedirect->AddAReference(); parent->stdinRedirectAux->AddAReference(); } else { @@ -1330,8 +1334,8 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { if (parent) { stdinRedirect = nil; stdinRedirectAux = nil; - parent->stdinRedirect->RemoveAReference(); - parent->stdinRedirectAux->RemoveAReference(); + stash1->RemoveAReference(); + stash2->RemoveAReference(); } return result; @@ -3390,14 +3394,16 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { chain.ReportAnExecutionError("Encountered an error while parsing HBL", false, true); } else { - bool references_added = false; + _AVLListXL * stash1 = nil; + _List* stash2 = nil; if (inArg && inArgAux) { exc.stdinRedirectAux = inArgAux; exc.stdinRedirect = inArg; } else { if (chain.stdinRedirect) { - references_added = true; + stash1 = chain.stdinRedirect; + stash2 = chain.stdinRedirectAux; // for recursion calls chain.stdinRedirect->AddAReference(); chain.stdinRedirectAux->AddAReference(); } @@ -3412,9 +3418,9 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { exc.Execute(); } - if (references_added) { - chain.stdinRedirect->RemoveAReference(); - chain.stdinRedirectAux->RemoveAReference(); + if (stash1) { + stash1->RemoveAReference(); + stash2->RemoveAReference(); } exc.stdinRedirectAux = nil; diff --git a/src/core/operation.cpp b/src/core/operation.cpp index af4332e2a..236ae4412 100644 --- a/src/core/operation.cpp +++ b/src/core/operation.cpp @@ -478,21 +478,27 @@ bool _Operation::Execute (_Stack& theScrap, _VariableContainer const* nam if (currentExecutionList && currentExecutionList->stdinRedirect) { // 20180620: SLKP, need to split this off because if Execute fails // then there will be a double free on stdinRedirect + + auto stash1 = currentExecutionList->stdinRedirect; + auto stash2 = currentExecutionList->stdinRedirectAux;; + // for recursive calls, both function_body and currentExecutionList can be reset to null + function_body -> stdinRedirect = currentExecutionList->stdinRedirect; function_body -> stdinRedirectAux = currentExecutionList->stdinRedirectAux; - function_body -> stdinRedirect -> AddAReference(); - function_body -> stdinRedirectAux -> AddAReference(); + currentExecutionList -> stdinRedirect -> AddAReference(); + currentExecutionList -> stdinRedirectAux -> AddAReference(); ret = function_body->Execute(); - function_body -> stdinRedirect -> RemoveAReference(); - function_body -> stdinRedirectAux -> RemoveAReference(); - function_body -> stdinRedirect = nil; - function_body -> stdinRedirectAux = nil; - } else { + stash1 -> RemoveAReference(); + stash2-> RemoveAReference(); + } else { ret = function_body->Execute(); } + + function_body -> stdinRedirect = nil; + function_body -> stdinRedirectAux = nil; if (terminateExecution) { theScrap.Push (new _Constant (0.0)); From 54300b24c1826580925a140f475e8d74f3a1b69c Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Thu, 21 Jun 2018 08:17:47 -0400 Subject: [PATCH 12/53] Fixing the fix to the bug fix --- src/core/batchlan.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index cd621612a..a32775e8a 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -1273,8 +1273,10 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { if (parent && stdinRedirect == nil) { stash1 = stdinRedirect = parent->stdinRedirect; stash2 = stdinRedirectAux = parent->stdinRedirectAux; - parent->stdinRedirect->AddAReference(); - parent->stdinRedirectAux->AddAReference(); + if (stash1) { + stash1->AddAReference(); + stash2->AddAReference(); + } } else { parent = nil; } @@ -1334,8 +1336,8 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { if (parent) { stdinRedirect = nil; stdinRedirectAux = nil; - stash1->RemoveAReference(); - stash2->RemoveAReference(); + DeleteObject (stash1); + DeleteObject (stash2); } return result; From 8c3dfa2a5882f1e4271046de51d993829e12aac7 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Thu, 21 Jun 2018 09:59:38 -0400 Subject: [PATCH 13/53] FADE.bf fixes to correctly recreate baseline fit from cached values --- .../SelectionAnalyses/FADE.bf | 25 ++++++++++++++++--- .../libv3/tasks/estimators.bf | 9 +++++-- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf b/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf index a08bc988e..a28b02b73 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FADE.bf @@ -138,7 +138,7 @@ fade.path.base = (fade.json [terms.json.input])[terms.json.file]; fade.path.cache = fade.path.base + ".FADE.cache"; io.ReportProgressBar ("init", "Loading existing cache files"); fade.cache = io.LoadCacheFromFile (fade.path.cache); -io.ClearProgressBar (); +io.ClearProgressBar (); console.log ( "> FADE will write cache and result files to _`fade.path.base`.FADE.cache_ and _`fade.path.base`.FADE.json_, respectively \n\n"); @@ -227,9 +227,13 @@ if (utility.Has (fade.cache, terms.fade.cache.model, "String") && utility.Has (f io.WriteCacheToFile (fade.path.cache, fade.cache); } +fade.rebuild_lf = FALSE; + if (utility.Has (fade.cache, terms.fade.cache.baseline, "AssociativeList")) { io.ReportProgressMessageMD ("FADE", "baseline", "Loaded baseline model fit from cache"); fade.baseline_fit = fade.cache [terms.fade.cache.baseline]; + fade.rebuild_lf = TRUE; + } else { selection.io.startTimer (fade.json [terms.json.timers], "Baseline Fit", 1); io.ReportProgressMessageMD ("FADE", "baseline", "Fitting the baseline (`fade.baseline_model`) model to obtain branch lengths and rate matrix estimates"); @@ -294,6 +298,19 @@ utility.ForEachPair (fade.filter_specification, "_key_", "_value_", // TBD if (utility.Has (fade.cache, terms.fade.cache.substitutions, "AssociativeList") == FALSE) { + if (fade.rebuild_lf) { + estimators.CreateLFObject ( "fade.ancestral.rebuild", + fade.filter_names, + fade.trees, + "fade.generator.MLE" , + fade.baseline_fit, + {terms.run_options.retain_lf_object: TRUE}, + None + ) + + fade.baseline_fit[terms.likelihood_function] = "fade.ancestral.rebuild.likelihoodFunction"; + } + utility.EnsureKey (fade.cache, terms.fade.cache.substitutions); utility.EnsureKey (fade.cache, terms.fade.cache.composition); @@ -407,7 +424,7 @@ namespace fade { {"BayesFactor[bias>0]", "Empiricial Bayes Factor for substitution bias"} }; - + table_screen_output = {{"Site", "Partition", "target", "rate", "bias", "Bayes Factor", site_annotation_headers["Composition"], site_annotation_headers["Substitutions"]}}; report.biased_site = {{"" + (1+filter_info[s]), @@ -537,7 +554,7 @@ for (fade.residue = 0; fade.residue < 20; fade.residue += 1) { "fade.pass1.result_handler", None); io.WriteCacheToFile (fade.path.cache, fade.cache); - } + } } else { if (utility.Has (fade.cache [terms.fade.cache.mcmc], fade.bias.residue, "AssociativeList")) { io.ReportProgressBar ("fade", "[`fade.bias.residue`] Loaded posterior sample for grid loadings"); @@ -746,7 +763,7 @@ for (fade.residue = 0; fade.residue < 20; fade.residue += 1) { fade.json [terms.fade.cache.settings] = fade.run_settings; fade.json [terms.fade.json.site_annotations] = { terms.fade.json.headers : fade.site_annotation_headers, - terms.fade.json.site_annotations : fade.site_annotations + terms.fade.json.site_annotations : fade.site_annotations }; fade.json [terms.fit.MLE] = {terms.json.headers : fade.table_headers, terms.json.content : fade.site_results }; diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index b00104325..49329111d 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -732,7 +732,7 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o lfunction estimators.CreateLFObject (context, data_filter, tree, model_template, initial_values, run_options, model_objects) { if (Type(data_filter) == "String") { - return estimators.FitSingleModel_Ext ({ + return estimators.CreateLFObject (context, { { data_filter__ } @@ -759,12 +759,16 @@ lfunction estimators.CreateLFObject (context, data_filter, tree, model_template, DataSetFilter ^ (filters[i]) = CreateFilter( ^ (data_filter[i]), 1); } + user_model_id = context + ".user_model"; utility.ExecuteInGlobalNamespace ("`user_model_id` = 0"); + + ^(user_model_id) = model.generic.DefineModel(model_template, context + ".model", { "0": "terms.global" }, filters, None); + for (i = 0; i < components; i += 1) { lf_components[2 * i + 1] = "`context`.tree_" + i; model.ApplyModelToTree(lf_components[2 * i + 1], tree[i], { @@ -774,8 +778,9 @@ lfunction estimators.CreateLFObject (context, data_filter, tree, model_template, lfid = context + ".likelihoodFunction"; - utility.ExecuteInGlobalNamespace ("LikelihoodFunction `lfid` = (`&lf_components`)"); + + utility.ExecuteInGlobalNamespace ("LikelihoodFunction `lfid` = (`&lf_components`)"); df = 0; if (Type(initial_values) == "AssociativeList") { if (None == model_objects) { From 655018a4a80fc6dd077c3095e856de3dd5231b99 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Thu, 21 Jun 2018 11:22:14 -0400 Subject: [PATCH 14/53] Resolving the issue where statically created LF in Case52 (SimulateDataSet) was not properly handling lister release events, causing potential memory issues --- src/core/batchlan.cpp | 141 +++--- src/core/global_object_lists.cpp | 211 ++++---- src/core/likefunc.cpp | 802 +++++++++++++++---------------- 3 files changed, 568 insertions(+), 586 deletions(-) diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index a32775e8a..c1313dc8c 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -1266,17 +1266,9 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { callPoints << currentCommand; executionStack << this; - _AVLListXL * stash1 = nil; - _List* stash2 = nil; // recursion - - if (parent && stdinRedirect == nil) { - stash1 = stdinRedirect = parent->stdinRedirect; - stash2 = stdinRedirectAux = parent->stdinRedirectAux; - if (stash1) { - stash1->AddAReference(); - stash2->AddAReference(); - } + stdinRedirect = parent->stdinRedirect; + stdinRedirectAux = parent->stdinRedirectAux; } else { parent = nil; } @@ -1336,8 +1328,6 @@ _PMathObj _ExecutionList::Execute (_ExecutionList* parent) { if (parent) { stdinRedirect = nil; stdinRedirectAux = nil; - DeleteObject (stash1); - DeleteObject (stash2); } return result; @@ -3396,22 +3386,8 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { chain.ReportAnExecutionError("Encountered an error while parsing HBL", false, true); } else { - _AVLListXL * stash1 = nil; - _List* stash2 = nil; - - if (inArg && inArgAux) { - exc.stdinRedirectAux = inArgAux; - exc.stdinRedirect = inArg; - } else { - if (chain.stdinRedirect) { - stash1 = chain.stdinRedirect; - stash2 = chain.stdinRedirectAux; // for recursion calls - chain.stdinRedirect->AddAReference(); - chain.stdinRedirectAux->AddAReference(); - } - exc.stdinRedirectAux = chain.stdinRedirectAux; - exc.stdinRedirect = chain.stdinRedirect; - } + exc.stdinRedirectAux = inArgAux?inArgAux:chain.stdinRedirectAux; + exc.stdinRedirect = inArg?inArg:chain.stdinRedirect; if (simpleParameters.lLength && exc.TryToMakeSimple()) { ReportWarning (_String ("Successfully compiled an execution list.\n") & _String ((_String*)exc.toStr()) ); @@ -3420,14 +3396,8 @@ void _ElementaryCommand::ExecuteCase39 (_ExecutionList& chain) { exc.Execute(); } - if (stash1) { - stash1->RemoveAReference(); - stash2->RemoveAReference(); - } - exc.stdinRedirectAux = nil; exc.stdinRedirect = nil; - if (exc.result) { DeleteObject (chain.result); chain.result = exc.result; @@ -5606,74 +5576,79 @@ void _ElementaryCommand::ExecuteCase52 (_ExecutionList& chain) { _String filter_specification = *GetFilterName (filterID) & spawningTree->GetName()->Enquote(',') & *freqVar->GetName(); - _LikelihoodFunction lf (filter_specification, nil); + { + // 20180621 SLKP ensure that lf is deleted before the filter to unregister event listeners - if (terminateExecution) { - return; - } + _LikelihoodFunction lf (filter_specification, nil); + + if (terminateExecution) { + return; + } - bool doInternals = false; + bool doInternals = false; - if (parameters.lLength>5) { - doInternals = (ProcessNumericArgument ((_String*)parameters (5),chain.nameSpacePrefix)>0.5); - } + if (parameters.lLength>5) { + doInternals = (ProcessNumericArgument ((_String*)parameters (5),chain.nameSpacePrefix)>0.5); + } - _String spoolFile; + _String spoolFile; - FILE* mainFile = nil; + FILE* mainFile = nil; - errMsg = emptyString; + errMsg = emptyString; - if (parameters.lLength > 6) { - spoolFile = ProcessLiteralArgument ((_String*)parameters (6),chain.nameSpacePrefix); - spoolFile.ProcessFileName(); - mainFile = doFileOpen (spoolFile.sData,"w"); - if (!mainFile) { - errMsg = _String("Failed to open ") & spoolFile & " for writing"; - } - if (doInternals) { - spoolFile = spoolFile & ".anc"; + if (parameters.lLength > 6) { + spoolFile = ProcessLiteralArgument ((_String*)parameters (6),chain.nameSpacePrefix); + spoolFile.ProcessFileName(); + mainFile = doFileOpen (spoolFile.sData,"w"); + if (!mainFile) { + errMsg = _String("Failed to open ") & spoolFile & " for writing"; + } + if (doInternals) { + spoolFile = spoolFile & ".anc"; + } } - } - if (errMsg.sLength == 0) { - _DataSet * simDataSet; + if (errMsg.sLength == 0) { + _DataSet * simDataSet; - if (mainFile) { - simDataSet = new _DataSet (mainFile); - } else { - simDataSet = new _DataSet (siteCount); - } + if (mainFile) { + simDataSet = new _DataSet (mainFile); + } else { + simDataSet = new _DataSet (siteCount); + } - checkPointer (simDataSet); + checkPointer (simDataSet); - _List exclusions; + _List exclusions; - _String *simName = new _String(AppendContainerName(*(_String*)parameters (0),chain.nameSpacePrefix)); - _String mxName = *simName & ".rates"; - setParameter (mxName, 0.0); - _Variable *catValVar = FetchVar (LocateVarByName (mxName)); - _Matrix* catValues = new _Matrix (1,1,false,true); - checkPointer (catValues); + _String *simName = new _String(AppendContainerName(*(_String*)parameters (0),chain.nameSpacePrefix)); + _String mxName = *simName & ".rates"; + setParameter (mxName, 0.0); + _Variable *catValVar = FetchVar (LocateVarByName (mxName)); + _Matrix* catValues = new _Matrix (1,1,false,true); + checkPointer (catValues); - mxName = *simName & ".rateVars"; - setParameter (mxName, 0.0); - _Variable * catNameVar = FetchVar (LocateVarByName (mxName)); - _Matrix* catNames = new _Matrix (1,1,false,true); + mxName = *simName & ".rateVars"; + setParameter (mxName, 0.0); + _Variable * catNameVar = FetchVar (LocateVarByName (mxName)); + _Matrix* catNames = new _Matrix (1,1,false,true); - SetStatusLine ("Simulating Data"); - lf.Simulate (*simDataSet, exclusions, catValues, catNames, rootStates, doInternals?(mainFile?&spoolFile:&emptyString):nil); - SetStatusLine ("Idle"); + SetStatusLine ("Simulating Data"); + lf.Simulate (*simDataSet, exclusions, catValues, catNames, rootStates, doInternals?(mainFile?&spoolFile:&emptyString):nil); + SetStatusLine ("Idle"); - catValVar->SetValue(catValues, false); - catNameVar->SetValue(catNames, false); + catValVar->SetValue(catValues, false); + catNameVar->SetValue(catNames, false); - StoreADataSet (simDataSet, simName); - DeleteObject (simName); - DeleteDataFilter (filterID); - errMsg = emptyString; + StoreADataSet (simDataSet, simName); + DeleteObject (simName); + errMsg = emptyString; + } } + + DeleteDataFilter (filterID); } DeleteObject (ds); if (rootStates) { diff --git a/src/core/global_object_lists.cpp b/src/core/global_object_lists.cpp index 79c130f66..6938e4568 100644 --- a/src/core/global_object_lists.cpp +++ b/src/core/global_object_lists.cpp @@ -1,20 +1,20 @@ /* HyPhy - Hypothesis Testing Using Phylogenies. - + Copyright (C) 1997-now Core Developers: Sergei L Kosakovsky Pond (sergeilkp@icloud.com) Art FY Poon (apoon@cfenet.ubc.ca) Steven Weaver (sweaver@temple.edu) - + Module Developers: Lance Hepler (nlhepler@gmail.com) Martin Smith (martin.audacis@gmail.com) - + Significant contributions from: Spencer V Muse (muse@stat.ncsu.edu) Simon DW Frost (sdf22@cam.ac.uk) - + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -22,10 +22,10 @@ distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -62,56 +62,56 @@ namespace hyphy_global_objects { /** notification types enum */ - + enum kNotificationType { kNotificationTypeChange, kNotificationTypeDelete }; - + /** data filter objects -- internal */ - - - + + + void _SetDataFilterParameters (_String const& name, _DataSetFilter const& filter) { setParameter (WrapInNamespace ("species", &name), filter.NumberSpecies()); setParameter (WrapInNamespace ("sites", &name), filter.GetSiteCountInUnits()); - + _Parameter size_cutoff; checkParameter (defaultLargeFileCutoff,size_cutoff, 100000.); - + if (filter.GetSiteCount() < size_cutoff) { setParameter(WrapInNamespace("site_map", &name), new _Matrix (filter.theOriginalOrder), nil, false); setParameter(WrapInNamespace("site_freqs", &name), new _Matrix (filter.theFrequencies), nil, false); } - + if (filter.NumberSpecies() < size_cutoff) { setParameter(WrapInNamespace("sequence_map", &name), new _Matrix (filter.theNodeMap), nil, false); } } - + void _KillDataFilterParameters (_String const & filter_name) { _List kill_these_arguments; kill_these_arguments < "species" < "sites" < "site_map" < "site_freqs" < "sequence_map"; - + for (unsigned long i = 0; i < kill_these_arguments.countitems(); i++) { DeleteVariable(WrapInNamespace(*(_String*)kill_these_arguments.GetItem(i), &filter_name)); } } - + void _NotifyDataFilterListeners (const long index, kNotificationType event_type) { _List * listeners = (_List*)_data_filter_listeners.GetDataByKey( (BaseRef) index); if (listeners) { - + _List buffered_updates; - + for (unsigned long k = 0UL; k < listeners->lLength; k++) { BaseRef this_listener = listeners->GetItem(k); - + if (_LikelihoodFunction* lf = dynamic_cast<_LikelihoodFunction*> (this_listener)) { //StringToConsole(_String("_NotifyDataFilterListeners ") & index & " " & (long)lf & "\n"); - + if (event_type == kNotificationTypeChange) { buffered_updates << lf; //lf->Rebuild(); @@ -121,86 +121,86 @@ namespace hyphy_global_objects { } } } - + for (unsigned long k = 0UL; k < buffered_updates.lLength; k++) { ((_LikelihoodFunction*)buffered_updates.GetItem (k)) -> Rebuild(); } } } - - + + /** generic hidden functions */ - + bool _IsObjectLocked (long index, long object_class) { - + _AVLList* source = nil; - + switch (object_class) { case HY_BL_DATASET_FILTER: source = &_data_filter_locks; break; } - + if (source) { return source->FindLong (index) >= 0L; } - + return false; } - + bool _AcquireObjectLock (long index, long object_class) { - + _AVLList* source = nil; - + switch (object_class) { case HY_BL_DATASET_FILTER: source = &_data_filter_locks; break; } - + if (source) { long added_as = source->Insert((BaseRef)index); return added_as >= 0; } - + return false; } - + bool _RemoveObjectLock (long index, long object_class) { - + _AVLList* source = nil; - + switch (object_class) { case HY_BL_DATASET_FILTER: source = &_data_filter_locks; break; } - + if (source) { source->Delete ((BaseRef)index); return true; } - + return false; } - + /** public facing functions for data filter objects */ - + const _DataSetFilter * GetDataFilter (long index) { if (_data_filters.IsValidIndex (index)) { return (const _DataSetFilter*)_data_filters.GetXtra (index); } return nil; } - + const _DataSetFilter * GetDataFilter (_String const& name ) { return GetDataFilter(FindDataFilter (name)); } - + _DataSetFilter * ExclusiveLockDataFilter (long index) { if (_data_filters.IsValidIndex (index)) { if (_AcquireObjectLock(index, HY_BL_DATASET_FILTER)) { @@ -209,32 +209,34 @@ namespace hyphy_global_objects { } return nil; } - + _DataSetFilter * ExclusiveLockDataFilter (_String const& name) { return ExclusiveLockDataFilter(FindDataFilter(name)); } - + bool ReleaseDataFilterLock (long index) { if (_data_filters.IsValidIndex (index)) { return _RemoveObjectLock(index, HY_BL_DATASET_FILTER); } return false; } - + bool ReleaseDataFilterLock (_String const& name) { return ReleaseDataFilterLock(FindDataFilter(name)); } - - + + bool UnregisterChangeListenerForDataFilter (long const index, BaseRef listener) { + //StringToConsole(_String("\nUnregisterChangeListenerForDataFilter ENTER ") & (long)_data_filters.IsValidIndex (index) &" \n"); if (_data_filters.IsValidIndex (index)) { - + // StringToConsole(_String("\nUnregisterChangeListenerForDataFilter: valid filter index\n")); + if (dynamic_cast <_LikelihoodFunction*> (listener)) { _List * current_listeners = (_List *)_data_filter_listeners.GetDataByKey ((BaseRef)index); if (current_listeners) { long listener_index = current_listeners->_SimpleList::Find((long)listener); if (listener_index >= 0) { - //StringToConsole(_String("UnregisterChangeListenerForDataFilter ") & index & " " & (long)listener & "\n"); + //StringToConsole(_String("\nUnregisterChangeListenerForDataFilter ") & index & " " & (long)listener & "\n"); current_listeners->Delete (listener_index); return true; } @@ -242,67 +244,68 @@ namespace hyphy_global_objects { } WarnError (_String("Not a supported listener type in call to ") & _String (__PRETTY_FUNCTION__)); } - + return false; } - - + + bool RegisterChangeListenerForDataFilter (long const index, BaseRef listener) { - + //StringToConsole(_String("RegisterChangeListenerForDataFilter ") & index & " " & (long)listener & "\n"); - + if (_data_filters.IsValidIndex (index)) { - + if (dynamic_cast <_LikelihoodFunction*> (listener)) { _List * current_listeners = (_List *)_data_filter_listeners.GetDataByKey ((BaseRef)index); if (!current_listeners) { current_listeners = new _List; + //StringToConsole(_String("\nRegisterChangeListenerForDataFilter ") & index & " " & (long)listener & "\n"); _data_filter_listeners.Insert ((BaseRef)index, (long)current_listeners, false, false); } - + if (current_listeners->_SimpleList::Find((long)listener) < 0L) { (*current_listeners) << listener; } return true; - + } - - + + WarnError (_String("Not a supported listener type in call to ") & _String (__PRETTY_FUNCTION__)); - - + + } - + return false; } - - + + bool RegisterChangeListenerForDataFilter (_String const& name, BaseRef listener) { return RegisterChangeListenerForDataFilter(FindDataFilter (name), listener); } - + bool UnregisterChangeListenerForDataFilter (_String const& name, BaseRef listener) { return UnregisterChangeListenerForDataFilter(FindDataFilter (name), listener); } - + long FindDataFilter (_String const& name) { return _data_filters.Find (&name); } - + long StoreDataFilter (_String const& name, _DataSetFilter* object, bool handle_errors) { - + if (name.IsValidIdentifier(true)) { long exists_already = FindDataFilter(name); - + /*printf ("[StoreDataFilter] %s %d\n", name.sData, exists_already); - + _SimpleList history; long locked_index = _data_filter_locks.Next (-1, history); while (locked_index >= 0) { printf ("\tLOCKED %s\n", GetFilterName((long)_data_filter_locks.Retrieve(locked_index))->sData); locked_index = _data_filter_locks.Next (locked_index, history); } */ - + if (exists_already >= 0L) { if (_IsObjectLocked(exists_already, HY_BL_DATASET_FILTER)) { if (handle_errors) { @@ -310,17 +313,17 @@ namespace hyphy_global_objects { } return -1; } - + //DeleteObject ((_DataSetFilter*)_data_filters.GetXtra (exists_already)); _data_filters.SetXtra(exists_already, object, false); // this will delete the existing object _NotifyDataFilterListeners (exists_already, kNotificationTypeChange); - + } else { exists_already = _data_filters.Insert (new _String(name), (long)object, false, false); } - - - + + + _SetDataFilterParameters (name, *object); return exists_already; } else { @@ -330,47 +333,51 @@ namespace hyphy_global_objects { } return -1; } - + bool DeleteDataFilter (long index) { + + if (_data_filters.IsValidIndex (index)) { if (_IsObjectLocked(index, HY_BL_DATASET_FILTER)) { return false; } + _NotifyDataFilterListeners (index, kNotificationTypeDelete); + _KillDataFilterParameters( *GetFilterName (index)); _data_filters.Delete ((BaseRef)GetFilterName(index), true); } return true; } - + bool DeleteDataFilter (_String const& name) { return DeleteDataFilter (FindDataFilter(name)); } - - + + _String const * GetFilterName (long index) { if (_data_filters.IsValidIndex(index)) { return (_String*)_data_filters.Retrieve(index); } return nil; } - + void ClearFilters (void) { // note that this ignores lock information _data_filters.Clear(true); _data_filter_locks.Clear (); - + } - + void ClearAllGlobals (void) { ClearFilters(); } - + //____________________________________________________________________________________ - + _String const GenerateUniqueObjectIDByType (_String const & base, const long type) { _AVLList * names = nil; _List* legacy_list = nil; - + switch (type) { case HY_BL_DATASET: legacy_list = &dataSetNamesList; @@ -382,23 +389,23 @@ namespace hyphy_global_objects { names = &variableNames; break; } - + _String try_name; - + if (names) { try_name = base; long suffix = 1L; - + while (names->Find (&try_name) >= 0) { try_name = base & "_" & suffix++; } } else if (legacy_list) { try_name = legacy_list->GenerateUniqueNameForList (base, false); } - + return try_name; } - + AVLListXLIterator ObjectIndexer (const long type) { switch (type) { case HY_BL_DATASET_FILTER: @@ -408,7 +415,7 @@ namespace hyphy_global_objects { WarnError (_String("Called ") & __PRETTY_FUNCTION__ & " with an unsupported type"); return AVLListXLIterator (nil); } - + unsigned long CountObjectsByType (const long type) { switch (type) { case HY_BL_DATASET_FILTER: @@ -418,16 +425,16 @@ namespace hyphy_global_objects { WarnError (_String("Called ") & __PRETTY_FUNCTION__ & " with an unsupported type"); return 0UL; } - - + + //____________________________________________________________________________________ - + _String const * GetObjectNameByType (const long type, const long index, bool correct_for_empties) { - + if (index < 0L) { return nil; } - + _List * theList = nil; switch (type) { case HY_BL_DATASET: @@ -436,7 +443,7 @@ namespace hyphy_global_objects { case HY_BL_DATASET_FILTER: theList = &_data_filter_aux; break; - + case HY_BL_LIKELIHOOD_FUNCTION: theList = &likeFuncNamesList; break; @@ -452,13 +459,13 @@ namespace hyphy_global_objects { case HY_BL_BGM: theList = &bgmNamesList; break; - + } if (theList) { // account for deleted objects if (!correct_for_empties) return (_String*)theList->GetItemRangeCheck (index); - + long counter = 0; for (unsigned long name_index = 0; name_index < theList->lLength; name_index++) { _String *thisName = (_String*)theList->GetItem(name_index); @@ -473,5 +480,5 @@ namespace hyphy_global_objects { } return nil; } - + } diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index aeff9f76b..414f95cc2 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -1,21 +1,21 @@ /* - + HyPhy - Hypothesis Testing Using Phylogenies. - + Copyright (C) 1997-now Core Developers: Sergei L Kosakovsky Pond (sergeilkp@icloud.com) Art FY Poon (apoon@cfenet.ubc.ca) Steven Weaver (sweaver@temple.edu) - + Module Developers: Lance Hepler (nlhepler@gmail.com) Martin Smith (martin.audacis@gmail.com) - + Significant contributions from: Spencer V Muse (muse@stat.ncsu.edu) Simon DW Frost (sdf22@cam.ac.uk) - + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -23,10 +23,10 @@ distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -34,7 +34,7 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - + */ //#define _UBER_VERBOSE_LF_DEBUG @@ -243,7 +243,7 @@ globalStartingPoint ("GLOBAL_STARTING_POINT"), addLFSmoothing ("LF_SMOOTHING_SCALER"), reduceLFSmoothing ("LF_SMOOTHING_REDUCTION"); - + extern _String useNexusFileData, VerbosityLevelString, acceptRootedTrees; @@ -316,7 +316,7 @@ void DecideOnDivideBy (_LikelihoodFunction* lf) } - + #ifdef _OPENMP lf->SetThreadCount (1); #endif @@ -327,7 +327,7 @@ void DecideOnDivideBy (_LikelihoodFunction* lf) _Parameter tdiff = timer.TimeSinceStart(); #ifdef _OPENMP -#ifdef __HYPHYMPI__ +#ifdef __HYPHYMPI__ if (systemCPUCount > 1 && _hy_mpi_node_rank == 0) { #else if (systemCPUCount > 1) { @@ -381,7 +381,7 @@ void UpdateOptimizationStatus (_Parameter max, long pdone, char init, boo _FString* t; static TimeDifference timer; - + if (init==0) { lCount = likeFuncEvalCallCount; timer.Start(); @@ -529,7 +529,7 @@ void _LikelihoodFunction::Init (void) branchCaches = nil; parameterValuesAndRanges = nil; optimizatonHistory = nil; - + #ifdef _OPENMP lfThreadCount = 1L; #ifdef __HYPHYMPI__ @@ -604,9 +604,9 @@ _String const * _LikelihoodFunction::GetIthFrequenciesName (long f) const { bool _LikelihoodFunction::MapTreeTipsToData (long f, _String *errorMessage, bool leafScan) // from triplets { _TheTree* t = GetIthTree(f); - + _TreeIterator ti (t, _HY_TREE_TRAVERSAL_POSTORDER | _HY_TREE_TRAVERSAL_SKIP_ROOT); - + _DataSetFilter * df = GetIthFilterMutable (f); long dfDim = df->GetDimension(true); @@ -627,7 +627,7 @@ bool _LikelihoodFunction::MapTreeTipsToData (long f, _String *errorMessage, b return false; } } - + // now that "tips" contains all the names of tree tips we can // scan thru the names in the datafilter and check whether there is a 1-1 match if ((t->IsDegenerate()?2:tips.lLength)!=df->NumberSpecies()) { @@ -650,11 +650,11 @@ bool _LikelihoodFunction::MapTreeTipsToData (long f, _String *errorMessage, b _Parameter doNum = 0.0; checkParameter (tryNumericSequenceMatch, doNum, 0.0); if (doNum>0.5) { - + try { tipMatches.Clear(); - - + + for (j=0L; jsData); @@ -664,19 +664,19 @@ bool _LikelihoodFunction::MapTreeTipsToData (long f, _String *errorMessage, b throw (j); } } - + if (j==tips.lLength) { if (tipMatches.Find(0L) < 0) // map to indexing from 0 tipMatches.Offset (-1L); _SimpleList const *dfMap = (_SimpleList const*)df->GetMap(); - + if (dfMap) { for (unsigned long k = 0UL; k < tips.lLength; k++) { tipMatches.lData[k] = dfMap->lData[tipMatches.lData[k]]; } } - } + } } catch (unsigned long i) { j = -1L; } @@ -755,21 +755,21 @@ void _LikelihoodFunction::Rebuild (bool rescan_parameters) { void _LikelihoodFunction::UnregisterListeners (void) { unsigned long partition_count = CountObjects(kLFCountPartitions); for (unsigned long i = 0UL; i < partition_count; i++) { - UnregisterChangeListenerForDataFilter(theDataFilters.GetElement(i), this); + UnregisterChangeListenerForDataFilter(theDataFilters.GetElement(i), this); } } - + //_______________________________________________________________________________________ void _LikelihoodFunction::Clear (void) { DeleteCaches (); - - unsigned long partition_count = CountObjects(kLFCountPartitions); - - theTrees.Clear(); + + //unsigned long partition_count = CountObjects(kLFCountPartitions); + UnregisterListeners (); - + + theTrees.Clear(); theDataFilters.Clear(); theProbabilities.Clear(); indexInd.Clear(); @@ -795,7 +795,7 @@ void _LikelihoodFunction::Clear (void) delete (mstCache); mstCache = nil; } - + if (optimizatonHistory) { DeleteObject(optimizatonHistory); optimizatonHistory = nil; @@ -819,15 +819,15 @@ void _LikelihoodFunction::Clear (void) void _LikelihoodFunction::AllocateTemplateCaches (void) { partScalingCache.Clear(); DeleteObject(bySiteResults); - + if (templateKind < 0 || templateKind == _hyphyLFComputationalTemplateBySite) { - + long max_filter_size = 0L; for (unsigned long f=0UL; fGetSiteCountInUnits()); } - + #ifdef __HYPHYMPI__ bySiteResults = new _Matrix (theTrees.lLength+3,max_filter_size,false,true); #else @@ -844,12 +844,12 @@ void _LikelihoodFunction::AllocateTemplateCaches (void) { bool _LikelihoodFunction::CheckIthPartition(unsigned long partition, _String * errorString, _String const * df, _String const * tree, _String const * efv) { _DataSetFilter const* filter = GetIthFilter (partition); - + long filter_dimension = filter->GetDimension(true), freq_dimension = GetIthFrequencies(partition)->GetHDim (); - + if (freq_dimension != filter_dimension) { - + if (df && efv) { WarnOrStoreError(errorString,_String("The dimension of the equilibrium frequencies vector ") & efv->Enquote() & " (" & freq_dimension & ") doesn't match the number of states in the dataset filter (" & filter_dimension & ") " & df->Enquote()); @@ -859,7 +859,7 @@ bool _LikelihoodFunction::CheckIthPartition(unsigned long partition, _String } return false; } - + if (filter->IsNormalFilter() == false) { // do checks for the numeric filter if (filter->NumberSpecies() != 3UL || filter_dimension != 4L) { WarnOrStoreError(errorString,_String ("Datafilters with numerical probability vectors must contain exactly three sequences and contain nucleotide data. Had ") & (long) filter->NumberSpecies() & " sequences on alphabet of dimension " & (long) filter_dimension & '.'); @@ -885,7 +885,7 @@ bool _LikelihoodFunction::Construct(_List& triplets, _VariableContainer* the Clear (); long i = 0L; - + for (; i< (long)triplets.lLength-2L; i+=3L) { _String object_name; long objectID; @@ -893,9 +893,9 @@ bool _LikelihoodFunction::Construct(_List& triplets, _VariableContainer* the // add datasetfilter object_name = AppendContainerName (*(_String*)triplets(i), theP); objectID = FindDataFilter (object_name); - + //printf ("[_LikelihoodFunction::Construct] %s / %s\n", object_name.sData, GetFilterName(objectID)->sData); - + if (objectID < 0) { WarnError (_String("Could not locate a datafilter ")& object_name.Enquote()); return false; @@ -927,11 +927,11 @@ bool _LikelihoodFunction::Construct(_List& triplets, _VariableContainer* the } else { theProbabilities<optimalOrders); leafSkips.Duplicate (&lf->leafSkips); templateKind = lf->templateKind; - + if (lf->optimizatonHistory) { optimizatonHistory = new _AssociativeList; optimizatonHistory->Duplicate (lf->optimizatonHistory); } else { optimizatonHistory = nil; } - + if (lf->computingTemplate) { computingTemplate = (_Formula*)lf->computingTemplate->makeDynamic(); } else { @@ -1192,7 +1192,7 @@ void _LikelihoodFunction::GetGlobalVars (_SimpleList& rec) const { //_______________________________________________________________________________________ _Parameter _LikelihoodFunction::GetIthIndependent (long index) const { _Parameter return_value; - + if (parameterValuesAndRanges) { return_value = (*parameterValuesAndRanges)(index,1); } else { @@ -1468,7 +1468,7 @@ _Matrix* _LikelihoodFunction::ConstructCategoryMatrix (const _SimpleList& whi } // compute the number of columns in the matrix - + if (templateKind < 0) { vDim = GetIthFilter(whichParts.lData[0])->GetSiteCountInUnits(); } else @@ -1744,10 +1744,10 @@ bool _LikelihoodFunction::SendOffToMPI (long) #else bool _LikelihoodFunction::SendOffToMPI (long index) { // dispatch an MPI task to node 'index+1' - + /* 20170404 SLKP Need to check if the decision to recompute a partition is made correctly. In particular, need to confirm that changes to category variables are handled correctly (e.g. HaveParametersChanged, vs has changed */ - + bool sendToSlave = (computationalResults.GetSize() < parallelOptimizerTasks.lLength); _SimpleList * slaveParams = (_SimpleList*)parallelOptimizerTasks(index); @@ -1959,7 +1959,7 @@ _Parameter _LikelihoodFunction::Compute (void) bool done = false; #ifdef _UBER_VERBOSE_LF_DEBUG - + if (likeFuncEvalCallCount >= 12731) { fprintf (stderr, "\n*** Likelihood function evaluation %ld ***\n", likeFuncEvalCallCount+1); for (unsigned long i=0; i= 0.) { if (result >= __DBL_EPSILON__ * 1.e4) { char buffer [2048]; @@ -2180,9 +2180,9 @@ _Parameter _LikelihoodFunction::Compute (void) result = 0.; } } - + ComputeParameterPenalty (); - + _Parameter regularized_value = result - smoothingPenalty; return regularized_value; } @@ -2208,7 +2208,7 @@ bool _LikelihoodFunction::HasBlockChanged(long index) const { void _LikelihoodFunction::RecurseConstantOnPartition (long blockIndex, long index, long dependance, long highestIndex, _Parameter weight, _Matrix& cache) { _CategoryVariable* thisC = (_CategoryVariable*)LocateVar(indexCat.lData[index]); - + if (indexIsHiddenMarkov()) { RecurseCategory (blockIndex, index+1, dependance,highestIndex,weight); @@ -2257,7 +2257,7 @@ void _LikelihoodFunction::RecurseConstantOnPartition (long blockIndex, long for (long kk = 0; kkGetFrequency(kk); } - + log_sum += myLog (category_weights->theData[category_index]*weight); cache.theData[categID] = log_sum; @@ -2477,11 +2477,11 @@ void _LikelihoodFunction::CheckFibonacci (_Parameter shrinkFactor) //_______________________________________________________________________________________ void _LikelihoodFunction::CheckDependentBounds (void) { -/* +/* this function makes sure that a constrained optimization starts within the domain of allowed parameter values */ - + if (!indexDep.lLength) { // nothing to do here return; } @@ -2713,24 +2713,24 @@ inline _Parameter sqr (_Parameter x) _TheTree * tree = GetIthTree (partition_index); _Matrix * eq_freqs = GetIthFrequencies(partition_index); unsigned long tip_count = filter->NumberSpecies(); - + if (filter->GetData()->GetTT()->IsStandardNucleotide() && filter->IsNormalFilter() && tip_count<150 && eq_freqs->IsIndependent()) { // if not - use distance estimates - + if (tree->IsDegenerate()) { continue; } - + unsigned long inode_count = 0UL; - + _TreeIterator ti (tree, _HY_TREE_TRAVERSAL_PREORDER | _HY_TREE_TRAVERSAL_SKIP_ROOT); - + _SimpleList node_to_index_support ; _AVLListX node_to_index (&node_to_index_support); _List root_paths; - + bool two = false; - + while (_CalcNode* iterator = ti.Next()) { if (ti.IsAtLeaf()) { _SimpleList * indexed_path = new _SimpleList; @@ -2747,24 +2747,24 @@ inline _Parameter sqr (_Parameter x) } else { node_to_index.Insert ((BaseRef)ti.GetNode(), inode_count++); } - + _SimpleList avl_storage; _AVLList node_variables (&avl_storage); iterator->ScanContainerForVariables(node_variables,node_variables); - + if (node_variables.countitems()>=2) { two = true; } } - + // first of all, construct the matrix of branch sums, // which will be topology dependent // the branches are associated with the node they terminate with - + _Matrix theSystem ((tip_count-1)*tip_count/2,inode_count+tip_count,true,true); - + unsigned long eq_index = 0UL; - + for (unsigned long row = 0UL; row < tip_count-1 ; row++) { for (unsigned long column = row + 1UL; column < tip_count; column++, eq_index++) { theSystem.Store(eq_index,row,1.0); @@ -2776,12 +2776,12 @@ inline _Parameter sqr (_Parameter x) } } } - + _Matrix transversions (theSystem.GetHDim(),1,false,true), transitions (theSystem.GetHDim(),1,false,true), jc (theSystem.GetHDim(),1,false,true), transpose(theSystem); - + transpose.Transpose(); _Matrix AAst (transpose); AAst*=theSystem; @@ -2799,9 +2799,9 @@ inline _Parameter sqr (_Parameter x) } else { freq = filter->HarvestFrequencies (1,1,false); } - + _Matrix diffs (1,7,false,true); - + // compute the universal frequency weights _Parameter tmp,t2, cR = (*freq)[0]+(*freq)[2], @@ -2810,9 +2810,9 @@ inline _Parameter sqr (_Parameter x) c2=2*(*freq)[1]*(*freq)[3]/cY, c3=2*((*freq)[0]*(*freq)[2]*cY/cR +(*freq)[1]*(*freq)[3]*cR/cY), comps, fP = 1.0-sqr((*freq)[0])-sqr((*freq)[1])-sqr((*freq)[2])-sqr((*freq)[3]); - + DeleteObject(freq); - + eq_index = 0UL; for (unsigned long row = 0UL; row 0.0) { t2 = -c1*log(tmp); } else { t2 = c1*100; } - + tmp = 1.0-1.0/c2*P2-.5/cY*Q; if (tmp>0.0) { t2 -= c2*log(tmp); } else { t2 += c2*100; } - + tmp = 1-.5/(cR*cY)*Q; if (tmp>0.0) { t2 += c3*log(tmp); } else { t2 += c3*100; } - + transitions[eq_index]= t2; } _Parameter P = (Q+P1+P2)/comps; @@ -2867,11 +2867,11 @@ inline _Parameter sqr (_Parameter x) jc[eq_index]=20.0; } } - + _Matrix *trstEst=nil, *trvrEst=nil, *jcEst=nil; - + theSystem=transpose; theSystem*=jc; jc.Clear(); @@ -2895,22 +2895,22 @@ inline _Parameter sqr (_Parameter x) // if two parameters present - set first one to transition and the 2nd one to transversion // for the third parameter and afterwards use the average of the two // produce a list of depthwise traversed nodes - + ti.Reset(); ti.Next(); // skip the root - + unsigned long tip_index = 0UL, inode_index = 0UL; - + while (_CalcNode* iterator = ti.Next()) { _SimpleList independent_vars_l, dependent_vars_l; - + _AVLList independent_vars (&independent_vars_l), dependent_vars (&dependent_vars_l); - + iterator->ScanContainerForVariables(independent_vars,dependent_vars); - + independent_vars.ReorderList(); dependent_vars.ReorderList(); @@ -2946,19 +2946,19 @@ inline _Parameter sqr (_Parameter x) } } } - + DeleteObject(trstEst); DeleteObject(trvrEst); DeleteObject(jcEst); } else { _Parameter initValue = 0.1; checkParameter (globalStartingPoint, initValue, 0.1); - + _SimpleList indeps; _AVLList iavl (&indeps); - + tree->ScanContainerForVariables (iavl, iavl); - + for (long vc = 0; vc < indeps.lLength; vc++) { //char buf[512]; _Variable * localVar = LocateVar(indeps.lData[vc]); @@ -2970,8 +2970,8 @@ inline _Parameter sqr (_Parameter x) // snprintf (buf, sizeof(buf),"[PRESET]%s = %g\n", localVar->GetName()->sData, localVar->Compute()->Value()); //BufferToConsole(buf); } - - + + } } } @@ -3284,7 +3284,7 @@ void _LikelihoodFunction::InitMPIOptimizer (void) MPISwitchNodesToMPIMode (slaveNodes); for (long i = 1L; iGetLeafCount(), iNodeCount = cT->GetINodeCount(), atomSize = theFilter->GetUnitLength(); - + long ambig_resolution_count = 1L; if (leafCount > 1UL) { @@ -3427,7 +3427,7 @@ void _LikelihoodFunction::SetupLFCaches (void) { } InitializeArray(siteScalingFactors[i] , patternCount*iNodeCount*cT->categoryCount, 1.); - + // now process filter characters by site / column _List foundCharactersAux; @@ -3483,10 +3483,10 @@ void _LikelihoodFunction::SetupLFCaches (void) { //extern long marginalLFEvals, marginalLFEvalsAmb; //_______________________________________________________________________________________ - + void _LikelihoodFunction::LoggerLogL (_Parameter logL) { if (optimizatonHistory) { - + #ifdef _COMPARATIVE_LF_DEBUG_CHECK if (_comparative_lf_debug_matrix && fabs ((*_comparative_lf_debug_matrix)[_comparative_lf_index] - logL) > 0.001) { char buffer [512]; @@ -3497,7 +3497,7 @@ void _LikelihoodFunction::LoggerLogL (_Parameter logL) { StringToConsole (_String (" = ") & GetIthIndependent(var_id)); NLToConsole(); } - + raise(SIGTRAP); } _comparative_lf_index++; @@ -3507,28 +3507,28 @@ void _LikelihoodFunction::LoggerLogL (_Parameter logL) { (*_comparative_lf_debug_matrix) << logL; } #endif - - + + *((_GrowingVector*) this->optimizatonHistory->GetByKey("LogL")) << logL << ((_AssociativeList*)this->optimizatonHistory->GetByKey("Phases"))->Length(); } } - + //_______________________________________________________________________________________ - + void _LikelihoodFunction::LoggerAddGradientPhase (_Parameter precision) { if (optimizatonHistory) { _AssociativeList* new_phase = new _AssociativeList; (*new_phase) < (_associative_list_key_value){"type", new _FString ("Gradient descent")} < (_associative_list_key_value){"precision", new _Constant (precision)}; - - + + *((_AssociativeList*) this->optimizatonHistory->GetByKey("Phases")) < (_associative_list_key_value){nil, new_phase}; } } - + //_______________________________________________________________________________________ - + void _LikelihoodFunction::LoggerAddCoordinatewisePhase (_Parameter shrinkage, char convergence_mode) { if (optimizatonHistory) { _String phase_kind; @@ -3546,19 +3546,19 @@ void _LikelihoodFunction::LoggerAddCoordinatewisePhase (_Parameter shrink phase_kind = "Very slow convergence"; break; } - + _AssociativeList* new_phase = new _AssociativeList; (*new_phase) < (_associative_list_key_value){"type", new _FString ("Directional pass")} < (_associative_list_key_value){"shrinkage", new _Constant (shrinkage)} < (_associative_list_key_value){"mode", new _FString(phase_kind)}; - - + + *((_AssociativeList*) this->optimizatonHistory->GetByKey("Phases")) < (_associative_list_key_value){nil, new_phase}; } } - + //_______________________________________________________________________________________ - + void _LikelihoodFunction::LoggerAllVariables (void) { if (optimizatonHistory) { _AssociativeList* variables = ((_AssociativeList*)this->optimizatonHistory->GetByKey("Parameters")); @@ -3567,11 +3567,11 @@ void _LikelihoodFunction::LoggerAllVariables (void) { } } } - + //_______________________________________________________________________________________ - + void _LikelihoodFunction::LoggerSingleVariable (unsigned long index, _Parameter logL, _Parameter bracket_precision, _Parameter brent_precision, _Parameter bracket_width, unsigned long bracket_evals, unsigned long brent_evals) { - + if (optimizatonHistory) { _AssociativeList* new_phase = new _AssociativeList; (*new_phase) < (_associative_list_key_value){"type", new _FString (*GetIthIndependentName(index))} @@ -3581,9 +3581,9 @@ void _LikelihoodFunction::LoggerSingleVariable (unsigned long inde < (_associative_list_key_value){"bracket evals", new _Constant (bracket_evals)} < (_associative_list_key_value){"brent evals", new _Constant (brent_evals)} < (_associative_list_key_value){"brent evals", new _Constant (brent_evals)}; - + *((_AssociativeList*) this->optimizatonHistory->GetByKey("Phases")) < (_associative_list_key_value){nil, new_phase}; - + LoggerLogL (logL); *((_GrowingVector*) (((_AssociativeList*)this->optimizatonHistory->GetByKey("Parameters")))->GetByKey(*GetIthIndependentName(index))) << GetIthIndependent(index); } @@ -3593,24 +3593,24 @@ void _LikelihoodFunction::LoggerSingleVariable (unsigned long inde _Matrix* _LikelihoodFunction::Optimize () { - + if (lockedLFID != -1) { WarnError ("Optimize() could not be executed, because another optimization is already in progress."); return new _Matrix (1,1,false,true); } char buffer [1024]; - + RescanAllVariables (); - + if (optimizatonHistory) { DeleteObject (optimizatonHistory); optimizatonHistory = nil; } - + bool keepOptimizationLog; checkParameter(produceOptimizationLog, keepOptimizationLog, false); - + if (keepOptimizationLog) { optimizatonHistory = new _AssociativeList; (*optimizatonHistory) < (_associative_list_key_value){"LogL", new _GrowingVector} @@ -3621,13 +3621,13 @@ _Matrix* _LikelihoodFunction::Optimize () { < (_associative_list_key_value){"Phases", new _AssociativeList}; /* 0 - N-1 indices - - + + */ } - - - + + + if (indexInd.empty()) { _Matrix * result = new _Matrix (2UL, Maximum(3UL,indexDep.lLength), false, true); @@ -3652,8 +3652,8 @@ _Matrix* _LikelihoodFunction::Optimize () { } } - - + + _Parameter intermediateP, wobble = 0., @@ -3735,7 +3735,7 @@ _Matrix* _LikelihoodFunction::Optimize () { computationalResults.Clear(); -#if !defined __UNIX__ || defined __HEADLESS__ || defined __HYPHYQT__ || defined __HYPHY_GTK__ +#if !defined __UNIX__ || defined __HEADLESS__ || defined __HYPHYQT__ || defined __HYPHY_GTK__ SetStatusBarValue (0,maxSoFar,0); #endif @@ -3750,7 +3750,7 @@ DecideOnDivideBy (this); #ifdef __HYPHYMPI__ } #endif - + int skipCG = 0; checkParameter (skipConjugateGradient,skipCG,0); @@ -3767,7 +3767,7 @@ DecideOnDivideBy (this); } } } - + if (!CheckEqual(precision, 0.0)) { GetInitialValues(); } @@ -3817,14 +3817,14 @@ DecideOnDivideBy (this); "\n\t" & optimizationPrecision & " = " & precision & "\n\t" & maximumIterationsPerVariable & " = " & maxItersPerVar &"\n\nInitial parameter values\n"); - + for (unsigned long i = 0UL; i < indexInd.lLength; i++) { - + ReportWarning (_String(LocateVar (indexInd.lData[i])->GetName()->sData) & " = " & GetIthIndependent (i)); } maxItersPerVar *= indexInd.lLength; - + #if !defined __UNIX__ || defined __HEADLESS__ #ifdef __HYPHYMPI__ if (_hy_mpi_node_rank == 0) @@ -3836,12 +3836,12 @@ DecideOnDivideBy (this); #endif int optMethod = optMethodP; - + SetupParameterMapping (); _Matrix variableValues; GetAllIndependent (variableValues); - - + + if (optMethod == 4L && indexInd.lLength == 1) { optMethod = 0L; } @@ -4024,7 +4024,7 @@ DecideOnDivideBy (this); _List *stepHistory = nil; _GrowingVector logLHistory; - + maxSoFar = lastMaxValue = Compute(); logLHistory.Store(maxSoFar); @@ -4200,7 +4200,7 @@ DecideOnDivideBy (this); if (smoothingTerm > 0.) { smoothingTerm *= smoothingReduction; } - + _SimpleList nc2; long ncp = 0, @@ -4233,7 +4233,7 @@ DecideOnDivideBy (this); snprintf (buffer, sizeof(buffer),"\n[Unchanged variables = %ld]", noChange.lLength); BufferToConsole (buffer); } - + if (hardLimitOnOptimizationValue < INFINITY && timer.TimeSinceStart() > hardLimitOnOptimizationValue) { ReportWarning (_String("Optimization terminated before convergence because the hard time limit was exceeded.")); break; @@ -4249,7 +4249,7 @@ DecideOnDivideBy (this); _Parameter prec = Minimum (diffs[0], diffs[1]); prec = Minimum (Maximum (prec, precision), 1.); - + if (gradientBlocks.lLength) { for (long b = 0; b < gradientBlocks.lLength; b++) { maxSoFar = ConjugateGradientDescent (prec, bestMSoFar,true,10,(_SimpleList*)(gradientBlocks(b)),maxSoFar); @@ -4257,7 +4257,7 @@ DecideOnDivideBy (this); } else { maxSoFar = ConjugateGradientDescent (prec, bestMSoFar,true,10,nil,maxSoFar); } - + GetAllIndependent (bestMSoFar); for (unsigned long k = 0UL; k < indexInd.lLength; k++) { ((_GrowingVector*)(*stepHistory)(k))->Store (bestMSoFar.theData[k]); @@ -4267,16 +4267,16 @@ DecideOnDivideBy (this); logLHistory.Store(maxSoFar); } } - + LoggerAddCoordinatewisePhase (divFactor, convergenceMode); - + for (jjj=forward?0:indexInd.lLength-1; forward?(jjj=0; forward?jjj++:jjj--) { if (hardLimitOnOptimizationValue < INFINITY && timer.TimeSinceStart() > hardLimitOnOptimizationValue) { break; } - + unsigned long current_index = doShuffle > 0.1 ? shuffledOrder.lData[jjj] : jjj; - + bool amIGlobal = GetIthIndependentVar(current_index)->IsGlobal(); #ifdef __HYPHYMPI__ @@ -4370,7 +4370,7 @@ DecideOnDivideBy (this); if (precisionStep < 1e-6) { precisionStep = 1e-6; }*/ - + } else { if (amIGlobal) brackStep = pow(currentPrecision/**(bestVal>1.?pow(e,long(log(bestVal))):1.)*/, @@ -4382,7 +4382,7 @@ DecideOnDivideBy (this); long brackStepSave = bracketFCount, oneDStepSave = oneDFCount; - + _Parameter lastLogL = maxSoFar; if (useAdaptiveStep>0.5) { @@ -4593,7 +4593,7 @@ DecideOnDivideBy (this); //forceRecomputation = false; result.Store (1,1,indexInd.lLength); result.Store (1,2,CountObjects(kLFCountGlobalVariables)); - + _PMathObj pm; for (unsigned long i=0UL; iGetByKey("LogL"))->Trim(); _AssociativeList* variable_traces = ((_AssociativeList*)optimizatonHistory->GetByKey("Parameters")); - + for (unsigned long var_id = 0; var_id < indexInd.lLength; var_id++) { ((_GrowingVector*)variable_traces->GetByKey(*GetIthIndependentName(var_id)))->Trim(); } @@ -4618,7 +4618,7 @@ DecideOnDivideBy (this); #if !defined __UNIX__ || defined __HEADLESS__ SetStatusBarValue (-1,maxSoFar,(likeFuncEvalCallCount-evalsIn)/TimerDifferenceFunction(true)); #endif - + return (_Matrix*)result.makeDynamic(); } @@ -4870,9 +4870,9 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& SetIthIndependent(6L, GetIthIndependent(6L)); } */ - + while (1) { - + while (middle-leftStep < lowerBound) { if (verbosityLevel > 100) { char buf [512]; @@ -4918,7 +4918,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& } - + if (CheckEqual(middle,saveL)) { middleValue = saveLV; } else if (CheckEqual(middle,saveR)) { @@ -5036,7 +5036,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& _Parameter rc = Compute(); CheckAndSetIthIndependent(index,middle);*/ middleValue = Compute(); - + if (verbosityLevel > 100) { char buf [256]; snprintf (buf, 256, "\n\t[_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%g) = %g [@%g -> %g:@%g -> %g]]", index, middle, middleValue, left, leftValue,right, rightValue); @@ -5054,7 +5054,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& snprintf (buf, 256, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET SUCCESSFUL: %15.12g <= %15.12g <= %15.12g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); BufferToConsole (buf); } - + bracketFCount+=likeFuncEvalCallCount-funcCounts; bracketCount++; @@ -5204,7 +5204,7 @@ _PMathObj _LikelihoodFunction::CovarianceMatrix (_SimpleList* parameterList) _String fString = _String("function _profileFit(_xxv_,_variableIndex){SetParameter(")&*myName&",_variableIndex,_xxv_);LFCompute(" // &*myName&(",_xxres); fprintf (stdout,\"\\n\",_xxv_,\" \",_xxres); return _xxres;}"); &*myName&(",_xxres);return _xxres;}"); - + /* ___________________________________ ! NEW CODE BY AFYP ____________________________________ */ @@ -5246,7 +5246,7 @@ _PMathObj _LikelihoodFunction::CovarianceMatrix (_SimpleList* parameterList) sigLevels.Store (i,5,h); } } - + snprintf (buffer, sizeof(buffer),"%.14g",sigLevels (i,0)); _String checkLFDIFF = _String("CChi2(2*(-_profileFit(") & buffer & "," & j & ")+(" & functionValue & ")),1)"; _PMathObj lf_diff = (_PMathObj) _FString (checkLFDIFF, false).Evaluate(_hyDefaultExecutionContext); @@ -5652,7 +5652,7 @@ void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, _Matrix&unit, } } - + if (testStep) { SetIthIndependent(index,currentValue+testStep); gradient[index]=(Compute()-funcValue)/testStep; @@ -5765,7 +5765,7 @@ _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precisio maxSoFar = Compute(), initial_value = maxSoFar, currentPrecision = localOnly?precision:.01; - + if (check_value != A_LARGE_NUMBER) { if (!CheckEqual(check_value, maxSoFar)) { _String errorStr = _String("Internal error in _LikelihoodFunction::ConjugateGradientDescent. The function evaluated at current parameter values [") & maxSoFar & "] does not match the last recorded LF maximum [" & check_value & "]"; @@ -5780,7 +5780,7 @@ _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precisio //return; } } - + _SimpleList freeze; @@ -5789,9 +5789,9 @@ _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precisio _SimpleList all (indexInd.lLength,0,1); freeze.Intersect (all, *only_these_parameters); } - - - + + + _Matrix unit (bestVal), gradient (bestVal); @@ -5833,11 +5833,11 @@ _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precisio S = gradient; S *= -1./gradient.AbsValue(); GradientLocateTheBump(localOnly?precision:currentPrecision, maxSoFar, bestVal, S); - + LoggerAddGradientPhase (localOnly?precision:currentPrecision); LoggerAllVariables (); LoggerLogL (maxSoFar); - + if (vl>1) { snprintf (buffer, sizeof(buffer),"Conjugate Gradient Pass %ld, precision %g, gradient step %g, max so far %15.12g\n",index+1,precision,gradientStep,maxSoFar); BufferToConsole (buffer); @@ -5889,14 +5889,14 @@ _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precisio if (maxSoFar < initial_value && CheckEqual(maxSoFar, initial_value) == false) { WarnError (_String("Internal optimization error in _LikelihoodFunction::ConjugateGradientDescent. Worsened likelihood score from ") & initial_value & " to " & maxSoFar); } - + if (vl>1) { BufferToConsole("\n"); } - + return maxSoFar; - + } //_______________________________________________________________________________________ @@ -6055,41 +6055,41 @@ void _LikelihoodFunction::GradientDescent (_Parameter& gPrecision, _Matrix& b initialValue = maxSoFar, bp = gPrecision*0.1, lV = 0., rV = 0., ms = 0.; - + _Matrix left ; GetAllIndependent (left); _Matrix right (left), middle (left), newMiddle (left); - + // _GrowingVector brentHistory; - - + + middle = bestVal; - + int outcome = Bracket(-1, lV,ms,rV,leftValue, middleValue, rightValue,bp, &gradient); if (middleValue < initialValue) { SetAllIndependent (&bestVal); FlushLocalUpdatePolicy(); return; } - + if (outcome >=0 && (leftValue > middleValue || rightValue > middleValue)) { WarnError (_String ("Internal error in _LikelihoodFunction::GradientLocateTheBump: bracket reported successful (") & (long)outcome & "), but likelihood values are inconsistent with it. " & leftValue & " / " & middleValue & " / " & rightValue & " initial value = " & maxSoFar); return; } - + //printf ("[LogL = %.20g GRADIENT BRACKET %g/%.20g, %g/%.20g, %g/%.20g; %d]\n",maxSoFar,lV,leftValue,ms,middleValue,rV,rightValue, outcome); - + left.AplusBx (gradient, lV); middle.AplusBx (gradient, ms); right.AplusBx (gradient, rV); - + bool reset = false; - + if (outcome!=-1) { // successfull bracket // set up left, right, middle - + if (outcome == -2) { if (middleValue>maxSoFar) { maxSoFar = middleValue; @@ -6101,9 +6101,9 @@ void _LikelihoodFunction::GradientDescent (_Parameter& gPrecision, _Matrix& b FlushLocalUpdatePolicy(); return; } - - - + + + if (outcome == indexInd.lLength) { reset = true; } else { @@ -6120,14 +6120,14 @@ void _LikelihoodFunction::GradientDescent (_Parameter& gPrecision, _Matrix& b // brentHistory.Store (-FX - initialValue); bool parabolic_step = false; XM = .5*(lV+rV); - + _Parameter tol1 = fabs (X) * MIN (gPrecision, 1e-4) + machineEps, tol2 = 2.*tol1; - + if (fabs(X-XM) <= tol2) { break; } - + if (fabs(E)>tol1) { R = (X-W)*(FX-FV); Q = (X-V)*(FX-FW); @@ -6147,21 +6147,21 @@ void _LikelihoodFunction::GradientDescent (_Parameter& gPrecision, _Matrix& b D = (XM - X >= 0.) ? tol1 : -tol1; } } - + } - + if (!parabolic_step) { E = (X >= XM ? lV : rV) - X; D = GOLDEN_RATIO_C * E; } U = fabs (D) >= tol1 ? X + D : X + (D > 0. ? tol1 : -tol1); - + //for (index = 0; index < indexInd.lLength; index++) // SetIthIndependent (index,middle.theData[index]+U*gradient.theData[index]); FU = -SetParametersAndCompute (-1,U,&newMiddle,&gradient); //printf ("\n%g\n", FU); - + if (FU<=FX) { // accept the move currentBestPoint = newMiddle; currentBestPoint.AplusBx(gradient, U); @@ -6199,18 +6199,18 @@ void _LikelihoodFunction::GradientDescent (_Parameter& gPrecision, _Matrix& b FV = FU; } } - + } outcome++; - + } - + middleValue = -FX; //brentHistory.Store (0.); if (middleValue <= maxSoFar || CheckEqual(maxSoFar, middleValue)) { //brentHistory.Store (-1.); //brentHistory.Store (middleValue-initialValue); - + SetAllIndependent (&bestVal); maxSoFar = middleValue; } else { @@ -6223,18 +6223,18 @@ void _LikelihoodFunction::GradientDescent (_Parameter& gPrecision, _Matrix& b brentHistory.Store (maxSoFar-initialValue); brentHistory.Store (-FX-initialValue);*/ } - + if (maxSoFar < initialValue && !CheckEqual (maxSoFar, initialValue, 10. * machineEps)) { WarnError (_String ("Internal error in _LikelihoodFunction::GradientLocateTheBump: in the Brent loop iteration ") & long(outcome) & ". " & _String (maxSoFar, "%15.12g") & " / " & _String (initialValue,"%15.12g") & ".\n");// & _String ((_String*)brentHistory.toStr())); return; } - + //bestVal = middle; //maxSoFar = middleValue; } //middle = X; } - + else { reset = true; if (verbosityLevel>1) { @@ -6248,17 +6248,17 @@ void _LikelihoodFunction::GradientDescent (_Parameter& gPrecision, _Matrix& b middleValue = rightValue; middle = right; } - + if (middleValue>maxSoFar) { SetAllIndependent (&middle); maxSoFar = middleValue; reset = false; } } - + if (reset) SetAllIndependent (&bestVal); - + FlushLocalUpdatePolicy(); } @@ -6275,7 +6275,7 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P brentPrec = bracketSetting>0.?bracketSetting:gPrecision; DetermineLocalUpdatePolicy (); - + /*if (optimizatonHistory && ((_AssociativeList*)this->optimizatonHistory->GetByKey("Phases"))->Length() == 2171) { verbosityLevel = 1000; } else { @@ -6285,7 +6285,7 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P unsigned long inCount = likeFuncEvalCallCount; int outcome = Bracket (index,left,middle,right,leftValue, middleValue, rightValue,bp); unsigned long bracketCount = likeFuncEvalCallCount - inCount; - + if (outcome != -1) { // successfull bracket _Parameter U,V,W,X=middle,E=0.,FX,FW,FV,XM,R,Q,P,ETEMP,D=0.,FU; W = middle; @@ -6294,28 +6294,28 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P FV = FX; FW = FX; outcome = 0; - - + + while (outcome < 20) { XM = .5*(left+right); - + bool parabolic_step = false; - + if (verbosityLevel > 50) { char buf [256]; snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (index %ld) (current max = %15.12g) GOLDEN RATIO INTERVAL CHECK: %g <= %g (%g = %g) <= %g, span = %g]", index, bestVal, left, XM, X, fabs(X-XM), right, right-left); BufferToConsole (buf); } - + if (fabs(X-XM) <= brentPrec) { break; } - + _Parameter tol1 = fabs (X) * Minimum (brentPrec, 1e-7) + machineEps, tol2 = 2.*tol1; - - + + if (fabs(E)>tol1) { R = (X-W)*(FX-FV); Q = (X-V)*(FX-FW); @@ -6336,25 +6336,25 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P } } } - - + + if (!parabolic_step) { E = (X >= XM ? left : right) - X; D = GOLDEN_RATIO_C * E; } - + U = fabs (D) >= tol1 ? X + D : X + (D > 0. ? tol1 : -tol1); - + //U = X + D; SetIthIndependent (index,U); FU = -Compute(); - + if (verbosityLevel > 50) { char buf [256]; snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (index %ld) GOLDEN RATIO TRY: param %20.16g, log L %20.16g]", index, U, -FU); BufferToConsole (buf); } - + if (FU<=FX) { // value at U is the new minimum if (verbosityLevel > 50) { char buf [256]; @@ -6398,9 +6398,9 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P } } outcome++; - + } - + if (verbosityLevel > 50) { char buf [256]; snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (index %ld) GOLDEN RATIO SEARCH SUCCESSFUL: precision %g, parameter moved from %15.12g to %15.12g, Log L new/old = %15.12g/%15.12g ]\n\n", index, brentPrec, bestVal, X, -FX, maxSoFar); @@ -6408,7 +6408,7 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P } middleValue = -FX; middle = X; - + if (middleValue 50) { char buf [256]; @@ -6438,7 +6438,7 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P if (index >= 0) { LoggerSingleVariable (index, maxSoFar, bp, brentPrec, outcome != -1 ? right-left : -1., bracketCount, likeFuncEvalCallCount-inCount-bracketCount); } - + oneDFCount += likeFuncEvalCallCount-inCount-bracketCount; oneDCount ++; FlushLocalUpdatePolicy (); @@ -6866,9 +6866,9 @@ long _LikelihoodFunction::DependOnTree (_String const & treeName) const { //_______________________________________________________________________________________ long _LikelihoodFunction::DependOnDS (long ID) const { - + void * data_set_pointer = dataSetList.GetItem (ID); - + for (long k = 0L; k GetData() == data_set_pointer) { return k; @@ -6888,7 +6888,7 @@ long _LikelihoodFunction::DependOnModel (_String const& modelTitle) const { if (iterator->GetModelIndex() == modelIndex) { return k; } - + } } } @@ -6914,8 +6914,8 @@ void _LikelihoodFunction::ScanAllVariables (void) cpCat, treeSizes, rankVariablesSupp; - - + + _AVLListX rankVariables (&rankVariablesSupp); @@ -6971,7 +6971,7 @@ void _LikelihoodFunction::ScanAllVariables (void) ((_TheTree*)(LocateVar(theTrees(i))))->ScanAndAttachVariables (); ((_TheTree*)(LocateVar(theTrees(i))))->ScanForGVariables (iia, iid,&rankVariables, treeSizes.GetElement (i) << 16); } - + for (unsigned long i=0; iScanContainerForVariables (iia, iid, &rankVariables, 1 + treeSizes.GetElement (i)); @@ -7084,8 +7084,8 @@ void _LikelihoodFunction::ScanAllVariables (void) _Parameter l = DEFAULTLOWERBOUND*(1.0-machineEps), u = DEFAULTUPPERBOUND*(1.0-machineEps); - - + + for (unsigned long i=0; i %d\n", _cv->theName->sData, rankVariables.GetXtra(rankVariables.Find ((BaseRef)indexInd.lData[i]))); } - + for (unsigned long i=0; iGetLowerBound()<=l) { @@ -7306,7 +7306,7 @@ void _LikelihoodFunction::Cleanup (void) { Clear(); DeleteObject (parameterValuesAndRanges); - + #ifdef MDSOCL for (int i = 0; i < theTrees.lLength; i++) { @@ -7416,7 +7416,7 @@ void _LikelihoodFunction::Setup (bool check_reversibility) leafSkips.Clear(); treeTraversalMasks.Clear(); - + if (!check_reversibility) { if (canUseReversibleSpeedups.countitems() != theTrees.lLength) { check_reversibility = true; @@ -7431,7 +7431,7 @@ void _LikelihoodFunction::Setup (bool check_reversibility) _Parameter assumeRev = 0.; checkParameter (assumeReversible,assumeRev,0.0); - + for (unsigned long i=0UL; iHasChanged(); } ); - + } //#define _HY_GPU_EXAMPLE_CALCULATOR @@ -7618,9 +7618,9 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, #ifdef MDSOCL - return t->OCLLikelihoodEvaluator (changedBranches, - df, - conditionalInternalNodeLikelihoodCaches[index], + return t->OCLLikelihoodEvaluator (changedBranches, + df, + conditionalInternalNodeLikelihoodCaches[index], conditionalTerminalNodeStateFlag[index], (_GrowingVector*)conditionalTerminalNodeLikelihoodCaches(index), OCLEval[index]); @@ -7683,7 +7683,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, } else { snID = t->DetermineNodesForUpdate (*branches, matrices,catID,*cbid,canClear); } - + #ifdef _UBER_VERBOSE_LF_DEBUG fprintf (stderr, "\nCached %ld (%ld)/New %ld (%ld)\n", *cbid, nodeID, snID, matrices->lLength); #endif @@ -7709,7 +7709,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, } else { doCachedComp = nodeID; } - + } else { RestoreScalingFactors (index, *cbid, patternCnt, scc, sccb); @@ -7747,7 +7747,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, _Parameter sum = 0.; - + if (doCachedComp >= 3) { #ifdef _UBER_VERBOSE_LF_DEBUG fprintf (stderr, "CACHE compute branch %d\n",doCachedComp-3); @@ -7773,7 +7773,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, #ifdef _UBER_VERBOSE_LF_DEBUG fprintf (stderr, "NORMAL compute lf \n"); #endif - + _Parameter* thread_results = new _Parameter[np]; #pragma omp parallel for default(shared) schedule(static,1) private(blockID) num_threads (np) if (np>1) for (blockID = 0; blockID < np; blockID ++) { @@ -7794,9 +7794,9 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, branchIndex, branchIndex >= 0 ? branchValues->lData: nil); } - - + + if (np > 1) { _Parameter correction = 0.; for (blockID = 0; blockID < np; blockID ++) { @@ -7805,15 +7805,15 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, correction = (temp_sum - sum) - thread_results[blockID]; sum = temp_sum; } - + } else { sum = thread_results[0]; } - + /*#ifdef _UBER_VERBOSE_LF_DEBUG static _Parameter previous_results [4096]; static long previous_scalers [4096]; - + if (likeFuncEvalCallCount > 12050) { abort (); } @@ -7826,14 +7826,14 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, } if (likeFuncEvalCallCount >= 12000) { - + for (long i = 0L; i < sitesPerP - 1; i++) { previous_results [catID * sitesPerP + i] = siteRes[i]; previous_scalers [catID * sitesPerP + i] = scc [i]; } } #endif*/ - + delete [] thread_results; /* #pragma omp parallel for default(shared) schedule(static,1) private(blockID) num_threads (np) reduction(+:sum) if (np>1) @@ -7864,18 +7864,18 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, doCachedComp = -doCachedComp-1; //printf ("Set up %d\n", doCachedComp); *cbid = doCachedComp; - + overallScalingFactorsBackup.lData[index] = overallScalingFactors.lData[index]; if (sccb) for (long recoverIndex = 0; recoverIndex < patternCnt; recoverIndex++) { sccb[recoverIndex] = scc[recoverIndex]; } - + /*for (unsigned long p_id = 0; p_id < indexInd.lLength; p_id++) { printf ("%ld %s = %15.12g\n", p_id, GetIthIndependentVar(p_id)->GetName()->sData, (*parameterValuesAndRanges)(p_id,0)); }*/ - + #pragma omp parallel for default(shared) schedule(static,1) private(blockID) num_threads (np) if (np>1) for (blockID = 0; blockID < np; blockID ++) { t->ComputeBranchCache (*sl,doCachedComp, bc, inc, df, @@ -7888,7 +7888,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, (1+blockID) * sitesPerP, catID,tcc,siteRes); } - + // check results if (sum > -A_LARGE_NUMBER) { @@ -7901,7 +7901,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, catID, siteRes) - _logLFScaler * overallScalingFactors.lData[index]; - + if (fabs ((checksum-sum)/sum) > 0.00001) { /*_Parameter check2 = t->ComputeTreeBlockByBranch (*sl, *branches, @@ -7922,15 +7922,15 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, _String* node_name = GetIthTree (index)->GetNodeFromFlatIndex(doCachedComp)->GetName(); - + WarnError (_String("Internal error in ComputeBranchCache (branch ") & *node_name & - " ) reversible model cached likelihood = "& checksum & ", directly computed likelihood = " & sum & + " ) reversible model cached likelihood = "& checksum & ", directly computed likelihood = " & sum & ". This is most likely because a non-reversible model was incorrectly auto-detected (or specified by the model file in environment variables)."); WarnError ("Bailing"); return -A_LARGE_NUMBER; } } - + // need to update siteRes when computing cache and changing scaling factors! } return sum; @@ -8308,10 +8308,10 @@ void _LikelihoodFunction::OptimalOrder (long index, _SimpleList& sl) long level, nc = 0; - + node_iterator ni (spanningTreeRoot, _HY_TREE_TRAVERSAL_POSTORDER); - + while (node* iterator = ni.Next()) { if (iterator != spanningTreeRoot) { long maxLevel2 = 0L; @@ -8554,7 +8554,7 @@ unsigned long _LikelihoodFunction::CountObjects (_LikelihoodFunctionCountType case kLFCountCategoryVariables: return indexCat.lLength; } - + return theTrees.lLength; } @@ -8649,8 +8649,8 @@ void _LikelihoodFunction::SerializeLF(_String & rec, char opt, long tIdx = dataSetList._SimpleList::Find( (long)(GetDataFilter(redirector->Get(idx)) ->GetData())); - - + + tIdx = taggedDS.Insert((BaseRef) tIdx, taggedDS.countitems()); if (tIdx < 0L) { @@ -8948,16 +8948,16 @@ void _LikelihoodFunction::SerializeLF(_String & rec, char opt, << ',' << _String((long) theDF->GetUnitLength()) << ','; - + if (horPart) { rec << horPart->Enquote('"'); DeleteObject(horPart); } horPart = (_String *)theDF->theNodeMap.ListToPartitionString(); - + rec << ',' << horPart->Enquote('"'); - + DeleteObject(horPart); horPart = theDF->GetExclusions(); @@ -9100,7 +9100,7 @@ BaseRef _LikelihoodFunction::toStr (unsigned long) { } else { snprintf (str, sizeof(str), "\n%s=%.16g;", thisVar->GetName()->getStr(),(double)GetIthIndependent(i)); } - + res<GetLowerBound(),DEFAULTPARAMETERLBOUND)) { @@ -9115,9 +9115,9 @@ BaseRef _LikelihoodFunction::toStr (unsigned long) { if (indexDep.lLength>0) { for (long i=0; iGetName(), thisVar->GetFormulaString(), kAppendAnAssignmentToBufferFree | (thisVar->IsGlobal() ? kAppendAnAssignmentToBufferGlobal : 0)); - + if (!CheckEqual(thisVar->GetLowerBound(),DEFAULTPARAMETERLBOUND)) { snprintf (str, sizeof(str), "\n%s:>%.16g;", thisVar->GetName()->getStr(),(double)thisVar->GetLowerBound()); res<GetName(); l1 = currentTree->GetName()->Length(); res<<'='; - + _TreeIterator ti (currentTree, _HY_TREE_TRAVERSAL_POSTORDER); - + _CalcNode* currentNode=ti.Next(), * nextNode; - + level = ti.Depth(); nextNode=ti.Next(); - + _SimpleList iV, dV2; // decide if we can use expected substituion measure bool useExpectedSubstitutions = longOrShort<3; @@ -9213,7 +9213,7 @@ BaseRef _LikelihoodFunction::toStr (unsigned long) { res<<','; } res.AppendNCopies('(', level-lastLevel); - + } else if (level 1.5) { category_simulation_mode = kLFSimulateCategoriesContinuous; } else { @@ -9359,8 +9359,8 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { category_simulation_mode = kLFSimulateCategoriesDiscrete; } } - - + + if (category_simulation_mode == kLFSimulateCategoriesContinuous) { for (unsigned long cat_index = 0UL; cat_index < indexCat.lLength; cat_index ++) { _CategoryVariable* ith_category = GetIthCategoryVar(cat_index); @@ -9382,68 +9382,68 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { discrete_category_variables << cat_index; } } - + if (catNames && indexCat.lLength) { - + catNames->Clear(); CreateMatrix (catNames,indexCat.lLength,1,false,true,false); catNames->Convert2Formulas(); - + _SimpleList* all_arrays[3] = { & HMM_category_variables, & discrete_category_variables, & continuous_category_variables }; - + unsigned long through_index = 0UL; - + for (_SimpleList * array : all_arrays) { for (unsigned long i = 0UL; i < array->lLength; i++, through_index++) { catNames->StoreFormula (through_index,0L,*new _Formula (new _FString (*LocateVar (array->GetElement(i))->GetName())), false, false); } } - + } } - + _DataSetFilter const *first_filter = GetIthFilter(0); - + unsigned long species_count = first_filter->NumberSpecies(); - + for (unsigned long sequence_index = 0UL; sequence_index < species_count; sequence_index ++) { target.AddName(*first_filter->GetSequenceName(sequence_index)); } - + unsigned long internal_node_count = 0UL; - + if (storeIntermediates && storeIntermediates->sLength == 0) { GetIthTree (0L)->AddNodeNamesToDS (&target,false,true,0); // only add internal node names internal_node_count = target.GetNames().lLength - species_count; } target.SetTranslationTable (first_filter->GetData()); - + unsigned long sequences_to_simulate = target.GetNames().lLength, site_offset_raw = 0UL, // raw offset (e.g. in nucleotides for codon data site_offset = 0UL, total_sites = 0UL; - + for (unsigned long i = 0UL; iGetSiteCountInUnits(); } - + if (catValues && indexCat.lLength) { catValues->Clear(); CreateMatrix (catValues,indexCat.lLength,total_sites,false,true,false); } - + TimeDifference timer; - - + + bool column_wise = false; - - + + for (unsigned long partition_index = 0UL; partition_indexNumberSpecies()+internal_node_count != sequences_to_simulate) { ReportWarning (_String ("Ignoring partition ") & _String ((long) (partition_index + 1L)) & " of the likelihood function since it has a different number of sequences/tree than the first part."); @@ -9451,8 +9451,8 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { } _Parameter * this_freqs = ((_Matrix*)GetIthFrequencies(partition_index)->ComputeNumeric())->fastIndex(); _TheTree *this_tree = GetIthTree (partition_index); - - + + unsigned long this_site_count = this_filter->GetSiteCountInUnits(), leaf_count = 0L, good_sites = 0L, @@ -9460,14 +9460,14 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { sites_per_unit = this_filter->GetUnitLength(), this_raw_site_count = this_filter->GetSiteCount(); - + if (theExclusions.lLength>partition_index) { _List* user_exclusions = (_List*)theExclusions(partition_index); - - + + if (user_exclusions->countitems() > 0) { const _TranslationTable* this_translation_table = this_filter->GetTranslationTable(); - + for (unsigned long state = 0UL; state < user_exclusions->lLength; state++) { long resolved_state = this_translation_table->MultiTokenResolutions(*(_String*)user_exclusions->GetItem(state), NULL); if (resolved_state < 0L) { @@ -9476,102 +9476,102 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { user_exclusions_numeric << state; } } - + column_wise = true; } } - + if (category_simulation_mode != kLFSimulateCategoriesNone) { column_wise = true; } - + if (column_wise) { - + _TheTree * this_tree = GetIthTree (partition_index); this_tree->SetUpMatrices(1); while (good_sites < this_site_count) { - + if (category_simulation_mode != kLFSimulateCategoriesNone ) { - + for (unsigned long hmm_category_index =0UL; hmm_category_index < HMM_category_variables.lLength; hmm_category_index ++) { // deal with HMM - + _CategoryVariable* hmm_cat = GetIthCategoryVar(HMM_category_variables(hmm_category_index)); - + _Matrix* category_weight_matrix = hmm_cat->GetWeights(); _Parameter* category_weights; - + unsigned long category_count = hmm_cat->GetNumberOfIntervals(); - + if (good_sites == 0L) { category_weights = category_weight_matrix->fastIndex(); } else { _Matrix * hmm = hmm_cat->ComputeHiddenMarkov(); category_weights = hmm->theData+hmm->GetVDim()* HMM_state(hmm_category_index); } - + unsigned long root_state = DrawFromDiscrete(category_weights, category_count); - + hmm_cat->SetIntervalValue(root_state); if (good_sites > 0L) { HMM_state [hmm_category_index] = root_state; } - + if (catValues) { catValues->Store (hmm_category_index,site_offset+good_sites,hmm_cat->Compute()->Value()); } } - + for (unsigned long discrete_category_index = 0UL; discrete_category_index < discrete_category_variables.lLength; discrete_category_index++) { - - + + _CategoryVariable* discrete_cat = GetIthCategoryVar(discrete_category_variables(discrete_category_index)); - + unsigned long category_value = DrawFromDiscrete(discrete_cat->GetWeights()->fastIndex(), discrete_cat->GetNumberOfIntervals()); - + discrete_cat->SetIntervalValue(category_value); - + if (catValues) { catValues->Store (discrete_category_index+HMM_category_variables.lLength,site_offset+good_sites,discrete_cat->Compute()->Value()); } } - + for (unsigned long continuous_category_index = 0UL; continuous_category_index < continuous_category_variables.lLength; continuous_category_index++) { // use discrete values here - + _CategoryVariable* continuous_cat_var = GetIthCategoryVar(continuous_category_variables(continuous_category_index)); - + _Parameter category_value = continuous_cat_var->GetCumulative().Newton(continuous_cat_var->GetDensity(),MAX (genrand_real2(), 1e-30),continuous_cat_var->GetMinX(),continuous_cat_var->GetMaxX(),_x_); - + continuous_cat_var->SetValue(new _Constant (category_value), false); if (catValues) { catValues->Store (continuous_category_index+discrete_category_variables.lLength+HMM_category_variables.lLength,site_offset+good_sites,category_value); } } } // end category initialization block - + unsigned long root_state; - + if (spawnValues) { root_state = spawnValues->theData[site_offset+good_sites]; } else { root_state = DrawFromDiscrete(this_freqs, filter_dimension); } - - + + _SimpleList ancestral_values, leaf_values; - + if (SingleBuildLeafProbs (this_tree->GetRoot(), root_state, leaf_values, user_exclusions_numeric,this_tree, true, this_filter, storeIntermediates?&ancestral_values:nil)) { good_sites++; //add this site to the simulated dataset - - - + + + _String simulated_unit (this_filter->ConvertCodeToLetters(this_filter->CorrectCode(leaf_values(0)), sites_per_unit)); for (unsigned long character_index = 0UL; character_index < sites_per_unit; character_index ++) { @@ -9586,12 +9586,12 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { } } - + target.ResetIHelper(); for (unsigned long character_index = 0UL; character_index < sites_per_unit; character_index ++) { target.Compact(site_offset_raw + leaf_count - sites_per_unit + character_index); } - + if (storeIntermediates && storeIntermediates->sLength == 0UL) { for (unsigned long internal_node_index = 0UL; internal_node_index < internal_node_count; internal_node_index++) { simulated_unit = this_filter->ConvertCodeToLetters(this_filter->CorrectCode(ancestral_values(internal_node_index)), sites_per_unit); @@ -9602,13 +9602,13 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { for (unsigned long character_index = 0UL; character_index < sites_per_unit; character_index ++) { target.Compact(site_offset_raw + leaf_count - sites_per_unit + character_index); } - + } } } - + _Parameter time_elapsed = timer.TimeSinceStart(); - + if (time_elapsed > .25) { #if !defined __UNIX__ || defined __HEADLESS__ @@ -9620,12 +9620,12 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { this_tree->CleanUpMatrices(); } else {// end simulate column by column - - + + unsigned long * simulated_sequence = new unsigned long [this_site_count]; - + // generate a random "spawning vector" - + if (spawnValues) { // use supplied starting values for (unsigned long site_index = 0UL; site_index < this_site_count; site_index++) { simulated_sequence[site_index] = spawnValues->theData[site_index+site_offset]; @@ -9635,12 +9635,12 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { simulated_sequence[site_index] = DrawFromDiscrete(this_freqs, filter_dimension); } } - + // now proceed down the tree branches to get the values of the probabilities at the leaves // this is done recursively - + _DataSet * ancestral_sequences = nil; - + if (storeIntermediates) { if (storeIntermediates->sLength) { FILE * file_for_ancestral_sequences = doFileOpen (storeIntermediates->sData,"w"); @@ -9656,12 +9656,12 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { } else { ancestral_sequences = new _DataSet (this_site_count); } - + ancestral_sequences->SetTranslationTable (this_filter->GetData()); } - + BuildLeafProbs (this_tree->GetRoot(), simulated_sequence, this_site_count, target, this_tree, leaf_count, true, sites_per_unit, this_filter, site_offset_raw ,ancestral_sequences); - + if (ancestral_sequences) { ancestral_sequences->Finalize(); if (storeIntermediates->sLength == 0) { @@ -9673,15 +9673,15 @@ void _LikelihoodFunction::StateCounter (long functionCallback) { } DeleteObject (ancestral_sequences); } - + delete [] simulated_sequence; } // end over sequence-wise simulation - + site_offset_raw += this_raw_site_count; site_offset += this_site_count; } // end loop over partitions - + target.Finalize(); target.SetNoSpecies(target.GetNames().lLength); } @@ -9789,22 +9789,22 @@ void _LikelihoodFunction::BuildLeafProbs (node& curNode, long unsigned //_______________________________________________________________________________________ bool _LikelihoodFunction::SingleBuildLeafProbs (node& curNode, long parentState, _SimpleList& target, _SimpleList& theExc, _TheTree* curTree, bool isRoot, _DataSetFilter const* dsf, _SimpleList * iNodes) const { - + long myState = parentState; - + if (!isRoot) { - + _CalcNode* ccurNode = (_CalcNode*)LocateVar (curNode.get_data()); - + if (ccurNode->NeedNewCategoryExponential(-1)) { ccurNode->RecomputeMatrix(0,1); } - + unsigned long matrix_dimension = ccurNode->GetCompExp()->GetVDim(); - - + + _Parameter* fastI = ccurNode->GetCompExp()->fastIndex()+parentState*matrix_dimension; - + myState = DrawFromDiscrete(fastI, matrix_dimension); if (! curNode.is_leaf()) { @@ -9829,9 +9829,9 @@ void _LikelihoodFunction::BuildLeafProbs (node& curNode, long unsigned (*iNodes)<Find((BaseRef)indexInd.lData[k]); @@ -10333,37 +10333,37 @@ void _LikelihoodFunction::RankVariables(_AVLListX* tagger) } } } - + SortLists (&varRank,&indexInd); gradientBlocks.Clear(); - - // enforce user provided rankings - + + // enforce user provided rankings + _AssociativeList * variableGrouping = (_AssociativeList*)FetchObjectFromVariableByType(&userSuppliedVariableGrouping, ASSOCIATIVE_LIST); if (variableGrouping) { - + _SimpleList hist, supportList; - + _AVLListX existingRanking (&supportList); - + long ls, - cn = variableGrouping->avl.Traverser (hist,ls,variableGrouping->avl.GetRoot()); - + cn = variableGrouping->avl.Traverser (hist,ls,variableGrouping->avl.GetRoot()); + for (unsigned long vi = 0; vi < indexInd.lLength; vi ++ ) { existingRanking.Insert((BaseRef)indexInd.lData[vi], vi, true); - } + } - long offset = 1; + long offset = 1; bool re_sort = false; - + while (cn >= 0) { _PMathObj anEntry = (_PMathObj)variableGrouping->avl.GetXtra (cn); if (anEntry->ObjectClass() == MATRIX) { _Matrix *variableGroup = (_Matrix*) anEntry; if (variableGroup -> IsAStringMatrix()) { unsigned long dimension = variableGroup->GetHDim() * variableGroup->GetVDim (); - + _SimpleList thisBlock; for (unsigned long variable_id = 0; variable_id < dimension; variable_id ++) { _String variableID ((_String*)variableGroup->GetFormula (variable_id,-1)->Compute()->toStr()); @@ -10385,25 +10385,25 @@ void _LikelihoodFunction::RankVariables(_AVLListX* tagger) } if (re_sort) { _SimpleList new_ranks; - + for (unsigned long vi = 0; vi < indexInd.lLength; vi ++ ) { new_ranks << existingRanking.GetXtra(existingRanking.Find ((BaseRef)indexInd.lData[vi])); } SortLists (&new_ranks,&indexInd); - + if (gradientBlocks.lLength) { _SimpleList aux_list, included (indexInd.lLength, 0,0), not_listed; - + _AVLListX indexIndToGlobalID (&aux_list); - + for (unsigned long vi = 0; vi < indexInd.lLength; vi ++ ) { indexIndToGlobalID.Insert((BaseRef)indexInd.lData[vi], vi, true); //printf ("[%ld]\n",indexInd.lData[vi] ); } - + for (long b = 0; b < gradientBlocks.countitems(); b++) { _SimpleList *a_block = (_SimpleList*)(gradientBlocks(b)); for (long i = 0; i < a_block->countitems(); i++){ @@ -10422,7 +10422,7 @@ void _LikelihoodFunction::RankVariables(_AVLListX* tagger) gradientBlocks.Delete(b--); } } - + if (gradientBlocks.lLength) { for (long t = 0; t < included.lLength; t++) { if (included.lData[t]==0) { @@ -10433,19 +10433,19 @@ void _LikelihoodFunction::RankVariables(_AVLListX* tagger) gradientBlocks && & not_listed; } } - + /*for (long b = 0; b < gradientBlocks.lLength; b++) { _SimpleList *a_block = (_SimpleList*)(gradientBlocks(b)); for (long i = 0; i < a_block->lLength; i++){ printf ("Block %ld variable %s\n", b, LocateVar(indexInd.lData[a_block->lData[i]])->GetName()->sData); } }*/ - + } - + } } - + } //_______________________________________________________________________________________ @@ -10461,7 +10461,7 @@ _CustomFunction::_CustomFunction (_String* arg) { _AVLList al (&myVars); myBody.ScanFForVariables(al,true,false,false); al.ReorderList(); - + for (unsigned long k=0UL; kIsIndependent()) { indexInd << myVars.lData[k]; From d88e164589ec1493aab135e34ea7c923c4872c93 Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Thu, 21 Jun 2018 13:24:41 -0400 Subject: [PATCH 15/53] Restroing an accidentally deleted else --- src/core/global_object_lists.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/global_object_lists.cpp b/src/core/global_object_lists.cpp index 6938e4568..182c75671 100644 --- a/src/core/global_object_lists.cpp +++ b/src/core/global_object_lists.cpp @@ -241,8 +241,9 @@ namespace hyphy_global_objects { return true; } } + } else { + WarnError (_String("Not a supported listener type in call to ") & _String (__PRETTY_FUNCTION__)); } - WarnError (_String("Not a supported listener type in call to ") & _String (__PRETTY_FUNCTION__)); } return false; From 952771adda83f7483b5416ca2fed169e0f7a85f4 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Thu, 21 Jun 2018 14:16:02 -0400 Subject: [PATCH 16/53] CreateLF fix --- res/TemplateBatchFiles/libv3/tasks/estimators.bf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 49329111d..75d812c33 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -738,7 +738,7 @@ lfunction estimators.CreateLFObject (context, data_filter, tree, model_template, } }, { "0": tree - }, model_template, initial_values, run_options) + }, model_template, initial_values, run_options, model_objects); } components = utility.Array1D(data_filter); From 699174056c45a87d1033c80ab8065b1cc98ade13 Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Fri, 22 Jun 2018 09:46:34 -0400 Subject: [PATCH 17/53] #809 : moving QSD to Deprecated folder; removing QSD from menu option list --- .../{ => Deprecated}/QuickSelectionDetection.bf | 0 res/TemplateBatchFiles/files.lst | 1 - 2 files changed, 1 deletion(-) rename res/TemplateBatchFiles/{ => Deprecated}/QuickSelectionDetection.bf (100%) diff --git a/res/TemplateBatchFiles/QuickSelectionDetection.bf b/res/TemplateBatchFiles/Deprecated/QuickSelectionDetection.bf similarity index 100% rename from res/TemplateBatchFiles/QuickSelectionDetection.bf rename to res/TemplateBatchFiles/Deprecated/QuickSelectionDetection.bf diff --git a/res/TemplateBatchFiles/files.lst b/res/TemplateBatchFiles/files.lst index 6141516eb..e699be5dc 100644 --- a/res/TemplateBatchFiles/files.lst +++ b/res/TemplateBatchFiles/files.lst @@ -99,7 +99,6 @@ "PSDI","Test whether sites in two populations are evolving under different selective pressures along internal tree branches.","CompareSelectivePressureIVL.bf"; "NY","Test for positive selection using the approach of Nielsen and Yabg, by sampling global dN/dS from an array of distributions, and using Bayesian posterior to identify the sites with dN/dS>1.","NielsenYang.bf"; "PSM","Test for positive selection using the approach of Nielsen and Yang, by sampling global dN/dS from an array of distributions, and using Bayesian posterior to identify the sites with dN/dS>1. Multiple subsets of one data set with shared dN/dS.","MFPositiveSelection.bf"; -"QSD","Quickly test for positive selection using several approaches.","QuickSelectionDetection.bf"; "RELAX","Test whether selected branches are under relaxed or intensified selection against reference branches","SelectionAnalyses/RELAX.bf"; "SSD","Use approximate likelihoods at a site to test for subtree specific selective pressure.","SubtreeSelectionComparison.bf"; "DST","Perform a random effects (D)irectional (E)volution on (P)rotein (S)equences test to identify sites which evolve directionally towards a given residue [MPI enabled].","DirectionalREL.bf"; From a133af22248cc711b82bcc3ef14a8ef98b4639b7 Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Mon, 2 Jul 2018 06:04:36 -0400 Subject: [PATCH 18/53] Some libv3 changes --- res/TemplateBatchFiles/CleanStopCodons.bf | 2 + .../SelectionAnalyses/MEME.bf | 5 +- .../libv3/tasks/alignments.bf | 153 ++++++++++++++++++ .../libv3/tasks/estimators.bf | 6 + 4 files changed, 164 insertions(+), 2 deletions(-) diff --git a/res/TemplateBatchFiles/CleanStopCodons.bf b/res/TemplateBatchFiles/CleanStopCodons.bf index eb3a16460..f52bbc896 100644 --- a/res/TemplateBatchFiles/CleanStopCodons.bf +++ b/res/TemplateBatchFiles/CleanStopCodons.bf @@ -153,6 +153,8 @@ for (sequenceIndex = 0; sequenceIndex < all64.species; sequenceIndex = sequenceI GetDataInfo (siteInfo, all64, sequenceIndex, siteIndex); siteInfo1 = stopCodonTemplate*siteInfo; siteInfo2 = nonStopCodonTemplate*siteInfo; + + if (siteInfo1[0]>0 && siteInfo2[0] == 0) { sitesWithDeletions[siteIndex] = 1; diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index df1dcb41a..a146910d9 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -467,7 +467,7 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa ^"meme.site_beta_nuisance" = 1; //console.log ("Optimizing FEL for pattern " + pattern_info); - io.SpoolLF (lf_fel, "/tmp/meme.debug", "FEL"); + //io.SpoolLF (lf_fel, "/tmp/meme.debug", "FEL"); Optimize (results, ^lf_fel); fel = estimators.ExtractMLEs (lf_fel, model_mapping); @@ -483,8 +483,9 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa } //console.log ("Optimizing MEME for pattern " + pattern_info); - io.SpoolLF (lf_bsrel, "/tmp/meme.debug", "MEME"); + //io.SpoolLF (lf_bsrel, "/tmp/meme.debug", "MEME"); Optimize (results, ^lf_bsrel); + //console.log (results[1][0]); alternative = estimators.ExtractMLEs (lf_bsrel, model_mapping); alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index f53b06082..381014cb8 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -546,6 +546,57 @@ lfunction alignments.TranslateCodonsToAminoAcidsWithAmbiguities (sequence, offse return translation; } +/** + * @name alignments.TranslateCodonsToAminoAcidsWithAmbigsAllFrames + * Translate a codon sequence to amino-acids using the mapping provided by the + * genetic code in all 3 frames + * @param {String} sequence - the string to translate + * @param {Dictionary} code - genetic code description (e.g. returned by alignments.LoadGeneticCode) + * @param {lookup} code - resolution lookup dictionary + * @returns {Dict} for each reading frame F in {0, 1, 2} returns + + F -> { + terms.data.sequence: translated sequence (always choose X if available, otherwise first sense resolution) + terms.sense_codons : N, // number of sense A/A + terms.stop_codons : N // number of stop codons + } + */ + + lfunction alignments.TranslateCodonsToAminoAcidsWithAmbigsAllFrames (sequence, code, lookup) { + + result = {}; + + + for (frame = 0; frame < 3; frame += 1) { + try_run = alignments.TranslateCodonsToAminoAcidsWithAmbiguities (sequence, frame, code, lookup); + + translation = ""; translation * 128; + + frame_result = {utility.getGlobalValue ("terms.sense_codons") : 0, + utility.getGlobalValue ("terms.stop_codons") : 0 + }; + + upper_bound = Abs (try_run); + for (i = 0; i < upper_bound; i+=1) { + if (try_run[i] / "X") { // has_stop + translation * "X"; + frame_result [^"terms.stop_codons"] += 1; + } else { + translation * (try_run[i])["INDEXORDER"][0]; + frame_result [^"terms.sense_codons"] += 1; + } + } + + + translation * 0; + frame_result [utility.getGlobalValue ("terms.data.sequence")] = translation; + result[frame] = frame_result; + } + + return result; +} + + /** * @name alignments.MapAlignmentToReferenceCoordinates * Map a query sequence from the aligned coordinates @@ -705,3 +756,105 @@ lfunction alignments.Extract_site_patterns (data_filter) { return site_info; } + +/** + * @name alignments.StripGaps + * Remove gaps from a sequence + * @param {String} sequence - the input sequence + * @returns {String} the sequence with all gaps removed + */ + +lfunction alignments.StripGaps (sequence) { + return sequence ^ {{"\-",""}}; +} + +/** + * @name alignments.alignment.MapCodonsToAA + * Map in-frame nucleotides onto a protein alignment string + + * @param {String} codon_sequence - the codon sequence to map + * @param {String} aa_sequence - the matching aligned a.a. sequence + * @param {Number} no more than this many mismatches - the codon sequence to map + * @param {Dict} mapping - code ["terms.code.mapping"] + + * @returns {String} the mapped sequence + + * @example + GCAAAATCATTAGGGACTATGGAAAACAGA + -AKSLGTMEN-R + + maps to + + ---GCAAAATCATTAGGGACTATGGAAAAC---AGA + + */ + +lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, mapping) { + + seqLen = Abs(aa_sequence); + translString = ""; + translString * (seqLen); + seqLenN = Abs(codon_sequence); + + aaPos = 0; + seqPos = 0; + codon = codon_sequence[seqPos][seqPos + 2]; + currentAA = mapping[codon]; + + mismatch_count = 0; + + for (aaPos = 0; aaPos < seqLen && seqPos < seqLenN; aaPos += 1) { + advance = 1; + copy_codon = 1; + + if (currentAA != 0) { + if (aa_sequence[aaPos] == "-") { + //if (currentAA != "X") { + translString * "---"; + advance = 0; + //} + } else { + mismatch_count += (aa_sequence[aaPos] != currentAA); + if (this_many_mm == 1) { + if (mismatch_count == this_many_mm) { + translString * 0; + console.log (translString); + console.log (codon_sequence); + } + assert(mismatch_count < this_many_mm, "A mismatch between codon and protein sequences at position " + aaPos + " (codon `seqPos`) : codon '" + codon_sequence[seqPos][seqPos + 2] + "'(`currentAA`) a.a. '`aa_sequence[aaPos]`'"); + } else { + if (mismatch_count >= this_many_mm) { + translString * 0; + return None; + } + } + } + } else { + copy_codon = 0; + } + + if (advance) { + if (copy_codon) { + if (currentAA == "X") { + translString * "---"; + } else { + translString * codon; + } + } else { + //fprintf (stdout, "Skipping codon ", codon, "\n"); + aaPos = aaPos - 1; + } + seqPos += 3; + codon = codon_sequence[seqPos][seqPos + 2]; + currentAA = mapping[codon]; + } + } + + for (; aaPos < seqLen; aaPos += 1) { + translString * "---"; + } + + + translString * 0; + return translString; +} \ No newline at end of file diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 75d812c33..2391a2c0f 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -479,6 +479,7 @@ function estimators.ApplyExistingEstimates(likelihood_function_id, model_descrip if (Type((initial_values[terms.branch_length])[estimators.ApplyExistingEstimates.i]) == "AssociativeList") { // have branch lengths for this partition _application_type = None; + if (Type (branch_length_conditions) == "AssociativeList") { if (Abs(branch_length_conditions) > estimators.ApplyExistingEstimates.i) { _application_type = branch_length_conditions[estimators.ApplyExistingEstimates.i]; @@ -811,6 +812,11 @@ lfunction estimators.FitSingleModel_Ext (data_filter, tree, model_template, init this_namespace = this_namespace[0][Abs (this_namespace)-3]; df = estimators.CreateLFObject (this_namespace, data_filter, tree, model_template, initial_values, run_options, None); + + /* + Export (lfe, likelihoodFunction); + console.log (lfe); + */ Optimize(mles, likelihoodFunction); From 64c6e3b14a71bf35e5ce335651d463093c6409ce Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Thu, 5 Jul 2018 13:11:05 -0400 Subject: [PATCH 19/53] Adding debugging code to deal with internal LF inconsistencies --- res/TemplateBatchFiles/qndhelper1.ibf | 328 +++++++++++++------------- src/core/likefunc.cpp | 33 ++- 2 files changed, 193 insertions(+), 168 deletions(-) diff --git a/res/TemplateBatchFiles/qndhelper1.ibf b/res/TemplateBatchFiles/qndhelper1.ibf index 560eb0ee1..f825e6b1d 100644 --- a/res/TemplateBatchFiles/qndhelper1.ibf +++ b/res/TemplateBatchFiles/qndhelper1.ibf @@ -23,11 +23,11 @@ function BuildCodonFrequencies (obsF) first = h$16; second = h%16$4; third = h%4; - if (_Genetic_Code[h]==10) + if (_Genetic_Code[h]==10) { hshift = hshift+1; PIStop = PIStop-obsF[first][0]*obsF[second][1]*obsF[third][2]; - continue; + continue; } result[h-hshift][0]=obsF[first][0]*obsF[second][1]*obsF[third][2]; } @@ -45,7 +45,7 @@ function ReportSite1 (siteI, siteM) fprintf (stdout, "Site ", Format(siteI+1,5,0)," dN/dS = ", Format(doneSites[siteM][0],12,5), " Full Log(L) = ", Format(doneSites[siteM][3],12,5), - " LRT = ",Format(doneSites[siteM][1],12,5), + " LRT = ",Format(doneSites[siteM][1],12,5), " p-value = ",Format(doneSites[siteM][2],12,5)); if (doneSites[siteM][2] 7) { - fullSites[siteI][7] = doneSites[siteM][7]; + fullSites[siteI][7] = doneSites[siteM][7]; } @@ -88,13 +88,13 @@ function ReportSite2 (siteI, siteM) "| dS(=dN): ", Format(fullSites[siteI][3],5,2), "| Log(L): ", Format(fullSites[siteI][6],10,2), "| LRT: ", Format(fullSites[siteI][4],5,2), - "| p: ", Format(fullSites[siteI][5],5,2)); + "| p: ", Format(fullSites[siteI][5],5,2)); if (Columns (fullSites) > 7) { fprintf (stdout, "| dN_other: ", Format(fullSites[siteI][7],5,2)); } - + if (fullSites[siteI][5]fullSites[siteI][1]) @@ -124,8 +124,8 @@ function ReportSite3 (siteI, siteM) fprintf (stdout, "Site ", Format(siteI+1,4,0), " dS = ", Format(fullSites[siteI][0],7,4), " dN = ", Format(fullSites[siteI][1],7,4), - " Log(L) ",Format(fullSites[siteI][2],7,4),"\n"); - + " Log(L) ",Format(fullSites[siteI][2],7,4),"\n"); + return 0; } @@ -134,36 +134,36 @@ function ReportSite3 (siteI, siteM) function ReceiveJobs1 (sendOrNot) { MPIReceive (-1, fromNode, result_String); - + siteIndex = MPINodeState[fromNode-1][1]; - + if (sendOrNot) { MPISend (fromNode,siteLikelihood); - MPINodeState[fromNode-1][1] = siteCount; + MPINodeState[fromNode-1][1] = siteCount; } else { MPINodeState[fromNode-1][0] = 0; - MPINodeState[fromNode-1][1] = -1; + MPINodeState[fromNode-1][1] = -1; } - + ExecuteCommands (result_String); - + siteMap = dupInfo[siteIndex]; doneSites[siteMap][0] = dNdS; dNdS = 1; LFCompute (siteLikelihood,LF_START_COMPUTE); LFCompute (siteLikelihood,nullLF); LFCompute (siteLikelihood,LF_DONE_COMPUTE); - + doneSites[siteMap][1] = 2*(siteLikelihood_MLES[1][0]-nullLF); - doneSites[siteMap][2] = (1-CChi2(doneSites[siteMap][1],1))/2; - doneSites[siteMap][3] = siteLikelihood_MLES[1][0]; - + doneSites[siteMap][2] = (1-CChi2(doneSites[siteMap][1],1))/2; + doneSites[siteMap][3] = siteLikelihood_MLES[1][0]; + dummy = ReportSite1 (siteIndex, siteMap); - + return fromNode-1; } /*------------------------------------------------------------------------*/ @@ -179,14 +179,14 @@ function ReportSiteMEME (siteI, siteM, doQ) fullSites[siteI][5] = doneSites[siteM][3]; fullSites[siteI][6] = doneSites[siteM][4]; fullSites[siteI][8] = doneSites[siteM][5]; - + if (doQ >= 0) { fullSites[siteI][7] = doQ; qStr = "| q: " + Format(fullSites[siteI][7],5,2); } else { qStr = ""; } - + fprintf (stdout, "| Codon: ", Format(siteI+1,4,0), "| Beta1: ", Format(fullSites[siteI][0],10,2), @@ -195,9 +195,9 @@ function ReportSiteMEME (siteI, siteM, doQ) "| P(Beta2): ", Format(fullSites[siteI][3],5,2), "| alpha: ", Format(fullSites[siteI][4],10,2), "| LRT: ", Format(fullSites[siteI][5],6,2), - "| p: ", Format(fullSites[siteI][6],5,2), - qStr, - "| Log(L): ", Format(fullSites[siteI][8],5,2)); + "| p: ", Format(fullSites[siteI][6],5,2), + qStr, + "| Log(L): ", Format(fullSites[siteI][8],5,2)); if (fullSites[siteI][6]= 0) { fullSites[siteI][11] = doQ; qStr = "| q: " + Format(fullSites[siteI][11],5,2); } else { qStr = ""; } - + fprintf (stdout, "| Codon: ", Format(siteI+1,4,0), "| Internal: ", Format(fullSites[siteI][0],8,2), "[", Format(fullSites[siteI][2],4,2), "], ", Format(fullSites[siteI][1],8,2), "[", Format(1-fullSites[siteI][2],4,2), "]", "| Leaves: ", Format(fullSites[siteI][4],8,2), "[", Format(fullSites[siteI][6],4,2), "], ", Format(fullSites[siteI][5],8,2), "[", Format(1-fullSites[siteI][6],4,2), "]", "| alpha: ", Format(fullSites[siteI][8],8,2), "| LRT: ", Format(fullSites[siteI][9],6,2), - "| p: ", Format(fullSites[siteI][10],5,2), - qStr, - "| Log(L): ", Format(fullSites[siteI][12],5,2)); + "| p: ", Format(fullSites[siteI][10],5,2), + qStr, + "| Log(L): ", Format(fullSites[siteI][12],5,2)); if (fullSites[siteI][10] 7) { noFValue = siteLikelihood_MLE_VALUES ["nFactorOther"]; } - + if (siteNA) { doneSites[siteMap][0] = nFValue; @@ -352,7 +352,7 @@ function ReceiveJobs2 (sendOrNot, nullAlt) { doneSites[siteMap][7] = noFValue; } - + /*if ((sFactor > 1e-10) || (nFactor > 1e-10)) { svpc = COVARIANCE_PRECISION; @@ -390,7 +390,7 @@ function ReceiveJobs2 (sendOrNot, nullAlt) } else { - doneSites[siteMap][6] = 1/covMx[0][2]; + doneSites[siteMap][6] = 1/covMx[0][2]; } } } @@ -400,14 +400,14 @@ function ReceiveJobs2 (sendOrNot, nullAlt) { doneSites[siteMap][6] = 0; }*/ - + doneSites[siteMap][6] = nFValue/sFValue; doneSites[siteMap][3] = doneSites[siteMap][3]+2*siteLikelihood_MLES[1][0]; doneSites[siteMap][5] = siteLikelihood_MLES[1][0]; } else { - doneSites[siteMap][3] = doneSites[siteMap][3]-2*siteLikelihood_MLES[1][0]; + doneSites[siteMap][3] = doneSites[siteMap][3]-2*siteLikelihood_MLES[1][0]; doneSites[siteMap][2] = sFValue; } @@ -419,11 +419,11 @@ function ReceiveJobs2 (sendOrNot, nullAlt) { if (doneSites[siteMap][4] == (-1)) { - doneSites[siteMap][4] = 1-CChi2(doneSites[siteMap][3],1); + doneSites[siteMap][4] = 1-CChi2(doneSites[siteMap][3],1); dummy = ReportSite2 (siteIndex, siteMap); } } - + return fromNode-1; } @@ -436,28 +436,28 @@ function MPISendJobMEMEInternal () { if (MPINodeState[mpiNode][0]==0) { - break; + break; } } - + if (mpiNode == MPI_NODE_COUNT-1) { mpiNode = ReceiveJobsMEMEInternal (); } - - + + lastKey = Abs(toDoList); - + if (lastKey > 0) { lastKey = toDoList ["INDEXORDER"][0]; theJob = toDoList [lastKey]; toDoList - lastKey; - MPINodeState[mpiNode][1] = theJob[0]; + MPINodeState[mpiNode][1] = theJob[0]; filterString = "" + (theJob[0]*3) + "-" + (theJob[0]*3+2); DataSetFilter siteFilter = CreateFilter (filteredData,3,filterString,"",GeneticCodeExclusions); - + if (theJob[1] == 2) { sFactor = 1; @@ -466,7 +466,7 @@ function MPISendJobMEMEInternal () nsFactor2 = dNdS; OPTIMIZATION_METHOD = 0; MPISend (mpiNode+1, felLikelihood); - + if (debugVerboseFlag) { fprintf (stdout, "[DEBUG: Sending FEL fit of site ", theJob[0], " to node ", mpiNode+1, "]\n"); @@ -479,10 +479,10 @@ function MPISendJobMEMEInternal () doneSites[siteMap][2] = mixingP; doneSites[siteMap][6] = nsFactor1leaves; doneSites[siteMap][7] = nsFactor2leaves; - doneSites[siteMap][8] = mixingPleaves; + doneSites[siteMap][8] = mixingPleaves; doneSites[siteMap][9] = sFactor; */ - + if (theJob[1] == 1) { sFactor = bySiteCache[theJob[0]][0]; @@ -506,7 +506,7 @@ function MPISendJobMEMEInternal () mixingP = 0.05; nsFactor2leaves = sFactor*0.5; mixingPleaves = 0.05; - + } else { @@ -527,10 +527,10 @@ function MPISendJobMEMEInternal () fprintf (stdout, "[DEBUG: Sending MEME fit of site ", theJob[0], " to node ", mpiNode+1, "]"); fprintf (stdout, "\n\talpha = ", sFactor, "\n\tbeta1 = ", nsFactor1*sFactor, - "\n\tbeta2 = ", nsFactor2, + "\n\tbeta2 = ", nsFactor2, "\n\tmixing = ", mixingP, "\n\tbeta1[leaves] = ", nsFactor1leaves*sFactor, - "\n\tbeta2[leaves] = ", nsFactor2leaves, + "\n\tbeta2[leaves] = ", nsFactor2leaves, "\n\tmixing[leaves] = ", mixingPleaves, "\n"); } @@ -540,11 +540,11 @@ function MPISendJobMEMEInternal () sFactor = bySiteCache[theJob[0]][0]; nsFactor1 = bySiteCache[theJob[0]][1]; mixingP = bySiteCache[theJob[0]][2]; - + nsFactor1leaves = bySiteCache[theJob[0]][3]; nsFactor2leaves = bySiteCache[theJob[0]][4]; mixingPleaves = bySiteCache[theJob[0]][5]; - + omega2 = 1; nsFactor2 := omega2 * sFactor; if (sFactor == 0) @@ -560,20 +560,20 @@ function MPISendJobMEMEInternal () fprintf (stdout, "[DEBUG: Sending MEME NULL fit of site ", theJob[0], " to node ", mpiNode+1, "]"); fprintf (stdout, "\n\talpha = ", sFactor, "\n\tbeta1 = ", nsFactor1*sFactor, - "\n\tbeta2 = ", nsFactor2, + "\n\tbeta2 = ", nsFactor2, "\n\tmixing = ", mixingP, "\n\tbeta1[leaves] = ", nsFactor1leaves*sFactor, - "\n\tbeta2[leaves] = ", nsFactor2leaves, + "\n\tbeta2[leaves] = ", nsFactor2leaves, "\n\tmixing[leaves] = ", mixingPleaves, "\n"); } } } MPINodeState[mpiNode][0] = 1; - MPINodeState[mpiNode][1] = theJob[0]; - MPINodeState[mpiNode][2] = theJob[1]; + MPINodeState[mpiNode][1] = theJob[0]; + MPINodeState[mpiNode][2] = theJob[1]; MPINodeState[mpiNode][3] += 1; - + if (debugVerboseFlag) { fileOut = "/tmp/" + theJob[0] + "." + theJob[1]; @@ -587,7 +587,7 @@ function MPISendJobMEMEInternal () ReceiveJobsMEMEInternal(); } } - + return +(MPINodeState [-1][0]) || Abs(toDoList); } @@ -599,28 +599,28 @@ function MPISendJobMEME () { if (MPINodeState[mpiNode][0]==0) { - break; + break; } } - + if (mpiNode == MPI_NODE_COUNT-1) { mpiNode = ReceiveJobsMEME (); } - - + + lastKey = Abs(toDoList); - + if (lastKey > 0) { lastKey = toDoList ["INDEXORDER"][0]; theJob = toDoList [lastKey]; toDoList - lastKey; - MPINodeState[mpiNode][1] = theJob[0]; + MPINodeState[mpiNode][1] = theJob[0]; filterString = "" + (theJob[0]*3) + "-" + (theJob[0]*3+2); DataSetFilter siteFilter = CreateFilter (filteredData,3,filterString,"",GeneticCodeExclusions); - + if (theJob[1] == 2) { sFactor = 1; @@ -629,7 +629,7 @@ function MPISendJobMEME () nsFactor2 = dNdS; OPTIMIZATION_METHOD = 0; MPISend (mpiNode+1, felLikelihood); - + if (debugVerboseFlag) { fprintf (stdout, "[DEBUG: Sending FEL fit of site ", theJob[0], " to node ", mpiNode+1, "]\n"); @@ -673,7 +673,7 @@ function MPISendJobMEME () fprintf (stdout, "[DEBUG: Sending MEME fit of site ", theJob[0], " to node ", mpiNode+1, "]"); fprintf (stdout, "\n\talpha = ", sFactor, "\n\tbeta1 = ", nsFactor1*sFactor, - "\n\tbeta2 = ", nsFactor2, + "\n\tbeta2 = ", nsFactor2, "\n\tmixing = ", mixingP, "\n"); } @@ -698,17 +698,17 @@ function MPISendJobMEME () fprintf (stdout, "[DEBUG: Sending MEME NULL fit of site ", theJob[0], " to node ", mpiNode+1, "]"); fprintf (stdout, "\n\talpha = ", sFactor, "\n\tbeta1 = ", nsFactor1*sFactor, - "\n\tbeta2 = ", nsFactor2, + "\n\tbeta2 = ", nsFactor2, "\n\tmixing = ", mixingP, "\n"); } } } MPINodeState[mpiNode][0] = 1; - MPINodeState[mpiNode][1] = theJob[0]; - MPINodeState[mpiNode][2] = theJob[1]; + MPINodeState[mpiNode][1] = theJob[0]; + MPINodeState[mpiNode][2] = theJob[1]; MPINodeState[mpiNode][3] += 1; - + if (debugVerboseFlag) { fileOut = "/tmp/" + theJob[0] + "." + theJob[1]; @@ -722,7 +722,7 @@ function MPISendJobMEME () ReceiveJobsMEME(); } } - + return +(MPINodeState [-1][0]) || Abs(toDoList); } @@ -731,19 +731,19 @@ function MPISendJobMEME () function ReceiveJobsMEME () { MPIReceive (-1, fromNode, result_String); - + //fprintf (stdout, result_String, "\n"); - + siteIndex = MPINodeState[fromNode-1][1]; siteNAF = MPINodeState[fromNode-1][2]; - + MPINodeState[fromNode-1][0] = 0; - MPINodeState[fromNode-1][1] = -1; - + MPINodeState[fromNode-1][1] = -1; + siteMap = dupInfo[siteIndex]; - + if (siteNAF == 1) - { + { res = Eval (result_String); siteLikelihood_MLES = res["MLES"]; siteLikelihood_MLE_VALUES = res["VALUES"]; @@ -754,7 +754,7 @@ function ReceiveJobsMEME () { ExecuteCommands (result_String); } - + if (siteNAF < 2) { nsf1V = siteLikelihood_MLE_VALUES ["nsFactor1"]; @@ -773,22 +773,22 @@ function ReceiveJobsMEME () nsf2V = felLikelihood_MLE_VALUES ["nsFactor2"]; sFValue = felLikelihood_MLE_VALUES ["sFactor"]; } - + if (siteNAF == 1) // alternative { doneSites[siteMap][0] = nsf1V*sFValue; doneSites[siteMap][1] = nsf2V; doneSites[siteMap][2] = mixingF; - doneSites[siteMap][6] = 1-mixingF; + doneSites[siteMap][6] = 1-mixingF; doneSites[siteMap][7] = sFValue; - + doneSites[siteMap][3] = doneSites[siteMap][3]+2*siteLikelihood_MLES[1][0]; doneSites[siteMap][5] = siteLikelihood_MLES[1][0]; - + bySiteCache[siteIndex][0] = sFValue; bySiteCache[siteIndex][1] = nsf1V; bySiteCache[siteIndex][2] = mixingF; - + if (debugVerboseFlag) { fprintf (stdout, "[DEBUG: Received MEME alternative fit of site ", siteIndex, " from node ", fromNode, "]"); @@ -806,7 +806,7 @@ function ReceiveJobsMEME () if (debugVerboseFlag) { fprintf (stdout, "[DEBUG: Added null model fit for site ", siteIndex, " to the queue]\n"); - } + } } else { @@ -814,7 +814,7 @@ function ReceiveJobsMEME () doneSites[siteMap][4] = -1; } } - else + else { if (siteNAF == 2) // FEL { @@ -828,11 +828,11 @@ function ReceiveJobsMEME () "\n\tbeta = ", nsf2V, "\n"); } - + } else // null { - doneSites[siteMap][3] = doneSites[siteMap][3]-2*siteLikelihood_MLES[1][0]; + doneSites[siteMap][3] = doneSites[siteMap][3]-2*siteLikelihood_MLES[1][0]; if (debugVerboseFlag) { fprintf (stdout, "[DEBUG: Received MEME NULL fit of site ", siteIndex, " from node ", fromNode, "]"); @@ -855,12 +855,12 @@ function ReceiveJobsMEME () { if (doneSites[siteMap][4] == (-1)) { - doneSites[siteMap][4] = 2/3-2/3*(0.45*CChi2(doneSites[siteMap][3],1)+0.55*CChi2(doneSites[siteMap][3],2)); + doneSites[siteMap][4] = 2/3-2/3*(0.45*CChi2(doneSites[siteMap][3],1)+0.55*CChi2(doneSites[siteMap][3],2)); ReportSiteMEME (siteIndex, siteMap, -1); } } } - + return fromNode-1; } @@ -869,18 +869,18 @@ function ReceiveJobsMEME () function ReceiveJobsMEMEInternal () { MPIReceive (-1, fromNode, result_String); - - + + siteIndex = MPINodeState[fromNode-1][1]; siteNAF = MPINodeState[fromNode-1][2]; - + MPINodeState[fromNode-1][0] = 0; - MPINodeState[fromNode-1][1] = -1; - + MPINodeState[fromNode-1][1] = -1; + siteMap = dupInfo[siteIndex]; - + if (siteNAF == 1) - { + { res = Eval (result_String); siteLikelihood_MLES = res["MLES"]; siteLikelihood_MLE_VALUES = res["VALUES"]; @@ -891,16 +891,16 @@ function ReceiveJobsMEMEInternal () { ExecuteCommands (result_String); } - + if (siteNAF < 2) { //fprintf (stdout, siteLikelihood_MLE_VALUES, "\n"); - + nsf1V = siteLikelihood_MLE_VALUES ["nsFactor1"]; nsf2V = siteLikelihood_MLE_VALUES ["nsFactor2"]; nsf1Vl = siteLikelihood_MLE_VALUES ["nsFactor1leaves"]; nsf2Vl = siteLikelihood_MLE_VALUES ["nsFactor2leaves"]; - + omega2F = siteLikelihood_MLE_VALUES ["omega2"]; mixingF = siteLikelihood_MLE_VALUES ["mixingP"]; @@ -912,28 +912,28 @@ function ReceiveJobsMEMEInternal () nsf2V = felLikelihood_MLE_VALUES ["nsFactor2"]; sFValue = felLikelihood_MLE_VALUES ["sFactor"]; } - + if (siteNAF == 1) // alternative { doneSites[siteMap][0] = nsf1V*sFValue; doneSites[siteMap][1] = nsf2V; doneSites[siteMap][2] = mixingF; - doneSites[siteMap][6] = nsf1Vl; + doneSites[siteMap][6] = nsf1Vl; doneSites[siteMap][7] = nsf2Vl; doneSites[siteMap][8] = mixingFl; doneSites[siteMap][9] = sFValue; - + doneSites[siteMap][3] = doneSites[siteMap][3]+2*siteLikelihood_MLES[1][0]; doneSites[siteMap][5] = siteLikelihood_MLES[1][0]; - + bySiteCache[siteIndex][0] = sFValue; bySiteCache[siteIndex][1] = nsf1V; bySiteCache[siteIndex][2] = mixingF; bySiteCache[siteIndex][3] = nsf1Vl; bySiteCache[siteIndex][4] = nsf2Vl; bySiteCache[siteIndex][5] = mixingFl; - + if (debugVerboseFlag) { fprintf (stdout, "[DEBUG: Received MEME alternative fit of site ", siteIndex, " from node ", fromNode, "]"); @@ -942,7 +942,7 @@ function ReceiveJobsMEMEInternal () "\n\tbeta2 = ", doneSites[siteMap][1], "\n\tmixing = ", doneSites[siteMap][2], "\n\tbeta1[leaves] = ", doneSites[siteMap][6], - "\n\tbeta2[leaves] = ", doneSites[siteMap][7], + "\n\tbeta2[leaves] = ", doneSites[siteMap][7], "\n\tmixing[leaves] = ", doneSites[siteMap][8], "\n"); } @@ -954,7 +954,7 @@ function ReceiveJobsMEMEInternal () if (debugVerboseFlag) { fprintf (stdout, "[DEBUG: Added null model fit for site ", siteIndex, " to the queue]\n"); - } + } } else { @@ -962,7 +962,7 @@ function ReceiveJobsMEMEInternal () doneSites[siteMap][4] = -1; } } - else + else { if (siteNAF == 2) // FEL { @@ -976,11 +976,11 @@ function ReceiveJobsMEMEInternal () "\n\tbeta = ", nsf2V, "\n"); } - + } else // null { - doneSites[siteMap][3] = doneSites[siteMap][3]-2*siteLikelihood_MLES[1][0]; + doneSites[siteMap][3] = doneSites[siteMap][3]-2*siteLikelihood_MLES[1][0]; if (debugVerboseFlag) { fprintf (stdout, "[DEBUG: Received MEME NULL fit of site ", siteIndex, " from node ", fromNode, "]"); @@ -989,7 +989,7 @@ function ReceiveJobsMEMEInternal () "\n\tomega2 = ", omega2F, "\n\tmixing = ", mixingF, "\n\tbeta1[leaves] = ", nsf1Vl, - "\n\tbeta2[leaves] = ", nsf2Vl, + "\n\tbeta2[leaves] = ", nsf2Vl, "\n\tmixing[leaves] = ", mixingFl, "\n"); } @@ -1006,12 +1006,12 @@ function ReceiveJobsMEMEInternal () { if (doneSites[siteMap][4] == (-1)) { - doneSites[siteMap][4] = 2/3-2/3*(0.45*CChi2(doneSites[siteMap][3],1)+0.55*CChi2(doneSites[siteMap][3],2)); + doneSites[siteMap][4] = 2/3-2/3*(0.45*CChi2(doneSites[siteMap][3],1)+0.55*CChi2(doneSites[siteMap][3],2)); ReportSiteMEMEInternal (siteIndex, siteMap, -1); } } } - + return fromNode-1; } @@ -1020,30 +1020,30 @@ function ReceiveJobsMEMEInternal () function ReceiveJobs3 (sendOrNot) { MPIReceive (-1, fromNode, result_String); - + siteIndex = MPINodeState[fromNode-1][1]; - + if (sendOrNot) { MPISend (fromNode,siteLikelihood); - MPINodeState[fromNode-1][1] = siteCount; + MPINodeState[fromNode-1][1] = siteCount; } else { MPINodeState[fromNode-1][0] = 0; - MPINodeState[fromNode-1][1] = -1; + MPINodeState[fromNode-1][1] = -1; } - + siteMap = dupInfo[siteIndex]; - + ExecuteCommands (result_String); - + doneSites[siteMap][0] = sFactor; doneSites[siteMap][1] = nFactor; doneSites[siteMap][2] = siteLikelihood_MLES[1][0]; ReportSite3 (siteIndex, siteMap); - + return fromNode-1; } @@ -1059,7 +1059,7 @@ if (pipeThroughFlag == 0) ChoiceList (nrChoice, "New/Restore",1,SKIP_NONE, "New Analysis","Perform a new analysis.", "Restore", "Restore an earlier nucleotide fit"); - + if (nrChoice < 0) { return; @@ -1071,7 +1071,7 @@ global AT = 1; global CG = 1; global CT = 1; global GT = 1; -global dNdS = 1; +global dNdS = 1; NucleotideMatrix = {{*,AC*t,t,AT*t}{AC*t,*,CG*t,CT*t}{t,CG*t,*,GT*t}{AT*t,CT*t,GT*t,*}}; @@ -1087,7 +1087,7 @@ if (nrChoice == 0) ChoiceList (modelChoice, "Model Options",1,SKIP_NONE, "Default","Use HKY85 and MG94xHKY85.", "Custom", "Use any reversible nucleotide model crossed with MG94."); - + if (modelChoice < 0) { return; @@ -1103,10 +1103,10 @@ if (nrChoice == 0) fprintf (stdout,"\nPlease enter a 6 character model designation (e.g:010010 defines HKY85):"); fscanf (stdin,"String", modelDesc); if (Abs(modelDesc)==6) - { + { done = 1; } - } + } } else { @@ -1116,9 +1116,9 @@ if (nrChoice == 0) DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); DataSetFilter nucData = CreateFilter (filteredData,1); - + ModelTitle = ""+modelDesc[0]; - + rateBiasTerms = {{"AC","1","AT","CG","CT","GT"}}; paramCount = 0; @@ -1130,23 +1130,23 @@ if (nrChoice == 0) { if (modelDesc[customLoopCounter2]==modelDesc[customLoopCounter]) { - ModelTitle = ModelTitle+modelDesc[customLoopCounter2]; + ModelTitle = ModelTitle+modelDesc[customLoopCounter2]; if (rateBiasTerms[customLoopCounter2] == "1") { modelConstraintString = modelConstraintString + rateBiasTerms[customLoopCounter]+":="+rateBiasTerms[customLoopCounter2]+";"; } else { - modelConstraintString = modelConstraintString + rateBiasTerms[customLoopCounter2]+":="+rateBiasTerms[customLoopCounter]+";"; + modelConstraintString = modelConstraintString + rateBiasTerms[customLoopCounter2]+":="+rateBiasTerms[customLoopCounter]+";"; } break; } } if (customLoopCounter==customLoopCounter2) { - ModelTitle = ModelTitle+modelDesc[customLoopCounter2]; + ModelTitle = ModelTitle+modelDesc[customLoopCounter2]; } - } + } if (Abs(modelConstraintString)) { @@ -1156,13 +1156,13 @@ if (nrChoice == 0) HarvestFrequencies (overallFrequencies, nucData,1,1,0); HarvestFrequencies (rawPositionFrequencies, nucData,3,1,1); - + positionFrequencies = CF3x4 (rawPositionFrequencies, GeneticCodeExclusions); codonFrequencies = BuildCodonFrequencies(positionFrequencies); Model NucModel = (NucleotideMatrix, overallFrequencies, 1); - + if (pipeThroughFlag == 0) { #include "queryTree.bf"; @@ -1185,9 +1185,9 @@ else DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); HarvestFrequencies (positionFrequencies, nucData,3,1,1); codonFrequencies = BuildCodonFrequencies(positionFrequencies); - + treeString = ""+givenTree; - + pMatch = {6,1}; pCheck = {{AC__,AT__,CG__,CT__,GT__}}; if (pCheck[0] == 1) @@ -1201,11 +1201,11 @@ else pCount = 2; ModelTitle = "01"; pMatch[0] = pCheck[0]; - pMatch[1] = 1; + pMatch[1] = 1; } - + for (l=1; l<5; l=l+1) - { + { for (k=0; k0.5) { if (convergenceMode < 2) { - LocateTheBump (current_index,precisionStep, maxSoFar, bestVal); + LocateTheBump (current_index,precisionStep, maxSoFar, bestVal, precisionStep); } else { LocateTheBump (current_index,precisionStep, maxSoFar, bestVal, convergenceMode == 2? precisionStep*0.25: precisionStep*0.0625); } @@ -4777,7 +4777,8 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& _Variable* curVar = index >= 0 ? GetIthIndependentVar (index) : nil; bool movingLeft = false, - first = true; + first = true, + successful = false; _Parameter lowerBound = curVar?GetIthIndependentBound(index,true):0., @@ -4952,6 +4953,8 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& } else { rightValue = SetParametersAndCompute (index, right, ¤tValues, gradient); } + + if (verbosityLevel > 100) { char buf [512]; @@ -4971,6 +4974,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& if (rightValue<=middleValue && leftValue<=middleValue) { //if (index < 0) printf ("\nMaximum found\n"); + successful = true; break; } @@ -5044,17 +5048,38 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& //exit (0); } } + } else { middleValue = SetParametersAndCompute (index, middle, ¤tValues, gradient); } + if (successful && !(rightValue<=middleValue && leftValue<=middleValue)) { + this->DoneComputing(); + char buf[256], buf2[512]; + snprintf (buf, 256, " \n\tERROR: [_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%g) = %g [@%g -> %g:@%g -> %g]]", index, middle, middleValue, left, leftValue,right, rightValue); + snprintf (buf2, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %d: %15.12g <= %15.12g <= %15.12g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); + + _String sLF (8192L, true); + SerializeLF (sLF,_hyphyLFSerializeModeVanilla); + sLF.Finalize (); + FILE * out = doFileOpen ("/tmp/hyphy.dump", "w"); + fwrite ((void*)sLF.getStr(), 1, sLF.Length(), out); + fclose (out); + + WarnError (_String("Internal error in _LikelihoodFunction::Bracket, likelihood function calculation on the same parameter value returned different scores, dumping the offending likelihood function to /tmp/hyphy.dump") & buf & "\n" & buf2 & "\nParameter name " & *GetIthIndependentName(index)); + + } + if (verbosityLevel > 100) { char buf [256]; - snprintf (buf, 256, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET SUCCESSFUL: %15.12g <= %15.12g <= %15.12g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); + snprintf (buf, 256, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %d: %15.12g <= %15.12g <= %15.12g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); BufferToConsole (buf); } - + + // SLKP 20180705 : error checking + + bracketFCount+=likeFuncEvalCallCount-funcCounts; bracketCount++; From f151bcc62f67dcf2fb15f55f33d92b279dd62da5 Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Thu, 5 Jul 2018 13:24:15 -0400 Subject: [PATCH 20/53] Set number of threads in LFCompute --- src/core/batchlanruntime.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core/batchlanruntime.cpp b/src/core/batchlanruntime.cpp index 2c5f26836..a1cdd019b 100644 --- a/src/core/batchlanruntime.cpp +++ b/src/core/batchlanruntime.cpp @@ -240,7 +240,10 @@ bool _ElementaryCommand::HandleComputeLFFunction (_ExecutionList& currentPr _LikelihoodFunction *lf = (_LikelihoodFunction*)_HYRetrieveBLObjectByName (name2Find, objectType,nil, true, true); if (*arg2 == lfStartCompute) { - lf->PrepareToCompute(true); + lf->PrepareToCompute (true); +#ifdef _OPENMP + lf->SetThreadCount (systemCPUCount); +#endif } else if (*arg2 == lfDoneCompute) { lf->DoneComputing (true); } else { From 849d8aa3b59b1d5a030862bbb1ea228c27a7e3a7 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Fri, 6 Jul 2018 17:33:18 -0400 Subject: [PATCH 21/53] Cleaning up cache defining code; stricter checks for model reversibility; updating SmallCodon.bf to use higher precision constants --- res/TemplateBatchFiles/AnalyzeCodonData.bf | 4 +- src/core/calcnode2.cpp | 378 +++--- src/core/include/calcnode.h | 18 +- src/core/include/likefunc.h | 2 +- src/core/likefunc.cpp | 153 ++- src/core/matrix.cpp | 6 +- src/core/polynoml.cpp | 2 +- .../SimpleOptimizations/SmallCodon.bf | 1132 +++++++++-------- 8 files changed, 922 insertions(+), 773 deletions(-) diff --git a/res/TemplateBatchFiles/AnalyzeCodonData.bf b/res/TemplateBatchFiles/AnalyzeCodonData.bf index 7c8093478..e8a1679de 100644 --- a/res/TemplateBatchFiles/AnalyzeCodonData.bf +++ b/res/TemplateBatchFiles/AnalyzeCodonData.bf @@ -23,12 +23,12 @@ if (modelType) ChoiceList (branchLengths, "Branch Lengths", 1, SKIP_NONE, "Estimate", "Estimate branch lengths by ML", "Proportional to input tree", "Branch lengths are proportional to those in input tree"); - + if (branchLengths < 0) { return; } - + if (branchLengths == 1) { global treeScaler = 1; diff --git a/src/core/calcnode2.cpp b/src/core/calcnode2.cpp index e0b29e11b..ef52aa3bb 100644 --- a/src/core/calcnode2.cpp +++ b/src/core/calcnode2.cpp @@ -48,6 +48,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "dmalloc.h" #endif +//#define _UBER_VERBOSE_LF_DEBUG //#define _UBER_VERBOSE_DUMP_MATRICES //#define _UBER_VERBOSE_DUMP 27 @@ -71,6 +72,19 @@ int launchmdsocl(long siteCount, _GrowingVector* lNodeResolutions); #endif + +#ifdef _UBER_VERBOSE_LF_DEBUG + +/*----------------------------------------------------------------------------------------------------------*/ +void echoNodeList (_SimpleList& theNodes, _SimpleList& leaves, _SimpleList& iNodes) +{ + for (long n = 0; n < theNodes.lLength; n++) { + node* nd = (node*)(theNodes(n)in_object)->GetName()->sData); + } +} + +#endif #ifdef _SLKP_LFENGINE_REWRITE_ @@ -318,12 +332,12 @@ long _TheTree::DetermineNodesForUpdate (_SimpleList& updateNodes, _List if (addOne >= 0) { - nodesToUpdate.lData[addOne] = 1; + nodesToUpdate.lData[addOne] = 2; } if (forceRecalculationOnTheseBranches.lLength) { for (unsigned long markedNode = 0; markedNode < forceRecalculationOnTheseBranches.lLength; markedNode++) { - nodesToUpdate.lData[forceRecalculationOnTheseBranches.lData[markedNode]] = 1; + nodesToUpdate.lData[forceRecalculationOnTheseBranches.lData[markedNode]] = 2; } if (canClear) { @@ -346,19 +360,20 @@ long _TheTree::DetermineNodesForUpdate (_SimpleList& updateNodes, _List currentTreeNode->RecomputeMatrix (catID, categoryCount, nil); } - nodesToUpdate.lData[nodeID] = 1; + nodesToUpdate.lData[nodeID] = 2; } if (nodesToUpdate.lData[nodeID]) { - nodesToUpdate.lData[flatParents.lData[nodeID]+flatLeaves.lLength] = 1; + nodesToUpdate.lData[flatParents.lData[nodeID]+flatLeaves.lLength] = 2; } } // one more pass to pick up all descendants of changed internal nodes + // SLKP 20180705 : is it not enough to simply look at the children of changed internal nodes? for (unsigned long nodeID = 0; nodeID < nodesToUpdate.lLength; nodeID++) - if (nodesToUpdate.lData[flatLeaves.lLength+flatParents.lData[nodeID]] && nodesToUpdate.lData[nodeID] == 0) { + if (nodesToUpdate.lData[flatLeaves.lLength+flatParents.lData[nodeID]] == 2 && nodesToUpdate.lData[nodeID] == 0) { nodesToUpdate.lData[nodeID] = 1; } @@ -612,7 +627,7 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL // process the leaves first _SimpleList taggedInternals (flatNodes.lLength, 0, 0); - long alphabetDimension = theFilter->GetDimension(), + long const alphabetDimension = theFilter->GetDimension(), siteCount = theFilter->GetPatternCount(), alphabetDimensionmod4 = alphabetDimension-alphabetDimension%4; @@ -622,7 +637,14 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL if (siteTo > siteCount) { siteTo = siteCount; } - + +/* + #ifdef _UBER_VERBOSE_LF_DEBUG + printf ("\n\n_TheTree::ComputeTreeBlockByBranch\n"); + echoNodeList (updateNodes,flatLeaves,flatNodes ); + printf ("\n"); +#endif +*/ for (unsigned long nodeID = 0; nodeID < updateNodes.lLength; nodeID++) { long nodeCode = updateNodes.lData [nodeID], parentCode = flatParents.lData [nodeCode]; @@ -761,23 +783,24 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL if (siteState >= 0) // a single character state; sweep down the appropriate column { - tMatrix += siteState; - if (alphabetDimension == 4) { - parentConditionals[0] *= tMatrix[0]; - parentConditionals[1] *= tMatrix[4]; - parentConditionals[2] *= tMatrix[8]; - parentConditionals[3] *= tMatrix[12]; + if (alphabetDimension == 4UL) { + parentConditionals[0] *= tMatrix[siteState]; + parentConditionals[1] *= tMatrix[siteState+4UL]; + parentConditionals[2] *= tMatrix[siteState+8UL]; + parentConditionals[3] *= tMatrix[siteState+12UL]; } else { - long k = 0; - for (; k < alphabetDimensionmod4; k+=4, tMatrix += alphabetDimension+alphabetDimension+alphabetDimension+alphabetDimension) { - parentConditionals[k] *= tMatrix[0]; - parentConditionals[k+1] *= tMatrix[alphabetDimension]; - parentConditionals[k+2] *= tMatrix[alphabetDimension+alphabetDimension]; - parentConditionals[k+3] *= tMatrix[alphabetDimension+alphabetDimension+alphabetDimension]; - } - for (; k < alphabetDimension; k++, tMatrix += alphabetDimension) { - parentConditionals[k] *= *tMatrix; - } + unsigned long k = 0UL; + unsigned long target_index = siteState; + unsigned long shifter = alphabetDimension << 2; + for (; k < alphabetDimensionmod4; k+=4UL, target_index += shifter) { + parentConditionals[k] *= tMatrix[target_index]; + parentConditionals[k+1] *= tMatrix[target_index + alphabetDimension]; + parentConditionals[k+2] *= tMatrix[target_index + alphabetDimension + alphabetDimension]; + parentConditionals[k+3] *= tMatrix[target_index + alphabetDimension + alphabetDimension + alphabetDimension]; + } + for (; k < alphabetDimension; k++, target_index += alphabetDimension) { + parentConditionals[k] *= tMatrix[target_index]; + } } continue; } else { @@ -836,14 +859,15 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL childVector += 4; } else { _Parameter sum = 0.0; - if (alphabetDimension > alphabetDimensionmod4){ for (long p = 0L; p < alphabetDimension; p++) { _Parameter accumulator = 0.0; - + #ifdef _SLKP_USE_SSE_INTRINSICS + + __m128d buffer1, buffer2, @@ -855,7 +879,7 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL load4; - if (((long int)tMatrix & 0x1111b) == 0 && ((long int)childVector & 0x1111b) == 0){ + if (((long int)tMatrix & 0b1111) == 0 && ((long int)childVector & 0b1111) == 0){ for (long c = 0; c < alphabetDimensionmod4; c+=4) { load1 = _mm_load_pd (tMatrix+c); load2 = _mm_load_pd (tMatrix+c+2); @@ -889,11 +913,7 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL __m256d sum256 = _mm256_setzero_pd(); - /*if (((long int)tMatrix & 0x11111b) == 0 && ((long int)childVector & 0x11111b) == 0){ - for (long c = 0L; c < alphabetDimensionmod4; c+=4L) { - sum = _mm256_add_pd (sum,_mm256_mul_pd (_mm256_load_pd (tMatrix+c), _mm256_load_pd (childVector+c))); - } - } else {*/ + for (long c = 0; c < alphabetDimensionmod4; c+=4L) { __m256d matrix_quad = _mm256_loadu_pd (tMatrix+c), child_quad = _mm256_loadu_pd (childVector+c), @@ -902,18 +922,15 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL sum256 = _mm256_add_pd (sum256,prod); } - //} - //double buffer[4] __attribute__ ((aligned (32))); - //_mm256_store_pd(buffer, sum256); - - //accumulator = (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]); accumulator = _avx_sum_4(sum256); //NOT sure why copy to doubles and add is faster - // that AVX istructions + // than AVX istructions #else // _SLKP_USE_AVX_INTRINSICS - for (long c = 0; c < alphabetDimensionmod4; c+=4) { - // 4 - unroll the loop + + + for (unsigned long c = 0UL; c < alphabetDimensionmod4; c+=4UL) { // 4 - unroll the loop + // 4 - unroll the loop _Parameter pr1 = tMatrix[c] * childVector[c], pr2 = tMatrix[c+1] * childVector[c+1], pr3 = tMatrix[c+2] * childVector[c+2], @@ -964,6 +981,7 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL } else #endif // _SLKP_USE_AVX_INTRINSICS + for (long p = 0; p < alphabetDimension; p++) { _Parameter accumulator = 0.0; @@ -1088,24 +1106,28 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL break; } - _Parameter term = log(accumulator), - temp_sum; - + _Parameter term; long site_frequency = theFilter->theFrequencies (siteOrdering.lData[siteID]); if (site_frequency > 1L) { - term *= site_frequency; + term = log(accumulator) * site_frequency - correction; + } else { + term = log(accumulator) - correction; } // Kahan sum - term -= correction; - temp_sum = result + term; + _Parameter temp_sum = result + term; correction = (temp_sum - result) - term; result = temp_sum; - +/*#ifdef _UBER_VERBOSE_LF_DEBUG + if (likeFuncEvalCallCount == 340) { + fprintf (stderr, "%s %ld\t%20.16g\t%20.16g\t%20.16g\n", siteID == siteTo - 1 ? "FINAL " : "", siteID, accumulator, term, result); + } +#endif*/ + } } - if (!storageVec && localScalerChange) { + if (!storageVec && localScalerChange) { #pragma omp atomic overallScaler += localScalerChange; } @@ -1113,18 +1135,6 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL return result; } -#ifdef _SLKP_DEBUG_ - -/*----------------------------------------------------------------------------------------------------------*/ -void echoNodeList (_SimpleList& theNodes, _SimpleList& leaves, _SimpleList& iNodes) -{ - for (long n = 0; n < theNodes.lLength; n++) { - node* nd = (node*)(theNodes(n)in_object)->GetName()->sData); - } -} - -#endif /*----------------------------------------------------------------------------------------------------------*/ @@ -1137,16 +1147,24 @@ void _TheTree::ComputeBranchCache ( long * lNodeFlags, _Parameter* scalingAdjustments, long * siteCorrectionCounts, - _GrowingVector* lNodeResolutions, + _GrowingVector const* lNodeResolutions, long& overallScaler, - long siteFrom, - long siteTo, - long catID, - _SimpleList* tcc, + long const siteFrom, + long siteTo, + long const catID, + _SimpleList const* tcc, _Parameter* siteRes ) { + /* + the cache matrix (linearized into a vector) will have TWO rows with siteCount blocks of alphabetDimension doubles, storing the conditional likelihoods of individual sites at a given branch + in the virtually rerooted tree + + cache -> + Row 0 [brID node -- the branch that is being rerooted on] + Row 1 [conditional likelihoods for the new root] + */ //printf ("ComputeBranchCache\n"); @@ -1154,10 +1172,11 @@ void _TheTree::ComputeBranchCache ( nodesToProcess, rootPath; - long myParent = brID -flatLeaves.lLength, - alphabetDimension = theFilter->GetDimension(), - alphabetDimensionmod4 = alphabetDimension - alphabetDimension % 4, - siteCount = theFilter->GetPatternCount(); + long myParent = brID -flatLeaves.lLength; + + const long alphabetDimension = theFilter->GetDimension(), + alphabetDimensionmod4 = alphabetDimension - alphabetDimension % 4, + siteCount = theFilter->GetPatternCount(); if (siteTo > siteCount) { siteTo = siteCount; @@ -1181,15 +1200,16 @@ void _TheTree::ComputeBranchCache ( } } - /*printf ("\n\nComputeBranchCache at branch %ld; siteOdering %s\n", +#ifdef _UBER_VERBOSE_LF_DEBUG + printf ("\n\nComputeBranchCache at branch %ld; siteOdering %s\n", brID, _String((_String*)siteOrdering.toStr()).sData); echoNodeList (rootPath,flatLeaves,flatNodes ); printf ("\n"); echoNodeList (nodesToProcess,flatLeaves,flatNodes); - */ +#endif - _Parameter * state = cache + alphabetDimension * siteFrom, + _Parameter * state = cache + alphabetDimension * siteFrom, * childVector; long localScalerChange = 0; @@ -1213,8 +1233,8 @@ void _TheTree::ComputeBranchCache ( } } } else { // an internal branch - long nodeCode = brID - flatLeaves.lLength; - _Parameter *lastUpdated = iNodeCache + (nodeCode * siteCount + siteFrom) * alphabetDimension; + const long nodeCode = brID - flatLeaves.lLength; + _Parameter * lastUpdated = iNodeCache + (nodeCode * siteCount + siteFrom) * alphabetDimension; long currentTCCIndex , currentTCCBit ; @@ -1232,7 +1252,7 @@ void _TheTree::ComputeBranchCache ( } } - for (long s = 0; s < alphabetDimension; s++) { + for (long s = 0L; s < alphabetDimension; s++) { state[s] = lastUpdated[s]; } @@ -1249,8 +1269,10 @@ void _TheTree::ComputeBranchCache ( taggedNodes.Populate (flatTree.lLength, 0, 0); rootPath.Flip (); + + long const node_count = nodesToProcess.lLength + rootPath.lLength - 2L; - for (long nodeID = 0; nodeID < nodesToProcess.lLength + rootPath.lLength - 2; nodeID++) { + for (long nodeID = 0; nodeID < node_count; nodeID++) { bool notPassedRoot = nodeIDGetName()->sData); + taggedNodes.lData[parentCode] = 1L; _Parameter const *localScalingFactor = scalingAdjustments + parentCode*siteCount; if (alphabetDimension == 4L) { - long k3 = 0; - for (long k = siteFrom; k < siteTo; k++, k3+=4) { + unsigned long k3 = 0UL; + for (unsigned long k = siteFrom; k < siteTo; k++, k3+=4UL) { _Parameter scaler = localScalingFactor[k]; parentConditionals [k3] = scaler; - parentConditionals [k3+1] = scaler; - parentConditionals [k3+2] = scaler; - parentConditionals [k3+3] = scaler; + parentConditionals [k3+1L] = scaler; + parentConditionals [k3+2L] = scaler; + parentConditionals [k3+3L] = scaler; } } else { unsigned long k3 = 0UL; - for (long k = siteFrom; k < siteTo; k++) { + for (unsigned long k = siteFrom; k < siteTo; k++) { _Parameter scaler = localScalingFactor[k]; for (unsigned long k2 = 0UL; k2 < alphabetDimension; k2++, k3++) { parentConditionals [k3] = scaler; @@ -1289,12 +1310,12 @@ void _TheTree::ComputeBranchCache ( } } } - + _CalcNode * currentTreeNode = (_CalcNode*) (isLeaf? flatCLeaves (nodeCode): flatTree (notPassedRoot?nodeCode:parentCode)); - - //printf ("isLeaf = %d, nodeCode = %ld, parentCode = %ld, matrix from %s, parent name %s\n", isLeaf, nodeCode, parentCode, currentTreeNode->GetName()->sData, ((_CalcNode *)flatTree(parentCode))->GetName()->sData); - +#ifdef _UBER_VERBOSE_LF_DEBUG + printf ("isLeaf = %d, not passedRoot = %d, nodeCode = %ld, parentCode = %ld, matrix from %s, parent name %s\n", isLeaf, notPassedRoot, nodeCode, parentCode, currentTreeNode->GetName()->sData, ((_CalcNode *)flatTree(parentCode))->GetName()->sData); +#endif _Parameter const * _hprestrict_ transitionMatrix = currentTreeNode->GetCompExp(catID)->theData; #ifdef _SLKP_USE_AVX_INTRINSICS @@ -1310,7 +1331,10 @@ void _TheTree::ComputeBranchCache ( * lastUpdatedSite; if (!isLeaf) { - lastUpdatedSite = childVector = iNodeCache + (siteFrom + nodeCode * siteCount) * alphabetDimension; +#ifdef _UBER_VERBOSE_LF_DEBUG + printf ("childvector from %d %s\n", nodeCode, ((_CalcNode *)flatTree(nodeCode))->GetName()->sData); +#endif + lastUpdatedSite = childVector = iNodeCache + (siteFrom + nodeCode * siteCount) * alphabetDimension; } @@ -1339,17 +1363,24 @@ void _TheTree::ComputeBranchCache ( long siteState = lNodeFlags[nodeCode*siteCount + siteOrdering.lData[siteID]] ; if (siteState >= 0) { // a single character state; sweep down the appropriate column - if (alphabetDimension == 4) { // special case for nuc data + if (alphabetDimension == 4UL) { // special case for nuc data parentConditionals[0] *= tMatrix[siteState]; - parentConditionals[1] *= tMatrix[siteState+4]; - parentConditionals[2] *= tMatrix[siteState+8]; - parentConditionals[3] *= tMatrix[siteState+12]; + parentConditionals[1] *= tMatrix[siteState+4UL]; + parentConditionals[2] *= tMatrix[siteState+8UL]; + parentConditionals[3] *= tMatrix[siteState+12UL]; } else { - tMatrix += siteState; - for (long k = 0; k < alphabetDimension; k++, tMatrix += alphabetDimension) { - parentConditionals[k] *= *tMatrix; - //printf ("Leaf %ld %g %g\n", k, parentConditionals[k], *tMatrix); - } + unsigned long k = 0UL; + unsigned long target_index = siteState; + unsigned long shifter = alphabetDimension << 2; + for (; k < alphabetDimensionmod4; k+=4UL, target_index += shifter) { + parentConditionals[k] *= tMatrix[target_index]; + parentConditionals[k+1] *= tMatrix[target_index + alphabetDimension]; + parentConditionals[k+2] *= tMatrix[target_index + alphabetDimension + alphabetDimension]; + parentConditionals[k+3] *= tMatrix[target_index + alphabetDimension + alphabetDimension + alphabetDimension]; + } + for (; k < alphabetDimension; k++, target_index += alphabetDimension) { + parentConditionals[k] *= tMatrix[target_index]; + } } continue; } else { @@ -1361,7 +1392,7 @@ void _TheTree::ComputeBranchCache ( if ((tcc->lData[currentTCCIndex] & bitMaskArray.masks[currentTCCBit]) > 0 && siteID > siteFrom) // the value of this conditional vector needs to be copied from a previously stored site // subtree duplication - for (long k = 0; k < alphabetDimension; k++) { + for (long k = 0UL; k < alphabetDimension; k++) { childVector[k] = lastUpdatedSite[k]; } else { @@ -1382,7 +1413,7 @@ void _TheTree::ComputeBranchCache ( _Parameter sum = .0; char didScale = 0; - if (alphabetDimension == 4) { // special case for nuc data + if (alphabetDimension == 4L) { // special case for nuc data #ifdef _SLKP_USE_AVX_INTRINSICS _handle4x4_pruning_case (childVector, tMatrix, parentConditionals, tmatrix_transpose); #else @@ -1414,53 +1445,84 @@ void _TheTree::ComputeBranchCache ( } } } - childVector += 4; + childVector += 4L; } else { for (long p = 0; p < alphabetDimension; p++) { + _Parameter accumulator = 0.0; + +#ifdef _SLKP_USE_SSE_INTRINSICS -#ifdef _SLKP_USE_AVX_INTRINSICS - if (alphabetDimension == 20UL) { - - __m256d t_matrix[5] = {_mm256_loadu_pd(tMatrix), - _mm256_loadu_pd(tMatrix+4UL), - _mm256_loadu_pd(tMatrix+8UL), - _mm256_loadu_pd(tMatrix+12UL), - _mm256_loadu_pd(tMatrix+16UL)}, - - c_vector[5] = {_mm256_loadu_pd(childVector), - _mm256_loadu_pd(childVector+4UL), - _mm256_loadu_pd(childVector+8UL), - _mm256_loadu_pd(childVector+12UL), - _mm256_loadu_pd(childVector+16UL)}; - - t_matrix[0] = _mm256_mul_pd(t_matrix[0], c_vector[0]); - t_matrix[1] = _mm256_mul_pd(t_matrix[1], c_vector[1]); - t_matrix[2] = _mm256_mul_pd(t_matrix[2], c_vector[2]); - t_matrix[3] = _mm256_mul_pd(t_matrix[3], c_vector[3]); - t_matrix[4] = _mm256_mul_pd(t_matrix[4], c_vector[4]); - - t_matrix[0] = _mm256_add_pd (t_matrix[0],t_matrix[1]); - t_matrix[2] = _mm256_add_pd (t_matrix[2],t_matrix[3]); - t_matrix[0] = _mm256_add_pd (t_matrix[0],t_matrix[2]); + __m128d buffer1, + buffer2, + buffer3 = _mm_setzero_pd(), + buffer4 = _mm_setzero_pd(), + load1, + load2, + load3, + load4; + + + if (((long int)tMatrix & 0b1111) == 0 && ((long int)childVector & 0b1111) == 0){ + for (long c = 0; c < alphabetDimensionmod4; c+=4) { + load1 = _mm_load_pd (tMatrix+c); + load2 = _mm_load_pd (tMatrix+c+2); + load3 = _mm_load_pd (childVector+c); + load4 = _mm_load_pd (childVector+c+2); + buffer1 = _mm_mul_pd (load1, load3); + buffer2 = _mm_mul_pd (load2, load4); + buffer3 = _mm_add_pd (buffer1,buffer3); + buffer4 = _mm_add_pd (buffer2,buffer4); + } + } else { + for (long c = 0; c < alphabetDimensionmod4; c+=4) { + load1 = _mm_loadu_pd (tMatrix+c); + load2 = _mm_loadu_pd (tMatrix+c+2); + load3 = _mm_loadu_pd (childVector+c); + load4 = _mm_loadu_pd (childVector+c+2); + buffer1 = _mm_mul_pd (load1, load3); + buffer2 = _mm_mul_pd (load2, load4); + buffer3 = _mm_add_pd (buffer1,buffer3); + buffer4 = _mm_add_pd (buffer2,buffer4); + } - tMatrix += 20UL; - sum += (parentConditionals[p] *= _avx_sum_4(_mm256_add_pd (t_matrix[0],t_matrix[4]))); - continue; + } + + buffer3 = _mm_add_pd (buffer3, buffer4); + double buffer[2] __attribute__ ((aligned (16))); + _mm_store_pd (buffer, buffer3); + accumulator = buffer[0] + buffer[1]; + +#elif defined _SLKP_USE_AVX_INTRINSICS // end _SLKP_USE_SSE_INTRINSICS + + __m256d sum256 = _mm256_setzero_pd(); + + + for (long c = 0; c < alphabetDimensionmod4; c+=4L) { + __m256d matrix_quad = _mm256_loadu_pd (tMatrix+c), + child_quad = _mm256_loadu_pd (childVector+c), + prod = _mm256_mul_pd (matrix_quad, child_quad); + sum256 = _mm256_add_pd (sum256,prod); } -#endif - _Parameter accumulator = 0.0; - - for (long c = 0; c < alphabetDimensionmod4; c+=4) { // 4 - unroll the loop - _Parameter pr1 = tMatrix[c] * childVector[c], - pr2 = tMatrix[c+1] * childVector[c+1], - pr3 = tMatrix[c+2] * childVector[c+2], - pr4 = tMatrix[c+3] * childVector[c+3]; - pr1 += pr2; - pr3 += pr4; - accumulator += pr1+pr3; + + + accumulator = _avx_sum_4(sum256); + //NOT sure why copy to doubles and add is faster + // than AVX istructions +#else // _SLKP_USE_AVX_INTRINSICS + + for (unsigned long c = 0UL; c < alphabetDimensionmod4; c+=4UL) { // 4 - unroll the loop + _Parameter pr1 = tMatrix[c] * childVector[c], + pr2 = tMatrix[c+1] * childVector[c+1], + pr3 = tMatrix[c+2] * childVector[c+2], + pr4 = tMatrix[c+3] * childVector[c+3]; + pr1 += pr2; + pr3 += pr4; + accumulator += pr1+pr3; } - +#endif // regular code + + for (long c = alphabetDimensionmod4; c < alphabetDimension; c++) { accumulator += tMatrix[c] * childVector[c]; } @@ -1506,13 +1568,17 @@ void _TheTree::ComputeBranchCache ( } - - //printf ("root name %s\n", ((_CalcNode *)flatTree(rootPath.lData[rootPath.lLength-2] - flatLeaves.lLength))->GetName()->sData); - - _Parameter const _hprestrict_ *rootConditionals = iNodeCache + (rootPath.lData[rootPath.lLength-2] - flatLeaves.lLength) * siteCount * alphabetDimension; +#ifdef _UBER_VERBOSE_LF_DEBUG + printf ("root name %s\n", ((_CalcNode *)flatTree(rootPath.lData[rootPath.lLength-2] - flatLeaves.lLength))->GetName()->sData); +#endif + + _Parameter const _hprestrict_ *rootConditionals = iNodeCache + (rootPath.lData[rootPath.lLength-2L] - flatLeaves.lLength) * siteCount * alphabetDimension; state = cache + alphabetDimension * siteCount; - for (unsigned long ii = siteFrom * alphabetDimension; ii < alphabetDimension*siteTo; ii++) { + + const unsigned long site_bound = alphabetDimension*siteTo; + + for (unsigned long ii = siteFrom * alphabetDimension; ii < site_bound; ii++) { state[ii] = rootConditionals[ii]; //printf ("Root conditional [%ld] = %g, node state [%ld] = %g\n", ii, state[ii], ii, cache[ii]); } @@ -1536,13 +1602,13 @@ const _CalcNode* _TheTree::GetNodeFromFlatIndex(long index) const { /*----------------------------------------------------------------------------------------------------------*/ _Parameter _TheTree::ComputeLLWithBranchCache ( - _SimpleList& siteOrdering, - long brID, - _Parameter* cache, + _SimpleList const& siteOrdering, + long const brID, + _Parameter const* cache, _DataSetFilter const* theFilter, - long siteFrom, + long const siteFrom, long siteTo, - long catID, + long const catID, _Parameter* storageVec ) { @@ -1560,14 +1626,20 @@ _Parameter _TheTree::ComputeLLWithBranchCache ( _Parameter term; long site_frequency = theFilter->theFrequencies.Get(direct_index); if ( site_frequency > 1L) { - term = log(accumulator) * site_frequency - correction; + term = log(accumulator) * site_frequency - correction; } else { term = log(accumulator) - correction; } _Parameter temp_sum = result + term; correction = (temp_sum - result) - term; result = temp_sum; - //result += log(accumulator) * theFilter->theFrequencies [siteOrdering.lData[siteID]]; +/* + #ifdef _UBER_VERBOSE_LF_DEBUG + if (likeFuncEvalCallCount == 340) { + fprintf (stderr, "%s %ld\t%20.16g\t%20.16g\t%20.16g\n", siteID == siteTo - 1 ? "FINAL " : "", siteID, accumulator, term, result); + } +#endif +*/ } }; diff --git a/src/core/include/calcnode.h b/src/core/include/calcnode.h index 198026082..01b38d0f9 100644 --- a/src/core/include/calcnode.h +++ b/src/core/include/calcnode.h @@ -759,23 +759,23 @@ class _TheTree: public _TreeTopology long * lNodeFlags, _Parameter* scalingAdjustments, long* siteCorrectionCounts, - _GrowingVector* lNodeResolutions, + _GrowingVector const* lNodeResolutions, long& overallScaler, - long siteFrom, + long const siteFrom, long siteTo, - long catID, - _SimpleList* = nil, + long const catID, + _SimpleList const* = nil, _Parameter* = nil ); _Parameter ComputeLLWithBranchCache ( - _SimpleList& siteOrdering, - long brID, - _Parameter* cache, + _SimpleList const& siteOrdering, + long const brID, + _Parameter const* cache, _DataSetFilter const* theFilter, - long siteFrom, + long const siteFrom, long siteTo, - long catID, + long const catID, _Parameter* storageVec = nil ); diff --git a/src/core/include/likefunc.h b/src/core/include/likefunc.h index 34b86d115..1c8fc52c2 100644 --- a/src/core/include/likefunc.h +++ b/src/core/include/likefunc.h @@ -414,7 +414,7 @@ class _LikelihoodFunction: public BaseObj protected: - + void _TerminateAndDump (const _String& error); void ComputeParameterPenalty (void); diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index a9ab0ff40..bc6b33f5f 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -37,7 +37,7 @@ */ - //#define _UBER_VERBOSE_LF_DEBUG +//#define _UBER_VERBOSE_LF_DEBUG #include #include @@ -1960,13 +1960,13 @@ _Parameter _LikelihoodFunction::Compute (void) bool done = false; #ifdef _UBER_VERBOSE_LF_DEBUG - if (likeFuncEvalCallCount >= 12731) { + if (likeFuncEvalCallCount >= 335) { fprintf (stderr, "\n*** Likelihood function evaluation %ld ***\n", likeFuncEvalCallCount+1); for (unsigned long i=0; iHasChanged()) { fprintf (stderr, "[CHANGED] "); - fprintf (stderr, "%s = %15.12g\n", v->GetName()->sData, v->theValue); + fprintf (stderr, "%s = %15.12g (%15.12g)\n", v->GetName()->sData, GetIthIndependent(i), ((_Constant*) LocateVar (indexInd.lData[i])->Compute())->Value()); } } } @@ -1997,7 +1997,7 @@ _Parameter _LikelihoodFunction::Compute (void) ComputeSiteLikelihoodsForABlock (partID, siteResults->theData, siteScalerBuffer); #ifdef _UBER_VERBOSE_LF_DEBUG - fprintf (stderr, "Did compute %g\n", result); + fprintf (stderr, "Did compute %20.16g\n", result); #endif _Parameter blockResult = SumUpSiteLikelihoods (partID, siteResults->theData, siteScalerBuffer); UpdateBlockResult (partID, blockResult); @@ -2164,7 +2164,7 @@ _Parameter _LikelihoodFunction::Compute (void) evalsSinceLastSetup ++; PostCompute (); #ifdef _UBER_VERBOSE_LF_DEBUG - fprintf (stderr, "%g\n", result); + fprintf (stderr, "%20.16g\n", result); #endif if (isnan (result)) { ReportWarning ("Likelihood function evaluation encountered a NaN (probably due to a parameterization error or a bug)."); @@ -4769,7 +4769,23 @@ void _LikelihoodFunction::GetAllIndependent (_Matrix & storage) const { } } + +//_______________________________________________________________________________________ +void _LikelihoodFunction::_TerminateAndDump(const _String &error) { + this->DoneComputing(); + + _String sLF (8192L, true); + SerializeLF (sLF,_hyphyLFSerializeModeVanilla); + sLF.Finalize (); + + FILE * out = doFileOpen ("/tmp/hyphy.dump", "w"); + fwrite ((void*)sLF.getStr(), 1, sLF.Length(), out); + fclose (out); + + WarnError (_String("Internal error, likelihood function calculation on the same parameter value returned different scores, dumping the offending likelihood function to /tmp/hyphy.dump") & error ); +} + //_______________________________________________________________________________________ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& middle, _Parameter& right, _Parameter& leftValue, _Parameter& middleValue, _Parameter& rightValue, _Parameter& initialStep, _Matrix* gradient) @@ -4849,8 +4865,6 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& saveM = NAN; } - - /*if (index < 0) { printf ("[Bracket bounds %g - %g (%g)/%g]\n", lowerBound, upperBound, practicalUB, middle); @@ -4861,7 +4875,10 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& if (verbosityLevel > 100) { char buf [512]; - snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld) INITIAL BRACKET %15.12g <= %15.12g (current %15.12g) <= %15.12g]", index, middle-leftStep, middle, index>=0?GetIthIndependent (index):0.0, middle+rightStep); + snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld, eval %ld) INITIAL BRACKET %15.12g <= %15.12g (current %15.12g) <= %15.12g]", index, likeFuncEvalCallCount, middle-leftStep, middle, index>=0?GetIthIndependent (index):0.0, middle+rightStep); + if (likeFuncEvalCallCount == 376) { + printf ("\nLog(L) = %20.16g\n", Compute()); + } BufferToConsole (buf); } @@ -4928,6 +4945,11 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& middleValue = saveMV; } else { middleValue = SetParametersAndCompute (index, middle, ¤tValues, gradient); + if (verbosityLevel > 100) { + char buf [512]; + snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld) UPDATED middle to %15.12g, LogL = %15.12g]", index, middle, middleValue); + BufferToConsole (buf); + } } left = middle-leftStep; @@ -4940,7 +4962,12 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& leftValue = saveMV; } else { leftValue = SetParametersAndCompute (index, left, ¤tValues, gradient); - } + if (verbosityLevel > 100) { + char buf [512]; + snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld) UPDATED left to %15.12g, LogL = %15.12g]", index, left, leftValue); + BufferToConsole (buf); + } + } right = middle+rightStep; @@ -4952,13 +4979,18 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& rightValue = saveMV; } else { rightValue = SetParametersAndCompute (index, right, ¤tValues, gradient); - } + if (verbosityLevel > 100) { + char buf [512]; + snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld) UPDATED right to %15.12g, LogL = %15.12g]", index, right, rightValue); + BufferToConsole (buf); + } + } if (verbosityLevel > 100) { char buf [512]; - snprintf (buf, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld): BRACKET %g (LogL : %15.12g diff: %15.12g) - %g (logL: %15.12g) - %g (LogL : %15.12g diff: %15.12g)]", index, left, leftValue, leftValue-middleValue, middle, middleValue, right, rightValue, rightValue-middleValue); + snprintf (buf, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld): BRACKET %20.16g (LogL : %15.12g diff: %15.12g) - %20.16g (logL: %15.12g) - %20.16g (LogL : %15.12g diff: %15.12g)]", index, left, leftValue, leftValue-middleValue, middle, middleValue, right, rightValue, rightValue-middleValue); BufferToConsole (buf); } @@ -5043,7 +5075,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& if (verbosityLevel > 100) { char buf [256]; - snprintf (buf, 256, "\n\t[_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%g) = %g [@%g -> %g:@%g -> %g]]", index, middle, middleValue, left, leftValue,right, rightValue); + snprintf (buf, 256, "\n\t[_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%20.16g) = %20.16g [@%20.16g -> %20.16g:@%20.16g -> %20.16g]]", index, middle, middleValue, left, leftValue,right, rightValue); BufferToConsole (buf); //exit (0); } @@ -5054,26 +5086,19 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& } if (successful && !(rightValue<=middleValue && leftValue<=middleValue)) { - this->DoneComputing(); + + char buf[256], buf2[512]; snprintf (buf, 256, " \n\tERROR: [_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%g) = %g [@%g -> %g:@%g -> %g]]", index, middle, middleValue, left, leftValue,right, rightValue); - snprintf (buf2, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %d: %15.12g <= %15.12g <= %15.12g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); + snprintf (buf2, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %s: %20.16g <= %20.16g >= %20.16g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); - _String sLF (8192L, true); - SerializeLF (sLF,_hyphyLFSerializeModeVanilla); - sLF.Finalize (); - - FILE * out = doFileOpen ("/tmp/hyphy.dump", "w"); - fwrite ((void*)sLF.getStr(), 1, sLF.Length(), out); - fclose (out); - - WarnError (_String("Internal error in _LikelihoodFunction::Bracket, likelihood function calculation on the same parameter value returned different scores, dumping the offending likelihood function to /tmp/hyphy.dump") & buf & "\n" & buf2 & "\nParameter name " & *GetIthIndependentName(index)); + _TerminateAndDump (_String (buf) & "\n" & buf2 & "\nParameter name " & *GetIthIndependentName(index)); } if (verbosityLevel > 100) { char buf [256]; - snprintf (buf, 256, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %d: %15.12g <= %15.12g <= %15.12g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); + snprintf (buf, 256, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %s: %15.12g <= %15.12g <= %15.12g. Steps, L=%g, R=%g, values (delta-left) %20.16g : %20.16g : (delta-right) %20.16g]", index, successful ? "SUCCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, middleValue-leftValue, middleValue, middleValue-rightValue); BufferToConsole (buf); } @@ -6383,7 +6408,7 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P if (FU<=FX) { // value at U is the new minimum if (verbosityLevel > 50) { char buf [256]; - snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (eval %ld) ACCEPT new try, confirm value %20.16g (delta = %20.16g)", likeFuncEvalCallCount, GetIthIndependent(index), FU-FX); + snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (eval %ld) ACCEPT new try, confirm value %20.16g (delta = %20.16g)", likeFuncEvalCallCount, GetIthIndependent(index), FX-FU); BufferToConsole (buf); } @@ -6401,7 +6426,7 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P } else { // value at X remains the minimum if (verbosityLevel > 50) { char buf [256]; - snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (eval %ld) REJECT new try (delta = %20.16g)", likeFuncEvalCallCount, X, FU-FX); + snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (eval %ld) REJECT new try (%20.16g) (delta = %20.16g)", likeFuncEvalCallCount, U, FX-FU); BufferToConsole (buf); } @@ -6435,12 +6460,19 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P middle = X; if (middleValue 50) { - char buf [256]; - snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (index %ld) RESETTING THE VALUE (worse log likelihood obtained) ]\n\n", index); - BufferToConsole (buf); - } - SetIthIndependent(index,bestVal); + if (verbosityLevel > 50) { + char buf [256]; + snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (index %ld) RESETTING THE VALUE (worse log likelihood obtained; current value %20.16g, best value %20.16g) ]\n\n", index, GetIthIndependent(index), bestVal); + + BufferToConsole (buf); + } + if (CheckEqual(GetIthIndependent(index), bestVal) && !CheckEqual (middleValue,maxSoFar)) { + char buf[256]; + snprintf (buf, 256, " \n\tERROR: [_LikelihoodFunction::LocateTheBump (index %ld) current value %20.16g (parameter = %20.16g), best value %20.16g (parameter = %20.16g)); delta = %20.16g ]\n\n", index, middleValue, GetIthIndependent(index), maxSoFar, bestVal, maxSoFar - middleValue); + + _TerminateAndDump (_String (buf) & "\n" & "\nParameter name " & *GetIthIndependentName(index)); + } + SetIthIndependent(index,bestVal); } else { if (!CheckEqual(GetIthIndependent(index),middle)) { if (verbosityLevel > 50) { @@ -6449,6 +6481,7 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P BufferToConsole (buf); } SetIthIndependent (index,middle); + //Compute(); } else { if (verbosityLevel > 50) { char buf [256]; @@ -7120,7 +7153,7 @@ void _LikelihoodFunction::ScanAllVariables (void) if (_cv->GetUpperBound()>=u) { _cv->SetBounds(_cv->GetLowerBound(),DEFAULTPARAMETERUBOUND); } - //printf ("%s -> %d\n", _cv->theName->sData, rankVariables.GetXtra(rankVariables.Find ((BaseRef)indexInd.lData[i]))); + //printf ("[VARIABLE RANK] %s -> %d\n", _cv->theName->sData, rankVariables.GetXtra(rankVariables.Find ((BaseRef)indexInd.lData[i]))); } for (unsigned long i=0; ilData[ciid]; + + if (nodeID == 0 || nodeID == 1) { long snID = -1; @@ -7710,7 +7745,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, } #ifdef _UBER_VERBOSE_LF_DEBUG - fprintf (stderr, "\nCached %ld (%ld)/New %ld (%ld)\n", *cbid, nodeID, snID, matrices->lLength); + fprintf (stderr, "\nCached %s (nodeID = %lD)/New %s (touched matrices %ld) Eval id = %ld\n", *cbid >= 0 ? t->GetNodeFromFlatIndex (*cbid)->GetName()->getStr() : "None", nodeID, snID >= 0 ? t->GetNodeFromFlatIndex (snID)->GetName()->getStr() : "None", matrices->lLength, likeFuncEvalCallCount); #endif if (snID != *cbid) { RestoreScalingFactors (index, *cbid, patternCnt, scc, sccb); @@ -7734,7 +7769,9 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, } else { doCachedComp = nodeID; } - +#ifdef _UBER_VERBOSE_LF_DEBUG + fprintf (stderr, "\ndoCachedComp = %ld\n", doCachedComp); +#endif } else { RestoreScalingFactors (index, *cbid, patternCnt, scc, sccb); @@ -7775,7 +7812,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, if (doCachedComp >= 3) { #ifdef _UBER_VERBOSE_LF_DEBUG - fprintf (stderr, "CACHE compute branch %d\n",doCachedComp-3); + fprintf (stderr, "CACHE compute branch %ld (%s) Eval id = %ld\n\n",doCachedComp-3, t->GetNodeFromFlatIndex (doCachedComp-3)->GetName()->getStr(), likeFuncEvalCallCount); #endif sum = t->ComputeLLWithBranchCache (*sl, doCachedComp-3, @@ -7786,6 +7823,29 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, catID, siteRes) - _logLFScaler * overallScalingFactors.lData[index]; + + /*#ifdef _UBER_VERBOSE_LF_DEBUG + if (likeFuncEvalCallCount >= 340) { + fprintf (stderr, "\nCheck validity of cache compute... diff = %20.16g\n\n", sum - + t->ComputeTreeBlockByBranch (*sl, + *branches, + tcc, + df, + inc, + conditionalTerminalNodeStateFlag[index], + ssf, + (_GrowingVector*)conditionalTerminalNodeLikelihoodCaches(index), + overallScalingFactors.lData[index], + 0, + df->GetPatternCount(), + catID, + siteRes, + scc, + branchIndex, + branchIndex >= 0 ? branchValues->lData: nil)); + } + #endif*/ + return sum; } @@ -7890,7 +7950,9 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, //printf ("Set up %d\n", doCachedComp); *cbid = doCachedComp; - +#ifdef _UBER_VERBOSE_LF_DEBUG + fprintf (stderr, "PREPARING CACHE FOR NODE %ld (%s)\n" , doCachedComp, t->GetNodeFromFlatIndex (doCachedComp)->GetName()->getStr()); +#endif overallScalingFactorsBackup.lData[index] = overallScalingFactors.lData[index]; if (sccb) for (long recoverIndex = 0; recoverIndex < patternCnt; recoverIndex++) { @@ -7926,8 +7988,10 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, catID, siteRes) - _logLFScaler * overallScalingFactors.lData[index]; + + //fprintf (stderr, "CONGRUENCE CHECK %20.16g\n",fabs ((checksum-sum)/sum)); - if (fabs ((checksum-sum)/sum) > 0.00001) { + if (fabs ((checksum-sum)/sum) > 1e-12) { /*_Parameter check2 = t->ComputeTreeBlockByBranch (*sl, *branches, tcc, @@ -7948,11 +8012,16 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, _String* node_name = GetIthTree (index)->GetNodeFromFlatIndex(doCachedComp)->GetName(); - WarnError (_String("Internal error in ComputeBranchCache (branch ") & *node_name & - " ) reversible model cached likelihood = "& checksum & ", directly computed likelihood = " & sum & - ". This is most likely because a non-reversible model was incorrectly auto-detected (or specified by the model file in environment variables)."); - WarnError ("Bailing"); + + _TerminateAndDump (_String("Internal error in ComputeBranchCache (branch ") & *node_name & + " ) reversible model cached likelihood = "& _String (checksum, "%20.16g") & ", directly computed likelihood = " & _String (sum, "%20.16g") & + ". This is most likely because a non-reversible model was incorrectly auto-detected (or specified by the model file in environment variables)."); + return -A_LARGE_NUMBER; + } else { +#ifdef _UBER_VERBOSE_LF_DEBUG + fprintf (stderr, "=== Absolute caching error %20.16g\n", checksum-sum, "\n"); +#endif } } diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index cbdc3aff6..c938fcfda 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -1744,7 +1744,7 @@ bool _Matrix::IsReversible(_Matrix* freqs) { if (rc && cr) { _Polynomial *rcp = (_Polynomial *)rc->ConstructPolynomial(), - *crp = (_Polynomial *)cr->ConstructPolynomial(); + *crp = (_Polynomial *)cr->ConstructPolynomial(); if (rcp && crp) { _PMathObj tr = nil, @@ -1775,7 +1775,9 @@ bool _Matrix::IsReversible(_Matrix* freqs) { } if (tr && tc) { _Polynomial * rcpF = (_Polynomial*)rcp->Mult(tr), - * crpF = (_Polynomial*)crp->Mult(tc); + * crpF = (_Polynomial*)crp->Mult(tc); + + //fprintf (stderr, "%s : %s\n", _String ((_String*)rcpF->toStr()).getStr(), _String ((_String*)crpF->toStr()).getStr()); compResult = rcpF->Equal(crpF); DeleteObject (rcpF); diff --git a/src/core/polynoml.cpp b/src/core/polynoml.cpp index 3fa90797e..5d823b5e3 100644 --- a/src/core/polynoml.cpp +++ b/src/core/polynoml.cpp @@ -964,7 +964,7 @@ bool _Polynomial::Equal(_MathObject* m) if (diff) { _Constant * v = (_Constant*)diff->IsANumber(true); if (v!=nil) { - result = fabs (v->Value()) < 1.e-6; + result = CheckEqual (v->Value(),0.0); DeleteObject (v); } //_String * diffS = (_String*)diff->toStr(); diff --git a/tests/hbltests/SimpleOptimizations/SmallCodon.bf b/tests/hbltests/SimpleOptimizations/SmallCodon.bf index d511e1c77..dcbbf05e1 100644 --- a/tests/hbltests/SimpleOptimizations/SmallCodon.bf +++ b/tests/hbltests/SimpleOptimizations/SmallCodon.bf @@ -52,598 +52,604 @@ global AC=1; global CG:=AT; global GT:=AT; +global CT=1.176334358800697; +global R=0.2312658755546703; +global AT=0.1219371068878096; +global AC=0.239867475616316; +global CG:=AT; +global GT:=AT; MG94custom={61,61}; -MG94custom[0][1]:=AC*R*synRate*0.10767; +MG94custom[0][1]:=synRate*R*AC*0.1076704545454545; MG94custom[0][2]:=synRate*0.19375; -MG94custom[0][3]:=AT*R*synRate*0.220455; -MG94custom[0][4]:=AC*R*synRate*0.20142; -MG94custom[0][8]:=R*synRate*0.150568; -MG94custom[0][12]:=AT*R*synRate*0.261648; -MG94custom[0][16]:=AC*R*synRate*0.189773; -MG94custom[0][32]:=R*synRate*0.284375; -MG94custom[1][0]:=AC*R*synRate*0.478125; -MG94custom[1][2]:=CG*R*synRate*0.19375; -MG94custom[1][3]:=CT*synRate*0.220455; -MG94custom[1][5]:=AC*R*synRate*0.20142; -MG94custom[1][9]:=R*synRate*0.150568; -MG94custom[1][13]:=AT*R*synRate*0.261648; -MG94custom[1][17]:=AC*R*synRate*0.189773; -MG94custom[1][33]:=R*synRate*0.284375; -MG94custom[1][48]:=AT*R*synRate*0.176989; +MG94custom[0][3]:=synRate*R*AT*0.2204545454545455; +MG94custom[0][4]:=synRate*R*AC*0.2014204545454545; +MG94custom[0][8]:=synRate*R*0.1505681818181818; +MG94custom[0][12]:=synRate*R*AT*0.2616477272727273; +MG94custom[0][16]:=synRate*R*AC*0.1897727272727273; +MG94custom[0][32]:=synRate*R*0.284375; +MG94custom[1][0]:=synRate*R*AC*0.478125; +MG94custom[1][2]:=synRate*R*CG*0.19375; +MG94custom[1][3]:=synRate*CT*0.2204545454545455; +MG94custom[1][5]:=synRate*R*AC*0.2014204545454545; +MG94custom[1][9]:=synRate*R*0.1505681818181818; +MG94custom[1][13]:=synRate*R*AT*0.2616477272727273; +MG94custom[1][17]:=synRate*R*AC*0.1897727272727273; +MG94custom[1][33]:=synRate*R*0.284375; +MG94custom[1][48]:=synRate*R*AT*0.1769886363636364; MG94custom[2][0]:=synRate*0.478125; -MG94custom[2][1]:=CG*R*synRate*0.10767; -MG94custom[2][3]:=GT*R*synRate*0.220455; -MG94custom[2][6]:=AC*R*synRate*0.20142; -MG94custom[2][10]:=R*synRate*0.150568; -MG94custom[2][14]:=AT*R*synRate*0.261648; -MG94custom[2][18]:=AC*R*synRate*0.189773; -MG94custom[2][34]:=R*synRate*0.284375; -MG94custom[3][0]:=AT*R*synRate*0.478125; -MG94custom[3][1]:=CT*synRate*0.10767; -MG94custom[3][2]:=GT*R*synRate*0.19375; -MG94custom[3][7]:=AC*R*synRate*0.20142; -MG94custom[3][11]:=R*synRate*0.150568; -MG94custom[3][15]:=AT*R*synRate*0.261648; -MG94custom[3][19]:=AC*R*synRate*0.189773; -MG94custom[3][35]:=R*synRate*0.284375; -MG94custom[3][49]:=AT*R*synRate*0.176989; -MG94custom[4][0]:=AC*R*synRate*0.386364; -MG94custom[4][5]:=AC*synRate*0.10767; +MG94custom[2][1]:=synRate*R*CG*0.1076704545454545; +MG94custom[2][3]:=synRate*R*GT*0.2204545454545455; +MG94custom[2][6]:=synRate*R*AC*0.2014204545454545; +MG94custom[2][10]:=synRate*R*0.1505681818181818; +MG94custom[2][14]:=synRate*R*AT*0.2616477272727273; +MG94custom[2][18]:=synRate*R*AC*0.1897727272727273; +MG94custom[2][34]:=synRate*R*0.284375; +MG94custom[3][0]:=synRate*R*AT*0.478125; +MG94custom[3][1]:=synRate*CT*0.1076704545454545; +MG94custom[3][2]:=synRate*R*GT*0.19375; +MG94custom[3][7]:=synRate*R*AC*0.2014204545454545; +MG94custom[3][11]:=synRate*R*0.1505681818181818; +MG94custom[3][15]:=synRate*R*AT*0.2616477272727273; +MG94custom[3][19]:=synRate*R*AC*0.1897727272727273; +MG94custom[3][35]:=synRate*R*0.284375; +MG94custom[3][49]:=synRate*R*AT*0.1769886363636364; +MG94custom[4][0]:=synRate*R*AC*0.3863636363636364; +MG94custom[4][5]:=synRate*AC*0.1076704545454545; MG94custom[4][6]:=synRate*0.19375; -MG94custom[4][7]:=AT*synRate*0.220455; -MG94custom[4][8]:=CG*R*synRate*0.150568; -MG94custom[4][12]:=CT*R*synRate*0.261648; -MG94custom[4][20]:=AC*R*synRate*0.189773; -MG94custom[4][36]:=R*synRate*0.284375; -MG94custom[4][50]:=AT*R*synRate*0.176989; -MG94custom[5][1]:=AC*R*synRate*0.386364; -MG94custom[5][4]:=AC*synRate*0.478125; -MG94custom[5][6]:=CG*synRate*0.19375; -MG94custom[5][7]:=CT*synRate*0.220455; -MG94custom[5][9]:=CG*R*synRate*0.150568; -MG94custom[5][13]:=CT*R*synRate*0.261648; -MG94custom[5][21]:=AC*R*synRate*0.189773; -MG94custom[5][37]:=R*synRate*0.284375; -MG94custom[5][51]:=AT*R*synRate*0.176989; -MG94custom[6][2]:=AC*R*synRate*0.386364; +MG94custom[4][7]:=synRate*AT*0.2204545454545455; +MG94custom[4][8]:=synRate*R*CG*0.1505681818181818; +MG94custom[4][12]:=synRate*R*CT*0.2616477272727273; +MG94custom[4][20]:=synRate*R*AC*0.1897727272727273; +MG94custom[4][36]:=synRate*R*0.284375; +MG94custom[4][50]:=synRate*R*AT*0.1769886363636364; +MG94custom[5][1]:=synRate*R*AC*0.3863636363636364; +MG94custom[5][4]:=synRate*AC*0.478125; +MG94custom[5][6]:=synRate*CG*0.19375; +MG94custom[5][7]:=synRate*CT*0.2204545454545455; +MG94custom[5][9]:=synRate*R*CG*0.1505681818181818; +MG94custom[5][13]:=synRate*R*CT*0.2616477272727273; +MG94custom[5][21]:=synRate*R*AC*0.1897727272727273; +MG94custom[5][37]:=synRate*R*0.284375; +MG94custom[5][51]:=synRate*R*AT*0.1769886363636364; +MG94custom[6][2]:=synRate*R*AC*0.3863636363636364; MG94custom[6][4]:=synRate*0.478125; -MG94custom[6][5]:=CG*synRate*0.10767; -MG94custom[6][7]:=GT*synRate*0.220455; -MG94custom[6][10]:=CG*R*synRate*0.150568; -MG94custom[6][14]:=CT*R*synRate*0.261648; -MG94custom[6][22]:=AC*R*synRate*0.189773; -MG94custom[6][38]:=R*synRate*0.284375; -MG94custom[6][52]:=AT*R*synRate*0.176989; -MG94custom[7][3]:=AC*R*synRate*0.386364; -MG94custom[7][4]:=AT*synRate*0.478125; -MG94custom[7][5]:=CT*synRate*0.10767; -MG94custom[7][6]:=GT*synRate*0.19375; -MG94custom[7][11]:=CG*R*synRate*0.150568; -MG94custom[7][15]:=CT*R*synRate*0.261648; -MG94custom[7][23]:=AC*R*synRate*0.189773; -MG94custom[7][39]:=R*synRate*0.284375; -MG94custom[7][53]:=AT*R*synRate*0.176989; -MG94custom[8][0]:=R*synRate*0.386364; -MG94custom[8][4]:=CG*R*synRate*0.20142; -MG94custom[8][9]:=AC*R*synRate*0.10767; +MG94custom[6][5]:=synRate*CG*0.1076704545454545; +MG94custom[6][7]:=synRate*GT*0.2204545454545455; +MG94custom[6][10]:=synRate*R*CG*0.1505681818181818; +MG94custom[6][14]:=synRate*R*CT*0.2616477272727273; +MG94custom[6][22]:=synRate*R*AC*0.1897727272727273; +MG94custom[6][38]:=synRate*R*0.284375; +MG94custom[6][52]:=synRate*R*AT*0.1769886363636364; +MG94custom[7][3]:=synRate*R*AC*0.3863636363636364; +MG94custom[7][4]:=synRate*AT*0.478125; +MG94custom[7][5]:=synRate*CT*0.1076704545454545; +MG94custom[7][6]:=synRate*GT*0.19375; +MG94custom[7][11]:=synRate*R*CG*0.1505681818181818; +MG94custom[7][15]:=synRate*R*CT*0.2616477272727273; +MG94custom[7][23]:=synRate*R*AC*0.1897727272727273; +MG94custom[7][39]:=synRate*R*0.284375; +MG94custom[7][53]:=synRate*R*AT*0.1769886363636364; +MG94custom[8][0]:=synRate*R*0.3863636363636364; +MG94custom[8][4]:=synRate*R*CG*0.2014204545454545; +MG94custom[8][9]:=synRate*R*AC*0.1076704545454545; MG94custom[8][10]:=synRate*0.19375; -MG94custom[8][11]:=AT*R*synRate*0.220455; -MG94custom[8][12]:=GT*R*synRate*0.261648; -MG94custom[8][24]:=AC*synRate*0.189773; -MG94custom[8][40]:=R*synRate*0.284375; -MG94custom[9][1]:=R*synRate*0.386364; -MG94custom[9][5]:=CG*R*synRate*0.20142; -MG94custom[9][8]:=AC*R*synRate*0.478125; -MG94custom[9][10]:=CG*R*synRate*0.19375; -MG94custom[9][11]:=CT*synRate*0.220455; -MG94custom[9][13]:=GT*R*synRate*0.261648; -MG94custom[9][25]:=AC*R*synRate*0.189773; -MG94custom[9][41]:=R*synRate*0.284375; -MG94custom[9][54]:=AT*R*synRate*0.176989; -MG94custom[10][2]:=R*synRate*0.386364; -MG94custom[10][6]:=CG*R*synRate*0.20142; +MG94custom[8][11]:=synRate*R*AT*0.2204545454545455; +MG94custom[8][12]:=synRate*R*GT*0.2616477272727273; +MG94custom[8][24]:=synRate*AC*0.1897727272727273; +MG94custom[8][40]:=synRate*R*0.284375; +MG94custom[9][1]:=synRate*R*0.3863636363636364; +MG94custom[9][5]:=synRate*R*CG*0.2014204545454545; +MG94custom[9][8]:=synRate*R*AC*0.478125; +MG94custom[9][10]:=synRate*R*CG*0.19375; +MG94custom[9][11]:=synRate*CT*0.2204545454545455; +MG94custom[9][13]:=synRate*R*GT*0.2616477272727273; +MG94custom[9][25]:=synRate*R*AC*0.1897727272727273; +MG94custom[9][41]:=synRate*R*0.284375; +MG94custom[9][54]:=synRate*R*AT*0.1769886363636364; +MG94custom[10][2]:=synRate*R*0.3863636363636364; +MG94custom[10][6]:=synRate*R*CG*0.2014204545454545; MG94custom[10][8]:=synRate*0.478125; -MG94custom[10][9]:=CG*R*synRate*0.10767; -MG94custom[10][11]:=GT*R*synRate*0.220455; -MG94custom[10][14]:=GT*R*synRate*0.261648; -MG94custom[10][26]:=AC*synRate*0.189773; -MG94custom[10][42]:=R*synRate*0.284375; -MG94custom[10][55]:=AT*R*synRate*0.176989; -MG94custom[11][3]:=R*synRate*0.386364; -MG94custom[11][7]:=CG*R*synRate*0.20142; -MG94custom[11][8]:=AT*R*synRate*0.478125; -MG94custom[11][9]:=CT*synRate*0.10767; -MG94custom[11][10]:=GT*R*synRate*0.19375; -MG94custom[11][15]:=GT*R*synRate*0.261648; -MG94custom[11][27]:=AC*R*synRate*0.189773; -MG94custom[11][43]:=R*synRate*0.284375; -MG94custom[11][56]:=AT*R*synRate*0.176989; -MG94custom[12][0]:=AT*R*synRate*0.386364; -MG94custom[12][4]:=CT*R*synRate*0.20142; -MG94custom[12][8]:=GT*R*synRate*0.150568; -MG94custom[12][13]:=AC*synRate*0.10767; -MG94custom[12][14]:=R*synRate*0.19375; -MG94custom[12][15]:=AT*synRate*0.220455; -MG94custom[12][28]:=AC*R*synRate*0.189773; -MG94custom[12][44]:=R*synRate*0.284375; -MG94custom[12][57]:=AT*R*synRate*0.176989; -MG94custom[13][1]:=AT*R*synRate*0.386364; -MG94custom[13][5]:=CT*R*synRate*0.20142; -MG94custom[13][9]:=GT*R*synRate*0.150568; -MG94custom[13][12]:=AC*synRate*0.478125; -MG94custom[13][14]:=CG*R*synRate*0.19375; -MG94custom[13][15]:=CT*synRate*0.220455; -MG94custom[13][29]:=AC*R*synRate*0.189773; -MG94custom[13][45]:=R*synRate*0.284375; -MG94custom[13][58]:=AT*R*synRate*0.176989; -MG94custom[14][2]:=AT*R*synRate*0.386364; -MG94custom[14][6]:=CT*R*synRate*0.20142; -MG94custom[14][10]:=GT*R*synRate*0.150568; -MG94custom[14][12]:=R*synRate*0.478125; -MG94custom[14][13]:=CG*R*synRate*0.10767; -MG94custom[14][15]:=GT*R*synRate*0.220455; -MG94custom[14][30]:=AC*R*synRate*0.189773; -MG94custom[14][46]:=R*synRate*0.284375; -MG94custom[14][59]:=AT*R*synRate*0.176989; -MG94custom[15][3]:=AT*R*synRate*0.386364; -MG94custom[15][7]:=CT*R*synRate*0.20142; -MG94custom[15][11]:=GT*R*synRate*0.150568; -MG94custom[15][12]:=AT*synRate*0.478125; -MG94custom[15][13]:=CT*synRate*0.10767; -MG94custom[15][14]:=GT*R*synRate*0.19375; -MG94custom[15][31]:=AC*R*synRate*0.189773; -MG94custom[15][47]:=R*synRate*0.284375; -MG94custom[15][60]:=AT*R*synRate*0.176989; -MG94custom[16][0]:=AC*R*synRate*0.348864; -MG94custom[16][17]:=AC*R*synRate*0.10767; +MG94custom[10][9]:=synRate*R*CG*0.1076704545454545; +MG94custom[10][11]:=synRate*R*GT*0.2204545454545455; +MG94custom[10][14]:=synRate*R*GT*0.2616477272727273; +MG94custom[10][26]:=synRate*AC*0.1897727272727273; +MG94custom[10][42]:=synRate*R*0.284375; +MG94custom[10][55]:=synRate*R*AT*0.1769886363636364; +MG94custom[11][3]:=synRate*R*0.3863636363636364; +MG94custom[11][7]:=synRate*R*CG*0.2014204545454545; +MG94custom[11][8]:=synRate*R*AT*0.478125; +MG94custom[11][9]:=synRate*CT*0.1076704545454545; +MG94custom[11][10]:=synRate*R*GT*0.19375; +MG94custom[11][15]:=synRate*R*GT*0.2616477272727273; +MG94custom[11][27]:=synRate*R*AC*0.1897727272727273; +MG94custom[11][43]:=synRate*R*0.284375; +MG94custom[11][56]:=synRate*R*AT*0.1769886363636364; +MG94custom[12][0]:=synRate*R*AT*0.3863636363636364; +MG94custom[12][4]:=synRate*R*CT*0.2014204545454545; +MG94custom[12][8]:=synRate*R*GT*0.1505681818181818; +MG94custom[12][13]:=synRate*AC*0.1076704545454545; +MG94custom[12][14]:=synRate*R*0.19375; +MG94custom[12][15]:=synRate*AT*0.2204545454545455; +MG94custom[12][28]:=synRate*R*AC*0.1897727272727273; +MG94custom[12][44]:=synRate*R*0.284375; +MG94custom[12][57]:=synRate*R*AT*0.1769886363636364; +MG94custom[13][1]:=synRate*R*AT*0.3863636363636364; +MG94custom[13][5]:=synRate*R*CT*0.2014204545454545; +MG94custom[13][9]:=synRate*R*GT*0.1505681818181818; +MG94custom[13][12]:=synRate*AC*0.478125; +MG94custom[13][14]:=synRate*R*CG*0.19375; +MG94custom[13][15]:=synRate*CT*0.2204545454545455; +MG94custom[13][29]:=synRate*R*AC*0.1897727272727273; +MG94custom[13][45]:=synRate*R*0.284375; +MG94custom[13][58]:=synRate*R*AT*0.1769886363636364; +MG94custom[14][2]:=synRate*R*AT*0.3863636363636364; +MG94custom[14][6]:=synRate*R*CT*0.2014204545454545; +MG94custom[14][10]:=synRate*R*GT*0.1505681818181818; +MG94custom[14][12]:=synRate*R*0.478125; +MG94custom[14][13]:=synRate*R*CG*0.1076704545454545; +MG94custom[14][15]:=synRate*R*GT*0.2204545454545455; +MG94custom[14][30]:=synRate*R*AC*0.1897727272727273; +MG94custom[14][46]:=synRate*R*0.284375; +MG94custom[14][59]:=synRate*R*AT*0.1769886363636364; +MG94custom[15][3]:=synRate*R*AT*0.3863636363636364; +MG94custom[15][7]:=synRate*R*CT*0.2014204545454545; +MG94custom[15][11]:=synRate*R*GT*0.1505681818181818; +MG94custom[15][12]:=synRate*AT*0.478125; +MG94custom[15][13]:=synRate*CT*0.1076704545454545; +MG94custom[15][14]:=synRate*R*GT*0.19375; +MG94custom[15][31]:=synRate*R*AC*0.1897727272727273; +MG94custom[15][47]:=synRate*R*0.284375; +MG94custom[15][60]:=synRate*R*AT*0.1769886363636364; +MG94custom[16][0]:=synRate*R*AC*0.3488636363636364; +MG94custom[16][17]:=synRate*R*AC*0.1076704545454545; MG94custom[16][18]:=synRate*0.19375; -MG94custom[16][19]:=AT*R*synRate*0.220455; -MG94custom[16][20]:=AC*R*synRate*0.20142; -MG94custom[16][24]:=R*synRate*0.150568; -MG94custom[16][28]:=AT*R*synRate*0.261648; -MG94custom[16][32]:=CG*R*synRate*0.284375; -MG94custom[17][1]:=AC*R*synRate*0.348864; -MG94custom[17][16]:=AC*R*synRate*0.478125; -MG94custom[17][18]:=CG*R*synRate*0.19375; -MG94custom[17][19]:=CT*synRate*0.220455; -MG94custom[17][21]:=AC*R*synRate*0.20142; -MG94custom[17][25]:=R*synRate*0.150568; -MG94custom[17][29]:=AT*R*synRate*0.261648; -MG94custom[17][33]:=CG*R*synRate*0.284375; -MG94custom[17][48]:=CT*R*synRate*0.176989; -MG94custom[18][2]:=AC*R*synRate*0.348864; +MG94custom[16][19]:=synRate*R*AT*0.2204545454545455; +MG94custom[16][20]:=synRate*R*AC*0.2014204545454545; +MG94custom[16][24]:=synRate*R*0.1505681818181818; +MG94custom[16][28]:=synRate*R*AT*0.2616477272727273; +MG94custom[16][32]:=synRate*R*CG*0.284375; +MG94custom[17][1]:=synRate*R*AC*0.3488636363636364; +MG94custom[17][16]:=synRate*R*AC*0.478125; +MG94custom[17][18]:=synRate*R*CG*0.19375; +MG94custom[17][19]:=synRate*CT*0.2204545454545455; +MG94custom[17][21]:=synRate*R*AC*0.2014204545454545; +MG94custom[17][25]:=synRate*R*0.1505681818181818; +MG94custom[17][29]:=synRate*R*AT*0.2616477272727273; +MG94custom[17][33]:=synRate*R*CG*0.284375; +MG94custom[17][48]:=synRate*R*CT*0.1769886363636364; +MG94custom[18][2]:=synRate*R*AC*0.3488636363636364; MG94custom[18][16]:=synRate*0.478125; -MG94custom[18][17]:=CG*R*synRate*0.10767; -MG94custom[18][19]:=GT*R*synRate*0.220455; -MG94custom[18][22]:=AC*R*synRate*0.20142; -MG94custom[18][26]:=R*synRate*0.150568; -MG94custom[18][30]:=AT*R*synRate*0.261648; -MG94custom[18][34]:=CG*R*synRate*0.284375; -MG94custom[19][3]:=AC*R*synRate*0.348864; -MG94custom[19][16]:=AT*R*synRate*0.478125; -MG94custom[19][17]:=CT*synRate*0.10767; -MG94custom[19][18]:=GT*R*synRate*0.19375; -MG94custom[19][23]:=AC*R*synRate*0.20142; -MG94custom[19][27]:=R*synRate*0.150568; -MG94custom[19][31]:=AT*R*synRate*0.261648; -MG94custom[19][35]:=CG*R*synRate*0.284375; -MG94custom[19][49]:=CT*R*synRate*0.176989; -MG94custom[20][4]:=AC*R*synRate*0.348864; -MG94custom[20][16]:=AC*R*synRate*0.386364; -MG94custom[20][21]:=AC*synRate*0.10767; +MG94custom[18][17]:=synRate*R*CG*0.1076704545454545; +MG94custom[18][19]:=synRate*R*GT*0.2204545454545455; +MG94custom[18][22]:=synRate*R*AC*0.2014204545454545; +MG94custom[18][26]:=synRate*R*0.1505681818181818; +MG94custom[18][30]:=synRate*R*AT*0.2616477272727273; +MG94custom[18][34]:=synRate*R*CG*0.284375; +MG94custom[19][3]:=synRate*R*AC*0.3488636363636364; +MG94custom[19][16]:=synRate*R*AT*0.478125; +MG94custom[19][17]:=synRate*CT*0.1076704545454545; +MG94custom[19][18]:=synRate*R*GT*0.19375; +MG94custom[19][23]:=synRate*R*AC*0.2014204545454545; +MG94custom[19][27]:=synRate*R*0.1505681818181818; +MG94custom[19][31]:=synRate*R*AT*0.2616477272727273; +MG94custom[19][35]:=synRate*R*CG*0.284375; +MG94custom[19][49]:=synRate*R*CT*0.1769886363636364; +MG94custom[20][4]:=synRate*R*AC*0.3488636363636364; +MG94custom[20][16]:=synRate*R*AC*0.3863636363636364; +MG94custom[20][21]:=synRate*AC*0.1076704545454545; MG94custom[20][22]:=synRate*0.19375; -MG94custom[20][23]:=AT*synRate*0.220455; -MG94custom[20][24]:=CG*R*synRate*0.150568; -MG94custom[20][28]:=CT*R*synRate*0.261648; -MG94custom[20][36]:=CG*R*synRate*0.284375; -MG94custom[20][50]:=CT*R*synRate*0.176989; -MG94custom[21][5]:=AC*R*synRate*0.348864; -MG94custom[21][17]:=AC*R*synRate*0.386364; -MG94custom[21][20]:=AC*synRate*0.478125; -MG94custom[21][22]:=CG*synRate*0.19375; -MG94custom[21][23]:=CT*synRate*0.220455; -MG94custom[21][25]:=CG*R*synRate*0.150568; -MG94custom[21][29]:=CT*R*synRate*0.261648; -MG94custom[21][37]:=CG*R*synRate*0.284375; -MG94custom[21][51]:=CT*R*synRate*0.176989; -MG94custom[22][6]:=AC*R*synRate*0.348864; -MG94custom[22][18]:=AC*R*synRate*0.386364; +MG94custom[20][23]:=synRate*AT*0.2204545454545455; +MG94custom[20][24]:=synRate*R*CG*0.1505681818181818; +MG94custom[20][28]:=synRate*R*CT*0.2616477272727273; +MG94custom[20][36]:=synRate*R*CG*0.284375; +MG94custom[20][50]:=synRate*R*CT*0.1769886363636364; +MG94custom[21][5]:=synRate*R*AC*0.3488636363636364; +MG94custom[21][17]:=synRate*R*AC*0.3863636363636364; +MG94custom[21][20]:=synRate*AC*0.478125; +MG94custom[21][22]:=synRate*CG*0.19375; +MG94custom[21][23]:=synRate*CT*0.2204545454545455; +MG94custom[21][25]:=synRate*R*CG*0.1505681818181818; +MG94custom[21][29]:=synRate*R*CT*0.2616477272727273; +MG94custom[21][37]:=synRate*R*CG*0.284375; +MG94custom[21][51]:=synRate*R*CT*0.1769886363636364; +MG94custom[22][6]:=synRate*R*AC*0.3488636363636364; +MG94custom[22][18]:=synRate*R*AC*0.3863636363636364; MG94custom[22][20]:=synRate*0.478125; -MG94custom[22][21]:=CG*synRate*0.10767; -MG94custom[22][23]:=GT*synRate*0.220455; -MG94custom[22][26]:=CG*R*synRate*0.150568; -MG94custom[22][30]:=CT*R*synRate*0.261648; -MG94custom[22][38]:=CG*R*synRate*0.284375; -MG94custom[22][52]:=CT*R*synRate*0.176989; -MG94custom[23][7]:=AC*R*synRate*0.348864; -MG94custom[23][19]:=AC*R*synRate*0.386364; -MG94custom[23][20]:=AT*synRate*0.478125; -MG94custom[23][21]:=CT*synRate*0.10767; -MG94custom[23][22]:=GT*synRate*0.19375; -MG94custom[23][27]:=CG*R*synRate*0.150568; -MG94custom[23][31]:=CT*R*synRate*0.261648; -MG94custom[23][39]:=CG*R*synRate*0.284375; -MG94custom[23][53]:=CT*R*synRate*0.176989; -MG94custom[24][8]:=AC*synRate*0.348864; -MG94custom[24][16]:=R*synRate*0.386364; -MG94custom[24][20]:=CG*R*synRate*0.20142; -MG94custom[24][25]:=AC*synRate*0.10767; +MG94custom[22][21]:=synRate*CG*0.1076704545454545; +MG94custom[22][23]:=synRate*GT*0.2204545454545455; +MG94custom[22][26]:=synRate*R*CG*0.1505681818181818; +MG94custom[22][30]:=synRate*R*CT*0.2616477272727273; +MG94custom[22][38]:=synRate*R*CG*0.284375; +MG94custom[22][52]:=synRate*R*CT*0.1769886363636364; +MG94custom[23][7]:=synRate*R*AC*0.3488636363636364; +MG94custom[23][19]:=synRate*R*AC*0.3863636363636364; +MG94custom[23][20]:=synRate*AT*0.478125; +MG94custom[23][21]:=synRate*CT*0.1076704545454545; +MG94custom[23][22]:=synRate*GT*0.19375; +MG94custom[23][27]:=synRate*R*CG*0.1505681818181818; +MG94custom[23][31]:=synRate*R*CT*0.2616477272727273; +MG94custom[23][39]:=synRate*R*CG*0.284375; +MG94custom[23][53]:=synRate*R*CT*0.1769886363636364; +MG94custom[24][8]:=synRate*AC*0.3488636363636364; +MG94custom[24][16]:=synRate*R*0.3863636363636364; +MG94custom[24][20]:=synRate*R*CG*0.2014204545454545; +MG94custom[24][25]:=synRate*AC*0.1076704545454545; MG94custom[24][26]:=synRate*0.19375; -MG94custom[24][27]:=AT*synRate*0.220455; -MG94custom[24][28]:=GT*R*synRate*0.261648; -MG94custom[24][40]:=CG*R*synRate*0.284375; -MG94custom[25][9]:=AC*R*synRate*0.348864; -MG94custom[25][17]:=R*synRate*0.386364; -MG94custom[25][21]:=CG*R*synRate*0.20142; -MG94custom[25][24]:=AC*synRate*0.478125; -MG94custom[25][26]:=CG*synRate*0.19375; -MG94custom[25][27]:=CT*synRate*0.220455; -MG94custom[25][29]:=GT*R*synRate*0.261648; -MG94custom[25][41]:=CG*R*synRate*0.284375; -MG94custom[25][54]:=CT*R*synRate*0.176989; -MG94custom[26][10]:=AC*synRate*0.348864; -MG94custom[26][18]:=R*synRate*0.386364; -MG94custom[26][22]:=CG*R*synRate*0.20142; +MG94custom[24][27]:=synRate*AT*0.2204545454545455; +MG94custom[24][28]:=synRate*R*GT*0.2616477272727273; +MG94custom[24][40]:=synRate*R*CG*0.284375; +MG94custom[25][9]:=synRate*R*AC*0.3488636363636364; +MG94custom[25][17]:=synRate*R*0.3863636363636364; +MG94custom[25][21]:=synRate*R*CG*0.2014204545454545; +MG94custom[25][24]:=synRate*AC*0.478125; +MG94custom[25][26]:=synRate*CG*0.19375; +MG94custom[25][27]:=synRate*CT*0.2204545454545455; +MG94custom[25][29]:=synRate*R*GT*0.2616477272727273; +MG94custom[25][41]:=synRate*R*CG*0.284375; +MG94custom[25][54]:=synRate*R*CT*0.1769886363636364; +MG94custom[26][10]:=synRate*AC*0.3488636363636364; +MG94custom[26][18]:=synRate*R*0.3863636363636364; +MG94custom[26][22]:=synRate*R*CG*0.2014204545454545; MG94custom[26][24]:=synRate*0.478125; -MG94custom[26][25]:=CG*synRate*0.10767; -MG94custom[26][27]:=GT*synRate*0.220455; -MG94custom[26][30]:=GT*R*synRate*0.261648; -MG94custom[26][42]:=CG*R*synRate*0.284375; -MG94custom[26][55]:=CT*R*synRate*0.176989; -MG94custom[27][11]:=AC*R*synRate*0.348864; -MG94custom[27][19]:=R*synRate*0.386364; -MG94custom[27][23]:=CG*R*synRate*0.20142; -MG94custom[27][24]:=AT*synRate*0.478125; -MG94custom[27][25]:=CT*synRate*0.10767; -MG94custom[27][26]:=GT*synRate*0.19375; -MG94custom[27][31]:=GT*R*synRate*0.261648; -MG94custom[27][43]:=CG*R*synRate*0.284375; -MG94custom[27][56]:=CT*R*synRate*0.176989; -MG94custom[28][12]:=AC*R*synRate*0.348864; -MG94custom[28][16]:=AT*R*synRate*0.386364; -MG94custom[28][20]:=CT*R*synRate*0.20142; -MG94custom[28][24]:=GT*R*synRate*0.150568; -MG94custom[28][29]:=AC*synRate*0.10767; +MG94custom[26][25]:=synRate*CG*0.1076704545454545; +MG94custom[26][27]:=synRate*GT*0.2204545454545455; +MG94custom[26][30]:=synRate*R*GT*0.2616477272727273; +MG94custom[26][42]:=synRate*R*CG*0.284375; +MG94custom[26][55]:=synRate*R*CT*0.1769886363636364; +MG94custom[27][11]:=synRate*R*AC*0.3488636363636364; +MG94custom[27][19]:=synRate*R*0.3863636363636364; +MG94custom[27][23]:=synRate*R*CG*0.2014204545454545; +MG94custom[27][24]:=synRate*AT*0.478125; +MG94custom[27][25]:=synRate*CT*0.1076704545454545; +MG94custom[27][26]:=synRate*GT*0.19375; +MG94custom[27][31]:=synRate*R*GT*0.2616477272727273; +MG94custom[27][43]:=synRate*R*CG*0.284375; +MG94custom[27][56]:=synRate*R*CT*0.1769886363636364; +MG94custom[28][12]:=synRate*R*AC*0.3488636363636364; +MG94custom[28][16]:=synRate*R*AT*0.3863636363636364; +MG94custom[28][20]:=synRate*R*CT*0.2014204545454545; +MG94custom[28][24]:=synRate*R*GT*0.1505681818181818; +MG94custom[28][29]:=synRate*AC*0.1076704545454545; MG94custom[28][30]:=synRate*0.19375; -MG94custom[28][31]:=AT*synRate*0.220455; -MG94custom[28][44]:=CG*R*synRate*0.284375; -MG94custom[28][57]:=CT*synRate*0.176989; -MG94custom[29][13]:=AC*R*synRate*0.348864; -MG94custom[29][17]:=AT*R*synRate*0.386364; -MG94custom[29][21]:=CT*R*synRate*0.20142; -MG94custom[29][25]:=GT*R*synRate*0.150568; -MG94custom[29][28]:=AC*synRate*0.478125; -MG94custom[29][30]:=CG*synRate*0.19375; -MG94custom[29][31]:=CT*synRate*0.220455; -MG94custom[29][45]:=CG*R*synRate*0.284375; -MG94custom[29][58]:=CT*R*synRate*0.176989; -MG94custom[30][14]:=AC*R*synRate*0.348864; -MG94custom[30][18]:=AT*R*synRate*0.386364; -MG94custom[30][22]:=CT*R*synRate*0.20142; -MG94custom[30][26]:=GT*R*synRate*0.150568; +MG94custom[28][31]:=synRate*AT*0.2204545454545455; +MG94custom[28][44]:=synRate*R*CG*0.284375; +MG94custom[28][57]:=synRate*CT*0.1769886363636364; +MG94custom[29][13]:=synRate*R*AC*0.3488636363636364; +MG94custom[29][17]:=synRate*R*AT*0.3863636363636364; +MG94custom[29][21]:=synRate*R*CT*0.2014204545454545; +MG94custom[29][25]:=synRate*R*GT*0.1505681818181818; +MG94custom[29][28]:=synRate*AC*0.478125; +MG94custom[29][30]:=synRate*CG*0.19375; +MG94custom[29][31]:=synRate*CT*0.2204545454545455; +MG94custom[29][45]:=synRate*R*CG*0.284375; +MG94custom[29][58]:=synRate*R*CT*0.1769886363636364; +MG94custom[30][14]:=synRate*R*AC*0.3488636363636364; +MG94custom[30][18]:=synRate*R*AT*0.3863636363636364; +MG94custom[30][22]:=synRate*R*CT*0.2014204545454545; +MG94custom[30][26]:=synRate*R*GT*0.1505681818181818; MG94custom[30][28]:=synRate*0.478125; -MG94custom[30][29]:=CG*synRate*0.10767; -MG94custom[30][31]:=GT*synRate*0.220455; -MG94custom[30][46]:=CG*R*synRate*0.284375; -MG94custom[30][59]:=CT*synRate*0.176989; -MG94custom[31][15]:=AC*R*synRate*0.348864; -MG94custom[31][19]:=AT*R*synRate*0.386364; -MG94custom[31][23]:=CT*R*synRate*0.20142; -MG94custom[31][27]:=GT*R*synRate*0.150568; -MG94custom[31][28]:=AT*synRate*0.478125; -MG94custom[31][29]:=CT*synRate*0.10767; -MG94custom[31][30]:=GT*synRate*0.19375; -MG94custom[31][47]:=CG*R*synRate*0.284375; -MG94custom[31][60]:=CT*R*synRate*0.176989; -MG94custom[32][0]:=R*synRate*0.348864; -MG94custom[32][16]:=CG*R*synRate*0.189773; -MG94custom[32][33]:=AC*R*synRate*0.10767; +MG94custom[30][29]:=synRate*CG*0.1076704545454545; +MG94custom[30][31]:=synRate*GT*0.2204545454545455; +MG94custom[30][46]:=synRate*R*CG*0.284375; +MG94custom[30][59]:=synRate*CT*0.1769886363636364; +MG94custom[31][15]:=synRate*R*AC*0.3488636363636364; +MG94custom[31][19]:=synRate*R*AT*0.3863636363636364; +MG94custom[31][23]:=synRate*R*CT*0.2014204545454545; +MG94custom[31][27]:=synRate*R*GT*0.1505681818181818; +MG94custom[31][28]:=synRate*AT*0.478125; +MG94custom[31][29]:=synRate*CT*0.1076704545454545; +MG94custom[31][30]:=synRate*GT*0.19375; +MG94custom[31][47]:=synRate*R*CG*0.284375; +MG94custom[31][60]:=synRate*R*CT*0.1769886363636364; +MG94custom[32][0]:=synRate*R*0.3488636363636364; +MG94custom[32][16]:=synRate*R*CG*0.1897727272727273; +MG94custom[32][33]:=synRate*R*AC*0.1076704545454545; MG94custom[32][34]:=synRate*0.19375; -MG94custom[32][35]:=AT*R*synRate*0.220455; -MG94custom[32][36]:=AC*R*synRate*0.20142; -MG94custom[32][40]:=R*synRate*0.150568; -MG94custom[32][44]:=AT*R*synRate*0.261648; -MG94custom[33][1]:=R*synRate*0.348864; -MG94custom[33][17]:=CG*R*synRate*0.189773; -MG94custom[33][32]:=AC*R*synRate*0.478125; -MG94custom[33][34]:=CG*R*synRate*0.19375; -MG94custom[33][35]:=CT*synRate*0.220455; -MG94custom[33][37]:=AC*R*synRate*0.20142; -MG94custom[33][41]:=R*synRate*0.150568; -MG94custom[33][45]:=AT*R*synRate*0.261648; -MG94custom[33][48]:=GT*R*synRate*0.176989; -MG94custom[34][2]:=R*synRate*0.348864; -MG94custom[34][18]:=CG*R*synRate*0.189773; +MG94custom[32][35]:=synRate*R*AT*0.2204545454545455; +MG94custom[32][36]:=synRate*R*AC*0.2014204545454545; +MG94custom[32][40]:=synRate*R*0.1505681818181818; +MG94custom[32][44]:=synRate*R*AT*0.2616477272727273; +MG94custom[33][1]:=synRate*R*0.3488636363636364; +MG94custom[33][17]:=synRate*R*CG*0.1897727272727273; +MG94custom[33][32]:=synRate*R*AC*0.478125; +MG94custom[33][34]:=synRate*R*CG*0.19375; +MG94custom[33][35]:=synRate*CT*0.2204545454545455; +MG94custom[33][37]:=synRate*R*AC*0.2014204545454545; +MG94custom[33][41]:=synRate*R*0.1505681818181818; +MG94custom[33][45]:=synRate*R*AT*0.2616477272727273; +MG94custom[33][48]:=synRate*R*GT*0.1769886363636364; +MG94custom[34][2]:=synRate*R*0.3488636363636364; +MG94custom[34][18]:=synRate*R*CG*0.1897727272727273; MG94custom[34][32]:=synRate*0.478125; -MG94custom[34][33]:=CG*R*synRate*0.10767; -MG94custom[34][35]:=GT*R*synRate*0.220455; -MG94custom[34][38]:=AC*R*synRate*0.20142; -MG94custom[34][42]:=R*synRate*0.150568; -MG94custom[34][46]:=AT*R*synRate*0.261648; -MG94custom[35][3]:=R*synRate*0.348864; -MG94custom[35][19]:=CG*R*synRate*0.189773; -MG94custom[35][32]:=AT*R*synRate*0.478125; -MG94custom[35][33]:=CT*synRate*0.10767; -MG94custom[35][34]:=GT*R*synRate*0.19375; -MG94custom[35][39]:=AC*R*synRate*0.20142; -MG94custom[35][43]:=R*synRate*0.150568; -MG94custom[35][47]:=AT*R*synRate*0.261648; -MG94custom[35][49]:=GT*R*synRate*0.176989; -MG94custom[36][4]:=R*synRate*0.348864; -MG94custom[36][20]:=CG*R*synRate*0.189773; -MG94custom[36][32]:=AC*R*synRate*0.386364; -MG94custom[36][37]:=AC*synRate*0.10767; +MG94custom[34][33]:=synRate*R*CG*0.1076704545454545; +MG94custom[34][35]:=synRate*R*GT*0.2204545454545455; +MG94custom[34][38]:=synRate*R*AC*0.2014204545454545; +MG94custom[34][42]:=synRate*R*0.1505681818181818; +MG94custom[34][46]:=synRate*R*AT*0.2616477272727273; +MG94custom[35][3]:=synRate*R*0.3488636363636364; +MG94custom[35][19]:=synRate*R*CG*0.1897727272727273; +MG94custom[35][32]:=synRate*R*AT*0.478125; +MG94custom[35][33]:=synRate*CT*0.1076704545454545; +MG94custom[35][34]:=synRate*R*GT*0.19375; +MG94custom[35][39]:=synRate*R*AC*0.2014204545454545; +MG94custom[35][43]:=synRate*R*0.1505681818181818; +MG94custom[35][47]:=synRate*R*AT*0.2616477272727273; +MG94custom[35][49]:=synRate*R*GT*0.1769886363636364; +MG94custom[36][4]:=synRate*R*0.3488636363636364; +MG94custom[36][20]:=synRate*R*CG*0.1897727272727273; +MG94custom[36][32]:=synRate*R*AC*0.3863636363636364; +MG94custom[36][37]:=synRate*AC*0.1076704545454545; MG94custom[36][38]:=synRate*0.19375; -MG94custom[36][39]:=AT*synRate*0.220455; -MG94custom[36][40]:=CG*R*synRate*0.150568; -MG94custom[36][44]:=CT*R*synRate*0.261648; -MG94custom[36][50]:=GT*R*synRate*0.176989; -MG94custom[37][5]:=R*synRate*0.348864; -MG94custom[37][21]:=CG*R*synRate*0.189773; -MG94custom[37][33]:=AC*R*synRate*0.386364; -MG94custom[37][36]:=AC*synRate*0.478125; -MG94custom[37][38]:=CG*synRate*0.19375; -MG94custom[37][39]:=CT*synRate*0.220455; -MG94custom[37][41]:=CG*R*synRate*0.150568; -MG94custom[37][45]:=CT*R*synRate*0.261648; -MG94custom[37][51]:=GT*R*synRate*0.176989; -MG94custom[38][6]:=R*synRate*0.348864; -MG94custom[38][22]:=CG*R*synRate*0.189773; -MG94custom[38][34]:=AC*R*synRate*0.386364; +MG94custom[36][39]:=synRate*AT*0.2204545454545455; +MG94custom[36][40]:=synRate*R*CG*0.1505681818181818; +MG94custom[36][44]:=synRate*R*CT*0.2616477272727273; +MG94custom[36][50]:=synRate*R*GT*0.1769886363636364; +MG94custom[37][5]:=synRate*R*0.3488636363636364; +MG94custom[37][21]:=synRate*R*CG*0.1897727272727273; +MG94custom[37][33]:=synRate*R*AC*0.3863636363636364; +MG94custom[37][36]:=synRate*AC*0.478125; +MG94custom[37][38]:=synRate*CG*0.19375; +MG94custom[37][39]:=synRate*CT*0.2204545454545455; +MG94custom[37][41]:=synRate*R*CG*0.1505681818181818; +MG94custom[37][45]:=synRate*R*CT*0.2616477272727273; +MG94custom[37][51]:=synRate*R*GT*0.1769886363636364; +MG94custom[38][6]:=synRate*R*0.3488636363636364; +MG94custom[38][22]:=synRate*R*CG*0.1897727272727273; +MG94custom[38][34]:=synRate*R*AC*0.3863636363636364; MG94custom[38][36]:=synRate*0.478125; -MG94custom[38][37]:=CG*synRate*0.10767; -MG94custom[38][39]:=GT*synRate*0.220455; -MG94custom[38][42]:=CG*R*synRate*0.150568; -MG94custom[38][46]:=CT*R*synRate*0.261648; -MG94custom[38][52]:=GT*R*synRate*0.176989; -MG94custom[39][7]:=R*synRate*0.348864; -MG94custom[39][23]:=CG*R*synRate*0.189773; -MG94custom[39][35]:=AC*R*synRate*0.386364; -MG94custom[39][36]:=AT*synRate*0.478125; -MG94custom[39][37]:=CT*synRate*0.10767; -MG94custom[39][38]:=GT*synRate*0.19375; -MG94custom[39][43]:=CG*R*synRate*0.150568; -MG94custom[39][47]:=CT*R*synRate*0.261648; -MG94custom[39][53]:=GT*R*synRate*0.176989; -MG94custom[40][8]:=R*synRate*0.348864; -MG94custom[40][24]:=CG*R*synRate*0.189773; -MG94custom[40][32]:=R*synRate*0.386364; -MG94custom[40][36]:=CG*R*synRate*0.20142; -MG94custom[40][41]:=AC*synRate*0.10767; +MG94custom[38][37]:=synRate*CG*0.1076704545454545; +MG94custom[38][39]:=synRate*GT*0.2204545454545455; +MG94custom[38][42]:=synRate*R*CG*0.1505681818181818; +MG94custom[38][46]:=synRate*R*CT*0.2616477272727273; +MG94custom[38][52]:=synRate*R*GT*0.1769886363636364; +MG94custom[39][7]:=synRate*R*0.3488636363636364; +MG94custom[39][23]:=synRate*R*CG*0.1897727272727273; +MG94custom[39][35]:=synRate*R*AC*0.3863636363636364; +MG94custom[39][36]:=synRate*AT*0.478125; +MG94custom[39][37]:=synRate*CT*0.1076704545454545; +MG94custom[39][38]:=synRate*GT*0.19375; +MG94custom[39][43]:=synRate*R*CG*0.1505681818181818; +MG94custom[39][47]:=synRate*R*CT*0.2616477272727273; +MG94custom[39][53]:=synRate*R*GT*0.1769886363636364; +MG94custom[40][8]:=synRate*R*0.3488636363636364; +MG94custom[40][24]:=synRate*R*CG*0.1897727272727273; +MG94custom[40][32]:=synRate*R*0.3863636363636364; +MG94custom[40][36]:=synRate*R*CG*0.2014204545454545; +MG94custom[40][41]:=synRate*AC*0.1076704545454545; MG94custom[40][42]:=synRate*0.19375; -MG94custom[40][43]:=AT*synRate*0.220455; -MG94custom[40][44]:=GT*R*synRate*0.261648; -MG94custom[41][9]:=R*synRate*0.348864; -MG94custom[41][25]:=CG*R*synRate*0.189773; -MG94custom[41][33]:=R*synRate*0.386364; -MG94custom[41][37]:=CG*R*synRate*0.20142; -MG94custom[41][40]:=AC*synRate*0.478125; -MG94custom[41][42]:=CG*synRate*0.19375; -MG94custom[41][43]:=CT*synRate*0.220455; -MG94custom[41][45]:=GT*R*synRate*0.261648; -MG94custom[41][54]:=GT*R*synRate*0.176989; -MG94custom[42][10]:=R*synRate*0.348864; -MG94custom[42][26]:=CG*R*synRate*0.189773; -MG94custom[42][34]:=R*synRate*0.386364; -MG94custom[42][38]:=CG*R*synRate*0.20142; +MG94custom[40][43]:=synRate*AT*0.2204545454545455; +MG94custom[40][44]:=synRate*R*GT*0.2616477272727273; +MG94custom[41][9]:=synRate*R*0.3488636363636364; +MG94custom[41][25]:=synRate*R*CG*0.1897727272727273; +MG94custom[41][33]:=synRate*R*0.3863636363636364; +MG94custom[41][37]:=synRate*R*CG*0.2014204545454545; +MG94custom[41][40]:=synRate*AC*0.478125; +MG94custom[41][42]:=synRate*CG*0.19375; +MG94custom[41][43]:=synRate*CT*0.2204545454545455; +MG94custom[41][45]:=synRate*R*GT*0.2616477272727273; +MG94custom[41][54]:=synRate*R*GT*0.1769886363636364; +MG94custom[42][10]:=synRate*R*0.3488636363636364; +MG94custom[42][26]:=synRate*R*CG*0.1897727272727273; +MG94custom[42][34]:=synRate*R*0.3863636363636364; +MG94custom[42][38]:=synRate*R*CG*0.2014204545454545; MG94custom[42][40]:=synRate*0.478125; -MG94custom[42][41]:=CG*synRate*0.10767; -MG94custom[42][43]:=GT*synRate*0.220455; -MG94custom[42][46]:=GT*R*synRate*0.261648; -MG94custom[42][55]:=GT*R*synRate*0.176989; -MG94custom[43][11]:=R*synRate*0.348864; -MG94custom[43][27]:=CG*R*synRate*0.189773; -MG94custom[43][35]:=R*synRate*0.386364; -MG94custom[43][39]:=CG*R*synRate*0.20142; -MG94custom[43][40]:=AT*synRate*0.478125; -MG94custom[43][41]:=CT*synRate*0.10767; -MG94custom[43][42]:=GT*synRate*0.19375; -MG94custom[43][47]:=GT*R*synRate*0.261648; -MG94custom[43][56]:=GT*R*synRate*0.176989; -MG94custom[44][12]:=R*synRate*0.348864; -MG94custom[44][28]:=CG*R*synRate*0.189773; -MG94custom[44][32]:=AT*R*synRate*0.386364; -MG94custom[44][36]:=CT*R*synRate*0.20142; -MG94custom[44][40]:=GT*R*synRate*0.150568; -MG94custom[44][45]:=AC*synRate*0.10767; +MG94custom[42][41]:=synRate*CG*0.1076704545454545; +MG94custom[42][43]:=synRate*GT*0.2204545454545455; +MG94custom[42][46]:=synRate*R*GT*0.2616477272727273; +MG94custom[42][55]:=synRate*R*GT*0.1769886363636364; +MG94custom[43][11]:=synRate*R*0.3488636363636364; +MG94custom[43][27]:=synRate*R*CG*0.1897727272727273; +MG94custom[43][35]:=synRate*R*0.3863636363636364; +MG94custom[43][39]:=synRate*R*CG*0.2014204545454545; +MG94custom[43][40]:=synRate*AT*0.478125; +MG94custom[43][41]:=synRate*CT*0.1076704545454545; +MG94custom[43][42]:=synRate*GT*0.19375; +MG94custom[43][47]:=synRate*R*GT*0.2616477272727273; +MG94custom[43][56]:=synRate*R*GT*0.1769886363636364; +MG94custom[44][12]:=synRate*R*0.3488636363636364; +MG94custom[44][28]:=synRate*R*CG*0.1897727272727273; +MG94custom[44][32]:=synRate*R*AT*0.3863636363636364; +MG94custom[44][36]:=synRate*R*CT*0.2014204545454545; +MG94custom[44][40]:=synRate*R*GT*0.1505681818181818; +MG94custom[44][45]:=synRate*AC*0.1076704545454545; MG94custom[44][46]:=synRate*0.19375; -MG94custom[44][47]:=AT*synRate*0.220455; -MG94custom[44][57]:=GT*R*synRate*0.176989; -MG94custom[45][13]:=R*synRate*0.348864; -MG94custom[45][29]:=CG*R*synRate*0.189773; -MG94custom[45][33]:=AT*R*synRate*0.386364; -MG94custom[45][37]:=CT*R*synRate*0.20142; -MG94custom[45][41]:=GT*R*synRate*0.150568; -MG94custom[45][44]:=AC*synRate*0.478125; -MG94custom[45][46]:=CG*synRate*0.19375; -MG94custom[45][47]:=CT*synRate*0.220455; -MG94custom[45][58]:=GT*R*synRate*0.176989; -MG94custom[46][14]:=R*synRate*0.348864; -MG94custom[46][30]:=CG*R*synRate*0.189773; -MG94custom[46][34]:=AT*R*synRate*0.386364; -MG94custom[46][38]:=CT*R*synRate*0.20142; -MG94custom[46][42]:=GT*R*synRate*0.150568; +MG94custom[44][47]:=synRate*AT*0.2204545454545455; +MG94custom[44][57]:=synRate*R*GT*0.1769886363636364; +MG94custom[45][13]:=synRate*R*0.3488636363636364; +MG94custom[45][29]:=synRate*R*CG*0.1897727272727273; +MG94custom[45][33]:=synRate*R*AT*0.3863636363636364; +MG94custom[45][37]:=synRate*R*CT*0.2014204545454545; +MG94custom[45][41]:=synRate*R*GT*0.1505681818181818; +MG94custom[45][44]:=synRate*AC*0.478125; +MG94custom[45][46]:=synRate*CG*0.19375; +MG94custom[45][47]:=synRate*CT*0.2204545454545455; +MG94custom[45][58]:=synRate*R*GT*0.1769886363636364; +MG94custom[46][14]:=synRate*R*0.3488636363636364; +MG94custom[46][30]:=synRate*R*CG*0.1897727272727273; +MG94custom[46][34]:=synRate*R*AT*0.3863636363636364; +MG94custom[46][38]:=synRate*R*CT*0.2014204545454545; +MG94custom[46][42]:=synRate*R*GT*0.1505681818181818; MG94custom[46][44]:=synRate*0.478125; -MG94custom[46][45]:=CG*synRate*0.10767; -MG94custom[46][47]:=GT*synRate*0.220455; -MG94custom[46][59]:=GT*R*synRate*0.176989; -MG94custom[47][15]:=R*synRate*0.348864; -MG94custom[47][31]:=CG*R*synRate*0.189773; -MG94custom[47][35]:=AT*R*synRate*0.386364; -MG94custom[47][39]:=CT*R*synRate*0.20142; -MG94custom[47][43]:=GT*R*synRate*0.150568; -MG94custom[47][44]:=AT*synRate*0.478125; -MG94custom[47][45]:=CT*synRate*0.10767; -MG94custom[47][46]:=GT*synRate*0.19375; -MG94custom[47][60]:=GT*R*synRate*0.176989; -MG94custom[48][1]:=AT*R*synRate*0.348864; -MG94custom[48][17]:=CT*R*synRate*0.189773; -MG94custom[48][33]:=GT*R*synRate*0.284375; -MG94custom[48][49]:=CT*synRate*0.220455; -MG94custom[48][51]:=AC*R*synRate*0.20142; -MG94custom[48][54]:=R*synRate*0.150568; -MG94custom[48][58]:=AT*R*synRate*0.261648; -MG94custom[49][3]:=AT*R*synRate*0.348864; -MG94custom[49][19]:=CT*R*synRate*0.189773; -MG94custom[49][35]:=GT*R*synRate*0.284375; -MG94custom[49][48]:=CT*synRate*0.10767; -MG94custom[49][53]:=AC*R*synRate*0.20142; -MG94custom[49][56]:=R*synRate*0.150568; -MG94custom[49][60]:=AT*R*synRate*0.261648; -MG94custom[50][4]:=AT*R*synRate*0.348864; -MG94custom[50][20]:=CT*R*synRate*0.189773; -MG94custom[50][36]:=GT*R*synRate*0.284375; -MG94custom[50][51]:=AC*synRate*0.10767; +MG94custom[46][45]:=synRate*CG*0.1076704545454545; +MG94custom[46][47]:=synRate*GT*0.2204545454545455; +MG94custom[46][59]:=synRate*R*GT*0.1769886363636364; +MG94custom[47][15]:=synRate*R*0.3488636363636364; +MG94custom[47][31]:=synRate*R*CG*0.1897727272727273; +MG94custom[47][35]:=synRate*R*AT*0.3863636363636364; +MG94custom[47][39]:=synRate*R*CT*0.2014204545454545; +MG94custom[47][43]:=synRate*R*GT*0.1505681818181818; +MG94custom[47][44]:=synRate*AT*0.478125; +MG94custom[47][45]:=synRate*CT*0.1076704545454545; +MG94custom[47][46]:=synRate*GT*0.19375; +MG94custom[47][60]:=synRate*R*GT*0.1769886363636364; +MG94custom[48][1]:=synRate*R*AT*0.3488636363636364; +MG94custom[48][17]:=synRate*R*CT*0.1897727272727273; +MG94custom[48][33]:=synRate*R*GT*0.284375; +MG94custom[48][49]:=synRate*CT*0.2204545454545455; +MG94custom[48][51]:=synRate*R*AC*0.2014204545454545; +MG94custom[48][54]:=synRate*R*0.1505681818181818; +MG94custom[48][58]:=synRate*R*AT*0.2616477272727273; +MG94custom[49][3]:=synRate*R*AT*0.3488636363636364; +MG94custom[49][19]:=synRate*R*CT*0.1897727272727273; +MG94custom[49][35]:=synRate*R*GT*0.284375; +MG94custom[49][48]:=synRate*CT*0.1076704545454545; +MG94custom[49][53]:=synRate*R*AC*0.2014204545454545; +MG94custom[49][56]:=synRate*R*0.1505681818181818; +MG94custom[49][60]:=synRate*R*AT*0.2616477272727273; +MG94custom[50][4]:=synRate*R*AT*0.3488636363636364; +MG94custom[50][20]:=synRate*R*CT*0.1897727272727273; +MG94custom[50][36]:=synRate*R*GT*0.284375; +MG94custom[50][51]:=synRate*AC*0.1076704545454545; MG94custom[50][52]:=synRate*0.19375; -MG94custom[50][53]:=AT*synRate*0.220455; -MG94custom[50][57]:=CT*R*synRate*0.261648; -MG94custom[51][5]:=AT*R*synRate*0.348864; -MG94custom[51][21]:=CT*R*synRate*0.189773; -MG94custom[51][37]:=GT*R*synRate*0.284375; -MG94custom[51][48]:=AC*R*synRate*0.386364; -MG94custom[51][50]:=AC*synRate*0.478125; -MG94custom[51][52]:=CG*synRate*0.19375; -MG94custom[51][53]:=CT*synRate*0.220455; -MG94custom[51][54]:=CG*R*synRate*0.150568; -MG94custom[51][58]:=CT*R*synRate*0.261648; -MG94custom[52][6]:=AT*R*synRate*0.348864; -MG94custom[52][22]:=CT*R*synRate*0.189773; -MG94custom[52][38]:=GT*R*synRate*0.284375; +MG94custom[50][53]:=synRate*AT*0.2204545454545455; +MG94custom[50][57]:=synRate*R*CT*0.2616477272727273; +MG94custom[51][5]:=synRate*R*AT*0.3488636363636364; +MG94custom[51][21]:=synRate*R*CT*0.1897727272727273; +MG94custom[51][37]:=synRate*R*GT*0.284375; +MG94custom[51][48]:=synRate*R*AC*0.3863636363636364; +MG94custom[51][50]:=synRate*AC*0.478125; +MG94custom[51][52]:=synRate*CG*0.19375; +MG94custom[51][53]:=synRate*CT*0.2204545454545455; +MG94custom[51][54]:=synRate*R*CG*0.1505681818181818; +MG94custom[51][58]:=synRate*R*CT*0.2616477272727273; +MG94custom[52][6]:=synRate*R*AT*0.3488636363636364; +MG94custom[52][22]:=synRate*R*CT*0.1897727272727273; +MG94custom[52][38]:=synRate*R*GT*0.284375; MG94custom[52][50]:=synRate*0.478125; -MG94custom[52][51]:=CG*synRate*0.10767; -MG94custom[52][53]:=GT*synRate*0.220455; -MG94custom[52][55]:=CG*R*synRate*0.150568; -MG94custom[52][59]:=CT*R*synRate*0.261648; -MG94custom[53][7]:=AT*R*synRate*0.348864; -MG94custom[53][23]:=CT*R*synRate*0.189773; -MG94custom[53][39]:=GT*R*synRate*0.284375; -MG94custom[53][49]:=AC*R*synRate*0.386364; -MG94custom[53][50]:=AT*synRate*0.478125; -MG94custom[53][51]:=CT*synRate*0.10767; -MG94custom[53][52]:=GT*synRate*0.19375; -MG94custom[53][56]:=CG*R*synRate*0.150568; -MG94custom[53][60]:=CT*R*synRate*0.261648; -MG94custom[54][9]:=AT*R*synRate*0.348864; -MG94custom[54][25]:=CT*R*synRate*0.189773; -MG94custom[54][41]:=GT*R*synRate*0.284375; -MG94custom[54][48]:=R*synRate*0.386364; -MG94custom[54][51]:=CG*R*synRate*0.20142; -MG94custom[54][55]:=CG*R*synRate*0.19375; -MG94custom[54][56]:=CT*synRate*0.220455; -MG94custom[54][58]:=GT*R*synRate*0.261648; -MG94custom[55][10]:=AT*R*synRate*0.348864; -MG94custom[55][26]:=CT*R*synRate*0.189773; -MG94custom[55][42]:=GT*R*synRate*0.284375; -MG94custom[55][52]:=CG*R*synRate*0.20142; -MG94custom[55][54]:=CG*R*synRate*0.10767; -MG94custom[55][56]:=GT*R*synRate*0.220455; -MG94custom[55][59]:=GT*R*synRate*0.261648; -MG94custom[56][11]:=AT*R*synRate*0.348864; -MG94custom[56][27]:=CT*R*synRate*0.189773; -MG94custom[56][43]:=GT*R*synRate*0.284375; -MG94custom[56][49]:=R*synRate*0.386364; -MG94custom[56][53]:=CG*R*synRate*0.20142; -MG94custom[56][54]:=CT*synRate*0.10767; -MG94custom[56][55]:=GT*R*synRate*0.19375; -MG94custom[56][60]:=GT*R*synRate*0.261648; -MG94custom[57][12]:=AT*R*synRate*0.348864; -MG94custom[57][28]:=CT*synRate*0.189773; -MG94custom[57][44]:=GT*R*synRate*0.284375; -MG94custom[57][50]:=CT*R*synRate*0.20142; -MG94custom[57][58]:=AC*R*synRate*0.10767; +MG94custom[52][51]:=synRate*CG*0.1076704545454545; +MG94custom[52][53]:=synRate*GT*0.2204545454545455; +MG94custom[52][55]:=synRate*R*CG*0.1505681818181818; +MG94custom[52][59]:=synRate*R*CT*0.2616477272727273; +MG94custom[53][7]:=synRate*R*AT*0.3488636363636364; +MG94custom[53][23]:=synRate*R*CT*0.1897727272727273; +MG94custom[53][39]:=synRate*R*GT*0.284375; +MG94custom[53][49]:=synRate*R*AC*0.3863636363636364; +MG94custom[53][50]:=synRate*AT*0.478125; +MG94custom[53][51]:=synRate*CT*0.1076704545454545; +MG94custom[53][52]:=synRate*GT*0.19375; +MG94custom[53][56]:=synRate*R*CG*0.1505681818181818; +MG94custom[53][60]:=synRate*R*CT*0.2616477272727273; +MG94custom[54][9]:=synRate*R*AT*0.3488636363636364; +MG94custom[54][25]:=synRate*R*CT*0.1897727272727273; +MG94custom[54][41]:=synRate*R*GT*0.284375; +MG94custom[54][48]:=synRate*R*0.3863636363636364; +MG94custom[54][51]:=synRate*R*CG*0.2014204545454545; +MG94custom[54][55]:=synRate*R*CG*0.19375; +MG94custom[54][56]:=synRate*CT*0.2204545454545455; +MG94custom[54][58]:=synRate*R*GT*0.2616477272727273; +MG94custom[55][10]:=synRate*R*AT*0.3488636363636364; +MG94custom[55][26]:=synRate*R*CT*0.1897727272727273; +MG94custom[55][42]:=synRate*R*GT*0.284375; +MG94custom[55][52]:=synRate*R*CG*0.2014204545454545; +MG94custom[55][54]:=synRate*R*CG*0.1076704545454545; +MG94custom[55][56]:=synRate*R*GT*0.2204545454545455; +MG94custom[55][59]:=synRate*R*GT*0.2616477272727273; +MG94custom[56][11]:=synRate*R*AT*0.3488636363636364; +MG94custom[56][27]:=synRate*R*CT*0.1897727272727273; +MG94custom[56][43]:=synRate*R*GT*0.284375; +MG94custom[56][49]:=synRate*R*0.3863636363636364; +MG94custom[56][53]:=synRate*R*CG*0.2014204545454545; +MG94custom[56][54]:=synRate*CT*0.1076704545454545; +MG94custom[56][55]:=synRate*R*GT*0.19375; +MG94custom[56][60]:=synRate*R*GT*0.2616477272727273; +MG94custom[57][12]:=synRate*R*AT*0.3488636363636364; +MG94custom[57][28]:=synRate*CT*0.1897727272727273; +MG94custom[57][44]:=synRate*R*GT*0.284375; +MG94custom[57][50]:=synRate*R*CT*0.2014204545454545; +MG94custom[57][58]:=synRate*R*AC*0.1076704545454545; MG94custom[57][59]:=synRate*0.19375; -MG94custom[57][60]:=AT*R*synRate*0.220455; -MG94custom[58][13]:=AT*R*synRate*0.348864; -MG94custom[58][29]:=CT*R*synRate*0.189773; -MG94custom[58][45]:=GT*R*synRate*0.284375; -MG94custom[58][48]:=AT*R*synRate*0.386364; -MG94custom[58][51]:=CT*R*synRate*0.20142; -MG94custom[58][54]:=GT*R*synRate*0.150568; -MG94custom[58][57]:=AC*R*synRate*0.478125; -MG94custom[58][59]:=CG*R*synRate*0.19375; -MG94custom[58][60]:=CT*synRate*0.220455; -MG94custom[59][14]:=AT*R*synRate*0.348864; -MG94custom[59][30]:=CT*synRate*0.189773; -MG94custom[59][46]:=GT*R*synRate*0.284375; -MG94custom[59][52]:=CT*R*synRate*0.20142; -MG94custom[59][55]:=GT*R*synRate*0.150568; +MG94custom[57][60]:=synRate*R*AT*0.2204545454545455; +MG94custom[58][13]:=synRate*R*AT*0.3488636363636364; +MG94custom[58][29]:=synRate*R*CT*0.1897727272727273; +MG94custom[58][45]:=synRate*R*GT*0.284375; +MG94custom[58][48]:=synRate*R*AT*0.3863636363636364; +MG94custom[58][51]:=synRate*R*CT*0.2014204545454545; +MG94custom[58][54]:=synRate*R*GT*0.1505681818181818; +MG94custom[58][57]:=synRate*R*AC*0.478125; +MG94custom[58][59]:=synRate*R*CG*0.19375; +MG94custom[58][60]:=synRate*CT*0.2204545454545455; +MG94custom[59][14]:=synRate*R*AT*0.3488636363636364; +MG94custom[59][30]:=synRate*CT*0.1897727272727273; +MG94custom[59][46]:=synRate*R*GT*0.284375; +MG94custom[59][52]:=synRate*R*CT*0.2014204545454545; +MG94custom[59][55]:=synRate*R*GT*0.1505681818181818; MG94custom[59][57]:=synRate*0.478125; -MG94custom[59][58]:=CG*R*synRate*0.10767; -MG94custom[59][60]:=GT*R*synRate*0.220455; -MG94custom[60][15]:=AT*R*synRate*0.348864; -MG94custom[60][31]:=CT*R*synRate*0.189773; -MG94custom[60][47]:=GT*R*synRate*0.284375; -MG94custom[60][49]:=AT*R*synRate*0.386364; -MG94custom[60][53]:=CT*R*synRate*0.20142; -MG94custom[60][56]:=GT*R*synRate*0.150568; -MG94custom[60][57]:=AT*R*synRate*0.478125; -MG94custom[60][58]:=CT*synRate*0.10767; -MG94custom[60][59]:=GT*R*synRate*0.19375; +MG94custom[59][58]:=synRate*R*CG*0.1076704545454545; +MG94custom[59][60]:=synRate*R*GT*0.2204545454545455; +MG94custom[60][15]:=synRate*R*AT*0.3488636363636364; +MG94custom[60][31]:=synRate*R*CT*0.1897727272727273; +MG94custom[60][47]:=synRate*R*GT*0.284375; +MG94custom[60][49]:=synRate*R*AT*0.3863636363636364; +MG94custom[60][53]:=synRate*R*CT*0.2014204545454545; +MG94custom[60][56]:=synRate*R*GT*0.1505681818181818; +MG94custom[60][57]:=synRate*R*AT*0.478125; +MG94custom[60][58]:=synRate*CT*0.1076704545454545; +MG94custom[60][59]:=synRate*R*GT*0.19375; + vectorOfFrequencies={ -{ 0.0684634387476} -{ 0.0154174945249} -{ 0.0277433542637} -{ 0.0315672183411} -{ 0.0356916015236} -{ 0.00803750266039} -{ 0.0144632633625} -{ 0.0164567336793} -{ 0.0266806048061} -{ 0.00600828830748} -{ 0.0108117483528} -{ 0.0123019306771} -{ 0.046363843446} -{ 0.0104408179834} -{ 0.0187879627036} -{ 0.0213775059501} -{ 0.0372423266151} -{ 0.00838671526271} -{ 0.0150916617656} -{ 0.0171717441791} -{ 0.0194153011545} -{ 0.00437219200093} -{ 0.00786763837634} -{ 0.00895203428158} -{ 0.0145135537544} -{ 0.0032683522715} -{ 0.00588130936454} -{ 0.00669192971684} -{ 0.0252207226563} -{ 0.00567953290953} -{ 0.0102201621222} -{ 0.0116288061683} -{ 0.0558077379368} -{ 0.0125675179311} -{ 0.0226149003404} -{ 0.0257319100647} -{ 0.0290938869097} -{ 0.00655174280379} -{ 0.0117896796627} -{ 0.0134146501734} -{ 0.0217486037548} -{ 0.00489763566432} -{ 0.00881315969148} -{ 0.0100278767164} -{ 0.0377933284116} -{ 0.00851079706952} -{ 0.0153149435393} -{ 0.01742580086} -{ 0.00782174192914} -{ 0.0160149650053} -{ 0.0181073841606} -{ 0.00407765810865} -{ 0.00733763279709} -{ 0.00834897807998} -{ 0.00304817884003} -{ 0.00548511337442} -{ 0.00624112606825} -{ 0.0235217218785} -{ 0.00529692964466} -{ 0.00953167814686} -{ 0.0108454285073} -} -; +{0.06846343874760828} +{0.01541749452486247} +{0.02774335426373669} +{0.03156721834114321} +{0.03569160152356932} +{0.008037502660387861} +{0.01446326336249214} +{0.01645673367931657} +{0.02668060480605323} +{0.006008288307483169} +{0.01081174835277974} +{0.01230193067706317} +{0.04636384344599061} +{0.01044081798338113} +{0.01878796270360404} +{0.02137750595014184} +{0.03724232661514847} +{0.008386715262710204} +{0.01509166176561572} +{0.01717174417905836} +{0.0194153011545149} +{0.004372192000927598} +{0.007867638376339373} +{0.00895203428158263} +{0.01451355375443286} +{0.003268352271497358} +{0.005881309364541421} +{0.006691929716838918} +{0.02522072265628805} +{0.005679532909526541} +{0.01022016212215594} +{0.0116288061683182} +{0.05580773793677189} +{0.01256751793109718} +{0.02261490034039122} +{0.02573191006472667} +{0.02909388690968476} +{0.006551742803785218} +{0.01178967966274807} +{0.01341465017344942} +{0.0217486037547714} +{0.004897635664324634} +{0.00881315969147599} +{0.01002787671640084} +{0.03779332841159332} +{0.008510797069515073} +{0.01531494353933847} +{0.01742580086000975} +{0.007821741929144397} +{0.0160149650053194} +{0.01810738416057303} +{0.004077658108649541} +{0.007337632797094954} +{0.008348978079979009} +{0.003048178840034214} +{0.005485113374415129} +{0.006241126068249472} +{0.02352172187854409} +{0.005296929644663227} +{0.009531678146861006} +{0.0108454285072788} +}; Model MG94customModel=(MG94custom,vectorOfFrequencies,0); From 21fc15389298150a7bbe0bda846e9de243d0b97d Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Fri, 6 Jul 2018 19:07:51 -0400 Subject: [PATCH 22/53] Better error reporting --- src/core/likefunc.cpp | 4 ++-- tests/hbltests/SimpleOptimizations/IntermediateCodon.bf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index bc6b33f5f..707dd1f24 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -5088,8 +5088,8 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& if (successful && !(rightValue<=middleValue && leftValue<=middleValue)) { - char buf[256], buf2[512]; - snprintf (buf, 256, " \n\tERROR: [_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%g) = %g [@%g -> %g:@%g -> %g]]", index, middle, middleValue, left, leftValue,right, rightValue); + char buf[512], buf2[512]; + snprintf (buf, 512, " \n\tERROR: [_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%20.16g) = %%20.16g [@%%20.16g -> %%20.16g:@%%20.16g -> %%20.16g]]", index, middle, middleValue, left, leftValue,right, rightValue); snprintf (buf2, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %s: %20.16g <= %20.16g >= %20.16g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); _TerminateAndDump (_String (buf) & "\n" & buf2 & "\nParameter name " & *GetIthIndependentName(index)); diff --git a/tests/hbltests/SimpleOptimizations/IntermediateCodon.bf b/tests/hbltests/SimpleOptimizations/IntermediateCodon.bf index dccf7e593..c0a2b582d 100644 --- a/tests/hbltests/SimpleOptimizations/IntermediateCodon.bf +++ b/tests/hbltests/SimpleOptimizations/IntermediateCodon.bf @@ -616,7 +616,7 @@ Tree LargeNuc_tree = DATAFILE_TREE; DataSetFilter LargeNuc_part = CreateFilter(flu,3,"","","TAA,TAG,TGA"); -VERBOSITY_LEVEL = 1; +//VERBOSITY_LEVEL = 200; LikelihoodFunction LargeNuc_LF = (LargeNuc_part,LargeNuc_tree); //AUTO_PARALLELIZE_OPTIMIZE = 1; OPTIMIZATION_PRECISION = 0.001; From f6fb87c0423723ad0448ef82920d87a28eb6a5c1 Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Mon, 9 Jul 2018 06:20:28 +0900 Subject: [PATCH 23/53] Multiple tweaks to optimization code --- src/core/calcnode.cpp | 26 +- src/core/include/likefunc.h | 8 +- src/core/likefunc.cpp | 479 +++++++++++++++++++----------------- src/core/matrix.cpp | 62 +++-- 4 files changed, 324 insertions(+), 251 deletions(-) diff --git a/src/core/calcnode.cpp b/src/core/calcnode.cpp index 8052f8ab6..316d06788 100644 --- a/src/core/calcnode.cpp +++ b/src/core/calcnode.cpp @@ -6386,12 +6386,17 @@ void _TheTree::RecoverNodeSupportStates (_DataSetFilter const* dsf, long sit } vecPointer += cBase; } + + // TODO SLKP 20180703: ugly fix for underflow which WON'T work if category count > 1 for (long iNodeCount = 0L; iNodeCount < flatTree.lLength - 1; iNodeCount++) { node* thisINode = (node*)flatNodes.lData[iNodeCount]; + _Parameter sum = 0.; + for (long cc = 0; cc < cBase; cc++) { _Parameter tmp = 1.0; + for (long nc = 0; nc < thisINode->nodes.length; nc++) { _Parameter tmp2 = 0.0; _CalcNode * child = map_node_to_calcnode(thisINode->go_down(nc+1)); @@ -6405,8 +6410,17 @@ void _TheTree::RecoverNodeSupportStates (_DataSetFilter const* dsf, long sit tmp *= tmp2; } vecPointer[cc] = tmp; + sum += tmp; + } + + if (sum < _lfScalingFactorThreshold && sum > 0.0) { + for (long cc = 0; cc < cBase; cc++) { + vecPointer[cc] *= _lfScalerUpwards; + } } + vecPointer += cBase; + } RecoverNodeSupportStates2 (&GetRoot(),currentStateVector+globalShifter,currentStateVector,categoryCount>1?catCount:(-1)); } @@ -6425,7 +6439,8 @@ void _TheTree::RecoverNodeSupportStates2 (node* thisNode, _Parameter* if (thisNode->parent) { if (thisNode->parent->parent) { - for (long cc = 0; cc < cBase; cc++,vecPointer++) { + _Parameter sum = 0.; + for (long cc = 0; cc < cBase; cc++) { _Parameter tmp = 1.0; for (long nc = 0; nc < thisNode->parent->nodes.length; nc++) { _Parameter tmp2 = 0.0; @@ -6445,8 +6460,15 @@ void _TheTree::RecoverNodeSupportStates2 (node* thisNode, _Parameter* tmp *= tmp2; } - *vecPointer = tmp; + vecPointer[cc] = tmp; + sum += tmp; } + if (sum < _lfScalingFactorThreshold && sum > 0.0) { + for (long cc = 0; cc < cBase; cc++) { + vecPointer[cc] *= _lfScalerUpwards; + } + } + vecPointer += cBase; } else { for (long cc = 0; cc < cBase; cc++,vecPointer++) { _Parameter tmp = 1.0; diff --git a/src/core/include/likefunc.h b/src/core/include/likefunc.h index 1c8fc52c2..0da9691b7 100644 --- a/src/core/include/likefunc.h +++ b/src/core/include/likefunc.h @@ -259,7 +259,7 @@ class _LikelihoodFunction: public BaseObj long SequenceCount (long); unsigned long SiteCount (void) const; void Rebuild (bool = false); - void SerializeLF (_String&, char=0, _SimpleList* = nil, _SimpleList* = nil); + virtual void SerializeLF (_String&, char=0, _SimpleList* = nil, _SimpleList* = nil); _Formula* HasComputingTemplate (void) const{ return computingTemplate; } @@ -710,9 +710,9 @@ class _CustomFunction: public _LikelihoodFunction virtual _Parameter Compute (void); virtual void RescanAllVariables (void) {} - - - + virtual void SerializeLF (_String& res, char=0, _SimpleList* = nil, _SimpleList* = nil) { + res.AppendNewInstance ((_String*)myBody.toStr()); + } _Formula myBody; }; diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index bc6b33f5f..c3c3fabb1 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -190,7 +190,6 @@ globalStartingPoint ("GLOBAL_STARTING_POINT"), optimizeSummationOrder ("OPTIMIZE_SUMMATION_ORDER"), optimizePartitionSize ("OPTIMIZE_SUMMATION_ORDER_PARTITION"), maximumIterationsPerVariable ("MAXIMUM_ITERATIONS_PER_VARIABLE"), - optimizationPrecisionMethod ("OPTIMIZATION_PRECISION_METHOD"), likefuncOutput ("LIKELIHOOD_FUNCTION_OUTPUT"), dataFilePrintFormat ("DATA_FILE_PRINT_FORMAT"), dataFileDefaultWidth ("DATA_FILE_DEFAULT_WIDTH"), @@ -260,7 +259,6 @@ _SimpleList Fibonacci; _Parameter precision, - optimizationPrecMethod, maxItersPerVar, dFPrintFormat, dFDefaultWidth, @@ -3809,8 +3807,7 @@ DecideOnDivideBy (this); checkParameter (startingPrecision,currentPrecision,0.1); checkParameter (optimizationMethod,optMethodP,4.0); - checkParameter (optimizationPrecisionMethod,optimizationPrecMethod,0.0); - checkParameter (optimizationPrecision,precision,0.001); + checkParameter (optimizationPrecision,precision,0.001); checkParameter (maximumIterationsPerVariable,maxItersPerVar,5000.); ReportWarning (_String("Optimization settings:\n\t") & optimizationMethod & " = " & optMethodP & @@ -3877,6 +3874,7 @@ DecideOnDivideBy (this); current_precision *= 0.1; } ConjugateGradientDescent(precision, bestSoFar, true); + //ConjugateGradientDescent(1e-7, bestSoFar, true); } #if !defined __UNIX__ || defined __HEADLESS__ #ifdef __HYPHYMPI__ @@ -4594,13 +4592,6 @@ DecideOnDivideBy (this); result.Store (1,1,indexInd.lLength); result.Store (1,2,CountObjects(kLFCountGlobalVariables)); - _PMathObj pm; - for (unsigned long i=0UL; i= 0) { SetIthIndependent (index,value); } else { - if (value < 0) { + if (value < 0.) { WarnError ("Internal error in gradient bracket function\n"); return -A_LARGE_NUMBER; } @@ -4876,9 +4874,6 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& if (verbosityLevel > 100) { char buf [512]; snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld, eval %ld) INITIAL BRACKET %15.12g <= %15.12g (current %15.12g) <= %15.12g]", index, likeFuncEvalCallCount, middle-leftStep, middle, index>=0?GetIthIndependent (index):0.0, middle+rightStep); - if (likeFuncEvalCallCount == 376) { - printf ("\nLog(L) = %20.16g\n", Compute()); - } BufferToConsole (buf); } @@ -4892,23 +4887,24 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& while (1) { while (middle-leftStep < lowerBound) { - if (verbosityLevel > 100) { - char buf [512]; - snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld) HANDLING LEFT BOUNDARY CASES] : LB = %g, current try = %g, current evaluated midpoint value = %g (%s)", index, lowerBound, middle-leftStep, middleValue, first ? "first" : "NOT first"); - BufferToConsole (buf); - } + if (verbosityLevel > 100) { + char buf [512]; + snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld) HANDLING LEFT BOUNDARY CASES] : LB = %g, current try = %g, current evaluated midpoint value = %g (%s)", index, lowerBound, middle-leftStep, middleValue, first ? "first" : "NOT first"); + BufferToConsole (buf); + } leftStep*=.125; if ( leftStep0 || index < 0 && leftStep < STD_GRAD_STEP) { if (!first) { if (go2Bound) { middle = lowerBound==0.0 ? PERTURBATION_OF_ZERO : lowerBound; - middleValue = SetParametersAndCompute (index, middle, ¤tValues, gradient); + if (verbosityLevel > 100) { char buf [512]; - snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld) UPDATED middle to %15.12g, LogL = %15.12g]", index, middle, middleValue); + snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket LEFT BOUNDARY (index %ld) UPDATED middle from %15.12g to %15.12g, LogL = %15.12g]", index, middle, middleValue); BufferToConsole (buf); } + middleValue = SetParametersAndCompute (index, middle, ¤tValues, gradient); } return -2; } else { @@ -5092,7 +5088,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& snprintf (buf, 256, " \n\tERROR: [_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%g) = %g [@%g -> %g:@%g -> %g]]", index, middle, middleValue, left, leftValue,right, rightValue); snprintf (buf2, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %s: %20.16g <= %20.16g >= %20.16g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); - _TerminateAndDump (_String (buf) & "\n" & buf2 & "\nParameter name " & *GetIthIndependentName(index)); + _TerminateAndDump (_String (buf) & "\n" & buf2 & "\nParameter name " & (index >= 0 ? *GetIthIndependentName(index) : "line optimization")); } @@ -5663,21 +5659,10 @@ void _LikelihoodFunction::GetGradientStepBound (_Matrix& gradient,_Parameter& //_______________________________________________________________________________________ -void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, _Matrix&unit, _Parameter& gradientStep, _Matrix& values,_SimpleList& freeze, long order, bool normalize) -{ +void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, _Matrix&unit, _Parameter& gradientStep, _Matrix& values,_SimpleList& freeze, long order, bool normalize) { _Parameter funcValue; - //CheckStep (gradientStep,unit,&values); - /*if (order>1) - { - _Matrix nG (unit); - nG*=-1; - CheckStep (gradientStep,nG,&values); - } - if (gradientStep==0) - return;*/ - - if (order==1) { + if (order==1L) { funcValue = Compute(); for (long index=0; index 50) - { - printf ("[GRADIENT @ %s, [%g-%g-%g], %g. der = %g]\n", cv->GetName()->sData, lb, currentValue, ub, testStep, gradient[index]); + /*if (verbosityLevel > 50) { + printf ("[GRADIENT @ %s, [%20.16g-%20.16g (%20.16g)-%20.16g], delta = %15.12g. der = %20.16g]\n", GetIthIndependentName(index)->getStr(), lb, currentValue, GetIthIndependentVar(index)->Value(), ub, testStep, gradient[index]); }*/ } } @@ -5746,7 +5730,7 @@ void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, _Matrix&unit, return; } - funcValue = 1/sqrt(funcValue); + funcValue = 1./sqrt(funcValue); for (long index=0; index=0 && (leftValue > middleValue || rightValue > middleValue)) { - WarnError (_String ("Internal error in _LikelihoodFunction::GradientLocateTheBump: bracket reported successful (") & (long)outcome & "), but likelihood values are inconsistent with it. " & leftValue & " / " & middleValue & " / " & rightValue & " initial value = " & maxSoFar); - return; + WarnError (_String ("Internal error in _LikelihoodFunction::GradientLocateTheBump: bracket reported successful (") & (long)outcome & "), but likelihood values are inconsistent with it. " & leftValue & " / " & middleValue & " / " & rightValue & " initial value = " & maxSoFar); + return; } - + //printf ("[LogL = %.20g GRADIENT BRACKET %g/%.20g, %g/%.20g, %g/%.20g; %d]\n",maxSoFar,lV,leftValue,ms,middleValue,rV,rightValue, outcome); - - left.AplusBx (gradient, lV); - middle.AplusBx (gradient, ms); - right.AplusBx (gradient, rV); - + + left_vector.AplusBx (gradient, left); + middle_vector.AplusBx (gradient, middle); + right_vector.AplusBx (gradient, right); + bool reset = false; - + if (outcome!=-1) { // successfull bracket // set up left, right, middle - - if (outcome == -2) { - if (middleValue>maxSoFar) { - maxSoFar = middleValue; - bestVal = middle; - SetAllIndependent (&middle); - } else { - SetAllIndependent (&bestVal); + + if (outcome == -2) { + if (verbosityLevel > 50) { + char buf [256]; + snprintf (buf, 256, "\n\t[_LikelihoodFunction::GradientLocateTheBump BRACKET == -2 clause]"); + BufferToConsole (buf); + } + if (middleValue>maxSoFar) { + maxSoFar = middleValue; + bestVal = middle_vector; + SetAllIndependent (&middle_vector); + } else { + SetAllIndependent (&bestVal); + } + FlushLocalUpdatePolicy(); + return; } - FlushLocalUpdatePolicy(); - return; - } - - - - if (outcome == indexInd.lLength) { - reset = true; - } else { - _Parameter U,V,W,X=ms,E=0.,FX,FW,FV,XM,R,Q,P,ETEMP,D=0.,FU; - _Matrix currentBestPoint (newMiddle); - currentBestPoint.AplusBx(gradient, ms); - W = .0; - V = .0; - FX = -middleValue; - FV = FX; - FW = FX; - outcome = 0; - while (outcome < 20) { - // brentHistory.Store (-FX - initialValue); - bool parabolic_step = false; - XM = .5*(lV+rV); - - _Parameter tol1 = fabs (X) * MIN (gPrecision, 1e-4) + machineEps, - tol2 = 2.*tol1; + + + + if (outcome == indexInd.lLength) { + reset = true; + } else { + _Parameter U,V,W,X=middle,E=0.,FX,FW,FV,XM,R,Q,P,ETEMP,D=0.,FU; + _Matrix current_best_vector (prior_parameter_values); + current_best_vector.AplusBx(gradient, middle); + W = .0; + V = .0; + FX = -middleValue; + FV = FX; + FW = FX; + outcome = 0; + while (outcome < 20) { + // brentHistory.Store (-FX - initialValue); + bool parabolic_step = false; + XM = .5*(left+right); + + if (verbosityLevel > 50) { + char buf [256]; + snprintf (buf, 256, "\n\t[_LikelihoodFunction::GradientLocateTheBump (current max = %20.16g) GOLDEN RATIO INTERVAL CHECK: %g <= %g (%g = %g) <= %g, span = %g]", maxSoFar, left, XM, X, fabs(X-XM), right, right-left); + BufferToConsole (buf); + } + + _Parameter tol1 = fabs (X) * MIN (gPrecision, 1e-7) + machineEps, + tol2 = 2.*tol1; - if (fabs(X-XM) <= tol2) { - break; - } + if (fabs(X-XM) <= tol2) { + break; + } + + + + if (fabs(E)>tol1) { + R = (X-W)*(FX-FV); + Q = (X-V)*(FX-FW); + P = (X-V)*Q-(X-W)*R; + Q = 2.0 * (Q-R); + if (Q>0.) { + P = -P; + } + Q = fabs(Q); + ETEMP = E; + E = D; + if (!(fabs (P) > fabs (.5*Q*ETEMP) || P <= Q * (left-X) || P >= Q *( right-X))) { + parabolic_step = true; + D = P/Q; + U = X+D; + if (U - left < tol2 || right - U < tol2) { + D = (XM - X >= 0.) ? tol1 : -tol1; + } + } + + } + + + if (!parabolic_step) { + E = (X >= XM ? left : right) - X; + D = GOLDEN_RATIO_C * E; + } + U = fabs (D) >= tol1 ? X + D : X + (D > 0. ? tol1 : -tol1); + + //for (index = 0; index < indexInd.lLength; index++) + // SetIthIndependent (index,middle.theData[index]+U*gradient.theData[index]); + FU = -SetParametersAndCompute (-1,U,&prior_parameter_values,&gradient); + //printf ("\n%g\n", FU); + + if (verbosityLevel > 50) { + char buf [256]; + snprintf (buf, 256, "\n\t[_LikelihoodFunction::GradientLocateTheBump GOLDEN RATIO TRY: param %20.16g, log L %20.16g]", U, -FU); + BufferToConsole (buf); + } - if (fabs(E)>tol1) { - R = (X-W)*(FX-FV); - Q = (X-V)*(FX-FW); - P = (X-V)*Q-(X-W)*R; - Q = 2.0 * (Q-R); - if (Q>0.) { - P = -P; - } - Q = fabs(Q); - ETEMP = E; - E = D; - if (!(fabs (P) > fabs (.5*Q*ETEMP) || P <= Q * (lV-X) || P >= Q *( rV-X))) { - parabolic_step = true; - D = P/Q; - U = X+D; - if (U - lV < tol2 || rV - U < tol2) { - D = (XM - X >= 0.) ? tol1 : -tol1; - } - } + if (FU<=FX) { // accept the move - } + if (verbosityLevel > 50) { + char buf [256]; + snprintf (buf, 256, "\n\t[_LikelihoodFunction::GradientLocateTheBump (eval %ld) ACCEPT new try, confirm value %20.16g (delta = %20.16g)", likeFuncEvalCallCount, U, FX-FU); + BufferToConsole (buf); + } + current_best_vector = prior_parameter_values; + current_best_vector.AplusBx(gradient, U); - if (!parabolic_step) { - E = (X >= XM ? lV : rV) - X; - D = GOLDEN_RATIO_C * E; - } - U = fabs (D) >= tol1 ? X + D : X + (D > 0. ? tol1 : -tol1); - - //for (index = 0; index < indexInd.lLength; index++) - // SetIthIndependent (index,middle.theData[index]+U*gradient.theData[index]); - FU = -SetParametersAndCompute (-1,U,&newMiddle,&gradient); - //printf ("\n%g\n", FU); - - if (FU<=FX) { // accept the move - currentBestPoint = newMiddle; - currentBestPoint.AplusBx(gradient, U); - //brentHistory.Store (1.); - //brentHistory.Store (U); - //brentHistory.Store (X); - if (U>=X) { - lV = X; - } else { - rV = X; + if (U>=X) { + left = X; + } else { + right = X; + } + V = W; + FV = FW; + W = X; + FW = FX; + X = U; + FX = FU; + } else { + if (verbosityLevel > 50) { + char buf [256]; + snprintf (buf, 256, "\n\t[_LikelihoodFunction::GradientLocateTheBump (eval %ld) REJECT new try (%20.16g) (delta = %20.16g)", likeFuncEvalCallCount, U, FX-FU); + BufferToConsole (buf); + } + + if (U 50) { + char buf [256]; + snprintf (buf, 256, "\n\t[_LikelihoodFunction::GradientLocateTheBump GOLDEN RATIO SEARCH SUCCESSFUL: precision %g, parameter moved from %15.12g to %15.12g, Log L new/old = %15.12g/%15.12g ]\n\n", gPrecision, X, -FX, middleValue, maxSoFar); + BufferToConsole (buf); } - if (FU<=FW || W==X ) { - V = W; - FV = FW; - W = U; - FW = FU; + middleValue = -FX; + //brentHistory.Store (0.); + if (middleValue < maxSoFar ) { + if (verbosityLevel > 50) { + char buf [256]; + snprintf (buf, 256, "\n\t[_LikelihoodFunction::GradientLocateTheBump RESETTING THE VALUE (worse log likelihood obtained; current value %20.16g, best value %20.16g) ]\n\n", middleValue, maxSoFar); + + BufferToConsole (buf); + } + SetAllIndependent (&bestVal); + maxSoFar = middleValue; } else { - if (FU<=FV || V==X || V==W) { - V = U; - FV = FU; - } + SetAllIndependent (¤t_best_vector); + maxSoFar = Compute(); + if (verbosityLevel > 50) { + char buf [256]; + snprintf (buf, 256, "\n\t[_LikelihoodFunction::GradientLocateTheBump moving parameter value (should trigger LL update) %15.12g ||L2|| move ]\n\n", (current_best_vector-bestVal).AbsValue()); + BufferToConsole (buf); + } + bestVal = current_best_vector; + } + + if (maxSoFar < initialValue && !CheckEqual (maxSoFar, initialValue, 10. * machineEps)) { + WarnError (_String ("Internal error in _LikelihoodFunction::GradientLocateTheBump: in the Brent loop iteration ") & long(outcome) & ". " & _String (maxSoFar, "%15.12g") & " / " & _String (initialValue,"%15.12g") & ".\n");// & _String ((_String*)brentHistory.toStr())); + return; } - - } - outcome++; - - } - - middleValue = -FX; - //brentHistory.Store (0.); - if (middleValue <= maxSoFar || CheckEqual(maxSoFar, middleValue)) { - //brentHistory.Store (-1.); - //brentHistory.Store (middleValue-initialValue); - - SetAllIndependent (&bestVal); - maxSoFar = middleValue; - } else { - SetAllIndependent (¤tBestPoint); - maxSoFar = Compute(); - bestVal = currentBestPoint; - /*brentHistory.Store (1.); - brentHistory.Store (changed); - brentHistory.Store (X); - brentHistory.Store (maxSoFar-initialValue); - brentHistory.Store (-FX-initialValue);*/ - } - - if (maxSoFar < initialValue && !CheckEqual (maxSoFar, initialValue, 10. * machineEps)) { - WarnError (_String ("Internal error in _LikelihoodFunction::GradientLocateTheBump: in the Brent loop iteration ") & long(outcome) & ". " & _String (maxSoFar, "%15.12g") & " / " & _String (initialValue,"%15.12g") & ".\n");// & _String ((_String*)brentHistory.toStr())); - return; + + //bestVal = middle; + //maxSoFar = middleValue; } - - //bestVal = middle; - //maxSoFar = middleValue; - } - //middle = X; + //middle = X; } - + else { - reset = true; - if (verbosityLevel>1) { - BufferToConsole ("Line optimization unsuccessful\n"); - } - if (leftValue>middleValue) { - middleValue = leftValue; - middle = left; - } - if (rightValue>middleValue) { - middleValue = rightValue; - middle = right; - } - - if (middleValue>maxSoFar) { - SetAllIndependent (&middle); - maxSoFar = middleValue; - reset = false; - } + reset = true; + if (verbosityLevel>1) { + BufferToConsole ("Line optimization unsuccessful\n"); + } + if (leftValue>middleValue) { + middleValue = leftValue; + middle_vector = left_vector; + } + if (rightValue>middleValue) { + middleValue = rightValue; + middle_vector = right_vector; + } + + if (middleValue>maxSoFar) { + SetAllIndependent (&middle_vector); + maxSoFar = middleValue; + reset = false; + } } - + if (reset) - SetAllIndependent (&bestVal); - + SetAllIndependent (&bestVal); + FlushLocalUpdatePolicy(); - } +} //_______________________________________________________________________________________ @@ -6335,8 +6353,16 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P unsigned long inCount = likeFuncEvalCallCount; int outcome = Bracket (index,left,middle,right,leftValue, middleValue, rightValue,bp); unsigned long bracketCount = likeFuncEvalCallCount - inCount; + + // ensure that we at least look within the bracket range + if (outcome != -1) { // successfull bracket + if (right - left < 4*brentPrec) { + brentPrec = (right-left) * 0.2; + //printf ("\nResetting brentPrec to %g\n", brentPrec, "\n"); + } + _Parameter U,V,W,X=middle,E=0.,FX,FW,FV,XM,R,Q,P,ETEMP,D=0.,FU; W = middle; V = middle; @@ -6358,6 +6384,7 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P } if (fabs(X-XM) <= brentPrec) { + // enforce at least one iteration break; } diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index c938fcfda..ec6f58238 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -3743,6 +3743,8 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) } continue; } + + #endif const unsigned long @@ -3766,25 +3768,47 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) */ for (unsigned long i = 0UL, vector_index = c; i < secondArg.hDim; i += 4UL, vector_index += column_shift4) { - _Parameter c0 = secondArg.theData[vector_index], - c1 = secondArg.theData[vector_index+secondArg.vDim], - c2 = secondArg.theData[vector_index+column_shift2], - c3 = secondArg.theData[vector_index+column_shift3]; - - for (unsigned long r = 0UL; r < hDim; r ++) { - - unsigned long element = r*vDim + i; - - _Parameter r0 = theData[element] * c0, - r1 = theData[element+1] * c1, - r2 = theData[element+2] * c2, - r3 = theData[element+3] * c3; - - r0 += r1; - r2 += r3; - dest[r*vDim + c] += r0 + r2; - - } +/*#ifdef _SLKP_USE_AVX_INTRINSICS + + _Parameter quad1[4] __attribute__ ((aligned (32))); + quad1[0] = secondArg.theData[vector_index]; + quad1[1] = secondArg.theData[vector_index+secondArg.vDim], + quad1[2] = secondArg.theData[vector_index+column_shift2], + quad1[3] = secondArg.theData[vector_index+column_shift3]; + __m256d __attribute__ ((aligned (32))) col_buffer = _mm256_load_pd (quad1); + + for (unsigned long r = 0UL; r < hDim; r ++) { + + //unsigned long element = r*vDim + i; + __m256d __attribute__ ((aligned (32))) row_quad = _mm256_loadu_pd (theData + (r*vDim + i)); + dest[r*vDim + c] += _avx_sum_4(_mm256_mul_pd (col_buffer,row_quad)); + + } + +#else*/ + _Parameter c0 = secondArg.theData[vector_index], + c1 = secondArg.theData[vector_index+secondArg.vDim], + c2 = secondArg.theData[vector_index+column_shift2], + c3 = secondArg.theData[vector_index+column_shift3]; + + for (unsigned long r = 0UL; r < hDim; r ++) { + + unsigned long element = r*vDim + i; + + + _Parameter r0 = theData[element] * c0, + r1 = theData[element+1] * c1, + r2 = theData[element+2] * c2, + r3 = theData[element+3] * c3; + + r0 += r1; + r2 += r3; + dest[r*vDim + c] += r0 + r2; + + } +//#endif + + } } } else { From 3abf41689ab34b8a4cbeea9032c11bded064eb8b Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Mon, 9 Jul 2018 10:08:42 +0900 Subject: [PATCH 24/53] Maybe a fix for bracket numerical instability --- src/core/include/likefunc.h | 2 +- src/core/likefunc.cpp | 41 +++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/core/include/likefunc.h b/src/core/include/likefunc.h index 0da9691b7..06e75224f 100644 --- a/src/core/include/likefunc.h +++ b/src/core/include/likefunc.h @@ -374,7 +374,7 @@ class _LikelihoodFunction: public BaseObj (_Parameter , _Matrix& , bool localOnly = false, long = 0x7fffffff,_SimpleList* only_these_parameters = nil, _Parameter check_lf = A_LARGE_NUMBER); _Parameter SetParametersAndCompute - (long, _Parameter, _Matrix* = nil, _Matrix* = nil); + (long, _Parameter, _Matrix* = nil, _Matrix* = nil, bool skip_compute = false); long CostOfPath (_DataSetFilter const*, _TheTree const* , _SimpleList&, _SimpleList* = nil) const; diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index c3c3fabb1..ae8217ec9 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -4734,7 +4734,7 @@ bool CheckEqual (_Parameter a, _Parameter b, _Parameter tolerance) { } //_______________________________________________________________________________________ -_Parameter _LikelihoodFunction::SetParametersAndCompute (long index, _Parameter value, _Matrix* baseLine, _Matrix* direction) +_Parameter _LikelihoodFunction::SetParametersAndCompute (long index, _Parameter value, _Matrix* baseLine, _Matrix* direction, bool skip_compute) { if (index >= 0) { SetIthIndependent (index,value); @@ -4749,6 +4749,9 @@ _Parameter _LikelihoodFunction::SetParametersAndCompute (long index, _Parameter } + if (skip_compute) { + return -A_LARGE_NUMBER; + } _Parameter logL = Compute(); //if (index >=0) // printf ("[SetParametersAndCompute %g = %g]\n", value, logL); @@ -4830,6 +4833,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& practicalUB = upperBound>DEFAULTPARAMETERUBOUND?DEFAULTPARAMETERUBOUND:upperBound; long funcCounts = likeFuncEvalCallCount; + _Parameter stash_middle; if (index >= 0) { middle = GetIthIndependent (index); @@ -4883,6 +4887,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& SetIthIndependent(6L, GetIthIndependent(6L)); } */ + while (1) { @@ -4904,7 +4909,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket LEFT BOUNDARY (index %ld) UPDATED middle from %15.12g to %15.12g, LogL = %15.12g]", index, middle, middleValue); BufferToConsole (buf); } - middleValue = SetParametersAndCompute (index, middle, ¤tValues, gradient); + middleValue = stash_middle = SetParametersAndCompute (index, middle, ¤tValues, gradient); } return -2; } else { @@ -4920,7 +4925,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& if (rightStep0 || index < 0 && rightStep < STD_GRAD_STEP) { if (!first) { if (go2Bound) { - middleValue = SetParametersAndCompute (index, middle=upperBound, ¤tValues, gradient); + middleValue = stash_middle = SetParametersAndCompute (index, middle=upperBound, ¤tValues, gradient); } return -2; } else { @@ -4934,13 +4939,13 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& if (CheckEqual(middle,saveL)) { - middleValue = saveLV; + stash_middle = middleValue = saveLV; } else if (CheckEqual(middle,saveR)) { - middleValue = saveRV; + stash_middle = middleValue = saveRV; } else if (CheckEqual(middle,saveM)) { - middleValue = saveMV; + stash_middle = middleValue = saveMV; } else { - middleValue = SetParametersAndCompute (index, middle, ¤tValues, gradient); + middleValue = stash_middle = SetParametersAndCompute (index, middle, ¤tValues, gradient); if (verbosityLevel > 100) { char buf [512]; snprintf (buf, sizeof(buf), "\n\t[_LikelihoodFunction::Bracket (index %ld) UPDATED middle to %15.12g, LogL = %15.12g]", index, middle, middleValue); @@ -5045,7 +5050,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& } if (middle>=practicalUB) { - middleValue = SetParametersAndCompute (index, middle = practicalUB, ¤tValues, gradient); + stash_middle = middleValue = SetParametersAndCompute (index, middle = practicalUB, ¤tValues, gradient); //if (index < 0) printf ("\nmiddle>=practicalUB\n"); break; } @@ -5062,12 +5067,12 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& if (curVar) { if (CheckAndSetIthIndependent(index,middle)) { - /*CheckAndSetIthIndependent(index,left); - _Parameter lc = Compute(); - CheckAndSetIthIndependent(index,right); - _Parameter rc = Compute(); - CheckAndSetIthIndependent(index,middle);*/ - middleValue = Compute(); + /*CheckAndSetIthIndependent(index,left); + _Parameter lc = Compute(); + CheckAndSetIthIndependent(index,right); + _Parameter rc = Compute(); + CheckAndSetIthIndependent(index,middle);*/ + middleValue = stash_middle;//Compute(); if (verbosityLevel > 100) { char buf [256]; @@ -5078,10 +5083,16 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& } } else { - middleValue = SetParametersAndCompute (index, middle, ¤tValues, gradient); + middleValue = stash_middle; + SetParametersAndCompute (index, middle, ¤tValues, gradient,false); + } if (successful && !(rightValue<=middleValue && leftValue<=middleValue)) { + /** SLKP 20180709 need to have a more permissive check, because sometimes if the change is too small + (or involves a paremeter that has very little effect on the LF), recomputation could be within numerical error + + **/ char buf[256], buf2[512]; From d796627e1a3bfe97bbcd631295c52b2806df3526 Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Mon, 9 Jul 2018 12:03:56 +0900 Subject: [PATCH 25/53] Redoing conjugate gradient descent --- src/core/include/likefunc.h | 2 +- src/core/likefunc.cpp | 136 ++++++++++++++++++++---------------- 2 files changed, 78 insertions(+), 60 deletions(-) diff --git a/src/core/include/likefunc.h b/src/core/include/likefunc.h index 06e75224f..fc7165f80 100644 --- a/src/core/include/likefunc.h +++ b/src/core/include/likefunc.h @@ -425,7 +425,7 @@ class _LikelihoodFunction: public BaseObj void ComputeBlockInt1 (long,_Parameter&,_TheTree*,_DataSetFilter*, char); void CheckStep (_Parameter&, _Matrix, _Matrix* selection = nil); void GetGradientStepBound (_Matrix&, _Parameter &, _Parameter &, long* = nil); - void ComputeGradient (_Matrix&, _Matrix&, _Parameter&, _Matrix&, _SimpleList&, + void ComputeGradient (_Matrix&, _Parameter&, _Matrix&, _SimpleList&, long, bool normalize = true); bool SniffAround (_Matrix& , _Parameter& , _Parameter&); void RecurseCategory (long,long,long,long,_Parameter diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index ae8217ec9..061c4e250 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -5670,7 +5670,7 @@ void _LikelihoodFunction::GetGradientStepBound (_Matrix& gradient,_Parameter& //_______________________________________________________________________________________ -void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, _Matrix&unit, _Parameter& gradientStep, _Matrix& values,_SimpleList& freeze, long order, bool normalize) { +void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, _Parameter& gradientStep, _Matrix& values,_SimpleList& freeze, long order, bool normalize) { _Parameter funcValue; if (order==1L) { @@ -5806,7 +5806,6 @@ bool _LikelihoodFunction::SniffAround (_Matrix& values, _Parameter& bestSoFar _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precision, _Matrix& bestVal, bool localOnly, long iterationLimit, _SimpleList* only_these_parameters, _Parameter check_value) { _Parameter gradientStep = STD_GRAD_STEP, - temp, maxSoFar = Compute(), initial_value = maxSoFar, currentPrecision = localOnly?precision:.01; @@ -5837,49 +5836,49 @@ _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precisio - _Matrix unit (bestVal), - gradient (bestVal); long vl = verbosityLevel; char buffer[1024]; - unit.PopulateConstantMatrix (1.); - if (vl>1) { snprintf (buffer, sizeof(buffer),"\nConjugate Gradient Pass %d, precision %g, gradient step %g, max so far %15.12g\n",0,precision,gradientStep,maxSoFar); BufferToConsole (buffer); } - _Matrix G (bestVal), - H (bestVal), - S (bestVal); + const unsigned long dim = bestVal.GetHDim() * bestVal.GetVDim(); - _Parameter gradL; + _Matrix gradient (bestVal), + current_direction, + previous_direction, + previous_gradient; - ComputeGradient (gradient, unit, gradientStep, bestVal, freeze, 1, false); + _Parameter gradL; + ComputeGradient (gradient, gradientStep, bestVal, freeze, 1, false); gradL = gradient.AbsValue (); + if (gradL > 0.0) { + // NOT already are at an extremum + + //gradient *= (1./gradL); + current_direction = gradient; + // move down the gradient - if (gradL != 0.0) { - - gradient *= -1.; - G.Duplicate (&gradient); - H.Duplicate (&gradient); - - for (long index = 0; index<200 && index < iterationLimit; index++, currentPrecision*=0.25) { - temp = maxSoFar; + for (long index = 0; index< MAX (dim, 10) && index < iterationLimit; index++, currentPrecision*=0.25) { + _Parameter current_maximum = maxSoFar; if (currentPrecision < 0.00001) { currentPrecision = 0.00001; } + + //printf ("%s\n", _String ((_String*)gradient.toStr()).getStr()); + + _Parameter line_search_precision = localOnly?precision:currentPrecision; - S = gradient; - S *= -1./gradient.AbsValue(); - GradientLocateTheBump(localOnly?precision:currentPrecision, maxSoFar, bestVal, S); + GradientLocateTheBump(line_search_precision, maxSoFar, bestVal, current_direction); - LoggerAddGradientPhase (localOnly?precision:currentPrecision ); + LoggerAddGradientPhase (line_search_precision); LoggerAllVariables (); LoggerLogL (maxSoFar); @@ -5887,46 +5886,70 @@ _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precisio snprintf (buffer, sizeof(buffer),"Conjugate Gradient Pass %ld, precision %g, gradient step %g, max so far %15.12g\n",index+1,precision,gradientStep,maxSoFar); BufferToConsole (buffer); } - if (localOnly) { - if (fabs((maxSoFar-temp))<=precision) { - break; - } - } else if (fabs((maxSoFar-temp)/temp)<=precision) { + + //printf ("##### %g => %g %g\n", current_maximum, maxSoFar, maxSoFar-current_maximum); + + //if (localOnly) { + if (fabs(maxSoFar-current_maximum)<=precision) { break; } + //} else if (fabs((maxSoFar-current_maximum)/current_maximum)<=precision) { + // break; + //} - ComputeGradient (gradient, unit, gradientStep, bestVal, freeze, 1, false); - gradL =gradient.AbsValue (); + previous_gradient = gradient; + ComputeGradient (gradient, gradientStep, bestVal, freeze, 1, false); + + //gradL = gradient.AbsValue (); + + //printf (">>>> %g\n", gradL); + if (CheckEqual(gradL,0.0)) { + // already at the maximum break; } - S = gradient; - //gradL = S.AbsValue(); - //S *= 1.; - - _Parameter gg = 0., - dgg = 0.; + + //gradient *= (1./gradL); + + previous_direction = current_direction; + + _Parameter beta = 0., scalar_product = 0.; + + + // use Polak–Ribière direction - for (unsigned long k = 0; k < indexInd.lLength; k++) { - gg += G.theData[k]*G.theData[k]; - dgg += (S.theData[k] + G.theData[k])*S.theData[k]; + for (unsigned long i = 0UL; i < dim; i++) { + scalar_product += previous_gradient.theData[i] * previous_gradient.theData[i]; + beta += gradient.theData[i] * ( gradient.theData[i] - previous_gradient.theData[i]); } - - if (gg == 0.) { - break; + + + // use Dai–Yuan + /*for (unsigned long i = 0UL; i < dim; i++) { + beta += gradient.theData[i] * gradient.theData[i]; + scalar_product += previous_direction.theData[i] * ( gradient.theData[i] - previous_gradient.theData[i]); } - - dgg /= gg; - - - for (unsigned long k = 0; k < indexInd.lLength; k++) { - G.theData[k] = -S.theData[k]; - gradient.theData[k] = H.theData[k] = G.theData[k] + dgg * H.theData[k]; + beta = -beta;*/ + + // Hestenes-Stiefel + /*for (unsigned long i = 0UL; i < dim; i++) { + beta += gradient.theData[i] * ( gradient.theData[i] - previous_gradient.theData[i]); + scalar_product += previous_direction.theData[i] * ( gradient.theData[i] - previous_gradient.theData[i]); } + beta = -beta;*/ + + + //printf ("=== %g/%g %g\n", beta, scalar_product, beta / scalar_product); + beta /= scalar_product; + beta = MAX (beta, 0.0); + previous_direction = current_direction; + + previous_gradient = previous_direction; + previous_gradient *= beta; + + current_direction = gradient; + current_direction += previous_gradient; - if (terminateExecution) { - return check_value; - } } } @@ -5960,16 +5983,11 @@ void _LikelihoodFunction::GradientDescent (_Parameter& gPrecision, _Matrix& b freeze, countLC; - _Matrix unit (bestVal), - gradient (bestVal); + _Matrix gradient (bestVal); long vl = verbosityLevel, index; - for (index=0; index=gPrecision && freeze.lLength Date: Tue, 10 Jul 2018 09:01:34 +0900 Subject: [PATCH 26/53] RELAX failed contraint removal fix --- .../SelectionAnalyses/RELAX.bf | 4 +- src/core/likefunc.cpp | 68 +++++++++++++++++++ .../SimpleOptimizations/SmallCodon.bf | 3 + 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf index 2271e543e..4a35612df 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf @@ -28,6 +28,8 @@ LoadFunctionLibrary("libv3/convenience/math.bf"); utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE); utility.SetEnvVariable ("ASSUME_REVERSIBLE_MODELS", TRUE); +//utility.SetEnvVariable ("LF_SMOOTHING_SCALER", 0.01); +//utility.SetEnvVariable ("LF_SMOOTHING_REDUCTION", 1/2); /*------------------------------------------------------------------------------*/ @@ -459,7 +461,7 @@ if (relax.model_set == "All") { io.ReportProgressMessageMD ("RELAX", "pe", "Fitting the partitioned descriptive model (separate distributions for *test* and *reference* branches)"); parameters.RemoveConstraint (utility.Keys (relax.bound_weights)); - for (relax.i = 1; relax.i < relax.rate_classes; relax.i += 1) { + for (relax.i = 1; relax.i <= relax.rate_classes; relax.i += 1) { parameters.RemoveConstraint (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i))); } relax.pe.fit = estimators.FitExistingLF (relax.alternative_model.fit[terms.likelihood_function], relax.model_object_map); diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 061c4e250..4c82b23a4 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -6391,7 +6391,75 @@ void _LikelihoodFunction::LocateTheBump (long index,_Parameter gPrecision, _P brentPrec = (right-left) * 0.2; //printf ("\nResetting brentPrec to %g\n", brentPrec, "\n"); } + + + /*if (index >= 0) { + // try Newton Raphson + + if (middle - left > STD_GRAD_STEP && right - middle > STD_GRAD_STEP) { + + + _Parameter last_value, current_value = middle, current_fx = middleValue; + + auto store_max = [&] (_Parameter x, _Parameter fx) -> void { + if (fx > maxSoFar) { + maxSoFar = fx; + bestVal = x; + } + }; + + do { + last_value = current_value; + + _Parameter x_plus_h = last_value + STD_GRAD_STEP, + x_minus_h = last_value - STD_GRAD_STEP, + fx_plus_h = SetParametersAndCompute(index, x_plus_h), + fx_minus_h = SetParametersAndCompute(index, x_minus_h), + dFdX = (fx_plus_h - fx_minus_h) / (2. * STD_GRAD_STEP), + d2FdX2 = ((fx_plus_h - current_fx) + (fx_minus_h - current_fx)) / (STD_GRAD_STEP * STD_GRAD_STEP); + + + store_max (x_plus_h, fx_plus_h); + store_max (x_minus_h, fx_minus_h); + + if (CheckEqual(d2FdX2, 0.0)) { + current_value = last_value; + } else { + current_value = last_value - dFdX / d2FdX2; + } + + if (current_value < left || current_value > right) { + break; + } + + //printf ("\n\nf(%20.16g) = %20.16g; f(%20.16g) = %20.16g\n", x_plus_h, fx_plus_h, x_minus_h, fx_minus_h); + //printf ("f(%g) = %g; dF = %g, dF2 = %g\n", last_value, current_fx, dFdX, d2FdX2); + + if (CheckAndSetIthIndependent(index, current_value)) { + current_fx = SetParametersAndCompute (index, current_value); + } + //printf (" == move by %g with value %20.16g\n", current_value-last_value, current_fx); + + store_max (current_value, current_fx); + + + } while (fabs (current_value - last_value) >= brentPrec && current_value - left > STD_GRAD_STEP && right - current_value > STD_GRAD_STEP); + + if (fabs (current_value - last_value) < brentPrec && current_value - left > STD_GRAD_STEP && right - current_value > STD_GRAD_STEP) { + CheckAndSetIthIndependent(index, bestVal); + //printf (" == SUCCESS\n"); + FlushLocalUpdatePolicy (); + return; + } + CheckAndSetIthIndependent(index, bestVal); + middle = bestVal; + middleValue = maxSoFar; + //printf (" == FAILURE %20.16g -> %20.16g\n", middle, middleValue); + + } + }*/ + _Parameter U,V,W,X=middle,E=0.,FX,FW,FV,XM,R,Q,P,ETEMP,D=0.,FU; W = middle; V = middle; diff --git a/tests/hbltests/SimpleOptimizations/SmallCodon.bf b/tests/hbltests/SimpleOptimizations/SmallCodon.bf index dcbbf05e1..3690f44e5 100644 --- a/tests/hbltests/SimpleOptimizations/SmallCodon.bf +++ b/tests/hbltests/SimpleOptimizations/SmallCodon.bf @@ -668,6 +668,9 @@ USE_ADAPTIVE_VARIABLE_STEP = 1; Optimize (res,lf); +//Export (lfe, lf); +//fprintf ("/tmp/test.bf", CLEAR_FILE, lfe); + //fprintf (stdout, "\n\n", lf.trace, "\n\n"); /* test epilogue */ From c193cadae876ad9779e282adf10a464c7a28367d Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Thu, 12 Jul 2018 15:41:06 +0900 Subject: [PATCH 27/53] RELAX fixes; datasize aware checking for reversible speed-up checks; fix for not initializing cat var when computing means for branch length conversions --- .../SelectionAnalyses/RELAX.bf | 110 +++++++++++++++++- .../modules/grid_compute.ibf | 13 +++ res/TemplateBatchFiles/libv3/all-terms.bf | 4 +- .../libv3/tasks/estimators.bf | 6 +- src/core/category.cpp | 5 +- src/core/likefunc.cpp | 2 +- 6 files changed, 127 insertions(+), 13 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf index 4a35612df..20641ee33 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf @@ -24,6 +24,8 @@ LoadFunctionLibrary("modules/io_functions.ibf"); LoadFunctionLibrary("modules/selection_lib.ibf"); LoadFunctionLibrary("libv3/models/codon/BS_REL.bf"); LoadFunctionLibrary("libv3/convenience/math.bf"); +LoadFunctionLibrary ("libv3/tasks/mpi.bf"); + utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE); @@ -34,8 +36,9 @@ utility.SetEnvVariable ("ASSUME_REVERSIBLE_MODELS", TRUE); /*------------------------------------------------------------------------------*/ relax.analysis_description = { - terms.io.info : "RELAX (a random effects test of selection relaxation) uses a random effects branch-site model framework to test whether a set of 'Test' branches evolves under relaxed selection relative to a set of 'Reference' branches (R), as measured by the relaxation parameter (K).", - terms.io.version : "2.0", + terms.io.info : "RELAX (a random effects test of selection relaxation) uses a random effects branch-site model framework to test whether a set of 'Test' branches evolves under relaxed selection relative to a set of 'Reference' branches (R), as measured by the relaxation parameter (K). + Version 2.1 adds a check for stability in K estimates to try to mitigate convergence problems", + terms.io.version : "2.1", terms.io.reference : "RELAX: Detecting Relaxed Selection in a Phylogenetic Framework (2015). Mol Biol Evol 32 (3): 820-832", terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group", terms.io.contact : "spond@temple.edu", @@ -63,6 +66,11 @@ terms.relax.k_range = { terms.lower_bound: "0", terms.upper_bound: "50" }; + +terms.relax.k_range1 = { + terms.lower_bound: "1", + terms.upper_bound: "50" + }; relax.p_threshold = 0.05; @@ -92,6 +100,7 @@ selection.io.startTimer (relax.json [terms.json.timers], "Overall", 0); namespace relax { LoadFunctionLibrary ("modules/shared-load-file.bf"); load_file ({utility.getGlobalValue("terms.prefix"): "relax", utility.getGlobalValue("terms.settings") : {utility.getGlobalValue("terms.settings.branch_selector") : "relax.select_branches"}}); + LoadFunctionLibrary ("modules/grid_compute.ibf"); } @@ -177,6 +186,7 @@ utility.ForEachPair (relax.filter_specification, "_key_", "_value_", selection.io.stopTimer (relax.json [terms.json.timers], "Preliminary model fitting"); +parameters.DeclareGlobalWithRanges (relax.relaxation_parameter, 1, 0, 50); if (relax.model_set == "All") { // run all the models @@ -206,16 +216,22 @@ if (relax.model_set == "All") { // run all the models parameters.SetStickBreakingDistribution (relax.distribution, relax.ge_guess); + relax.general_descriptive.fit = estimators.FitLF (relax.filter_names, relax.trees, { "0" : {"DEFAULT" : "relax.ge"}}, relax.final_partitioned_mg_results, relax.model_object_map, { - terms.run_options.apply_user_constraints: "relax.init.k" + terms.run_options.apply_user_constraints: "relax.init.k", + terms.run_options.retain_lf_object : TRUE + }); + + estimators.TraverseLocalParameters (relax.general_descriptive.fit [terms.likelihood_function], relax.model_object_map, "relax.set.k2"); + relax.general_descriptive.fit = estimators.FitExistingLF (relax.general_descriptive.fit [terms.likelihood_function], relax.model_object_map); selection.io.stopTimer (relax.json [terms.json.timers], "General descriptive model fitting"); @@ -291,7 +307,6 @@ relax.bound_weights = models.BindGlobalParameters ({"0" : relax.reference.bsrel_ models.BindGlobalParameters ({"0" : relax.test.bsrel_model, "1" : relax.reference.bsrel_model}, terms.nucleotideRate("[ACGT]","[ACGT]")); -parameters.DeclareGlobalWithRanges (relax.relaxation_parameter, 1, 0, 50); model.generic.AddGlobal (relax.test.bsrel_model, relax.relaxation_parameter, terms.relax.k); @@ -358,10 +373,70 @@ io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution wa relax.inferred_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.test.bsrel_model)) % 0; selection.io.report_dnds (relax.inferred_distribution); + io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **reference** branches"); relax.inferred_distribution_ref = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.reference.bsrel_model)) % 0; selection.io.report_dnds (relax.inferred_distribution_ref); +relax.lf.raw = relax.ComputeOnGrid ( relax.alternative_model.fit[terms.likelihood_function], + relax.grid.MatrixToDict ({200,1}["_MATRIX_ELEMENT_ROW_*0.025"]), + "relax.pass1.evaluator", + "relax.pass1.result_handler"); + +// FIND the difference between K < 1 and K > 1 + +relax.best_samples = {{-1e100,-1e100}}; + +for (relax.k = 0; relax.k < 40; relax.k += 1) { + relax.best_samples[0] = Max (relax.best_samples[0], relax.lf.raw[relax.k]); +} + +for (relax.k = 40; relax.k < 200; relax.k += 1) { + relax.best_samples[1] = Max (relax.best_samples[1], relax.lf.raw[relax.k]); +} + +//console.log (relax.best_samples); + +if (Abs (relax.best_samples[1] - relax.best_samples[0]) < 5.) { // could be diagnostic of convergence problems + io.ReportProgressMessageMD("RELAX", "alt-2", "* Potential convergence issues due to flat likelihood surfaces; checking to see whether K > 1 or K < 1 is robustly inferred"); + if (relax.fitted.K > 1) { + parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.range01); + } else { + parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.relax.k_range1); + } + relax.alternative_model.fit.take2 = estimators.FitLF (relax.filter_names, relax.trees, { "0" : relax.model_map}, + relax.alternative_model.fit , + relax.model_object_map, + {terms.run_options.retain_lf_object: TRUE} + ); + + + + if (relax.alternative_model.fit.take2 [terms.fit.log_likelihood] > relax.alternative_model.fit[terms.fit.log_likelihood]) { + + io.ReportProgressMessageMD("RELAX", "alt-2", "\n### Potential for highly unreliable K inference due to multiple local maxima in the likelihood function, treat results with caution "); + io.ReportProgressMessageMD("RELAX", "alt-2", "> Relaxation parameter reset to opposite mode of evolution from that obtained in the initial optimization."); + io.ReportProgressMessageMD("RELAX", "alt-2", "* " + selection.io.report_fit (relax.alternative_model.fit.take2, 9, relax.codon_data_info[terms.data.sample_size])); + relax.fitted.K = estimators.GetGlobalMLE (relax.alternative_model.fit.take2,terms.relax.k); + io.ReportProgressMessageMD("RELAX", "alt-2", "* Relaxation/intensification parameter (K) = " + Format(relax.fitted.K,8,2)); + io.ReportProgressMessageMD("RELAX", "alt-2", "* The following rate distribution was inferred for **test** branches"); + relax.inferred_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.test.bsrel_model)) % 0; + selection.io.report_dnds (relax.inferred_distribution); + + + io.ReportProgressMessageMD("RELAX", "alt-2", "* The following rate distribution was inferred for **reference** branches"); + relax.inferred_distribution_ref = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.reference.bsrel_model)) % 0; + selection.io.report_dnds (relax.inferred_distribution_ref); + + relax.alternative_model.fit = relax.alternative_model.fit.take2; + } + + + parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.relax.k_range); + + +} + relax.distribution_for_json = {relax.test_branches_name : utility.Map (utility.Range (relax.rate_classes, 0, 1), "_index_", "{terms.json.omega_ratio : relax.inferred_distribution [_index_][0], @@ -524,12 +599,23 @@ lfunction relax.extract.k(branch_info) { lfunction relax.set.k (tree_name, node_name, model_description) { if (utility.Has (model_description [utility.getGlobalValue ("terms.local")], utility.getGlobalValue ("terms.relax.k"), "String")) { k = (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.relax.k")]; - parameters.SetValue (tree_name + "." + node_name + "." + k, 1); + parameters.SetConstraint (tree_name + "." + node_name + "." + k, utility.getGlobalValue ("relax.relaxation_parameter"), ""); parameters.SetRange (tree_name + "." + node_name + "." + k, utility.getGlobalValue ("terms.relax.k_range")); } return tree_name + "." + node_name + "." + k; } +//------------------------------------------------------------------------------ + +lfunction relax.set.k2 (tree_name, node_name, model_description) { + if (utility.Has (model_description [utility.getGlobalValue ("terms.local")], utility.getGlobalValue ("terms.relax.k"), "String")) { + k = (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.relax.k")]; + parameters.RemoveConstraint (tree_name + "." + node_name + "." + k); + } + return tree_name + "." + node_name + "." + k; +} + + //------------------------------------------------------------------------------ lfunction relax.init.k (lf_id, components, data_filter, tree, model_map, initial_values, model_objects) { @@ -639,6 +725,8 @@ lfunction relax.BS_REL._DefineQ (bs_rel, namespace) { if ( component < bs_rel[utility.getGlobalValue("terms.model.components")]) { model.generic.AddGlobal ( bs_rel, _aux[component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component )); parameters.DeclareGlobalWithRanges (_aux[component-1], 0.5, 0, 1); + } else { + } models.codon.generic.DefineQMatrix(bs_rel, namespace); rate_matrices [key] = bs_rel[utility.getGlobalValue("terms.model.rate_matrix")]; @@ -720,3 +808,15 @@ lfunction relax.select_branches(partition_info) { return_set + tree_configuration; return return_set; } + +//------------------------------------------------------------------------------ + +lfunction relax.grid.MatrixToDict (grid) { + return utility.Map (utility.MatrixToListOfRows (grid), "_value_", + '{ terms.relax.k : { + terms.id : relax.relaxation_parameter, + terms.fit.MLE : _value_[1] + } + + }'); +} diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/grid_compute.ibf b/res/TemplateBatchFiles/SelectionAnalyses/modules/grid_compute.ibf index 47c830b36..2e9f81398 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/grid_compute.ibf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/grid_compute.ibf @@ -1,6 +1,7 @@ //------------------------------------------------------------------------------ lfunction ComputeOnGrid (lf_id, grid, handler, callback) { + jobs = mpi.PartitionIntoBlocks(grid); scores = {}; @@ -38,9 +39,21 @@ lfunction pass1.evaluator (lf_id, tasks, scores) { task_ids = utility.Keys (tasks); task_count = Abs (tasks); for (i = 0; i < task_count; i+=1) { + + //console.log (tasks[task_ids[i]]); + parameters.SetValues (tasks[task_ids[i]]); LFCompute (^lf_id, ll); results [task_ids[i]] = ll; + + /*if (ll < -1e10) { + LFCompute (^lf_id, LF_DONE_COMPUTE); + io.SpoolLF (lf_id, "/tmp/RELAX", "alt.lf"); + assert (0); + } + + console.log (ll + "\n\n");*/ + } LFCompute (^lf_id, LF_DONE_COMPUTE); diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index c7967e9a0..0e494af62 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -73,12 +73,12 @@ namespace terms{ range_gte1 = { lower_bound: "1", - upper_bound: "1e26" + upper_bound: "1e25" }; range_any = { lower_bound: "0", - upper_bound: "1e26" + upper_bound: "1e25" }; diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 2391a2c0f..8f9a1393a 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -695,13 +695,11 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o } //assert (0); + //Export (lf,likelihoodFunction); + //console.log (lf); Optimize (mles, likelihoodFunction); - /* - Export (lf,likelihoodFunction); - console.log (lf); - */ if (Type(initial_values) == "AssociativeList") { utility.ToggleEnvVariable("USE_LAST_RESULTS", None); diff --git a/src/core/category.cpp b/src/core/category.cpp index bd047235e..90c4e7bfa 100644 --- a/src/core/category.cpp +++ b/src/core/category.cpp @@ -1014,10 +1014,13 @@ void _CategoryVariable::ScanForGVariables (_AVLList& l) _Parameter _CategoryVariable::Mean (void) { _Parameter mean = 0.; + + UpdateIntervalsAndValues (); + _Matrix * wts = GetWeights(), * val = GetValues(); - for (long ii = 0; ii < intervals; ii++) { + for (long ii = 0; ii < intervals; ii++) { mean += wts->theData[ii] * val->theData[ii]; } diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 4c82b23a4..c3008da40 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -8115,7 +8115,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, //fprintf (stderr, "CONGRUENCE CHECK %20.16g\n",fabs ((checksum-sum)/sum)); - if (fabs ((checksum-sum)/sum) > 1e-12) { + if (fabs ((checksum-sum)/sum) > 1.e-12 * df->GetPatternCount ()) { /*_Parameter check2 = t->ComputeTreeBlockByBranch (*sl, *branches, tcc, From 8b97f43a3383f63afc44d92491383397f8b62797 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Tue, 17 Jul 2018 00:52:27 -0400 Subject: [PATCH 28/53] Custom LF convergence conditions; better load balancing for partition optimizer --- .../libv3/tasks/estimators.bf | 7 +- src/core/include/likefunc.h | 2 +- src/core/likefunc.cpp | 125 ++++++++++++++++-- 3 files changed, 120 insertions(+), 14 deletions(-) diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 8f9a1393a..6aec71731 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -479,7 +479,7 @@ function estimators.ApplyExistingEstimates(likelihood_function_id, model_descrip if (Type((initial_values[terms.branch_length])[estimators.ApplyExistingEstimates.i]) == "AssociativeList") { // have branch lengths for this partition _application_type = None; - + if (Type (branch_length_conditions) == "AssociativeList") { if (Abs(branch_length_conditions) > estimators.ApplyExistingEstimates.i) { _application_type = branch_length_conditions[estimators.ApplyExistingEstimates.i]; @@ -810,10 +810,11 @@ lfunction estimators.FitSingleModel_Ext (data_filter, tree, model_template, init this_namespace = this_namespace[0][Abs (this_namespace)-3]; df = estimators.CreateLFObject (this_namespace, data_filter, tree, model_template, initial_values, run_options, None); - + + /* Export (lfe, likelihoodFunction); - console.log (lfe); + fprintf ("/tmp/pogo-dump.fit", CLEAR_FILE, lfe); */ Optimize(mles, likelihoodFunction); diff --git a/src/core/include/likefunc.h b/src/core/include/likefunc.h index fc7165f80..61dfdaab0 100644 --- a/src/core/include/likefunc.h +++ b/src/core/include/likefunc.h @@ -170,7 +170,7 @@ class _LikelihoodFunction: public BaseObj // 4 - category variables - _Parameter GetIthIndependent (long) const; // get the value of i-th independent variable + _Parameter GetIthIndependent (long, bool = true) const; // get the value of i-th independent variable const _String* GetIthIndependentName (long) const; // get the name of i-th independent variable _Parameter GetIthDependent (long) const; // get the value of i-th dependent variable void GetAllIndependent (_Matrix&) const; // store all indepenent values in a matrix diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index d245ad85c..6ea9d31ee 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -240,7 +240,8 @@ globalStartingPoint ("GLOBAL_STARTING_POINT"), minimumSitesForAutoParallelize ("MINIMUM_SITES_FOR_AUTO_PARALLELIZE"), userSuppliedVariableGrouping ("PARAMETER_GROUPING"), addLFSmoothing ("LF_SMOOTHING_SCALER"), - reduceLFSmoothing ("LF_SMOOTHING_REDUCTION"); + reduceLFSmoothing ("LF_SMOOTHING_REDUCTION"), + custom_lf_convergence_criterion ("LF_CONVERGENCE_CRITERION"); extern _String useNexusFileData, @@ -1188,10 +1189,10 @@ void _LikelihoodFunction::GetGlobalVars (_SimpleList& rec) const { } //_______________________________________________________________________________________ -_Parameter _LikelihoodFunction::GetIthIndependent (long index) const { +_Parameter _LikelihoodFunction::GetIthIndependent (long index, bool map) const { _Parameter return_value; - if (parameterValuesAndRanges) { + if (parameterValuesAndRanges && map) { return_value = (*parameterValuesAndRanges)(index,1); } else { return_value = ((_Constant*) LocateVar (indexInd.lData[index])->Compute())->Value(); @@ -3269,17 +3270,50 @@ void _LikelihoodFunction::InitMPIOptimizer (void) perNode = 1L; overFlow = 0L; } + + /** benchmark partition level calculations and distribute partitions according to estimated computational weight **/ + + _Matrix partition_weights (theDataFilters.lLength, 2, false, true); + + if (theDataFilters.lLength > slaveNodes) { + + for (unsigned long i = 0UL; i < theDataFilters.lLength; i++) { + //fprintf (stderr, "\nComputing block %ld\n", i); + TimeDifference timer; + ComputeBlock(i); + _Parameter timeDiff = timer.TimeSinceStart(); + partition_weights.Store (i, 0, timeDiff); + } + + } else { + for (unsigned long i = 0UL; i < theDataFilters.lLength; i++) { + partition_weights.Store (i, 0, 1.); + } + } + + _Constant * sum = (_Constant*)partition_weights.Sum(); + partition_weights *= (slaveNodes/sum->Value()); + for (unsigned long i = 0UL; i < theDataFilters.lLength; i++) { + partition_weights.Store (i, 1, i); + } + + sum->SetValue(0.); + _Matrix * sorted_by_weight = (_Matrix*)partition_weights.SortMatrixOnColumn(sum); + DeleteObject (sum); + + //fprintf (stderr, "%s\n", ((_String*)sorted_by_weight->toStr())->getStr()); + MPISwitchNodesToMPIMode (slaveNodes); /*if (overFlow) { overFlow = slaveNodes/overFlow; }*/ - ReportWarning (_String ("InitMPIOptimizer with:") & (long)theDataFilters.lLength & " partitions on " & (long)slaveNodes + /*ReportWarning (_String ("InitMPIOptimizer with:") & (long)theDataFilters.lLength & " partitions on " & (long)slaveNodes & " MPI computational nodes. " & perNode & " partitions per node (+1 for " & overFlow & " nodes)"); - MPISwitchNodesToMPIMode (slaveNodes); + for (long i = 1L; i= (totalNodeCount - i)); + + ReportWarning (_String ("InitMPIOptimizer sending partitions ") & _String ((_String*)my_part.toStr()) & " to node " & i); + + + //fprintf (stderr, "%s\n", _String ((_String*)my_part.toStr()).getStr()); + + + _String sLF (8192L, true); + SerializeLF (sLF,_hyphyLFSerializeModeVanilla,&my_part); + sLF.Finalize (); + + MPISendString (sLF,i); + parallelOptimizerTasks.AppendNewInstance (new _SimpleList); + } + + + DeleteObject (sorted_by_weight); } @@ -3708,6 +3774,7 @@ _Matrix* _LikelihoodFunction::Optimize () { SetupLFCaches (); SetupCategoryCaches (); + computationalResults.Clear(); #ifdef __HYPHYMPI__ @@ -3718,6 +3785,7 @@ _Matrix* _LikelihoodFunction::Optimize () { } } + if (hyphyMPIOptimizerMode == _hyphyLFMPIModeNone) { #endif SetReferenceNodes(); @@ -3731,7 +3799,6 @@ _Matrix* _LikelihoodFunction::Optimize () { } #endif - computationalResults.Clear(); #if !defined __UNIX__ || defined __HEADLESS__ || defined __HYPHYQT__ || defined __HYPHY_GTK__ SetStatusBarValue (0,maxSoFar,0); @@ -3756,6 +3823,21 @@ DecideOnDivideBy (this); checkParameter (useLastResults,keepStartingPoint,0.0); checkParameter (allowBoundary,go2Bound,1L); checkParameter (useInitialDistanceGuess,precision,1.); + + _FString * custom_convergence_callback_name = + (_FString*) FetchObjectFromVariableByType(&custom_lf_convergence_criterion, STRING); + + long custom_convergence_callback = custom_convergence_callback_name ? FindBFFunctionName(*custom_convergence_callback_name->theString) : -1L; + + + if (custom_convergence_callback >= 0) { + if (GetBFFunctionArgumentCount (custom_convergence_callback) != 2L) { + WarnError ("Custom convergence criterion convergence function must have exactly two arguments: current log-L, and an dictionary with id -> value mapping"); + return new _Matrix(); + } + } + + //fprintf (stderr, "Custom convergence %s %ld\n", custom_convergence_callback_name->theString->sData,custom_convergence_callback); if (CheckEqual (keepStartingPoint,1.0)) { for (unsigned long i=0UL; i= 0) { + // custom callback + _List arguments; + _AssociativeList * parameters = new _AssociativeList; + + for (unsigned long i=0UL; i MStore (*GetIthIndependentName(i), new _Constant (GetIthIndependent(i, false))); + } + + arguments << new _Constant (maxSoFar); + arguments.AppendNewInstance(parameters); + + _PMathObj convegence_check = custom_convergence_callback_name->Call (&arguments, nil); + + if (convegence_check->Value () <= precision/termFactor) { + inCount ++; + } else { inCount = 0; + } + DeleteObject (convegence_check); + } else { + if (maxSoFar-lastMaxValue<=precision/termFactor) { + inCount++; + } else { + inCount = 0; + } } lastMaxValue = maxSoFar; From 166a159897ea0337640a39711d5239948a808dc3 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Tue, 17 Jul 2018 08:48:58 -0400 Subject: [PATCH 29/53] Adding aBSREL that tests for negative selection --- .../SelectionAnalyses/aBSREL-NS.bf | 846 ++++++++++++++++++ .../SelectionAnalyses/aBSREL.bf | 4 +- 2 files changed, 848 insertions(+), 2 deletions(-) create mode 100644 res/TemplateBatchFiles/SelectionAnalyses/aBSREL-NS.bf diff --git a/res/TemplateBatchFiles/SelectionAnalyses/aBSREL-NS.bf b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL-NS.bf new file mode 100644 index 000000000..54f366dae --- /dev/null +++ b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL-NS.bf @@ -0,0 +1,846 @@ +RequireVersion ("2.3.12"); + +LoadFunctionLibrary("libv3/all-terms.bf"); // must be loaded before CF3x4 + + +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); +LoadFunctionLibrary("libv3/IOFunctions.bf"); +LoadFunctionLibrary ("libv3/models/codon/MG_REV.bf"); +LoadFunctionLibrary("libv3/tasks/estimators.bf"); +LoadFunctionLibrary("libv3/tasks/alignments.bf"); +LoadFunctionLibrary("libv3/tasks/mpi.bf"); +LoadFunctionLibrary("libv3/tasks/trees.bf"); + +LoadFunctionLibrary("SelectionAnalyses/modules/io_functions.ibf"); +LoadFunctionLibrary("SelectionAnalyses/modules/selection_lib.ibf"); +LoadFunctionLibrary("libv3/models/codon/BS_REL.bf"); +LoadFunctionLibrary("libv3/convenience/math.bf"); + + +utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE); +utility.SetEnvVariable ("ASSUME_REVERSIBLE_MODELS", TRUE); + + +/*------------------------------------------------------------------------------*/ + +absrel.max_rate_classes = 5; +absrel.p_threshold = 0.05; + +absrel.MG94 = "MG94xREV with separate omega for each branch"; +absrel.baseline_mg94xrev = "Baseline MG94xREV"; +absrel.baseline_omega_ratio = "Baseline MG94xREV omega ratio"; +absrel.full_adaptive_model = "Full adaptive model"; +absrel.rate_classes = "Rate classes"; +absrel.per_branch_omega = "Per-branch omega"; + +terms.absrel.neg_lrt = terms.LRT + " (purifying selection)"; +terms.absrel.neg_uncorrected_pvalue = terms.json.uncorrected_pvalue + " (purifying selection)"; +terms.absrel.neg_corrected_pvalue = terms.json.corrected_pvalue + " (purifying selection)"; + +absrel.display_orders = {terms.original_name: -1, + terms.json.nucleotide_gtr: 0, + absrel.baseline_mg94xrev: 1, + absrel.baseline_omega_ratio: 1, + absrel.full_adaptive_model: 2, + absrel.rate_classes: 2, + terms.json.rate_distribution: 3, + terms.LRT: 4, + terms.json.uncorrected_pvalue: 5, + terms.json.corrected_pvalue: 6, + terms.absrel.neg_lrt : 7, + terms.absrel.neg_uncorrected_pvalue : 8, + terms.absrel.neg_corrected_pvalue : 9 + }; + + + +/*------------------------------------------------------------------------------*/ + + +absrel.analysis_description = {terms.io.info : "aBSREL-NS (Adaptive branch-site random effects likelihood negative selection) + uses an adaptive random effects branch-site model framework + to test whether each branch has evolved under **purifying** or **positive** selection, + using a procedure which infers an optimal number of rate categories per branch.", + terms.io.version : "2.0.1", + terms.io.reference : "Less Is More: An Adaptive Branch-Site Random Effects Model for Efficient Detection of Episodic Diversifying Selection (2015). Mol Biol Evol 32 (5): 1342-1353", + terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group", + terms.io.contact : "spond@temple.edu", + terms.io.requirements : "in-frame codon alignment and a phylogenetic tree" + }; + + +io.DisplayAnalysisBanner ( absrel.analysis_description ); + +absrel.json = { + terms.json.analysis: absrel.analysis_description, + terms.json.input: {}, + terms.json.fits : {}, + terms.json.timers : {}, + terms.json.test_results : {} + }; + + +selection.io.startTimer (absrel.json [terms.json.timers], "Overall", 0); + +namespace absrel { + LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf"); + load_file ("absrel"); +} + +io.CheckAssertion("utility.Array1D (absrel.partitions_and_trees) == 1", "aBSREL only works on a single partition dataset"); + + +utility.ForEachPair (absrel.selected_branches, "_partition_", "_selection_", + "_selection_ = utility.Filter (_selection_, '_value_', '_value_ == terms.tree_attributes.test'); + io.ReportProgressMessageMD('RELAX', 'selector', '* Selected ' + Abs(_selection_) + ' branches for testing: \\\`' + Join (', ',utility.Keys(_selection_)) + '\\\`')"); + + +/* +absrel.srv = io.SelectAnOption ({ + {"Yes", "Both synonymous and non-synonymous rates vary in a branch-site fashion (~5x more computationally expensive)"} + {"No", "[Default] Synonymous rates vary from branch to branch, while the dN/dS ratio varies among branch-site combinations"} + }, "Enable synonymous rate variation?"); +*/ + +selection.io.startTimer (absrel.json [terms.json.timers], "Preliminary model fitting", 1); + + +namespace absrel { + doGTR ("absrel"); +} + +selection.io.stopTimer (absrel.json [terms.json.timers], "Preliminary model fitting"); +selection.io.startTimer (absrel.json [terms.json.timers], "Baseline model fitting", 2); + +estimators.fixSubsetOfEstimates(absrel.gtr_results, absrel.gtr_results[terms.global]); + +io.ReportProgressMessageMD ("absrel", "base", "Fitting the baseline model with a single dN/dS class per branch, and no site-to-site variation. "); + +absrel.base.results = estimators.FitMGREV (absrel.filter_names, absrel.trees, absrel.codon_data_info [terms.code], { + terms.run_options.model_type: terms.local, + terms.run_options.retain_lf_object: TRUE, + terms.run_options.retain_model_object : TRUE +}, absrel.gtr_results); + +io.ReportProgressMessageMD("absrel", "base", "* " + selection.io.report_fit (absrel.base.results, 0, absrel.codon_data_info[terms.data.sample_size])); + + + + +absrel.baseline.branch_lengths = selection.io.extract_branch_info((absrel.base.results[terms.branch_length])[0], "selection.io.branch.length"); +absrel.baseline.omegas = selection.io.extract_branch_info((absrel.base.results[terms.branch_length])[0], "absrel.local.omega"); + +absrel.omega_stats = math.GatherDescriptiveStats (utility.Map (utility.Values (absrel.baseline.omegas), "_value_", "0+_value_")); + +io.ReportProgressMessageMD("absrel", "base", "* Branch-level `terms.parameters.omega_ratio` distribution has median " + + Format (absrel.omega_stats[terms.math.median], 5,2) + ", and 95% of the weight in " + Format (absrel.omega_stats[terms.math._2.5], 5,2) + " - " + Format (absrel.omega_stats[terms.math._97.5], 5,2)); + + + +selection.io.stopTimer (absrel.json [terms.json.timers], "Baseline model fitting"); + +// TODO -- there's gotta be a better way to do this +absrel.branch_count = Abs (absrel.baseline.branch_lengths); +absrel.sorted_branch_lengths = {absrel.branch_count, 2}; +absrel.bnames = utility.Keys (absrel.baseline.branch_lengths); +utility.ForEachPair (absrel.bnames, "_index_", "_value_", + ' + absrel.sorted_branch_lengths [_index_[1]][0] = absrel.baseline.branch_lengths[_value_]; + absrel.sorted_branch_lengths [_index_[1]][1] = _index_[1]; + '); +absrel.sorted_branch_lengths = absrel.sorted_branch_lengths % 0; +absrel.names_sorted_by_length = {absrel.branch_count, 1}; + +for (absrel.i = absrel.branch_count - 1; absrel.i >= 0; absrel.i = absrel.i - 1) { + absrel.names_sorted_by_length [absrel.branch_count - 1 - absrel.i] = absrel.bnames [absrel.sorted_branch_lengths[absrel.i][1]]; +} + +absrel.distribution_for_json = {absrel.per_branch_omega : + {terms.math.mean : absrel.omega_stats[terms.math.mean], + terms.math.median : absrel.omega_stats[terms.math.median], + terms.math._2.5 : absrel.omega_stats[terms.math._2.5], + terms.math._97.5 : absrel.omega_stats[terms.math._97.5]} + }; + + + +//Store MG94 to JSON +selection.io.json_store_lf_GTR_MG94 (absrel.json, + absrel.baseline_mg94xrev, + absrel.base.results[terms.fit.log_likelihood], + absrel.base.results[terms.parameters] , + absrel.codon_data_info[terms.data.sample_size], + absrel.distribution_for_json, + (absrel.base.results[terms.efv_estimate])["VALUEINDEXORDER"][0], + absrel.display_orders[absrel.baseline_mg94xrev]); + + +selection.io.json_store_branch_attribute(absrel.json, absrel.baseline_mg94xrev, terms.branch_length, absrel.display_orders[absrel.baseline_mg94xrev], + 0, + absrel.baseline.branch_lengths); + +selection.io.json_store_branch_attribute(absrel.json, absrel.baseline_omega_ratio, terms.json.branch_label, absrel.display_orders[absrel.baseline_omega_ratio], + 0, + absrel.baseline.omegas); + + + +// define BS-REL models with up to N rate classes + +absrel.model_defintions = {}; + +absrel.likelihood_function_id = absrel.base.results [terms.likelihood_function]; +absrel.constrain_everything (absrel.likelihood_function_id); +absrel.tree_id = absrel.get_tree_name (absrel.likelihood_function_id); +absrel.model_id = absrel.get_model_id (absrel.likelihood_function_id); +absrel.MG94.model = (absrel.base.results[terms.model])[(utility.Keys (absrel.base.results[terms.model]))[0]]; + +absrel.temp = model.GetParameters_RegExp (absrel.MG94.model, terms.nucleotideRate("[ACGT]","[ACGT]")); +absrel.temp - terms.nucleotideRate("A","G"); +absrel.full_model_parameters = {}; +utility.AddToSet (absrel.full_model_parameters, absrel.temp); + +selection.io.startTimer (absrel.json [terms.json.timers], "Complexity analysis", 3); + +absrel.model_object_map = { + absrel.MG94.model [terms.id] : absrel.MG94.model +}; + +absrel.model_defintions [1] = absrel.MG94.model; +for (absrel.i = 2; absrel.i <= absrel.max_rate_classes; absrel.i += 1) { + absrel.model_defintions [absrel.i] = model.generic.DefineMixtureModel("absrel.BS_REL.ModelDescription", + "absrel.model." + absrel.i, { + "0": parameters.Quote(terms.local), + "1": absrel.codon_data_info[terms.code], + "2": parameters.Quote (absrel.i) // the number of rate classes + }, + absrel.filter_names, + None); + + absrel.model_object_map [(absrel.model_defintions [absrel.i])[terms.id]] = absrel.model_defintions [absrel.i]; + models.BindGlobalParameters ({"1" : absrel.model_defintions [absrel.i], "0" : absrel.MG94.model}, terms.nucleotideRate("[ACGT]","[ACGT]")); +} + +io.ReportProgressMessageMD ("absrel", "complexity", "Determining the optimal number of rate classes per branch using a step up procedure"); + + +absrel.current_parameter_count = absrel.base.results[terms.parameters]; +absrel.current_best_score = math.GetIC (absrel.base.results[terms.fit.log_likelihood], absrel.current_parameter_count, absrel.codon_data_info[terms.data.sample_size]); +absrel.branch.complexity = {}; + +utility.ToggleEnvVariable ("USE_LAST_RESULTS", TRUE); + +absrel.complexity_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { + "0": 35, + "1": 10, + "2": 10, + "3": 20, + "4": 15, + "5": 15, + "6": 15 + }, + terms.number_precision : 2}; + +fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Length", "Rates", "Max. dN/dS", "Log(L)", "AIC-c", "Best AIC-c so far"}}, absrel.complexity_table.settings)); +absrel.complexity_table.settings [terms.table_options.header] = FALSE; + +for (absrel.branch_id = 0; absrel.branch_id < absrel.branch_count; absrel.branch_id += 1) { + + absrel.current_branch = absrel.names_sorted_by_length[absrel.branch_id]; + absrel.current_branch_estimates = absrel.GetBranchEstimates (absrel.MG94.model, absrel.tree_id, absrel.current_branch); + absrel.report.row = {}; + absrel.report.row [0] = absrel.current_branch; + absrel.report.row [1] = absrel.baseline.branch_lengths[absrel.current_branch]; + + absrel.current_rate_count = 2; + + while (TRUE) { + absrel.report.row [2] = Format(absrel.current_rate_count,0,0); + model.ApplyToBranch ((absrel.model_defintions [absrel.current_rate_count])[terms.id], absrel.tree_id, absrel.current_branch); + parameters.SetValues (absrel.current_branch_estimates); + + absrel.initial_guess = absrel.ComputeOnAGrid (absrel.PopulateInitialGrid (absrel.model_defintions [absrel.current_rate_count], absrel.tree_id, absrel.current_branch, absrel.current_branch_estimates), absrel.likelihood_function_id); + + absrel.SetBranchConstraints (absrel.model_defintions [absrel.current_rate_count], absrel.tree_id, absrel.current_branch); + + Optimize (absrel.stepup.mles, ^absrel.likelihood_function_id); + absrel.current_test_score = math.GetIC (absrel.stepup.mles[1][0], absrel.current_parameter_count + 2, absrel.codon_data_info[terms.data.sample_size]); + + absrel.provisional_estimates = absrel.GetBranchEstimates(absrel.model_defintions [absrel.current_rate_count], absrel.tree_id, absrel.current_branch); + absrel.dn_ds.distro = absrel.GetRateDistribution (absrel.provisional_estimates); + if (absrel.dn_ds.distro[absrel.current_rate_count-1][0] < 1000) { + absrel.report.row [3] = Format (absrel.dn_ds.distro[absrel.current_rate_count-1][0],5,2) + " (" + Format (absrel.dn_ds.distro[absrel.current_rate_count-1][1]*100,5,2) + "%)"; + } else { + absrel.report.row [3] = ">1000 (" + Format (absrel.dn_ds.distro[absrel.current_rate_count-1][1]*100,5,2) + "%)"; + } + absrel.report.row [4] = absrel.stepup.mles[1][0]; + absrel.report.row [5] = absrel.current_test_score; + + + if (absrel.current_test_score < absrel.current_best_score) { + absrel.current_branch_estimates = absrel.provisional_estimates; + absrel.current_best_score = absrel.current_test_score; + absrel.report.row [6] = absrel.current_test_score; + fprintf (stdout, io.FormatTableRow (absrel.report.row, absrel.complexity_table.settings)); + + if (absrel.current_rate_count >= absrel.max_rate_classes) { + break; + } + absrel.current_rate_count += 1; + absrel.current_parameter_count += 2; + } else { + absrel.report.row [6] = absrel.current_best_score; + fprintf (stdout, io.FormatTableRow (absrel.report.row, absrel.complexity_table.settings)); + break; + } + } + + if (absrel.current_test_score >= absrel.current_best_score) { // reset the model + absrel.current_rate_count = absrel.current_rate_count - 1; + if ( absrel.current_rate_count >= 2) { + model.ApplyToBranch ((absrel.model_defintions [absrel.current_rate_count])[terms.id], absrel.tree_id, absrel.current_branch); + absrel.SetBranchConstraints (absrel.model_defintions [absrel.current_rate_count], absrel.tree_id, absrel.current_branch); + } else { + model.ApplyToBranch (absrel.MG94.model[terms.id], absrel.tree_id, absrel.current_branch); + } + parameters.SetValues (absrel.current_branch_estimates); + } + + utility.AddToSet (absrel.full_model_parameters, + utility.Map (absrel.current_branch_estimates, "_parameter_", "_parameter_[terms.id]")); + + + + absrel.branch.complexity [absrel.current_branch] = absrel.current_rate_count; + absrel.constrain_everything (absrel.likelihood_function_id); + +} + +selection.io.json_store_branch_attribute(absrel.json, absrel.rate_classes, terms.json.branch_label, absrel.display_orders[absrel.rate_classes], + 0, + absrel.branch.complexity); + + +io.ReportProgressMessageMD ("absrel", "complexity-summary", "Rate class analyses summary"); +utility.ForEachPair (utility.BinByValue (absrel.branch.complexity), "_rates_", "_branches_", + "io.ReportProgressMessageMD('absrel', 'complexity-summary', '* ' + Abs(_branches_) + ' branches with **' + _rates_ + '** rate classes')"); + +selection.io.stopTimer (absrel.json [terms.json.timers], "Complexity analysis"); + + +selection.io.startTimer (absrel.json [terms.json.timers], "Full adaptive model fitting", 4); +io.ReportProgressMessageMD ("absrel", "Full adaptive model", "Improving parameter estimates of the adaptive rate class model"); +parameters.RemoveConstraint (utility.Keys (absrel.full_model_parameters)); + + +absrel.full_model.fit = estimators.FitExistingLF (absrel.likelihood_function_id,absrel.model_object_map); + +absrel.full_model.mle_set = estimators.TakeLFStateSnapshot (absrel.likelihood_function_id); + +io.ReportProgressMessageMD("absrel", "Full adaptive model", "* " + selection.io.report_fit (absrel.full_model.fit, 9, absrel.codon_data_info[terms.data.sample_size])); + +selection.io.stopTimer (absrel.json [terms.json.timers], "Full adaptive model fitting"); + +selection.io.json_store_branch_attribute(absrel.json, absrel.full_adaptive_model, terms.branch_length, absrel.display_orders[absrel.full_adaptive_model], + 0, + selection.io.extract_branch_info((absrel.full_model.fit[terms.branch_length])[0], "selection.io.branch.length")); + +absrel.branch.rate_distributions = selection.io.extract_branch_info((absrel.full_model.fit[terms.branch_length])[0], "absrel.GetRateDistribution"); + + +selection.io.json_store_branch_attribute(absrel.json, terms.json.rate_distribution, terms.json.branch_label, absrel.display_orders[terms.json.rate_distribution], + 0, + absrel.branch.rate_distributions); + + + +selection.io.json_store_lf (absrel.json, + absrel.full_adaptive_model, + absrel.full_model.fit[terms.fit.log_likelihood], + absrel.full_model.fit[terms.parameters] + 9 , + absrel.codon_data_info[terms.data.sample_size], + {}, + absrel.display_orders[absrel.full_adaptive_model]); + +/*** + Testing individual branches for selection +***/ + +selection.io.startTimer (absrel.json [terms.json.timers], "Testing for selection", 5); +io.ReportProgressMessageMD ("absrel", "testing", "Testing selected branches for selection"); + +absrel.testing_table.settings = {terms.table_options.header : TRUE, terms.table_options.column_widths: { + "0": 35, + "1": 10, + "2": 20, + "3": 12, + "4": 20, + "5": 20 + }, + terms.number_precision : 2}; + +fprintf (stdout, "\n", io.FormatTableRow ({{"Branch", "Rates", "dN/dS range", "Sel. mode", "Test LRT", "Uncorrected p-value"}}, absrel.testing_table.settings)); + +absrel.testing_table.settings [terms.table_options.header] = FALSE; +absrel.branch.p_values = {}; +absrel.branch.lrt = {}; + +absrel.branch.neg.p_values = {}; +absrel.branch.neg.lrt = {}; + + +for (absrel.branch_id = 0; absrel.branch_id < absrel.branch_count; absrel.branch_id += 1) { + absrel.current_branch = absrel.names_sorted_by_length[absrel.branch_id]; + + absrel.report.row = {}; + + absrel.report.row [0] = absrel.current_branch; + absrel.report.row [1] = Format (absrel.branch.complexity[absrel.current_branch], 3, 0); + absrel.dn_ds.distro = absrel.branch.rate_distributions [absrel.current_branch]; + + if (absrel.dn_ds.distro[absrel.branch.complexity[0]][0] < 1000) { + absrel.report.row [2] = Format (absrel.dn_ds.distro[absrel.branch.complexity[0]][0],5,2); + } else { + absrel.report.row [2] = ">1000"; + } + + if (absrel.dn_ds.distro[absrel.branch.complexity[absrel.current_branch]-1][0] < 1000) { + absrel.report.row [2] += " : " + Format (absrel.dn_ds.distro[absrel.branch.complexity[absrel.current_branch]-1][0],5,2); + } else { + absrel.report.row [2] += " : >1000"; + } + + if ((absrel.selected_branches[0])[absrel.current_branch] == terms.tree_attributes.test) { + if (absrel.dn_ds.distro [absrel.branch.complexity[absrel.current_branch]-1][0] > 1) { + absrel.branch.ConstrainForTesting (absrel.model_defintions [absrel.branch.complexity[absrel.current_branch]], absrel.tree_id, absrel.current_branch); + Optimize (absrel.null.mles, ^absrel.likelihood_function_id); + absrel.branch.test = absrel.ComputeLRT ( absrel.full_model.fit[terms.fit.log_likelihood], absrel.null.mles[1][0], 1); + estimators.RestoreLFStateFromSnapshot (absrel.likelihood_function_id, absrel.full_model.mle_set); + } else { + absrel.branch.test = {terms.LRT : 0, terms.p_value : 1}; + } + absrel.branch.p_values [ absrel.current_branch ] = absrel.branch.test [terms.p_value]; + absrel.branch.lrt [absrel.current_branch] = absrel.branch.test [terms.LRT]; + absrel.report.row [3] = "Positive"; + absrel.report.row [4] = absrel.branch.test [terms.LRT]; + absrel.report.row [5] = Format (absrel.branch.test [terms.p_value], 8, 5); + fprintf (stdout, io.FormatTableRow (absrel.report.row, absrel.testing_table.settings)); + + if (absrel.dn_ds.distro [absrel.branch.complexity[0]][0] < 1) { + absrel.df = absrel.branch.ConstrainForTestingNS (absrel.model_defintions [absrel.branch.complexity[absrel.current_branch]], absrel.tree_id, absrel.current_branch, absrel.dn_ds.distro); + Optimize (absrel.null.mles, ^absrel.likelihood_function_id); + Export (lfe, ^absrel.likelihood_function_id); + absrel.branch.test = absrel.ComputeLRT ( absrel.full_model.fit[terms.fit.log_likelihood], absrel.null.mles[1][0], absrel.df); + estimators.RestoreLFStateFromSnapshot (absrel.likelihood_function_id, absrel.full_model.mle_set); + } else { + absrel.branch.test = {terms.LRT : 0, terms.p_value : 1}; + } + absrel.branch.neg.p_values [ absrel.current_branch ] = absrel.branch.test [terms.p_value]; + absrel.branch.neg.lrt [absrel.current_branch] = absrel.branch.test [terms.LRT]; + absrel.report.row [3] = "Negative"; + absrel.report.row [4] = absrel.branch.test [terms.LRT]; + absrel.report.row [5] = Format (absrel.branch.test [terms.p_value], 8, 5); + + } else { + absrel.branch.lrt [absrel.current_branch] = None; + absrel.branch.p_values [absrel.current_branch] = None; + absrel.report.row [3] = ""; + absrel.report.row [4] = "Not selected"; + absrel.report.row [5] = "for testing"; + } + + fprintf (stdout, io.FormatTableRow (absrel.report.row, absrel.testing_table.settings)); + +} + +selection.io.json_store_branch_attribute(absrel.json, terms.LRT, terms.json.branch_label, absrel.display_orders[terms.LRT], + 0, + absrel.branch.lrt); + +selection.io.json_store_branch_attribute(absrel.json, terms.json.uncorrected_pvalue, terms.json.branch_label, absrel.display_orders[terms.json.uncorrected_pvalue], + 0, + absrel.branch.p_values); + +absrel.branch.p_values.corrected = math.HolmBonferroniCorrection (absrel.branch.p_values); + +selection.io.json_store_branch_attribute (absrel.json, terms.json.corrected_pvalue, terms.json.branch_label, absrel.display_orders[terms.json.corrected_pvalue], + 0, + absrel.branch.p_values.corrected); + +absrel.test.all = utility.Filter (absrel.branch.p_values.corrected, "_value_", "None!=_value_"); +absrel.test.positive = utility.Filter (absrel.test.all, "_value_", "_value_<=absrel.p_threshold"); + +selection.io.json_store_branch_attribute(absrel.json, terms.absrel.neg_lrt, terms.json.branch_label, absrel.display_orders[terms.absrel.neg_lrt], + 0, + absrel.branch.neg.lrt); + +selection.io.json_store_branch_attribute(absrel.json, terms.absrel.neg_uncorrected_pvalue, terms.json.branch_label, absrel.display_orders[terms.absrel.neg_uncorrected_pvalue], + 0, + absrel.branch.neg.p_values); + +absrel.branch.p_values.neg.corrected = math.HolmBonferroniCorrection (absrel.branch.neg.p_values); + +selection.io.json_store_branch_attribute (absrel.json, terms.absrel.neg_corrected_pvalue, terms.json.branch_label, absrel.display_orders[terms.absrel.neg_corrected_pvalue], + 0, + absrel.branch.p_values.neg.corrected); + +absrel.test.neg.all = utility.Filter (absrel.branch.p_values.neg.corrected, "_value_", "None!=_value_"); +absrel.test.negative = utility.Filter (absrel.test.neg.all, "_value_", "_value_<=absrel.p_threshold"); + + +selection.io.stopTimer (absrel.json [terms.json.timers], "Testing for selection"); + + + +console.log ("----\n### Adaptive branch site random effects likelihood test "); +console.log ( "Likelihood ratio test for episodic diversifying positive selection at Holm-Bonferroni corrected _p = " + Format (absrel.p_threshold, 8, 4) + "_ found **" + Abs(absrel.test.positive) + "** branches under **positive** selection among **"+ Abs (absrel.test.all) + "** tested.\n"); +utility.ForEachPair (absrel.test.positive, "_name_", "_p_", + ' + console.log ("* " + _name_ + ", p-value = " + Format (_p_, 8,5)); + '); + +console.log ( "\nLikelihood ratio test for episodic purifying selection at Holm-Bonferroni corrected _p = " + Format (absrel.p_threshold, 8, 4) + "_ found **" + Abs(absrel.test.negative) + "** branches under **negative** selection among **"+ Abs (absrel.test.all) + "** tested.\n"); +utility.ForEachPair (absrel.test.negative, "_name_", "_p_", + ' + console.log ("* " + _name_ + ", p-value = " + Format (_p_, 8,5)); + '); + + +absrel.json [terms.json.test_results] = { + terms.json.pvalue_threshold : absrel.p_threshold, + terms.json.tested : Abs (absrel.test.all), + terms.json.positive : Abs (absrel.test.positive) + }; + +/*** + Cleanup +***/ + +selection.io.stopTimer (absrel.json [terms.json.timers], "Overall"); +utility.ToggleEnvVariable ("USE_LAST_RESULTS", None); +io.SpoolJSON (absrel.json, absrel.codon_data_info [terms.json.json]); + + +return absrel.json; + +//------------------------------------------------------------------------------------------------------------------------ +//------------------------------------------------------------------------------------------------------------------------ +//------------------------------------------------------------------------------------------------------------------------ + +lfunction absrel.ComputeLRT (ha, h0, df) { + lrt = 2*(ha-h0); + + if (df == 1) { + return {utility.getGlobalValue("terms.LRT") : lrt, + utility.getGlobalValue("terms.p_value") : (1-0.4*CChi2 (lrt,1)-0.6* CChi2 (lrt,2))*.5}; + } + + return {utility.getGlobalValue("terms.LRT") : lrt, + utility.getGlobalValue("terms.p_value") : (1-CChi2 (lrt,2*df))}; + +} + + +lfunction absrel.GetBranchEstimates (model, tree_id, branch_id) { + values = {}; + utility.ForEachPair ((model[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.local")], + "_description_", + "_id_", + "`&values`[_description_] = { + terms.fit.MLE : Eval (`&tree_id` + '.' + `&branch_id` + '.' + _id_), + terms.id : `&tree_id` + '.' + `&branch_id` + '.' + _id_ + };"); + + return values; +} + +//------------------------------------------------------------------------------------------------------------------------ + +lfunction absrel.GetRateDistribution (local_parameters) { + + result = None; + component_count = (Abs (local_parameters))$2; + if (component_count > 1) { + rates = {"rates" : {component_count,1}, "weights" : {component_count,1}}; + for (k = 1; k < component_count; k+=1) { + (rates["rates"])[k-1] = (local_parameters[terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), k)])[utility.getGlobalValue ("terms.fit.MLE")]; + (rates["weights"])[k-1] = (local_parameters[terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), k)])[utility.getGlobalValue ("terms.fit.MLE")]; + } + (rates["rates"])[component_count-1] = (local_parameters[terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component_count)])[utility.getGlobalValue ("terms.fit.MLE")]; + result = parameters.GetStickBreakingDistribution (rates) % 0; + } else { + result = {{parameters.NormalizeRatio ( + (local_parameters[utility.getGlobalValue('terms.parameters.nonsynonymous_rate')])[utility.getGlobalValue ("terms.fit.MLE")], + (local_parameters[utility.getGlobalValue('terms.parameters.synonymous_rate')])[utility.getGlobalValue ("terms.fit.MLE")] + ), + 1}}; + } + return result; +} + +//------------------------------------------------------------------------------------------------------------------------ + +lfunction absrel.SetBranchConstraints (model, tree_id, branch_id) { + component_count = model[utility.getGlobalValue ("terms.model.components")]; + local_parameters = (model[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.local")]; + parameters.SetRange ("`tree_id`.`branch_id`.`local_parameters[utility.getGlobalValue ('terms.parameters.synonymous_rate')]`", {utility.getGlobalValue ("terms.lower_bound") : "0", utility.getGlobalValue ("terms.upper_bound") : "50"}); + for (k = 1; k < component_count; k+=1) { + omega_k = terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), k); + parameters.SetRange ("`tree_id`.`branch_id`.`local_parameters[omega_k]`", utility.getGlobalValue ("terms.range01")); + } + omega_k = terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), k); + parameters.SetRange ("`tree_id`.`branch_id`.`local_parameters[omega_k]`", utility.getGlobalValue ("terms.range_any")); +} + +//------------------------------------------------------------------------------------------------------------------------ + +lfunction absrel.branch.ConstrainForTesting (model, tree_id, branch_id) { + component_count = model[utility.getGlobalValue ("terms.model.components")]; + local_parameters = (model[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.local")]; + if (component_count > 1) { + omega_k = terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component_count); + parameters.SetConstraint ("`tree_id`.`branch_id`.`local_parameters[omega_k]`", "1", ''); + } else { + parameters.SetConstraint ( + "`tree_id`.`branch_id`.`local_parameters[utility.getGlobalValue ('terms.parameters.nonsynonymous_rate')]`", + "`tree_id`.`branch_id`.`local_parameters[utility.getGlobalValue ('terms.parameters.synonymous_rate')]`", ''); + } + +} + +//------------------------------------------------------------------------------------------------------------------------ + +lfunction absrel.branch.ConstrainForTestingNS (model, tree_id, branch_id, distribution) { + + component_count = model[utility.getGlobalValue ("terms.model.components")]; + local_parameters = (model[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.local")]; + if (component_count > 1) { + for (df = 0; df < component_count; df += 1) { + if (distribution[df][0] >= 1) { + break; + } + omega_k = terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), (df + 1)); + parameters.SetConstraint ("`tree_id`.`branch_id`.`local_parameters[omega_k]`", "1", ''); + } + + } else { + parameters.SetConstraint ( + "`tree_id`.`branch_id`.`local_parameters[utility.getGlobalValue ('terms.parameters.nonsynonymous_rate')]`", + "`tree_id`.`branch_id`.`local_parameters[utility.getGlobalValue ('terms.parameters.synonymous_rate')]`", ''); + + df = 1; + } + return df; + +} + +//------------------------------------------------------------------------------------------------------------------------ + +lfunction absrel.PopulateInitialGrid (model, tree_id, branch_id, current_estimates) { + + component_count = model[utility.getGlobalValue ("terms.model.components")]; + local_parameters = (model[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.local")]; + + grid = {}; + + if (component_count == 2) { + omega1 = terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), 1); + omega2 = terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), 2); + mixture1 = terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), 1 ); + + grid ["`tree_id`.`branch_id`.`local_parameters[^'terms.parameters.synonymous_rate']`"] = {5,1}["(current_estimates[^'terms.parameters.synonymous_rate'])[^'terms.fit.MLE']*(1+(2-_MATRIX_ELEMENT_ROW_)*0.25)"]; + grid ["`tree_id`.`branch_id`.`local_parameters[omega1]`"] = {5,1}["_MATRIX_ELEMENT_ROW_ * 0.2"]; + grid ["`tree_id`.`branch_id`.`local_parameters[omega2]`"] = {7,1}["(1+(_MATRIX_ELEMENT_ROW_-3)^3)*(_MATRIX_ELEMENT_ROW_>=3)+(_MATRIX_ELEMENT_ROW_*0.25+0.25)*(_MATRIX_ELEMENT_ROW_<3)"]; + grid ["`tree_id`.`branch_id`.`local_parameters[mixture1]`"] = {{0.98}{0.95}{0.90}{0.75}{0.5}}; + } else { + omega_prev = current_estimates [terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component_count - 1)]; + omega_last = terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component_count); + mixture_last = terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component_count - 1); + if (omega_prev [utility.getGlobalValue('terms.fit.MLE')] > 1) { + parameters.SetValue (omega_prev [utility.getGlobalValue('terms.id')], 0.8); + } + grid ["`tree_id`.`branch_id`.`local_parameters[omega_last]`"] = {10,1}["(1+(_MATRIX_ELEMENT_ROW_-5)^3)*(_MATRIX_ELEMENT_ROW_>=5)+(_MATRIX_ELEMENT_ROW_*0.15+0.15)*(_MATRIX_ELEMENT_ROW_<5)"]; + grid ["`tree_id`.`branch_id`.`local_parameters[mixture_last]`"] = {{0.98}{0.95}{0.90}{0.75}{0.5}}; + } + + return grid; +} + + +//------------------------------------------------------------------------------------------------------------------------ + +lfunction absrel.ComputeOnAGrid (grid_definition, lfname) { + + parameter_names = utility.Keys (grid_definition); + parameter_count = Abs (grid_definition); + grid_dimensions = {}; + total_grid_points = 1; + + utility.ForEachPair (grid_definition, "_key_", "_value_", + ' + `&grid_dimensions`[_key_] = utility.Array1D (_value_); + `&total_grid_points` = `&total_grid_points` * `&grid_dimensions`[_key_]; + '); + + + best_val :> -1e100; + best_val = -1e100; + + + LFCompute (^lfname,LF_START_COMPUTE); + + + for (grid_point = 0; grid_point < total_grid_points; grid_point += 1) { + index = grid_point; + + current_state = grid_dimensions; + + for (p_id = 0; p_id < parameter_count; p_id += 1) { + p_name = parameter_names[p_id]; + current_state[p_name] = (grid_definition[p_name])[index % grid_dimensions[p_name]]; + index = index $ grid_dimensions[p_name]; + } + + absrel.SetValues (current_state); + + LFCompute (^lfname, try_value); + + if (try_value > best_val) { + best_state = current_state; + best_val = try_value; + } + } + + absrel.SetValues (best_state); + LFCompute(^lfname,LF_DONE_COMPUTE); + + return best_state; + +} + +function absrel.SetValues(set) { + if (Type (set) == "AssociativeList") { + utility.ForEachPair (set, "_key_", "_value_", + ' + parameters.SetValue (_key_, _value_); + '); + } +} + +//---------------------------------------------------- +lfunction absrel.get_tree_name (lf_id) { + GetString (info, ^lf_id, -1); + return (info["Trees"])[0]; +} + +lfunction absrel.get_model_id (lf_id) { + GetString (info, ^lf_id, -1); + return (info["Models"])[0]; +} + +function absrel.constrain_everything (lf_id) { + GetString (absrel.constrain_everything.info, ^lf_id, -1); + + utility.ForEach (absrel.constrain_everything.info [utility.getGlobalValue("terms.parameters.global_independent")], "_value_", + "parameters.SetConstraint (_value_, Eval (_value_), terms.global)"); + utility.ForEach (absrel.constrain_everything.info [utility.getGlobalValue("terms.parameters.local_independent")], "_value_", + "parameters.SetConstraint (_value_, Eval (_value_), '')"); +} + +lfunction absrel.local.omega(branch_info) { + return parameters.NormalizeRatio ((branch_info[utility.getGlobalValue ("terms.parameters.nonsynonymous_rate")])[utility.getGlobalValue("terms.fit.MLE")], + (branch_info[utility.getGlobalValue ("terms.parameters.synonymous_rate")])[utility.getGlobalValue("terms.fit.MLE")]); +} + +//------------------------------------------------------------------------------ + +lfunction absrel.BS_REL.ModelDescription (type, code, components) { + model = models.codon.BS_REL.ModelDescription(type, code, components); + model [utility.getGlobalValue("terms.model.defineQ")] = "absrel.BS_REL._DefineQ"; + return model; +} + + + +//------------------------------------------------------------------------------ + +lfunction absrel.BS_REL._GenerateRate (fromChar, toChar, namespace, model_type, _tt, alpha, alpha_term, beta, beta_term, omega, omega_term) { + + p = {}; + diff = models.codon.diff(fromChar, toChar); + + if (None != diff) { + p[model_type] = {}; + p[utility.getGlobalValue("terms.global")] = {}; + + if (diff[utility.getGlobalValue("terms.diff.from")] > diff[utility.getGlobalValue("terms.diff.to")]) { + nuc_rate = "theta_" + diff[utility.getGlobalValue("terms.diff.to")] + diff[utility.getGlobalValue("terms.diff.from")]; + } else { + nuc_rate = "theta_" + diff[utility.getGlobalValue("terms.diff.from")] + diff[utility.getGlobalValue("terms.diff.to")]; + } + nuc_rate = parameters.ApplyNameSpace(nuc_rate, namespace); + (p[utility.getGlobalValue("terms.global")])[terms.nucleotideRate(diff[utility.getGlobalValue("terms.diff.from")], diff[utility.getGlobalValue("terms.diff.to")])] = nuc_rate; + + if (_tt[fromChar] != _tt[toChar]) { + if (model_type == utility.getGlobalValue("terms.global")) { + aa_rate = parameters.ApplyNameSpace(omega, namespace); + (p[model_type])[omega_term] = aa_rate; + } else { + aa_rate = omega + "*" + alpha; + (p[model_type])[omega_term] = omega; + } + p[utility.getGlobalValue("terms.model.rate_entry")] = nuc_rate + "*" + aa_rate; + } else { + if (model_type == utility.getGlobalValue("terms.local")) { + (p[model_type])[alpha_term] = alpha; + p[utility.getGlobalValue("terms.model.rate_entry")] = nuc_rate + "*" + alpha; + } else { + p[utility.getGlobalValue("terms.model.rate_entry")] = nuc_rate; + } + } + } + + + return p; +} + +//------------------------------------------------------------------------------ + +lfunction absrel.BS_REL._DefineQ(bs_rel, namespace) { + rate_matrices = {}; + + bs_rel [utility.getGlobalValue("terms.model.q_ij")] = &rate_generator; + bs_rel [utility.getGlobalValue("terms.mixture.mixture_components")] = {}; + + _aux = parameters.GenerateSequentialNames ("bsrel_mixture_aux", bs_rel[utility.getGlobalValue("terms.model.components")] - 1, "_"); + _wts = parameters.helper.stick_breaking (_aux, None); + mixture = {}; + + component_count = bs_rel[utility.getGlobalValue("terms.model.components")]; + + for (component = 1; component <= component_count; component += 1) { + key = "component_" + component; + ExecuteCommands (" + function rate_generator (fromChar, toChar, namespace, model_type, model) { + return absrel.BS_REL._GenerateRate (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')], + 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'), + 'beta_`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component), + 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component)); + }" + ); + + if ( component < component_count) { + model.generic.AddLocal ( bs_rel, _aux[component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component )); + parameters.SetRange (_aux[component-1], utility.getGlobalValue("terms.range_almost_01")); + } + + models.codon.generic.DefineQMatrix(bs_rel, namespace); + rate_matrices [key] = bs_rel[utility.getGlobalValue("terms.model.rate_matrix")]; + (bs_rel [^'terms.mixture.mixture_components'])[key] = _wts [component-1]; + } + + + bs_rel[utility.getGlobalValue("terms.model.rate_matrix")] = rate_matrices; + parameters.SetConstraint(((bs_rel[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.global")])[terms.nucleotideRate("A", "G")], "1", ""); + return bs_rel; +} diff --git a/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf index d105d3f5b..ba3eab158 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf @@ -65,13 +65,13 @@ absrel.analysis_description = {terms.io.info : "aBSREL (Adaptive branch-site ran io.DisplayAnalysisBanner ( absrel.analysis_description ); absrel.json = { - terms.json.analysis: absrel.analysis_description, + terms.json.analysis: absrel.analysis_description, terms.json.input: {}, terms.json.fits : {}, terms.json.timers : {}, terms.json.test_results : {} }; - + selection.io.startTimer (absrel.json [terms.json.timers], "Overall", 0); From 0d8f981d7afbe78115be9a50a9177a01b80f2708 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Thu, 19 Jul 2018 19:28:31 -0400 Subject: [PATCH 30/53] Fixes to compile with clang --- CMakeLists.txt | 11 +- src/core/calcnode2.cpp | 330 ++++++++++++++++++++--------------------- src/core/likefunc2.cpp | 50 +++---- src/core/matrix.cpp | 72 +++++---- 4 files changed, 233 insertions(+), 230 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49e8cac2f..85453c77a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,7 @@ endmacro(PCL_CHECK_FOR_AVX) set(CMAKE_MODULE_PATH cmake) set(HYPHY_VERSION 2.1) + #------------------------------------------------------------------------------- # setup the files we'll be using #------------------------------------------------------------------------------- @@ -94,7 +95,6 @@ set(DEFAULT_WARNING_FLAGS "-w") set(DEFAULT_DEBUG_WARNING_FLAGS "-Wall -Wno-int-to-pointer-cast -Wno-conversion-null -Wno-sign-compare -Wno-maybe-uninitialized") - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) execute_process( COMMAND ${CMAKE_CXX_COMPILER} -dumpversion @@ -106,7 +106,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) endif() if(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -std=gnu++14") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g -std=gnu++14") else(${GCC_VERSION} VERSION_LESS 6.0) set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g") endif(${GCC_VERSION} VERSION_LESS 6.0) @@ -175,6 +175,13 @@ if(NOT DEFINED DEFAULT_WARNING_FLAGS) set(DEFAULT_WARNING_FLAGS "") endif(NOT DEFINED DEFAULT_WARNING_FLAGS) +include (CheckLibraryExists) + +CHECK_LIBRARY_EXISTS( "libamdlibm.so" "log" "" HAS_AMD_LIBM) +if(HAS_AMD_LIBM) + set(DEFAULT_LINK_FLAGS "${DEFAULT_LINK_FLAGS} -lamdlibm") +endif(HAS_AMD_LIBM) + #------------------------------------------------------------------------------- # OpenMP support diff --git a/src/core/calcnode2.cpp b/src/core/calcnode2.cpp index ef52aa3bb..3ee00b6ac 100644 --- a/src/core/calcnode2.cpp +++ b/src/core/calcnode2.cpp @@ -7,7 +7,7 @@ Core Developers: Sergei L Kosakovsky Pond (sergeilkp@icloud.com) Art FY Poon (apoon@cfenet.ubc.ca) Steven Weaver (sweaver@temple.edu) - + Module Developers: Lance Hepler (nlhepler@gmail.com) Martin Smith (martin.audacis@gmail.com) @@ -106,7 +106,7 @@ inline void _handle4x4_pruning_case (double const* childVector, double const* tM childVector[1], childVector[2], childVector[3]}; - + __m128d buffer0 = _mm_loadu_pd (tv), buffer1 = _mm_loadu_pd (tv+2), matrix01 = _mm_loadu_pd (tMatrix), @@ -115,7 +115,7 @@ inline void _handle4x4_pruning_case (double const* childVector, double const* tM matrix56 = _mm_loadu_pd (tMatrix+6), reg_storage = _mm_mul_pd (buffer0, matrix01), reg_storage2 = _mm_mul_pd (buffer0, matrix34); - + matrix34 = _mm_mul_pd(buffer1, matrix12); matrix56 = _mm_mul_pd(buffer1, matrix56); reg_storage = _mm_add_pd (reg_storage, matrix34); @@ -124,72 +124,72 @@ inline void _handle4x4_pruning_case (double const* childVector, double const* tM matrix01 = _mm_loadu_pd (parentConditionals); matrix01 = _mm_mul_pd (reg_storage, matrix01); _mm_storeu_pd (parentConditionals, matrix01); - - - + + + matrix01 = _mm_loadu_pd (tMatrix+8); matrix12 = _mm_loadu_pd (tMatrix+10); matrix34 = _mm_loadu_pd (tMatrix+12); matrix56 = _mm_loadu_pd (tMatrix+14); reg_storage = _mm_mul_pd (buffer0, matrix01); - reg_storage2 = _mm_mul_pd (buffer0, matrix34); - + reg_storage2 = _mm_mul_pd (buffer0, matrix34); + matrix34 = _mm_mul_pd(buffer1, matrix12); matrix56 = _mm_mul_pd(buffer1, matrix56); reg_storage = _mm_add_pd (reg_storage, matrix34); reg_storage2 = _mm_add_pd (reg_storage2, matrix56); reg_storage = _mm_hadd_pd (reg_storage,reg_storage2); - + matrix01 = _mm_loadu_pd (parentConditionals+2); matrix01 = _mm_mul_pd (reg_storage, matrix01); _mm_storeu_pd (parentConditionals+2, matrix01); - + /* A1*B1 + A2*B2 + A3*B3 + A4*B4, where A4 = 1-A1-A2-A3 can be done with three multipications and 3 extra additions, like - + A1*(B1-B4) + A2*(B2-B4) + A3*(B3-B4) + B4 - + */ #elif defined _SLKP_USE_AVX_INTRINSICS - + __m256d c3 = _mm256_set1_pd(childVector[3]), c0 = _mm256_sub_pd(_mm256_set1_pd(childVector[0]),c3), c1 = _mm256_sub_pd(_mm256_set1_pd(childVector[1]),c3), c2 = _mm256_sub_pd(_mm256_set1_pd(childVector[2]),c3), t0,t1,t2; - + if (transposed_mx) { t0 = ((__m256d*)transposed_mx)[0]; t1 = ((__m256d*)transposed_mx)[1]; t2 = ((__m256d*)transposed_mx)[2]; - + } else { t0 = (__m256d) {tMatrix[0],tMatrix[4],tMatrix[8],tMatrix[12]}; t1 = (__m256d) {tMatrix[1],tMatrix[5],tMatrix[9],tMatrix[13]}; t2 = (__m256d) {tMatrix[2],tMatrix[6],tMatrix[10],tMatrix[14]}; } - + // load transition matrix by column - + __m256d sum01 = _mm256_add_pd (_mm256_mul_pd(c0,t0),_mm256_mul_pd(c1,t1)), sum23 = _mm256_add_pd (_mm256_mul_pd(c2,t2), c3); - + _mm256_storeu_pd(parentConditionals, _mm256_mul_pd (_mm256_loadu_pd (parentConditionals), _mm256_add_pd (sum01, sum23))); - + #else // 12 multiplications, 16 additions, 3 subtractions - + _Parameter t1 = childVector[0] - childVector[3], t2 = childVector[1] - childVector[3], t3 = childVector[2] - childVector[3], t4 = childVector[3]; - + parentConditionals [0] *= (tMatrix[0] * t1 + tMatrix[1] * t2) + (tMatrix[2] * t3 + t4); parentConditionals [1] *= (tMatrix[4] * t1 + tMatrix[5] * t2) + (tMatrix[6] * t3 + t4); parentConditionals [2] *= (tMatrix[8] * t1 + tMatrix[9] * t2) + (tMatrix[10] * t3 + t4); @@ -223,7 +223,7 @@ void _TheTree::ExponentiateMatrices (_List& expNodes, long tc, long catI { _List matrixQueue, nodesToDo; - + _SimpleList isExplicitForm; bool hasExpForm = false; @@ -243,13 +243,13 @@ void _TheTree::ExponentiateMatrices (_List& expNodes, long tc, long catI } } } - + //printf ("%ld %d\n", nodesToDo.lLength, hasExpForm); unsigned long matrixID; - + _List * computedExponentials = hasExpForm? new _List (matrixQueue.lLength) : nil; - + #ifdef _OPENMP unsigned long nt = cBase<20?1:(MIN(tc, matrixQueue.lLength / 3 + 1)); matrixExpCount += matrixQueue.lLength; @@ -263,15 +263,15 @@ void _TheTree::ExponentiateMatrices (_List& expNodes, long tc, long catI (*computedExponentials) [matrixID] = ((_Matrix*)matrixQueue(matrixID))->Exponentiate(); } } - - - + + + if (computedExponentials) { _CalcNode * current_node = nil; _List buffered_exponentials; - + for (unsigned long mx_index = 0; mx_index < nodesToDo.lLength; mx_index++) { if (isExplicitForm.lData[mx_index]) { _CalcNode *next_node = (_CalcNode*) nodesToDo (mx_index); @@ -300,7 +300,7 @@ void _TheTree::ExponentiateMatrices (_List& expNodes, long tc, long catI DeleteObject(computedExponentials); #ifdef _UBER_VERBOSE_DUMP_MATRICES if (likeFuncEvalCallCount == _UBER_VERBOSE_DUMP) { - fprintf (stderr, "\n T_MATRIX = {"); + fprintf (stderr, "\n T_MATRIX = {"); for (unsigned long nodeID = 0; nodeID < flatLeaves.lLength + flatTree.lLength - 1; nodeID++) { bool isLeaf = nodeID < flatLeaves.lLength; @@ -309,10 +309,10 @@ void _TheTree::ExponentiateMatrices (_List& expNodes, long tc, long catI if (nodeID) { fprintf (stderr, ","); } - fprintf (stderr, "\n\"%s\":%s", current_node->GetName()->sData, _String((_String*)current_node->GetCompExp()->toStr()).sData); - + fprintf (stderr, "\n\"%s\":%s", current_node->GetName()->sData, _String((_String*)current_node->GetCompExp()->toStr()).sData); + } - fprintf (stderr, "\n};\n"); + fprintf (stderr, "\n};\n"); } #endif @@ -430,7 +430,7 @@ void _TheTree::FillInConditionals (_DataSetFilter const* th /*----------------------------------------------------------------------------------------------------------*/ #ifdef MDSOCL -_Parameter _TheTree::OCLLikelihoodEvaluator ( _SimpleList& updateNodes, +_Parameter _TheTree::OCLLikelihoodEvaluator ( _SimpleList& updateNodes, _DataSetFilter* theFilter, _Parameter* iNodeCache, long * lNodeFlags, @@ -637,7 +637,7 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL if (siteTo > siteCount) { siteTo = siteCount; } - + /* #ifdef _UBER_VERBOSE_LF_DEBUG printf ("\n\n_TheTree::ComputeTreeBlockByBranch\n"); @@ -660,7 +660,7 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL // mark the parent for update and clear its conditionals if needed { taggedInternals.lData[parentCode] = 1; - _Parameter _hprestrict_ *localScalingFactor = scalingAdjustments + parentCode*siteCount; + _Parameter * _hprestrict_ localScalingFactor = scalingAdjustments + parentCode*siteCount; bool matchSet = (parentCode == setBranch); @@ -710,7 +710,7 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL _Parameter const * transitionMatrix = currentTreeNode->GetCompExp(catID)->theData; _Parameter * childVector, * lastUpdatedSite; - + #ifdef _SLKP_USE_AVX_INTRINSICS __m256d tmatrix_transpose [4] = { (__m256d) {transitionMatrix[0],transitionMatrix[4],transitionMatrix[8],transitionMatrix[12]}, @@ -761,7 +761,7 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL /* if (likeFuncEvalCallCount == 4 || likeFuncEvalCallCount == 5) { printf ("\nSKIPPED site %ld @ eval %ld\n", siteID, likeFuncEvalCallCount); - }*/ + }*/ continue; } parentTCCIBit++; @@ -860,25 +860,25 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL } else { _Parameter sum = 0.0; if (alphabetDimension > alphabetDimensionmod4){ - - + + for (long p = 0L; p < alphabetDimension; p++) { _Parameter accumulator = 0.0; - + #ifdef _SLKP_USE_SSE_INTRINSICS - + __m128d buffer1, buffer2, buffer3 = _mm_setzero_pd(), buffer4 = _mm_setzero_pd(), - load1, + load1, load2, load3, load4; - - + + if (((long int)tMatrix & 0b1111) == 0 && ((long int)childVector & 0b1111) == 0){ for (long c = 0; c < alphabetDimensionmod4; c+=4) { load1 = _mm_load_pd (tMatrix+c); @@ -889,7 +889,7 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL buffer2 = _mm_mul_pd (load2, load4); buffer3 = _mm_add_pd (buffer1,buffer3); buffer4 = _mm_add_pd (buffer2,buffer4); - } + } } else { for (long c = 0; c < alphabetDimensionmod4; c+=4) { load1 = _mm_loadu_pd (tMatrix+c); @@ -900,35 +900,35 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL buffer2 = _mm_mul_pd (load2, load4); buffer3 = _mm_add_pd (buffer1,buffer3); buffer4 = _mm_add_pd (buffer2,buffer4); - } - + } + } - - buffer3 = _mm_add_pd (buffer3, buffer4); + + buffer3 = _mm_add_pd (buffer3, buffer4); double buffer[2] __attribute__ ((aligned (16))); _mm_store_pd (buffer, buffer3); accumulator = buffer[0] + buffer[1]; - + #elif defined _SLKP_USE_AVX_INTRINSICS // end _SLKP_USE_SSE_INTRINSICS - + __m256d sum256 = _mm256_setzero_pd(); - + for (long c = 0; c < alphabetDimensionmod4; c+=4L) { __m256d matrix_quad = _mm256_loadu_pd (tMatrix+c), child_quad = _mm256_loadu_pd (childVector+c), prod = _mm256_mul_pd (matrix_quad, child_quad); - + sum256 = _mm256_add_pd (sum256,prod); } - - + + accumulator = _avx_sum_4(sum256); //NOT sure why copy to doubles and add is faster // than AVX istructions #else // _SLKP_USE_AVX_INTRINSICS - - + + for (unsigned long c = 0UL; c < alphabetDimensionmod4; c+=4UL) { // 4 - unroll the loop // 4 - unroll the loop _Parameter pr1 = tMatrix[c] * childVector[c], @@ -958,30 +958,30 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL _mm256_loadu_pd(tMatrix+8UL), _mm256_loadu_pd(tMatrix+12UL), _mm256_loadu_pd(tMatrix+16UL)}, - + c_vector[5] = {_mm256_loadu_pd(childVector), _mm256_loadu_pd(childVector+4UL), _mm256_loadu_pd(childVector+8UL), _mm256_loadu_pd(childVector+12UL), _mm256_loadu_pd(childVector+16UL)}; - + t_matrix[0] = _mm256_mul_pd(t_matrix[0], c_vector[0]); t_matrix[1] = _mm256_mul_pd(t_matrix[1], c_vector[1]); t_matrix[2] = _mm256_mul_pd(t_matrix[2], c_vector[2]); t_matrix[3] = _mm256_mul_pd(t_matrix[3], c_vector[3]); t_matrix[4] = _mm256_mul_pd(t_matrix[4], c_vector[4]); - + t_matrix[0] = _mm256_add_pd (t_matrix[0],t_matrix[1]); t_matrix[2] = _mm256_add_pd (t_matrix[2],t_matrix[3]); t_matrix[0] = _mm256_add_pd (t_matrix[0],t_matrix[2]); - + tMatrix += 20UL; sum += (parentConditionals[p] *= _avx_sum_4(_mm256_add_pd (t_matrix[0],t_matrix[4]))); } } else #endif // _SLKP_USE_AVX_INTRINSICS - + for (long p = 0; p < alphabetDimension; p++) { _Parameter accumulator = 0.0; @@ -1065,14 +1065,14 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL // assemble the entire likelihood - _Parameter _hprestrict_ * rootConditionals = iNodeCache + alphabetDimension * (siteFrom + (flatTree.lLength-1) * siteCount); + _Parameter * _hprestrict_ rootConditionals = iNodeCache + alphabetDimension * (siteFrom + (flatTree.lLength-1) * siteCount); _Parameter result = 0.0, correction = 0.0; for (long siteID = siteFrom, rootIndex = 0L; siteID < siteTo; siteID++) { _Parameter accumulator = 0.; - + if (setBranch == flatTree.lLength-1) { long rootState = setBranchTo[siteOrdering.lData[siteID]]; accumulator = rootConditionals[rootIndex + rootState] * theProbs[rootState]; @@ -1105,10 +1105,10 @@ _Parameter _TheTree::ComputeTreeBlockByBranch ( _SimpleL } break; } - + _Parameter term; long site_frequency = theFilter->theFrequencies (siteOrdering.lData[siteID]); - + if (site_frequency > 1L) { term = log(accumulator) * site_frequency - correction; } else { @@ -1156,11 +1156,11 @@ void _TheTree::ComputeBranchCache ( _Parameter* siteRes ) { - + /* the cache matrix (linearized into a vector) will have TWO rows with siteCount blocks of alphabetDimension doubles, storing the conditional likelihoods of individual sites at a given branch in the virtually rerooted tree - + cache -> Row 0 [brID node -- the branch that is being rerooted on] Row 1 [conditional likelihoods for the new root] @@ -1173,7 +1173,7 @@ void _TheTree::ComputeBranchCache ( rootPath; long myParent = brID -flatLeaves.lLength; - + const long alphabetDimension = theFilter->GetDimension(), alphabetDimensionmod4 = alphabetDimension - alphabetDimension % 4, siteCount = theFilter->GetPatternCount(); @@ -1208,7 +1208,7 @@ void _TheTree::ComputeBranchCache ( printf ("\n"); echoNodeList (nodesToProcess,flatLeaves,flatNodes); #endif - + _Parameter * state = cache + alphabetDimension * siteFrom, * childVector; @@ -1269,7 +1269,7 @@ void _TheTree::ComputeBranchCache ( taggedNodes.Populate (flatTree.lLength, 0, 0); rootPath.Flip (); - + long const node_count = nodesToProcess.lLength + rootPath.lLength - 2L; for (long nodeID = 0; nodeID < node_count; nodeID++) { @@ -1277,8 +1277,8 @@ void _TheTree::ComputeBranchCache ( long nodeCode = notPassedRoot?nodesToProcess.lData [nodeID]:rootPath.lData[nodeID-nodesToProcess.lLength], parentCode = notPassedRoot?flatParents.lData [nodeCode]:(rootPath.lData[nodeID-nodesToProcess.lLength+1] - flatLeaves.lLength); - - + + bool isLeaf = nodeCode < flatLeaves.lLength; if (!isLeaf) { @@ -1310,14 +1310,14 @@ void _TheTree::ComputeBranchCache ( } } } - + _CalcNode * currentTreeNode = (_CalcNode*) (isLeaf? flatCLeaves (nodeCode): flatTree (notPassedRoot?nodeCode:parentCode)); #ifdef _UBER_VERBOSE_LF_DEBUG printf ("isLeaf = %d, not passedRoot = %d, nodeCode = %ld, parentCode = %ld, matrix from %s, parent name %s\n", isLeaf, notPassedRoot, nodeCode, parentCode, currentTreeNode->GetName()->sData, ((_CalcNode *)flatTree(parentCode))->GetName()->sData); #endif _Parameter const * _hprestrict_ transitionMatrix = currentTreeNode->GetCompExp(catID)->theData; - + #ifdef _SLKP_USE_AVX_INTRINSICS __m256d tmatrix_transpose [4] = { (__m256d) {transitionMatrix[0],transitionMatrix[4],transitionMatrix[8],transitionMatrix[12]}, @@ -1326,7 +1326,7 @@ void _TheTree::ComputeBranchCache ( (__m256d) {transitionMatrix[3],transitionMatrix[7],transitionMatrix[11],transitionMatrix[15]} }; #endif - + _Parameter * childVector, * lastUpdatedSite; @@ -1336,7 +1336,7 @@ void _TheTree::ComputeBranchCache ( #endif lastUpdatedSite = childVector = iNodeCache + (siteFrom + nodeCode * siteCount) * alphabetDimension; } - + long currentTCCIndex , currentTCCBit , @@ -1451,7 +1451,7 @@ void _TheTree::ComputeBranchCache ( _Parameter accumulator = 0.0; #ifdef _SLKP_USE_SSE_INTRINSICS - + __m128d buffer1, buffer2, buffer3 = _mm_setzero_pd(), @@ -1460,8 +1460,8 @@ void _TheTree::ComputeBranchCache ( load2, load3, load4; - - + + if (((long int)tMatrix & 0b1111) == 0 && ((long int)childVector & 0b1111) == 0){ for (long c = 0; c < alphabetDimensionmod4; c+=4) { load1 = _mm_load_pd (tMatrix+c); @@ -1484,33 +1484,33 @@ void _TheTree::ComputeBranchCache ( buffer3 = _mm_add_pd (buffer1,buffer3); buffer4 = _mm_add_pd (buffer2,buffer4); } - + } - + buffer3 = _mm_add_pd (buffer3, buffer4); double buffer[2] __attribute__ ((aligned (16))); _mm_store_pd (buffer, buffer3); accumulator = buffer[0] + buffer[1]; - + #elif defined _SLKP_USE_AVX_INTRINSICS // end _SLKP_USE_SSE_INTRINSICS - + __m256d sum256 = _mm256_setzero_pd(); - - + + for (long c = 0; c < alphabetDimensionmod4; c+=4L) { __m256d matrix_quad = _mm256_loadu_pd (tMatrix+c), child_quad = _mm256_loadu_pd (childVector+c), prod = _mm256_mul_pd (matrix_quad, child_quad); - + sum256 = _mm256_add_pd (sum256,prod); } - - + + accumulator = _avx_sum_4(sum256); //NOT sure why copy to doubles and add is faster // than AVX istructions #else // _SLKP_USE_AVX_INTRINSICS - + for (unsigned long c = 0UL; c < alphabetDimensionmod4; c+=4UL) { // 4 - unroll the loop _Parameter pr1 = tMatrix[c] * childVector[c], pr2 = tMatrix[c+1] * childVector[c+1], @@ -1521,8 +1521,8 @@ void _TheTree::ComputeBranchCache ( accumulator += pr1+pr3; } #endif // regular code - - + + for (long c = alphabetDimensionmod4; c < alphabetDimension; c++) { accumulator += tMatrix[c] * childVector[c]; } @@ -1533,7 +1533,7 @@ void _TheTree::ComputeBranchCache ( } childVector += alphabetDimension; - + if (canScale) { if (sum < _lfScalingFactorThreshold && sum > 0.0) { _Parameter tryScale = scalingAdjustments [nodeCode*siteCount + siteID] * _lfScalerUpwards; @@ -1567,17 +1567,17 @@ void _TheTree::ComputeBranchCache ( } } - + #ifdef _UBER_VERBOSE_LF_DEBUG printf ("root name %s\n", ((_CalcNode *)flatTree(rootPath.lData[rootPath.lLength-2] - flatLeaves.lLength))->GetName()->sData); #endif - - _Parameter const _hprestrict_ *rootConditionals = iNodeCache + (rootPath.lData[rootPath.lLength-2L] - flatLeaves.lLength) * siteCount * alphabetDimension; + + _Parameter const * _hprestrict_ rootConditionals = iNodeCache + (rootPath.lData[rootPath.lLength-2L] - flatLeaves.lLength) * siteCount * alphabetDimension; state = cache + alphabetDimension * siteCount; - + const unsigned long site_bound = alphabetDimension*siteTo; - + for (unsigned long ii = siteFrom * alphabetDimension; ii < site_bound; ii++) { state[ii] = rootConditionals[ii]; //printf ("Root conditional [%ld] = %g, node state [%ld] = %g\n", ii, state[ii], ii, cache[ii]); @@ -1586,7 +1586,7 @@ void _TheTree::ComputeBranchCache ( if (!siteCorrectionCounts && localScalerChange) { #pragma omp atomic overallScaler += localScalerChange; - + //#pragma omp atomic // printf ("Rescale in ComputeBranchCache at branch %ld %ld\n", brID, localScalerChange); } @@ -1613,16 +1613,16 @@ _Parameter _TheTree::ComputeLLWithBranchCache ( ) { auto bookkeeping = [&siteOrdering, &storageVec, &theFilter] (const long siteID, const _Parameter accumulator, _Parameter& correction, _Parameter& result) -> void { - + long direct_index = siteOrdering.lData[siteID]; - + if (storageVec) { storageVec [direct_index] = accumulator; } else { if (accumulator <= 0.0) { throw (1L+direct_index); } - + _Parameter term; long site_frequency = theFilter->theFrequencies.Get(direct_index); if ( site_frequency > 1L) { @@ -1650,8 +1650,8 @@ _Parameter _TheTree::ComputeLLWithBranchCache ( siteTo = siteCount; } - _Parameter const _hprestrict_ *branchConditionals = cache + siteFrom * alphabetDimension; - _Parameter const _hprestrict_ *rootConditionals = branchConditionals + siteCount * alphabetDimension; + _Parameter const * _hprestrict_ branchConditionals = cache + siteFrom * alphabetDimension; + _Parameter const * _hprestrict_ rootConditionals = branchConditionals + siteCount * alphabetDimension; _Parameter result = 0.0, correction = 0.0; @@ -1662,15 +1662,15 @@ _Parameter _TheTree::ComputeLLWithBranchCache ( _Parameter const * _hprestrict_ transitionMatrix = givenTreeNode->GetCompExp(catID)->theData; - + // cases by alphabet dimension try { switch (alphabetDimension) { /**** - + NUCLEOTIDES - + ****/ case 4UL: { #ifdef _SLKP_USE_AVX_INTRINSICS @@ -1694,7 +1694,7 @@ _Parameter _TheTree::ComputeLLWithBranchCache ( s01 = _mm256_add_pd ( _mm256_mul_pd (b_cond0, tmatrix_transpose[0]), _mm256_mul_pd (b_cond1, tmatrix_transpose[1])), s23 = _mm256_add_pd ( _mm256_mul_pd (b_cond2, tmatrix_transpose[2]), _mm256_mul_pd (b_cond3, tmatrix_transpose[3])); accumulator = _avx_sum_4(_mm256_mul_pd (_mm256_mul_pd (root_c, probs), _mm256_add_pd (s01,s23))); - + #else accumulator = rootConditionals[0] * theProbs[0] * (branchConditionals[0] * transitionMatrix[0] + branchConditionals[1] * transitionMatrix[1] + branchConditionals[2] * transitionMatrix[2] + branchConditionals[3] * transitionMatrix[3]) + @@ -1712,9 +1712,9 @@ _Parameter _TheTree::ComputeLLWithBranchCache ( } break; /**** - + AMINOACIDS - + ****/ case 20UL: { for (unsigned long siteID = siteFrom; siteID < siteTo; siteID++) { @@ -1725,59 +1725,59 @@ _Parameter _TheTree::ComputeLLWithBranchCache ( _mm256_loadu_pd(branchConditionals+8UL), _mm256_loadu_pd(branchConditionals+12UL), _mm256_loadu_pd(branchConditionals+16UL)}; - - + + _Parameter const * tm = transitionMatrix; - + for (unsigned long p = 0UL; p < 20UL; p++, rootConditionals++) { - + __m256d t_matrix[5] = { _mm256_loadu_pd(tm), _mm256_loadu_pd(tm+4UL), _mm256_loadu_pd(tm+8UL), _mm256_loadu_pd(tm+12UL), _mm256_loadu_pd(tm+16UL)}; - - + + t_matrix[0] = _mm256_mul_pd(t_matrix[0], bc_vector[0]); t_matrix[1] = _mm256_mul_pd(t_matrix[1], bc_vector[1]); t_matrix[2] = _mm256_mul_pd(t_matrix[2], bc_vector[2]); t_matrix[3] = _mm256_mul_pd(t_matrix[3], bc_vector[3]); t_matrix[4] = _mm256_mul_pd(t_matrix[4], bc_vector[4]); - + t_matrix[0] = _mm256_add_pd (t_matrix[0],t_matrix[1]); t_matrix[1] = _mm256_add_pd (t_matrix[2],t_matrix[3]); t_matrix[3] = _mm256_add_pd (t_matrix[0],t_matrix[1]); - + tm += 20UL; - + accumulator += *rootConditionals * theProbs[p] * _avx_sum_4(_mm256_add_pd (t_matrix[3],t_matrix[4])); } #else // _SLKP_USE_AVX_INTRINSICS unsigned long rmx = 0UL; for (unsigned long p = 0UL; p < 20UL; p++,rootConditionals++) { _Parameter r2 = 0.; - + for (unsigned long c = 0UL; c < 20UL; c+=4UL, rmx +=4UL) { r2 += (branchConditionals[c] * transitionMatrix[rmx] + branchConditionals[c+1] * transitionMatrix[rmx+1]) + (branchConditionals[c+2] * transitionMatrix[rmx+2] + branchConditionals[c+3] * transitionMatrix[rmx+3]); } - + accumulator += *rootConditionals * theProbs[p] * r2; } #endif // _SLKP_USE_AVX_INTRINSICS branchConditionals += 20UL; bookkeeping (siteID, accumulator, correction, result); - + } // siteID - + } // case 20 break; /**** - + CODONS - + ****/ case 60UL: case 61UL: @@ -1785,35 +1785,35 @@ _Parameter _TheTree::ComputeLLWithBranchCache ( case 63UL: { for (unsigned long siteID = siteFrom; siteID < siteTo; siteID++) { _Parameter accumulator = 0.; - + unsigned long rmx = 0UL; for (unsigned long p = 0UL; p < alphabetDimension; p++,rootConditionals++) { _Parameter r2 = 0.; unsigned long c = 0UL; - + #ifdef _SLKP_USE_AVX_INTRINSICS - + __m256d sum256 = _mm256_setzero_pd (); - + for (; c < 60UL; c+=12UL, rmx +=12UL) { - + __m256d branches0 = _mm256_loadu_pd (branchConditionals+c), branches1 = _mm256_loadu_pd (branchConditionals+c+4), branches2 = _mm256_loadu_pd (branchConditionals+c+8), matrix0 = _mm256_loadu_pd (transitionMatrix+rmx), matrix1 = _mm256_loadu_pd (transitionMatrix+rmx+4), matrix2 = _mm256_loadu_pd (transitionMatrix+rmx+8); - + branches0 = _mm256_mul_pd(branches0, matrix0); branches1 = _mm256_mul_pd(branches1, matrix1); branches2 = _mm256_mul_pd(branches2, matrix2); - + branches0 = _mm256_add_pd (branches0,branches2); sum256 = _mm256_add_pd (branches0,_mm256_add_pd (sum256, branches1)); } - + r2 = _avx_sum_4(sum256); - + #else // _SLKP_USE_AVX_INTRINSICS for (; c < 60UL; c+=4UL, rmx +=4UL) { r2 += (branchConditionals[c] * transitionMatrix[rmx] + @@ -1822,67 +1822,67 @@ _Parameter _TheTree::ComputeLLWithBranchCache ( branchConditionals[c+3] * transitionMatrix[rmx+3]); } #endif - + for (; c < alphabetDimension; c++, rmx ++) { r2 += branchConditionals[c] * transitionMatrix[rmx]; } - + accumulator += *rootConditionals * theProbs[p] * r2; } - + branchConditionals += alphabetDimension; bookkeeping (siteID, accumulator, correction, result); - + } } // cases 60-63 break; default: { // valid alphabetDimension >= 2 - + if (alphabetDimension % 2) { // odd unsigned long alphabetDimension_minus1 = alphabetDimension-1; for (unsigned long siteID = siteFrom; siteID < siteTo; siteID++) { _Parameter accumulator = 0.; - + unsigned long rmx = 0UL; for (unsigned long p = 0UL; p < alphabetDimension; p++,rootConditionals++) { _Parameter r2 = 0.; - + for (unsigned long c = 0UL; c < alphabetDimension_minus1; c+=2UL, rmx +=2UL) { r2 += branchConditionals[c] * transitionMatrix[rmx] + branchConditionals[c+1] * transitionMatrix[rmx+1]; } - + r2 += branchConditionals[alphabetDimension_minus1] * transitionMatrix[rmx++]; - + accumulator += *rootConditionals * theProbs[p] * r2; } - + branchConditionals += alphabetDimension; bookkeeping (siteID, accumulator, correction, result); - + } } else { for (unsigned long siteID = siteFrom; siteID < siteTo; siteID++) { _Parameter accumulator = 0.; - + unsigned long rmx = 0UL; for (unsigned long p = 0UL; p < alphabetDimension; p++,rootConditionals++) { _Parameter r2 = 0.; - + for (unsigned long c = 0UL; c < alphabetDimension; c+=2UL, rmx +=2UL) { r2 += branchConditionals[c] * transitionMatrix[rmx] + branchConditionals[c+1] * transitionMatrix[rmx+1]; } - + accumulator += *rootConditionals * theProbs[p] * r2; } - + branchConditionals += alphabetDimension; bookkeeping (siteID, accumulator, correction, result); - + } } - + } // default } // switch (alphabetDimension) } catch (long site) { @@ -1932,7 +1932,7 @@ _Parameter _TheTree::ComputeTwoSequenceLikelihood long siteState1 = lNodeFlags[siteOrdering.lData[siteID]], siteState2 = lNodeFlags[siteCount + siteOrdering.lData[siteID]]; - + if (siteState1 >= 0) // a single character state; sweep down the appropriate column { @@ -2164,7 +2164,7 @@ _List* _TheTree::RecoverAncestralSequences (_DataSetFilter const* dsf, siteCount = dsf->GetSiteCountInUnits (), allNodeCount = 0, stateCacheDim = (alsoDoLeaves? (iNodeCount + leafCount): (iNodeCount)); - + long *stateCache = new long [patternCount*(iNodeCount-1)*alphabetDimension], *leafBuffer = new long [(alsoDoLeaves?leafCount*patternCount:1)*alphabetDimension]; @@ -2204,7 +2204,7 @@ _List* _TheTree::RecoverAncestralSequences (_DataSetFilter const* dsf, } _CalcNode * currentTreeNode = isLeaf? ((_CalcNode*) flatCLeaves (nodeCode)):((_CalcNode*) flatTree (nodeCode)); - + _Parameter const* transitionMatrix = nil; if (!catAssignments) { _Matrix * cexp = currentTreeNode->GetCompExp(); @@ -2260,7 +2260,7 @@ _List* _TheTree::RecoverAncestralSequences (_DataSetFilter const* dsf, long *stateBuffer = isLeaf?leafBuffer:stateCache; // check for degeneracy - + bool completely_unresolved = ArrayAll (childVector, alphabetDimension, [] (_Parameter x, unsigned long) {return x == 1.;}); if (completely_unresolved) { @@ -2323,7 +2323,7 @@ _List* _TheTree::RecoverAncestralSequences (_DataSetFilter const* dsf, result->AppendNewInstance (new _String(siteCount*unitLength,false)); } - _Parameter _hprestrict_ * rootConditionals = iNodeCache + alphabetDimension * ((iNodeCount-1) * patternCount); + _Parameter * _hprestrict_ rootConditionals = iNodeCache + alphabetDimension * ((iNodeCount-1) * patternCount); _SimpleList parentStates (stateCacheDim,0,0), conversion; @@ -2418,7 +2418,7 @@ void _CalcNode::SetupCategoryMap (_List& containerVariables, _SimpleList& cl //for (long k = 0; kGetName()->sData); - + if (catCount<0) { remapMyCategories.Clear(); } else { @@ -2612,9 +2612,9 @@ bool _TreeTopology::ConvertToPSW (_AVLListX& nodeMap, _List* inames, _Sim iNodeCount = -1; _SimpleList levelBuffer; - + node_iterator ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER); - + while (node * currentNode = ni.Next (&levelBuffer)) { _String nodeName = GetNodeName (currentNode); @@ -2823,11 +2823,11 @@ _VariableContainer* _CalcNode::ParentTree(void) { /* class _TreeIterator { private: - + node_iterator iterator; int traverser; _SimpleList history; - + public:*/ _TreeIterator::_TreeIterator (_TheTree const* source, int traversal_type): iterator(source->theRoot, traversal_type & _HY_TREE_TRAVERSAL_MASK) { @@ -2843,9 +2843,9 @@ void _TreeIterator:: Reset (void) { _CalcNode * _TreeIterator:: Next (void) { - + node * nn = iterator.Next(&history); - + if (nn) { if (nn->is_root() && (flags & _HY_TREE_TRAVERSAL_SKIP_ROOT)) { return Next(); @@ -2853,7 +2853,7 @@ _CalcNode * _TreeIterator:: Next (void) { if (!nn->is_leaf() && (flags & _HY_TREE_TRAVERSAL_LEAVES)) { return Next(); } - + return map_node_to_calcnode(nn); } return nil; diff --git a/src/core/likefunc2.cpp b/src/core/likefunc2.cpp index 5b78b241f..3bc20703d 100644 --- a/src/core/likefunc2.cpp +++ b/src/core/likefunc2.cpp @@ -7,7 +7,7 @@ Core Developers: Sergei L Kosakovsky Pond (sergeilkp@icloud.com) Art FY Poon (apoon@cfenet.ubc.ca) Steven Weaver (sweaver@temple.edu) - + Module Developers: Lance Hepler (nlhepler@gmail.com) Martin Smith (martin.audacis@gmail.com) @@ -57,12 +57,12 @@ void _LikelihoodFunction::DetermineLocalUpdatePolicy (void) { for (unsigned long k = 0; k < theTrees.lLength; k ++) { unsigned long catCount = ((_TheTree*)LocateVar(theTrees(k)))->categoryCount; - + _List * lup = new _List, * mte = new _List; computedLocalUpdatePolicy.AppendNewInstance (new _SimpleList (catCount,0,0)); - + for (unsigned long l = 0; l < catCount; l++) { lup->AppendNewInstance (new _SimpleList); mte->AppendNewInstance (new _List); @@ -86,10 +86,10 @@ void _LikelihoodFunction::ComputeParameterPenalty (void){ mp = 0.5*(lb+ub), span = ub-lb, v = GetIthIndependent(k); - + _Parameter term = exp (50*log (2.*fabs (v-mp)/span)); /*if (term > 0.0) { - printf ("\n[_LikelihoodFunction::ComputeParameterPenalty %lu: %g %g %g %g]\n", k, lb, ub, v, term); + printf ("\n[_LikelihoodFunction::ComputeParameterPenalty %lu: %g %g %g %g]\n", k, lb, ub, v, term); }*/ smoothingPenalty += term; // (2.*(v-mp)/span)^50 @@ -140,7 +140,7 @@ long _LikelihoodFunction::TotalRateClassesForAPartition (long part } } return hmmCats; - + } } } else if (partIndex < 0) { @@ -188,7 +188,7 @@ void _LikelihoodFunction::SetupCategoryCaches (void) hmmCatCount = 1L, catVarFlags = 0L, varIndex; - + try { for ( varIndex = 0; varIndex < myCats.lLength; varIndex++) { @@ -222,7 +222,7 @@ void _LikelihoodFunction::SetupCategoryCaches (void) if (varIndex < myCats.lLength) { throw ("Currently, HyPhy can support at most one HMM or Constant on Partition variable per partition"); - + } (*catVarCounts) << totalCatCount; @@ -254,7 +254,7 @@ void _LikelihoodFunction::SetupCategoryCaches (void) BatchDelete (catVarReferences,catVarCounts,catVarOffsets,hmmAndCOP,varType,container); WarnError (error); return; - + } } @@ -283,7 +283,7 @@ void _LikelihoodFunction::RestoreScalingFactors (long index, long branchID, l bool _LikelihoodFunction::ProcessPartitionList (_SimpleList& partsToDo, _Matrix* partitionList, _String const & caller) const { long partCount = CountObjects(kLFCountPartitions); - + if (partitionList) { partitionList->ConvertToSimpleList (partsToDo); partsToDo.Sort(); @@ -420,7 +420,7 @@ void _LikelihoodFunction::ReconstructAncestors (_DataSet &target,_SimpleList& _String * sampledString = (_String*)thisSet->GetItem(0); - + for (long siteIdx = 0; siteIdxsLength; siteIdx++) { target.AddSite (sampledString->sData[siteIdx]); } @@ -637,7 +637,7 @@ void _LikelihoodFunction::PopulateConditionalProbabilities (long in if (runMode == _hyphyLFConditionProbsRawMatrixMode || runMode == _hyphyLFConditionProbsScaledMatrixMode) // populate the matrix of conditionals and scaling factors { - _Parameter _hprestrict_ *bufferForThisCategory = buffer + indexShifter; + _Parameter * _hprestrict_ bufferForThisCategory = buffer + indexShifter; ComputeBlock (index, bufferForThisCategory, useThisPartitonIndex, branchIndex, branchValues); if (usedCachedResults) { @@ -719,7 +719,7 @@ void _LikelihoodFunction::PopulateConditionalProbabilities (long in for (long r1 = lowerBound, r2 = lowerBound2; r1 < upperBound; r1++,r2++) { if (siteCorrectors) { long scv = *siteCorrectors; - + if (currentRateCombo == 0L) { // first entry buffer[r1] = currentRateWeight * buffer[r2]; scalers.lData[r1] = scv; @@ -735,7 +735,7 @@ void _LikelihoodFunction::PopulateConditionalProbabilities (long in } } } - + siteCorrectors++; } else { buffer[r1] += currentRateWeight * buffer[r2]; @@ -820,7 +820,7 @@ _List* _LikelihoodFunction::RecoverAncestralSequencesMarginal (long index, _Ma { _DataSetFilter const* dsf = GetIthFilter(index); - + _TheTree *blockTree = (_TheTree*)LocateVar(theTrees.lData[index]); long patternCount = dsf->GetPatternCount (), @@ -1199,7 +1199,7 @@ void _LikelihoodFunction::SetupParameterMapping (void) if (smoothingReduction <= 0.0 || smoothingReduction >= 1.0) { smoothingReduction = 0.8; } - + for (unsigned long pIndex = 0; pIndex < indexInd.lLength; pIndex++) { _Variable* cv = GetIthIndependentVar(pIndex); @@ -1276,8 +1276,8 @@ _Parameter _LikelihoodFunction::SumUpSiteLikelihoods (long index, const _Paramet WarnError ("Constant-on-partition categories are currently not supported by the evaluation engine"); } else { for (unsigned long patternID = 0UL; patternID < pattern_count; patternID++) { - - + + long patternFrequency = index_filter->GetFrequency(patternID); if (patternFrequency > 1) { logL += myLog(patternLikelihoods[patternID])*patternFrequency; @@ -1321,11 +1321,11 @@ _Parameter _LikelihoodFunction::SumUpSiteLikelihoods (long index, const _Paramet _AssociativeList* _LikelihoodFunction::CollectLFAttributes (void) const { _AssociativeList * result = new _AssociativeList; - + _List model_list, filter_list, frequency_list; - + _SimpleList aux_list; InsertVarIDsInList (result, "Categories", GetCategoryVars ()); @@ -1347,10 +1347,10 @@ _AssociativeList* _LikelihoodFunction::CollectLFAttributes (void) const { aux_list << ith_tree->GetAVariable(); filter_list < new _String (*GetIthFilterName (component)); frequency_list < new _String (*GetIthFrequenciesName (component)); - + _SimpleList component_models; ith_tree->CompileListOfModels(component_models); - + if (component_models.lLength == 1UL) { model_list << modelNames (component_models(0)); } else { @@ -1359,10 +1359,10 @@ _AssociativeList* _LikelihoodFunction::CollectLFAttributes (void) const { } InsertVarIDsInList (result, "Trees", aux_list); InsertStringListIntoAVL (result, "Models", _SimpleList (model_list.lLength,0,1), model_list); - + aux_list.Clear(); aux_list.Populate (partition_count, 0, 1); - + InsertStringListIntoAVL (result, "Datafilters", aux_list, filter_list); InsertStringListIntoAVL (result, "Base frequencies", aux_list, frequency_list); @@ -1378,7 +1378,7 @@ void _LikelihoodFunction::UpdateBlockResult (long index, _Parameter new_value) { while (computationalResults.GetUsed() <= index) { computationalResults.Store (0.0); } - + computationalResults.theData[index] = new_value; } diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index ec6f58238..722a33cff 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -1776,7 +1776,7 @@ bool _Matrix::IsReversible(_Matrix* freqs) { if (tr && tc) { _Polynomial * rcpF = (_Polynomial*)rcp->Mult(tr), * crpF = (_Polynomial*)crp->Mult(tc); - + //fprintf (stderr, "%s : %s\n", _String ((_String*)rcpF->toStr()).getStr(), _String ((_String*)crpF->toStr()).getStr()); compResult = rcpF->Equal(crpF); @@ -3359,8 +3359,8 @@ void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract } } else { - _Parameter _hprestrict_ * argData = secondArg.theData; - _Parameter _hprestrict_ * stData = storage.theData; + _Parameter * _hprestrict_ argData = secondArg.theData; + _Parameter * _hprestrict_ stData = storage.theData; long upto = secondArg.lDim - secondArg.lDim%4; @@ -3576,8 +3576,8 @@ void _Matrix::Multiply (_Matrix& storage, _Parameter c) { if (storageType == 1) { // numbers - _Parameter _hprestrict_ * destination = storage.theData; - _Parameter _hprestrict_ * source = theData; + _Parameter * _hprestrict_ destination = storage.theData; + _Parameter * _hprestrict_ source = theData; if (theIndex) { for (long k = 0; k < lDim; k++) @@ -3667,15 +3667,15 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) /* two square dense matrices */ { unsigned long cumulativeIndex = 0UL; - + const unsigned long dimm4 = (vDim >> 2) << 2; const _Parameter * row = theData; _Parameter * dest = storage.theData; - + #ifndef _SLKP_SSE_VECTORIZATION_ - + if (dimm4 == vDim) { InitializeArray (dest, lDim, 0.0); @@ -3686,18 +3686,18 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) if (vDim == 20UL) { // special case for amino-acids __m256d __attribute__ ((aligned (32))) col_buffer[5]; - + _Parameter quad1[4] __attribute__ ((aligned (32))), quad2[4] __attribute__ ((aligned (32))), quad3[4] __attribute__ ((aligned (32))), quad4[4] __attribute__ ((aligned (32))), quad5[4] __attribute__ ((aligned (32))); - + quad1 [0] = secondArg.theData[c]; quad1 [1] = secondArg.theData[c + 20UL]; quad1 [2] = secondArg.theData[c + 40UL]; quad1 [3] = secondArg.theData[c + 60UL]; - + quad2 [0] = secondArg.theData[c + 80UL]; quad2 [1] = secondArg.theData[c + 100UL]; quad2 [2] = secondArg.theData[c + 120UL]; @@ -3724,8 +3724,8 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) col_buffer[3] = _mm256_load_pd (quad4); col_buffer[4] = _mm256_load_pd (quad5); // - - + + _Parameter const * p = theData; for (unsigned long r = 0UL; r < 20UL; r ++, p += 20UL) { @@ -3743,8 +3743,8 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) } continue; } - - + + #endif const unsigned long @@ -3769,46 +3769,46 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) for (unsigned long i = 0UL, vector_index = c; i < secondArg.hDim; i += 4UL, vector_index += column_shift4) { /*#ifdef _SLKP_USE_AVX_INTRINSICS - + _Parameter quad1[4] __attribute__ ((aligned (32))); quad1[0] = secondArg.theData[vector_index]; quad1[1] = secondArg.theData[vector_index+secondArg.vDim], quad1[2] = secondArg.theData[vector_index+column_shift2], quad1[3] = secondArg.theData[vector_index+column_shift3]; __m256d __attribute__ ((aligned (32))) col_buffer = _mm256_load_pd (quad1); - + for (unsigned long r = 0UL; r < hDim; r ++) { - + //unsigned long element = r*vDim + i; __m256d __attribute__ ((aligned (32))) row_quad = _mm256_loadu_pd (theData + (r*vDim + i)); dest[r*vDim + c] += _avx_sum_4(_mm256_mul_pd (col_buffer,row_quad)); - + } - + #else*/ _Parameter c0 = secondArg.theData[vector_index], c1 = secondArg.theData[vector_index+secondArg.vDim], c2 = secondArg.theData[vector_index+column_shift2], c3 = secondArg.theData[vector_index+column_shift3]; - + for (unsigned long r = 0UL; r < hDim; r ++) { - + unsigned long element = r*vDim + i; - - + + _Parameter r0 = theData[element] * c0, r1 = theData[element+1] * c1, r2 = theData[element+2] * c2, r3 = theData[element+3] * c3; - + r0 += r1; r2 += r3; dest[r*vDim + c] += r0 + r2; - + } //#endif - - + + } } } else { @@ -3878,11 +3878,7 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) #endif for (long r = 0; r < hDim; r ++) { #ifdef _OPENMP -#if GCC_VERSION > 40400 #pragma omp parallel for default(none) shared(r,secondArg,storage) schedule(static) if (nt>1) num_threads (nt) -#else -#pragma omp parallel for default(none) shared(r) schedule(static) if (nt>1) num_threads (nt) -#endif #endif for (long c = 0; c < secondArg.vDim; c+= _HY_MATRIX_CACHE_BLOCK) { _Parameter cacheBlockInMatrix2 [_HY_MATRIX_CACHE_BLOCK][_HY_MATRIX_CACHE_BLOCK]; @@ -3978,8 +3974,8 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) _Parameter value = theData[k]; - _Parameter _hprestrict_ *res = storage.theData + (m-i); - _Parameter _hprestrict_ *secArg = secondArg.theData + i*vDim; + _Parameter * _hprestrict_ res = storage.theData + (m-i); + _Parameter * _hprestrict_ secArg = secondArg.theData + i*vDim; #ifdef _SLKP_USE_AVX_INTRINSICS __m256d value_op = _mm256_set1_pd (value); @@ -4013,8 +4009,8 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix& secondArg) // in the form of A_rc * B_cc' _Parameter value = theData[k]; - _Parameter _hprestrict_ *res = storage.theData + (m-i); - _Parameter _hprestrict_ *secArg = secondArg.theData + i*vDim; + _Parameter * _hprestrict_ res = storage.theData + (m-i); + _Parameter * _hprestrict_ secArg = secondArg.theData + i*vDim; for (unsigned long i = 0UL; i < loopBound; i+=4) { res[i] += value * secArg[i]; @@ -5847,8 +5843,8 @@ _Parameter _Matrix::Sqr (_Parameter* _hprestrict_ stash) { // loop interchange rocks! - _Parameter _hprestrict_ * column = stash+lDim; - _Parameter const _hprestrict_ * source = theData; + _Parameter * _hprestrict_ column = stash+lDim; + _Parameter const * _hprestrict_ source = theData; for (long j = 0; j < vDim; j++) { for (long c = 0; c < vDim; c++) { From 8819f1a9bf0159903ff79c14f84fb8271bf32618 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Fri, 20 Jul 2018 08:41:21 -0400 Subject: [PATCH 31/53] Relaxing stringency of block compute checks so that single sites (LEISR) don't fail --- src/core/likefunc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 6ea9d31ee..b00807a43 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -8220,7 +8220,7 @@ _Parameter _LikelihoodFunction::ComputeBlock (long index, _Parameter* siteRes, //fprintf (stderr, "CONGRUENCE CHECK %20.16g\n",fabs ((checksum-sum)/sum)); - if (fabs ((checksum-sum)/sum) > 1.e-12 * df->GetPatternCount ()) { + if (fabs ((checksum-sum)/sum) > 1.e-10 * df->GetPatternCount ()) { /*_Parameter check2 = t->ComputeTreeBlockByBranch (*sl, *branches, tcc, From bc7b63247ad9cbb92e3c6418d9f17025633589c6 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Fri, 20 Jul 2018 17:05:41 -0400 Subject: [PATCH 32/53] Fixing a small memory leak when executing NEXUS-encapsulated BFs --- res/TemplateBatchFiles/BGM.bf | 413 ++++++++++++++-------------------- src/core/batchlan.cpp | 42 ++-- src/core/include/batchlan.h | 2 + src/core/site.cpp | 2 +- 4 files changed, 191 insertions(+), 268 deletions(-) diff --git a/res/TemplateBatchFiles/BGM.bf b/res/TemplateBatchFiles/BGM.bf index f1a28382e..2a4255702 100644 --- a/res/TemplateBatchFiles/BGM.bf +++ b/res/TemplateBatchFiles/BGM.bf @@ -1,248 +1,165 @@ -ExecuteAFile("bayesgraph.ibf"); - -/* ________________________________________________________________ */ - -function checkNode (nID) -{ - if (nodeList[nID] == 0) - { - nodeList [nID] = 1; - fprintf (LAST_FILE_PATH, nID, "; "); - } - return 0; -} - -/* ________________________________________________________________ */ - -function obtainBGMParameters (_lfID) -{ - ChoiceList (ambChoice, "Treatment of Ambiguities",1,SKIP_NONE, - "Averaged","All possible resolutions are considered and averaged.", - "Resolved","The most frequent (for that site) resolution is chosen."); - - ExecuteAFile ("Utility/AncestralMapper.bf"); - ExecuteAFile ("Utility/DescriptiveStatistics.bf"); - ExecuteAFile ("Utility/GrabBag.bf"); - - site_map = {}; - _SITE_RESULTS = obtainSubstitutionMatrix ("lf",0,site_map,_OBSERVED_NS_); - site_map = {}; - branchCount = Rows(_SITE_RESULTS); - nodeCount = Columns(_SITE_RESULTS); - - substitution_counts = ({1,branchCount}["1"])*_SITE_RESULTS; - substitution_stats = GatherDescriptiveStats (substitution_counts); - - PrintDescriptiveStats ("Counts of inferred non-synonymous substitution by site",substitution_stats); - /* determine the appropriate lower and upper bounds */ - nontrivial_sites = substitution_counts["_MATRIX_ELEMENT_VALUE_>0"] * ({nodeCount,1})["1"]; - - if (nontrivial_sites[0] < 2) - { - fprintf (stdout, "\nERROR: BGM analysis requires at least 2 sites with non-synonymous susbtitutions\n"); - return site_map; - } - - for (k = 0+substitution_stats["Max"]; k >= 1; k=k-1) - { - nontrivial_sites = (substitution_counts["_MATRIX_ELEMENT_VALUE_>=k"]) * ({nodeCount,1})["1"]; - if (nontrivial_sites[0] >= 2) - { - break; - } - } - - cutoff = prompt_for_a_value ("Include only sites with at least this many total substitutions",Max(1,substitution_stats["Median"]), Max(1,substitution_stats["Min"]), k, 1); - - for (h=0; h=cutoff) - { - site_map[Abs(site_map)] = h; - } - } - - nodeCount = Abs (site_map); - fprintf (stdout, "\nFound ", nodeCount, " sites with at least one non-synonymous mutation\n"); - ChoiceList (num_parents,"Maximum parents",1,NO_SKIP, - /*0*/ "1","Each site can be conditionally dependant on at most ONE other site. This setting permits the processing of large datasets quickly", - /*1*/ "2","Each site can be conditionally dependant on at most TWO other sites. This setting permits the recovery of more complex dependancies, but is computationally costly. It may be too slow/memory hungry for more than 100 sites."); - - if (num_parents < 0) - { - return site_map; - } - num_parents = num_parents + 1; - BGM_MCMC_DURATION = prompt_for_a_value ("Run the MCMC chain for this many iterations",100000, 1000, 1e26, 1); - BGM_MCMC_BURNIN = prompt_for_a_value ("How many burn-in steps before the main chain is run",BGM_MCMC_DURATION$10, 100, 1e26, 1); - BGM_MCMC_SAMPLES = prompt_for_a_value ("Sample from the chain every so many steps",BGM_MCMC_DURATION$100, 10, BGM_MCMC_DURATION, 1); - - BGM_MCMC_SAMPLES = Max(1,(BGM_MCMC_DURATION-BGM_MCMC_BURNIN)$BGM_MCMC_SAMPLES); - - ChoiceList (resample,"Ancestral Resampling",1,NO_SKIP, - /*0*/ "No","Base inference on the maximum likelihood ancestal reconstruction only", - /*1*/ "Yes","In addition to maximum likelihood ancestral states, sample a number (S) of alternative ancestral reconstructions to assess robustness. Runs S additional BGM analyses [MPI Enabled]"); - - if (resample < 0) - { - return site_map; - } - if (resample > 0) - { - resample = prompt_for_a_value ("How many ancestral samples?",100,1,1e26,1); - } - - fprintf (stdout, "\nRunning a BGM on ", nodeCount, " nodes with", - "\n\t", Format(num_parents,20,0), " maximum parents per node", - "\n\t", Format(BGM_MCMC_BURNIN,20,0), " burn-in steps", - "\n\t", Format(BGM_MCMC_DURATION,20,0), " chain length", - "\n\t", Format(BGM_MCMC_SAMPLES,20,0), " samples\n"); - - if (resample > 0) - { - fprintf (stdout, "\nWill generate ", resample, " ancestral samples\n"); - } - _bgm_data = {}; - _bgm_data ["MAP"] = site_map; - _bgm_data ["MATRIX"] = _SITE_RESULTS; - _bgm_data ["BGM_MCMC_DURATION"] = BGM_MCMC_DURATION; - _bgm_data ["BGM_MCMC_BURNIN"] = BGM_MCMC_BURNIN; - _bgm_data ["BGM_MCMC_SAMPLES"] = BGM_MCMC_SAMPLES; - _bgm_data ["PARENTS"] = num_parents; - _bgm_data ["RESAMPLE"] = resample; - return _bgm_data; -} - -/* ________________________________________________________________ */ - -function obtainSubstitutionMatrix (_lfID, sample_flag, site_map, _filterMatrix) -{ - _ancestral_id = _buildAncestralCacheInternal (_lfID, 0, _sample_flag); - - _fd = _filterDimensions (_ancestral_id); - if (Abs(site_map) == 0) - { - for (_k = 0; _k < _fd[0]; _k=_k+1) - { - site_map[_k] = _k; - } - } - else - { - _fd[0] = Abs (site_map); - } - _theMatrix = {_fd[1],_fd[0]}; - - for (_k = 0; _k < Abs(site_map); _k = _k+1) - { - _subsitution_matrix = _countSubstitutionsByBranchSite (_ancestral_id,site_map[_k],_filterMatrix); - for (_j = 0; _j < _fd[1]; _j=_j+1) - { - _theMatrix [_j][_k] = _subsitution_matrix[_j]; - } - } - - _destroyAncestralCache (_ancestral_id); - return _theMatrix; -} - -/* ________________________________________________________________ */ - -function handleMPIBGM (_bgm_data, jobID) -{ - if (MPI_NODE_COUNT <= 1) - { - if (jobID >= 0) - { - _sample_results [jobID] = runBGM(_bgm_data); - fprintf (stdout, "Ancestral sample ", jobID + 1, "\n"); - } - } - else - { - mpiNode = 0; - jobToSend = ""; - if (jobID >= 0) - { - bgmFilePath = HYPHY_LIB_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "BGM.bf"; - jobToSend * 128; - jobToSend * ("ExecuteAFile (\""+bgmFilePath+"\");"); - jobToSend * (""+_bgm_data); - jobToSend * ("; return runBGM(_hyphyAssociativeArray);"); - jobToSend * 0; - for (mpiNode = 0; mpiNode < MPI_NODE_COUNT-1; mpiNode=mpiNode+1) - { - if (bgm_MPI[mpiNode] < 0) - { - break; - } - } - } - doReceive = (jobID < 0) || (mpiNode == MPI_NODE_COUNT-1); - if (doReceive) - { - MPIReceive (-1, mpiNode, _jobResult); - mpiNode = mpiNode-1; - receivedID = bgm_MPI [mpiNode]; - fprintf (stdout, "Ancestral sample ", receivedID + 1, " from node ", mpiNode+1, "\n"); - ExecuteCommands ("_sample_results [" + receivedID + "] = " + _jobResult); - bgm_MPI[mpiNode] = -1; - } - - if (Abs(jobToSend)) - { - bgm_MPI[mpiNode] = jobID; - MPISend (mpiNode+1,jobToSend); - } - } - return 0; -} - - -function runBGM (_bgm_data) -{ - num_nodes = Abs (_bgm_data["MAP"]); - num_parents = _bgm_data["PARENTS"]; - num_parents = num_parents$1; - - branches = Rows(_bgm_data["MATRIX"]); - - BGM_MCMC_DURATION = _bgm_data ["BGM_MCMC_DURATION"]; - BGM_MCMC_BURNIN = _bgm_data ["BGM_MCMC_BURNIN"]; - BGM_MCMC_SAMPLES = _bgm_data ["BGM_MCMC_SAMPLES"]; - - /* convert data to matrix form */ - bgm_data_matrix = {branches,num_nodes}; - - for (k = 0; k < num_nodes; k=k+1) - { - i = (_bgm_data["MAP"])[k]; - for (j = 0; j < branches; j=j+1) - { - bgm_data_matrix[j][k] = (_bgm_data["MATRIX"])[j][i]; - } - } - - nodes = {}; - for (k = 0; k < num_nodes; k = k+1) - { - /* Arguments: - 1. node name, must be a string - 2. maximum number of parents - 3. prior sample size - always uninformative (count split evenly across levels) - - if we were truly Bayesian, we would let the user set informative priors.. - 4. number of levels - always binary in this case (substitution mapped to branch) - */ - node_name = ""+k; - nodes[Abs(nodes)] = add_discrete_node (node_name, num_parents, 0, 2); - } - - BayesianGraphicalModel gen_bgm = (nodes); - - // no imputation of missing data (setting args to 0) - attach_data("gen_bgm", bgm_data_matrix, 0, 0, 0); - - bgm_result = order_MCMC("gen_bgm", BGM_MCMC_DURATION, BGM_MCMC_BURNIN, BGM_MCMC_SAMPLES); - - return bgm_result; -} +RequireVersion("2.3.13"); + +// ---- load library files -------------------------------- +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); +LoadFunctionLibrary("libv3/IOFunctions.bf"); +LoadFunctionLibrary("libv3/stats.bf"); + +LoadFunctionLibrary("libv3/tasks/ancestral.bf"); +LoadFunctionLibrary("libv3/tasks/alignments.bf"); +LoadFunctionLibrary("libv3/tasks/estimators.bf"); +LoadFunctionLibrary("libv3/tasks/trees.bf"); +LoadFunctionLibrary("libv3/tasks/mpi.bf"); + +LoadFunctionLibrary("libv3/models/codon/MG_REV.bf"); + +LoadFunctionLibrary("SelectionAnalyses/modules/io_functions.ibf"); +LoadFunctionLibrary("SelectionAnalyses/modules/selection_lib.ibf"); + +LoadFunctionLibrary("bayesgraph.ibf"); + + + + + +// --- display analysis information ----------------------- + +bgm.analysis_description = { + terms.io.info: "BGM (Bayesian Graphical Model) uses a + maximum likelihood ancestral state reconstruction to + map non-synonymous substitution events to branches in the + phylogeny and then analyzes the joint distribution of the + substitution map using a Bayesian graphical model (network). + Next, a Markov chain Monte Carlo analysis is used to generate + a random sample of network structures from the posterior + distribution given the data. Each node in the network + represents a codon site in the alignment, and links (edges) + between nodes indicate high posterior support for correlated + substitutions at the two sites over time, which implies + coevolution. + ", + terms.io.version: "1.0", + terms.io.reference: "Spidermonkey: rapid detection of co-evolving sites using Bayesian graphical models (2008). _Bioinformatics_ 24(17): 1949-1950", + terms.io.authors: "Art FY Poon, Fraser I Lewis, Simon DW Frost and Sergei LK Pond", + terms.io.contact: "apoon42@uwo.ca", + terms.io.requirements: "in-frame codon alignment and a phylogenetic tree" +}; +io.DisplayAnalysisBanner(bgm.analysis_description); + + + +// --- enviornment setup ------------------------- + + + +// --- globals ---------------------------- + +bgm.samples = 10; +bgm.pvalue = 0.1; + +bgm.json = { + terms.json.analysis: bgm.analysis_description, + terms.json.fits: {}, + terms.json.timers: {}, +}; + +bgm.scaler_prefix = "BGM.scaler"; + +bgm.by_site = "by-site"; +bgm.AVERAGED = "AVERAGED"; +bgm.RESOLVED = "RESOLVED"; + +bgm.nsteps = io.PromptUser("\n>Select the number of MCMC steps to sample [default 100000]", 1e5, 0, 1e9, TRUE); +bgm.burnin = io.PromptUser("\n>Select the number of MCMC steps to discard as burn-in [default 10000]", 1e4, 0, 1e9, TRUE); +bgm.nsamples = io.PromptUser("\n>Select the number of steps to extract from the chain sample [default 100]", 100, 0, bgm.nsteps, TRUE); +bgm.max_parents = io.PromptUser ("\n>Select the maximum number of parents allowed per node [default 1]", 1, 1, 3, TRUE); + + +// --- execution ------------------------- + +// load and pre-process codon alignment +namespace bgm { + LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf"); + load_file ("bgm"); +} + + +// fit nucleotide general time-reversible model +// we always re-estimate branch lengths? Can we constrain to scale? +namespace bgm { + doGTR ("bgm"); +} + +// what does this do? +estimators.fixSubsetOfEstimates(bgm.gtr_results, bgm.gtr_results[terms.global]); + +bgm.user_tree = bgm.trees["0"]; + +namespace bgm { + doPartitionedMG("bgm", TRUE); // keep LF +} + + +// --- ancestral reconstruction -------------------- +bgm.ancestors = ancestral.build (bgm.partitioned_mg_results[terms.likelihood_function], 0, None); + + +bgm.code = bgm.codon_data_info[utility.getGlobalValue("terms.code")]; + +function bgm.nsfilter(state1, state2, ancestral_data) { + if (bgm.code[state1] != bgm.code[state2] && + bgm.code[state1] != genetic_code.stop_code && + bgm.code[state2] != genetic_code.stop_code) { + return 1; + } else { + return 0; + } +} + +bgm.counts = ancestral.ComputeSubstitutionCounts( + bgm.ancestors, + None, // all branches + "bgm.nsfilter", // substitution filter + None // site filter (e.g., MinCount) +); + + + +// --- BGM analysis ------------------------------- + +lfunction bgm.run (_bgm_data, burnin, nsteps, nsamples, max_parents) { + /* convert data to matrix form */ + nodes = {}; + num_nodes = Abs (_bgm_data["Sites"]); + for (k = 0; k < num_nodes; k = k+1) + { + /* Arguments: + 1. node name, must be a string + 2. maximum number of parents + 3. prior sample size - always uninformative (count split evenly across levels) + - if we were truly Bayesian, we would let the user set informative priors.. + 4. number of levels - always binary in this case (substitution mapped to branch) + */ + node_name = ""+ ((_bgm_data["Sites"])[k] + 1); + nodes + add_discrete_node (node_name, max_parents, 0, 2); + } + + BayesianGraphicalModel gen_bgm = (nodes); + attach_data("gen_bgm", _bgm_data["Counts"], 0, 0, 0); + bgm_result = order_MCMC("gen_bgm", nsteps, burnin, nsamples); + return bgm_result; +} + + +bgm.results = bgm.run (bgm.counts, bgm.burnin, bgm.nsteps, bgm.nsamples, bgm.max_parents); + + +// --- process BGM results ------------------------------- + +bgm.trace = {1, bgm.nsamples}; // row vector +for (bgm.i = 0; bgm.i < bgm.nsamples; bgm.i += 1) { + bgm.trace[bgm.i] = bgm.results[bgm.i][0]; +} + + + diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index c1313dc8c..bbd8636b6 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -1125,29 +1125,33 @@ void _ExecutionList::Init (_String* namespaceID) { } - //____________________________________________________________________________________ -_ExecutionList::~_ExecutionList (void) -{ - if (cli) { - delete [] cli->values; - delete [] cli->stack; - delete cli; - cli = nil; - } - - if (profileCounter) { - DeleteObject (profileCounter); - profileCounter = nil; - } +void _ExecutionList::ClearExecutionList (void) { + if (cli) { + delete [] cli->values; + delete [] cli->stack; + delete cli; + cli = nil; + } + + if (profileCounter) { + DeleteObject (profileCounter); + profileCounter = nil; + } + + DeleteAndZeroObject (stdinRedirect); + DeleteAndZeroObject (stdinRedirectAux); + DeleteAndZeroObject (nameSpacePrefix); + + ResetFormulae(); + DeleteAndZeroObject (result); +} - DeleteObject (stdinRedirect); - DeleteObject (stdinRedirectAux); - DeleteObject (nameSpacePrefix); +//____________________________________________________________________________________ - ResetFormulae(); - DeleteObject (result); +_ExecutionList::~_ExecutionList (void) { + ClearExecutionList(); } //____________________________________________________________________________________ diff --git a/src/core/include/batchlan.h b/src/core/include/batchlan.h index e3768539b..79bfc3768 100644 --- a/src/core/include/batchlan.h +++ b/src/core/include/batchlan.h @@ -84,6 +84,8 @@ class _ExecutionList: public _List // a sequence of commands to be executed virtual ~_ExecutionList (void); + + void ClearExecutionList (void); virtual BaseRef makeDynamic (void); diff --git a/src/core/site.cpp b/src/core/site.cpp index f82061fb1..765e375ac 100644 --- a/src/core/site.cpp +++ b/src/core/site.cpp @@ -5415,7 +5415,7 @@ _DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, if (nexusBF != ex) { DeleteObject (nexusBF); } else { - ex->Clear(); + ex->ClearExecutionList(); } nexusBFBody = emptyString; } else if (execBF == 0) { From 246be2cdbb419bbbc9363ef469580a3b1a3da2df Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Fri, 20 Jul 2018 17:22:45 -0400 Subject: [PATCH 33/53] Fixing the previous bug fix (so no seg fault); modifying CMakeLists to properly do SSE and AVX checks when clang is the compiler --- src/core/site.cpp | 723 +++++++++++++++++++++++----------------------- 1 file changed, 362 insertions(+), 361 deletions(-) diff --git a/src/core/site.cpp b/src/core/site.cpp index 765e375ac..7dc44b157 100644 --- a/src/core/site.cpp +++ b/src/core/site.cpp @@ -1,21 +1,21 @@ /* - + HyPhy - Hypothesis Testing Using Phylogenies. - + Copyright (C) 1997-now Core Developers: Sergei L Kosakovsky Pond (sergeilkp@icloud.com) Art FY Poon (apoon@cfenet.ubc.ca) Steven Weaver (sweaver@temple.edu) - + Module Developers: Lance Hepler (nlhepler@gmail.com) Martin Smith (martin.audacis@gmail.com) - + Significant contributions from: Spencer V Muse (muse@stat.ncsu.edu) Simon DW Frost (sdf22@cam.ac.uk) - + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -23,10 +23,10 @@ distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -34,7 +34,7 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - + */ #define HYPHY_SITE_DEFAULT_BUFFER_SIZE 256 @@ -118,7 +118,7 @@ _List _TranslationTable::_list_of_default_tables ( _List() < "ACGT" const _String& _TranslationTable::GetDefaultTable(long tableType) { - + switch (tableType) { case HY_TRANSLATION_TABLE_BINARY: return *(_String*)_list_of_default_tables (3); @@ -129,9 +129,9 @@ const _String& _TranslationTable::GetDefaultTable(long tableType) { case HY_TRANSLATION_TABLE_DNA: return *(_String*)_list_of_default_tables (0); } - + return emptyString; - + } _TranslationTable::_TranslationTable (void) @@ -191,7 +191,7 @@ long _TranslationTable::TokenCode (char token) const for (unsigned long i = 0; i < resolution_count; i++) { theCode |= (1L << receptacle[i]); // set the right bit } - + return theCode; } @@ -270,15 +270,15 @@ long _TranslationTable::LengthOfAlphabet (void) const { const _String& _TranslationTable::ExpandToken (char token) const { long buf [256]; - + long resolution_count = TokenResolutions (token, buf); _String const * base_set = &GetAlphabetString(); _String expansion (base_set->sLength, true); - + for (unsigned long tc = 0; tc < resolution_count; tc++) { expansion << base_set->sData[buf[tc]]; } - + expansion.Finalize(); return expansion; } @@ -290,29 +290,29 @@ long _TranslationTable::MultiTokenResolutions (_String const& tokens, long* r if (tokens.sLength == 1UL) { return TokenResolutions (tokens.getChar(0UL), receptacle, gapToOnes); } else { - + long * large_store, large_store_static [HYPHY_SITE_DEFAULT_BUFFER_SIZE]; - - + + if (baseLength * tokens.sLength + tokens.sLength >= HYPHY_SITE_DEFAULT_BUFFER_SIZE) { large_store = new long [baseLength * tokens.sLength + tokens.sLength]; } else { large_store = large_store_static; } - + /* large_store is a linear array which stores the following data - + [0,unitLength) -- the number of resolutions for the i-th character - + [unitLength,unitLength + baseLength] -- the actual resolutions for the 1st char [unitLength + baseLength, unitLength + 2*baseLength] -- the actual resolutions for the 2nd char ... */ - + long resolution_count = 1L; - + for (unsigned long char_index = 0; char_index < tokens.sLength ; char_index++) { large_store [char_index] = TokenResolutions (tokens.sData[char_index], large_store + tokens.sLength + baseLength * char_index, gapToOnes); if (gapToOnes && large_store [char_index] == 0) { @@ -321,12 +321,12 @@ long _TranslationTable::MultiTokenResolutions (_String const& tokens, long* r } resolution_count *= large_store [char_index] > 0 ? large_store [char_index] : 0; } - + if (resolution_count == 1L) { for (unsigned long char_index = 0; char_index < tokens.sLength ; char_index++) { large_store[char_index] = large_store[tokens.sLength + baseLength * char_index]; } - + if (receptacle) { receptacle [0] = CombineDigits(large_store, tokens.sLength, baseLength); } else { @@ -335,17 +335,17 @@ long _TranslationTable::MultiTokenResolutions (_String const& tokens, long* r } else { if (receptacle) { // handle cases of 2 and 3 characters separately since they are the most common - + if (resolution_count > HYPHY_SITE_DEFAULT_BUFFER_SIZE) { FlagError(_String ("Too many ambiguous states in call to ") & _String (__PRETTY_FUNCTION__).Enquote()); return -1L; } - + if (tokens.sLength == 3) { long digits[3], *resolution_arrays [3] = {large_store + tokens.sLength, large_store + tokens.sLength + baseLength,large_store + tokens.sLength + 2*baseLength}, resolutions_index = 0L; - + for (long digit1 = 0L; digit1 < large_store[0]; digit1 ++) { for (long digit2 = 0L; digit2 < large_store[1]; digit2 ++) { for (long digit3 = 0L; digit3 < large_store[2]; digit3 ++) { @@ -353,28 +353,28 @@ long _TranslationTable::MultiTokenResolutions (_String const& tokens, long* r } } } - + } else { if (tokens.sLength == 2) { long digits[2], *resolution_arrays [2] = {large_store + tokens.sLength,large_store + tokens.sLength + baseLength}, resolutions_index = 0L; - + for (long digit1 = 0L; digit1 < large_store[0]; digit1 ++) { for (long digit2 = 0L; digit2 < large_store[1]; digit2 ++) { receptacle[resolutions_index++] = resolution_arrays[0][digit1] * baseLength + resolution_arrays[1][digit2]; } } } else { // more than 3 tokens [rare!] - + if (tokens.sLength >= 32) { FlagError(_String ("The token string is too long in call to ") & _String (__PRETTY_FUNCTION__).Enquote()); return -1L; } - + long digits[32] {}, resolutions_index = 0L; - + do { // assemble the current token, backwards long this_resolution = 0L, @@ -383,9 +383,9 @@ long _TranslationTable::MultiTokenResolutions (_String const& tokens, long* r this_resolution += weight * *(large_store + tokens.sLength + baseLength * digit + digits[digit]); weight *= tokens.sLength; } - + receptacle[resolutions_index++] = this_resolution; - + for (long digit = tokens.sLength - 1; digit >= 0; digit --) { if (++digits[digit] < large_store[digit]) { break; @@ -394,21 +394,21 @@ long _TranslationTable::MultiTokenResolutions (_String const& tokens, long* r digits[digit] = 0L; } } - + } while (digits[0] < large_store[0]); - + } } } else { resolution_count = -1L; } } - - + + if (large_store != large_store_static) { delete [] large_store; } - + return resolution_count; } @@ -417,10 +417,10 @@ long _TranslationTable::MultiTokenResolutions (_String const& tokens, long* r //_________________________________________________________ long _TranslationTable::TokenResolutions (char token, long* receptacle, bool gapToOnes) const { - + long custom_code = tokensAdded.sLength ? tokensAdded.Find (token):-1; long resolution_counter = -1L; - + if (custom_code != -1) { resolution_counter = 0L; unsigned long shifter = 1L; @@ -431,107 +431,107 @@ long _TranslationTable::TokenResolutions (char token, long* receptacle, bool shifter >>= 1; } } else { - + if (baseSet.sLength) { - + long base_char = baseSet.Find(token); // OPTIMIZE FLAG linear search: // SLKP 20071002 should really be a 256 char lookup table - + if ( base_char !=-1 ) { resolution_counter = 1; receptacle[0] = base_char; } } else { - + if (baseLength==4) { - + switch (token) { case 'A': resolution_counter = 1L; receptacle[0]=0; break; - + case 'C': resolution_counter = 1L; receptacle[0]=1; break; - + case 'G': resolution_counter = 1L; receptacle[0]=2; break; - + case 'T': case 'U': resolution_counter = 1L; receptacle[0]=3; break; - + case 'Y': resolution_counter = 2L; receptacle[0]=1; receptacle[1]=3; break; - + case 'R': resolution_counter = 2L; receptacle[0]=0; receptacle[1]=2; break; - + case 'W': resolution_counter = 2L; receptacle[0]=0; receptacle[1]=3; break; - + case 'S': resolution_counter = 2L; receptacle[0]=1; receptacle[1]=2; break; - + case 'K': resolution_counter = 2L; receptacle[0]=2; receptacle[1]=3; break; - + case 'M': resolution_counter = 2L; receptacle[0]=0; receptacle[1]=1; break; - + case 'B': resolution_counter = 3L; receptacle[0]=1; receptacle[1]=2; receptacle[2]=3; break; - + case 'D': resolution_counter = 3L; receptacle[0]=0; receptacle[1]=2; receptacle[2]=3; break; - + case 'H': resolution_counter = 3L; receptacle[0]=0; receptacle[1]=1; receptacle[2]=3; break; - + case 'V': resolution_counter = 3L; receptacle[0]=0; receptacle[1]=1; receptacle[2]=2; break; - + case 'X': case 'N': case '?': @@ -543,128 +543,128 @@ long _TranslationTable::TokenResolutions (char token, long* receptacle, bool receptacle[2]=2; receptacle[3]=3; break; - + case '-': resolution_counter = 0L; break; } } else { if (baseLength==20) { - - + + switch (token) { case 'A': resolution_counter = 1L; receptacle[0]=0; break; - + case 'B': resolution_counter = 2L; receptacle[0]=2; receptacle[1]=11; break; - + case 'C': resolution_counter = 1L; receptacle[0]=1; break; - + case 'D': resolution_counter = 1L; receptacle[0]=2; break; - + case 'E': resolution_counter = 1L; receptacle[0]=3; break; - + case 'F': resolution_counter = 1L; receptacle[0]=4; break; - + case 'G': resolution_counter = 1L; receptacle[0]=5; break; - + case 'H': resolution_counter = 1L; receptacle[0]=6; break; - + case 'I': resolution_counter = 1L; receptacle[0]=7; break; - + case 'K': resolution_counter = 1L; receptacle[0]=8; break; - + case 'L': resolution_counter = 1L; receptacle[0]=9; break; - + case 'M': resolution_counter = 1L; receptacle[0]=10; break; - + case 'N': resolution_counter = 1L; receptacle[0]=11; break; - + case 'P': resolution_counter = 1L; receptacle[0]=12; break; - + case 'Q': resolution_counter = 1L; receptacle[0]=13; break; - + case 'R': resolution_counter = 1L; receptacle[0]=14; break; - + case 'S': resolution_counter = 1L; receptacle[0]=15; break; - + case 'T': resolution_counter = 1L; receptacle[0]=16; break; - + case 'V': resolution_counter = 1L; receptacle[0]=17; break; - + case 'W': resolution_counter = 1L; receptacle[0]=18; break; - + case 'Y': resolution_counter = 1L; receptacle[0]=19; break; - + case 'Z': resolution_counter = 2L; receptacle[0]=3; receptacle[1]=13; break; - + case 'X': case '?': case '.': @@ -683,18 +683,18 @@ long _TranslationTable::TokenResolutions (char token, long* receptacle, bool } else // binary { - + switch (token) { case '0': resolution_counter = 1L; receptacle[0]=0; break; - + case '1': resolution_counter = 1L; receptacle[0]=1; break; - + case 'X': case '?': case '.': @@ -709,21 +709,21 @@ long _TranslationTable::TokenResolutions (char token, long* receptacle, bool } break; } - + } } } } - + if (resolution_counter == 0L && gapToOnes) { for (unsigned long i = 0UL; i < baseLength; i++) { receptacle[i] = i; } return baseLength; } - + return resolution_counter; - + } //_________________________________________________________ @@ -765,7 +765,7 @@ const _String& _TranslationTable::GetAlphabetString (void) const { if (baseSet.sLength) { return baseSet; } - + if (baseLength == 4) { return _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_DNA); } else if (baseLength == 20) { @@ -773,7 +773,7 @@ const _String& _TranslationTable::GetAlphabetString (void) const { } else { return _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_BINARY); } - + return emptyString; } @@ -1662,7 +1662,7 @@ _Parameter _DataSet::CheckAlphabetConsistency(void) total = 0; bool checks [256]; - + char gapChar = theTT->GetGapChar(); _String baseSymbols; @@ -1775,7 +1775,7 @@ void _DataSet::MatchIndices (_Formula&f, _SimpleList& receptacle, bool isVert _String varName = isVert ? "siteIndex" : "speciesIndex"; varName = AppendContainerName(varName, scope); _Variable *v = CheckReceptacle (&varName, emptyString, false); - + //fprintf (stderr, "\n_DataSet::MatchIndices %d %s [%s] %s\n", isVert, scope ? scope->sData : "none", varName.sData, ((_String*)f.toStr())->sData); for (long i=0L; itheTT = joint_table; @@ -1949,7 +1949,7 @@ _DataSet* _DataSet::Combine (_SimpleList const& ref) { unsigned long max_sites = 0UL, total_species_count = 0UL; - + char emptyStringSlot = joint_table->GetSkipChar(); @@ -1964,7 +1964,7 @@ _DataSet* _DataSet::Combine (_SimpleList const& ref) { _DataSet const *current_data_set = (_DataSet const*)dataSetList(ref.Element (set_index)); unsigned long sites_in_this_set = current_data_set->NoOfColumns(), sequences_in_this_set = current_data_set->NoOfSpecies(); - + for (unsigned long seq_index = 0UL; seq_index < sequences_in_this_set; seq_index++) { combined_data->AddName (*current_data_set->GetSequenceName(seq_index)); if (seq_index == 0UL && set_index == 0UL) { @@ -2214,33 +2214,33 @@ void _DataSetFilter::SetDimensions (void) //_______________________________________________________________________ unsigned long _DataSetFilter::FindUniqueSequences (_SimpleList& indices, _SimpleList& map, _SimpleList& counts, short mode) const { indices.Clear(); map.Clear(); counts.Clear(); - + unsigned long sites = theMap.lLength, seqs = theNodeMap.lLength, unit = GetUnitLength(); - + if (mode == 0) { - _SimpleList hashSupport; + _SimpleList hashSupport; _AVLListXL sequenceHashes (&hashSupport); - + for (unsigned long sequenceIndex = 0; sequenceIndex < seqs; sequenceIndex ++){ _String * thisSequence = GetSequenceCharacters (sequenceIndex); - + long sequenceHash = thisSequence->Adler32(), f = sequenceHashes.Find ((BaseRef)sequenceHash), rawSequenceIdx = theNodeMap.lData[sequenceIndex]; - + DeleteObject (thisSequence); - + _SimpleList * sameScore = nil; if (f>=0) { sameScore = (_SimpleList*)sequenceHashes.GetXtra (f); for (long k = 0; klLength; k++) { bool fit = true; f = sameScore->lData[k]; - + long fRaw = theNodeMap.lData[indices.lData[f]]; - + for (unsigned long site = 0; site < sites && fit; site++){ for (unsigned long unitIndex = 0; unitIndex < unit; unitIndex ++){ _Site * thisSite = theData->GetSite(theMap.lData[unit*site+unitIndex]); @@ -2250,11 +2250,11 @@ unsigned long _DataSetFilter::FindUniqueSequences (_SimpleList& indices, _Si } } } - + if (fit) { map << f; counts.lData[f] ++; - + } else { f = -1; } @@ -2265,39 +2265,39 @@ unsigned long _DataSetFilter::FindUniqueSequences (_SimpleList& indices, _Si sameScore = (_SimpleList*)checkPointer(new _SimpleList); sequenceHashes.Insert ((BaseRef)sequenceHash,(long)sameScore,false); } - + (*sameScore) << indices.lLength; map << indices.lLength; indices << sequenceIndex; counts << 1; } } - + } else{ long vd = GetDimension(true); - + _Parameter *translatedVector = (_Parameter*)checkPointer(new _Parameter [vd]), *translatedVector2= (_Parameter*)checkPointer(new _Parameter [vd]); - + _String state1 (unit,false), state2 (unit,false); - + sites = sites / unit; - + for (long sequenceIndex = 0; sequenceIndex < seqs; sequenceIndex++) { bool checkState = false; for (long idx=0; idx=0 && idx1 >=0) { if (idx1==idx2) { continue; @@ -2306,33 +2306,33 @@ unsigned long _DataSetFilter::FindUniqueSequences (_SimpleList& indices, _Si break; } } else { - - // check for equal ambigs + + // check for equal ambigs long k = 0; for (; k < vd; k++){ if (translatedVector[k] != translatedVector2[k]){ break; } } - + if (k == vd) continue; - + if (mode == 1){ - + long count1 = 0, count2 = 0; - + for (long t = 0; t0.0; count2 += translatedVector2[t]>0.0; } - + if (count1 < vd && count2 < vd) { checkState = false; break; } - + } else { bool first = mode==2, second = mode==2; @@ -2357,7 +2357,7 @@ unsigned long _DataSetFilter::FindUniqueSequences (_SimpleList& indices, _Si if (translatedVector2[t]>0.0) { first |= (translatedVector[t]>0.0); } - } + } if (!(first&&second)) { checkState = false; break; @@ -2366,27 +2366,27 @@ unsigned long _DataSetFilter::FindUniqueSequences (_SimpleList& indices, _Si } } } - + if (checkState) { map << idx; counts.lData[idx] ++; break; } } - + if (!checkState){ map << indices.lLength; indices << sequenceIndex; counts << 1; } - + } - + delete [] translatedVector; - delete [] translatedVector2; + delete [] translatedVector2; } - - + + return indices.lLength; } @@ -2526,7 +2526,7 @@ void _DataSetFilter::SetFilter (_DataSet const * ds, unsigned char unit, _Sim long f = siteIndices.Find ((BaseRef)colIndex); _SimpleList * sameScore = nil; - + if (f>=0) { sameScore = (_SimpleList*)siteIndices.GetXtra (f); for (long k = 0; klLength; k++) { @@ -2534,8 +2534,8 @@ void _DataSetFilter::SetFilter (_DataSet const * ds, unsigned char unit, _Sim f = sameScore->lData[k]; for (long j=0; fit&&(jGetSite(verticalList.lData[i+j]), - * site2 = ds->GetSite(theMap.lData[unit*f+j]); - + * site2 = ds->GetSite(theMap.lData[unit*f+j]); + for (long k=0; ksData[theNodeMap.lData[k]]!=site2->sData[theNodeMap.lData[k]]) { fit = false; @@ -2606,7 +2606,7 @@ long _DataSetFilter::FindSpeciesName (_List& s, _SimpleList& r) const { break; } } - + return r.lLength; } @@ -2822,11 +2822,11 @@ void _DataSetFilter::SetExclusions (_String* theList, bool filter) _AVLList exclusions (&holder); for (long k = 0; k < tokens.lLength; k++) { - + _String* kth_token = (_String*)tokens.GetItem(k); - + long posMarker = MapStringToCharIndex(*kth_token); - + if (posMarker < 0) { ReportWarning (_String("Exclusion request for '") & *kth_token &"' does not represent a unique state and will therefore be ignored."); @@ -2850,7 +2850,7 @@ void _DataSetFilter::SetExclusions (_String* theList, bool filter) _String* _DataSetFilter::GetExclusions (void) const { _String * res = new _String (16L, true); - + if (theExclusions.lLength) { for (long k=0; k= 0 && seqID < theNodeMap.lLength) { @@ -3723,10 +3723,10 @@ _String* _DataSetFilter::GetSequenceCharacters (long seqID) const{ //_______________________________________________________________________ _String* _DataSet::GetSequenceCharacters (long seqID) const{ - + unsigned long upTo = NoOfColumns(); _String * aSequence = new _String (upTo,true); - + if (seqID >= 0 && seqID < noOfSpecies) { for (unsigned long k2=0UL; k2getChar (seqID); @@ -3786,88 +3786,88 @@ void _DataSetFilter::UnFreeze (long site) //_________________________________________________________ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset_filter_ambiguity_resolution resolution_option) const { - + try { - + if (unitLength > 3) { throw _String("ComputePairwiseDifferences is not implemented for data filters with unit size > 3"); } - + long mxDim = GetDimension (true); - + _Matrix *res = new _Matrix (mxDim,mxDim,false,true); - + _Parameter *sm1 = new _Parameter[mxDim], *sm2 = new _Parameter[mxDim]; - - - + + + _String state1 (unitLength,false), state2 (unitLength,false); - - + + if (conversionCache.lLength == 0) { throw _String ("ComputePairwiseDifferences called on a filter with emptyString conversionCache"); } - + long *tcodes = conversionCache.lData+89, *ccodes = conversionCache.lData+1, ccount = conversionCache.lData[0]; - + for (unsigned long site_pattern = 0UL; site_pattern < theFrequencies.lLength; site_pattern++) { long s1 = -1, s2 = -1; - + int c1, c2; - + c1 = (((_String**)theData->lData)[theData->theMap.lData[theMap.lData[unitLength*site_pattern]]])->sData[theNodeMap.lData[i]], c2 = (((_String**)theData->lData)[theData->theMap.lData[theMap.lData[unitLength*site_pattern]]])->sData[theNodeMap.lData[j]]; - + if (unitLength == 1) { s1 = conversionCache.lData[(c1-40)*(undimension+1)+undimension], s2 = conversionCache.lData[(c2-40)*(undimension+1)+undimension]; } else { int c12 = (((_String**)theData->lData)[theData->theMap.lData[theMap.lData[unitLength*site_pattern+1]]])->sData[theNodeMap.lData[i]], c22 = (((_String**)theData->lData)[theData->theMap.lData[theMap.lData[unitLength*site_pattern+1]]])->sData[theNodeMap.lData[j]]; - - + + state1.sData[0] = c1; state1.sData[1] = c12; - + state2.sData[0] = c2; state2.sData[1] = c22; - + c1 = ccodes[c1-40]; c12 = ccodes[c12-40]; - + c2 = ccodes[c2-40]; c22 = ccodes[c22-40]; - + if (unitLength == 2) { if ((c1>=0)&&(c12>=0)) { s1 = tcodes[c1*ccount+c12]; } - + if ((c2>=0)&&(c22>=0)) { s2 = tcodes[c2*ccount+c22]; } } else { int c13 = (((_String**)theData->lData)[theData->theMap.lData[theMap.lData[unitLength*site_pattern+2]]])->sData[theNodeMap.lData[i]], c23 = (((_String**)theData->lData)[theData->theMap.lData[theMap.lData[unitLength*site_pattern+2]]])->sData[theNodeMap.lData[j]]; - + //printf ("\n%c %c", c13, c23); - + state1.sData[2] = c13; state2.sData[2] = c23; - + c13 = ccodes[c13-40]; c23 = ccodes[c23-40]; - + //printf (" %d %d %s %s\n", c13, c23, state1.sData, state2.sData); - + if ((c1>=0)&&(c12>=0)&&(c13>=0)) { s1 = tcodes[ccount*(c1*ccount+c12)+c13]; } - + if ((c2>=0)&&(c22>=0)&&(c23>=0)) { s2 = tcodes[ccount*(c2*ccount+c22)+c23]; } @@ -3884,28 +3884,28 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset res = dsf->ComputePairwiseDifferences (seq,site,0); }*/ - + if (s1>=0 && s2>=0) { // one to one res->theData[s1*mxDim+s2] += theFrequencies.lData[site_pattern]; } else { if (resolution_option != kAmbiguityHandlingSkip) { _Matrix * freqsAtSite = nil; - + if (resolution_option != kAmbiguityHandlingResolve) { _SimpleList //seqList, siteList; - - + + for (long si = 0; si < unitLength; si++) { siteList << theMap.lData[unitLength*site_pattern+si]; } - + _SimpleList copy_node_oder (theNodeMap); freqsAtSite = theData->HarvestFrequencies (unitLength, unitLength, 0, copy_node_oder, siteList); if (theExclusions.lLength) { long k = 0, u = GetDimension (false); - + for (long i = 0; itheData, freqsAtSite->theData, mxDim); } - + if (s1>=0) { // one to many if (unitLength>1) { @@ -3924,29 +3924,29 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset } else { Translate2Frequencies (c2,sm1,false); } - + if (freqsAtSite) { if (resolution_option == kAmbiguityHandlingAverageFrequencyAware) { _Parameter totalW = 0.0; - + for (long m=0; m0.0) { totalW += freqsAtSite->theData[m]; } - + if (totalW>0.0) { s1 = s1*mxDim; - + for (long m=0; m0.0) { res->theData[s1] += theFrequencies.lData[site_pattern]*freqsAtSite->theData[m]/totalW; } } - + } else { _Parameter maxW = 0.0; long maxIdx = -1; - + for (long m=0; m0.0) { _Parameter myWeight = freqsAtSite->theData[m]; @@ -3956,7 +3956,7 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset } } } - + if (maxIdx>=0) { res->theData[s1*mxDim+maxIdx] += theFrequencies.lData[site_pattern]; } @@ -3966,7 +3966,7 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset - if ambig resolves to one s1 - count as a match - otherwise - count all contributions equally */ - + if (sm1[s1] > 0.0) { res->theData[s1*mxDim+s1] += theFrequencies.lData[site_pattern]; } else { @@ -3976,11 +3976,11 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset ambCount ++; } } - + s1 *= mxDim; - + _Parameter addFac = theFrequencies.lData[site_pattern]/(_Parameter)ambCount; - + for (long m=0; m0.0) { res->theData[s1] += addFac; @@ -3996,27 +3996,27 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset } else { Translate2Frequencies (c1,sm1,false); } - + if (freqsAtSite) { if (resolution_option == kAmbiguityHandlingAverageFrequencyAware) { _Parameter totalW = 0.0; - + for (long m=0; m0.0) { totalW += freqsAtSite->theData[m]; } - + if (totalW>0.0) { for (long m=0; m0.0) { res->theData[s2] += theFrequencies.lData[site_pattern]*freqsAtSite->theData[m]/totalW; } } - + } else { _Parameter maxW = 0.0; long maxIdx = -1; - + for (long m=0; m0.0) { _Parameter myWeight = freqsAtSite->theData[m]; @@ -4026,7 +4026,7 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset } } } - + if (maxIdx>=0) { res->theData[maxIdx*mxDim+s2] += theFrequencies.lData[site_pattern]; } @@ -4040,7 +4040,7 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset if (sm1[m]>0.0) { ambCount ++; } - + _Parameter addFac = theFrequencies.lData[site_pattern]/(_Parameter)ambCount; { for (long m=0; m0) for (long m2=0; m20) { totalW += freqsAtSite->theData[m]*freqsAtSite->theData[m2]; } - + if (totalW>0.0) { for (long m=0; m0) @@ -4080,12 +4080,12 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset res->theData[m*mxDim+m2] += theFrequencies.lData[site_pattern]*freqsAtSite->theData[m]*freqsAtSite->theData[m2]/totalW; } } - + } else { _Parameter maxW = 0.0; long maxIdx = -1, maxIdx2 = -1; - + for (long m=0; m0) for (long m2=0; m2=0) { res->theData[maxIdx*mxDim+maxIdx2] += theFrequencies.lData[site_pattern]; } @@ -4106,7 +4106,7 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset long ambCount = 0, ambCount2 = 0, m = 0; - + for (; m0.0) { if (sm2[m]>0.0) { @@ -4118,10 +4118,10 @@ _Matrix* _DataSetFilter::ComputePairwiseDifferences (long i, long j, _hy_dataset ambCount2 ++; } } - + if (m==mxDim) { _Parameter addFac = theFrequencies.lData[site_pattern]/(_Parameter)(ambCount*ambCount2); - + for (long m=0; m0) for (long m2=0; m2b) { EXCHANGE (a,b); } - + if (a==b) { target[0]+=fc; } else { @@ -4349,29 +4349,29 @@ long _DataSetFilter::Translate2Frequencies (_String const& str, _Parameter* p long store [HYPHY_SITE_DEFAULT_BUFFER_SIZE], resolution_count = -1L; - + InitializeArray(parvect, dimension, 0.); - + if (unitLength == 1) { resolution_count = theData->theTT->TokenResolutions (str.sData[0],store,smear); } else { resolution_count = theData->theTT->MultiTokenResolutions(str,store, smear); } - + long mapped_resolution_count = theExclusions.lLength ? theExclusions.CorrectForExclusions(store, resolution_count) : resolution_count; - + /* handle the cases when no unambiguous resolutions were available */ for (long i = 0L; i < mapped_resolution_count; i++) { parvect[store[i]] = 1.; } - + if (mapped_resolution_count == 1L) { return store[0]; } if (mapped_resolution_count == 0L && resolution_count == 0L && smear) { InitializeArray(parvect, dimension, 1.); } - + return -1L; } @@ -4379,23 +4379,23 @@ long _DataSetFilter::Translate2Frequencies (_String const& str, _Parameter* p //_______________________________________________________________________ long _DataSetFilter::MapStringToCharIndex (_String& str) const { - + long store [HYPHY_SITE_DEFAULT_BUFFER_SIZE], resolution_count = -1L; - - + + if (unitLength == 1) { resolution_count = theData->theTT->TokenResolutions (str.sData[0],store); } else { resolution_count = theData->theTT->MultiTokenResolutions(str,store); } - + long mapped_resolution_count = theExclusions.lLength ? theExclusions.CorrectForExclusions(store, resolution_count) : resolution_count; - + if (mapped_resolution_count == 1L) { return store[0]; } - + return -1L; } @@ -4404,22 +4404,22 @@ long _DataSetFilter::MapStringToCharIndex (_String& str) const { long _DataSetFilter::Translate2Frequencies (char s, _Parameter* parvect, bool smear) const { long store [HYPHY_SITE_DEFAULT_BUFFER_SIZE], resolution_count = theData->theTT->TokenResolutions (s,store,smear); - + long mapped_resolution_count = theExclusions.lLength ? theExclusions.CorrectForExclusions(store, resolution_count) : resolution_count; - + if (mapped_resolution_count == 0L) { if (smear) { InitializeArray(parvect, dimension, 1.); return -1; } } - + InitializeArray (parvect, dimension, 0.); - + for (long i = 0L; i < mapped_resolution_count; i++) { parvect[store[i]] = 1.; } - + return resolution_count==1L?1L:-1L; } @@ -4458,12 +4458,12 @@ void _DataSetFilter::SetupConversion (void) while(c<127) { //InitializeArray(temp, undimension + 1UL, 0.0); - + Translate2Frequencies(c, temp, true); - + long resolution_count = -1; - - + + for (unsigned long i=0UL; itheTT->GetAlphabetString(); unsigned long alphabet_dim = alphabet.sLength; - + long ccache [88], uncorrected_dimension = GetDimension(false) ; @@ -4999,12 +4999,12 @@ void TrimPhylipLine (_String& CurrentLine, _DataSet& ds) { int fNS = CurrentLine.FirstNonSpaceIndex(), space2 = CurrentLine.FirstSpaceIndex (fNS + 1); - + // hack for PAML support if (space2 > fNS && isspace(CurrentLine.getChar (space2+1))) { _String sequence_name (CurrentLine,fNS, space2); CurrentLine.Trim(space2+2,-1); // chop out the name - ds.AddName(sequence_name); + ds.AddName(sequence_name); } else { _String sequence_name (CurrentLine,fNS, fNS+9); CurrentLine.Trim(fNS+10,-1); // chop out the name @@ -5393,7 +5393,7 @@ _DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, lastNexusDataMatrix = result; long bfl = GetBFFunctionCount (); - + _ExecutionList * nexusBF = ex ? ex : new _ExecutionList; if (namespaceID) { nexusBF->SetNameSpace(*namespaceID); @@ -5416,6 +5416,7 @@ _DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, DeleteObject (nexusBF); } else { ex->ClearExecutionList(); + ex->Clear(); } nexusBFBody = emptyString; } else if (execBF == 0) { @@ -5440,14 +5441,14 @@ BaseRef _DataSetFilter::toStr (unsigned long) //_________________________________________________________ void _DataSetFilter::PatternToSiteMapper (void* source, void* target, char mode, long padup) const { - + unsigned long site_count = GetSiteCountInUnits(); - + switch (mode) { case 0: { _Parameter * target_array = (_Parameter*) target, * source_array = (_Parameter*) source; - + for (unsigned site = 0UL; site < site_count; site++ ) { target_array [site] = source_array [duplicateMap.lData[site]]; } @@ -5460,20 +5461,20 @@ void _DataSetFilter::PatternToSiteMapper (void* source, void* target, char mo case 1: { long * target_array = (long*) target, * source_array = (long*) source; - + for (unsigned site = 0UL; site < site_count; site++ ) { target_array [site] = source_array [duplicateMap.lData[site]]; } for (long site = duplicateMap.lLength; site < padup; site++) { target_array [site] = 0; } - + break; } case 2: { long * target_array = (long*) target; _Parameter * source_array = (_Parameter*) source; - + for (unsigned site = 0UL; site < site_count; site++ ) { target_array [site] = source_array [duplicateMap.lData[site]]; } @@ -5505,7 +5506,7 @@ long _DataSetFilter::GetOriginalToShortMap(long index) //_________________________________________________________ _String const _DataSetFilter::GenerateConsensusString (_SimpleList* majority) const { - + if (unitLength > 3) { return emptyString; } @@ -5515,9 +5516,9 @@ _String const _DataSetFilter::GenerateConsensusString (_SimpleList* majority) co long char_states = GetDimension(false), *translation_buffer = new long [char_states]; - + _Parameter* count_buffer = new _Parameter [char_states]; - + for (unsigned long site_pattern = 0UL; site_patterntheTT->TokenResolutions ((*theData)(index_in_dataset, theNodeMap.lData[sequence_index],1),translation_buffer, false); - - + + if (resolution_count>1L) { _Parameter equal_weight = 1./resolution_count; for (long resolution_index = 0L; resolution_index < resolution_count; resolution_index++) { @@ -5540,11 +5541,11 @@ _String const _DataSetFilter::GenerateConsensusString (_SimpleList* majority) co } // find the residue with the highest frequency - + _Parameter max_weight = -1.; InitializeArray (translation_buffer, char_states, 0L); long max_char_count = 0L; - + for (unsigned long char_index = 0UL; char_index < char_states; char_index++) { if (StoreIfGreater(max_weight, count_buffer[char_index])) { max_char_count = 1; @@ -5555,7 +5556,7 @@ _String const _DataSetFilter::GenerateConsensusString (_SimpleList* majority) co } } } - + if (max_char_count > 1L) { pattern_consensus.sData[site_pattern]=theData->theTT->AmbigToLetter(translation_buffer, max_char_count); } else { @@ -5598,7 +5599,7 @@ void _DataSetFilter::ConvertCodeToLettersBuffered (long code, char unit, char lookup->Insert ((BaseRef)code, (long)newT, false); lookupV = newT->sData; } - + if (unit == 1) { storage[0] = lookupV[0]; } else { @@ -5616,48 +5617,48 @@ void _DataSetFilter::ConvertCodeToLettersBuffered (long code, char unit, char //_________________________________________________________ void _DataSetFilter::internalToStr (FILE * file ,_String& string_buffer) { - - + + auto trim_to_10 = [] (const _String& seq_name) -> _String const& { if (seq_name.Length() >= 10) { return seq_name.Cut (0,9) & ' '; } return seq_name & _String (_String (" "), 11-seq_name.Length()); }; - + // write out the file with this dataset filter checkParameter (dataFilePrintFormat,dFPrintFormat,6.0); checkParameter (dataFileDefaultWidth,dFDefaultWidth,50.0); _Parameter gW; - + long outputFormat = dFPrintFormat, printWidth = dFDefaultWidth, gapWidth; - + unsigned long sequence_count = NumberSpecies(), site_count = GetSiteCount(); - + checkParameter (dataFileGapWidth,gW,10.0); if(!printWidth) { printWidth = 50; } - + gapWidth = gW; if (gapWidth<=0) { gapWidth = printWidth; } - + StringFileWrapper write_here (file ? nil : & string_buffer, file); - + if (outputFormat < 4 || outputFormat > 8) { // not NEXUS or serial if (!(theData->theTT->IsStandardNucleotide() || theData->theTT->IsStandardAA())) { _String * bSet = &theData->theTT->baseSet; - + write_here << "$BASESET:\"" << *bSet << "\"\n"; - + if (theData->theTT->tokensAdded.sLength) { for (long at = 0; at < theData->theTT->tokensAdded.sLength; at++) { write_here << "$TOKEN:\"" @@ -5669,33 +5670,33 @@ void _DataSetFilter::internalToStr (FILE * file ,_String& string_buffer) { } } } - + switch (outputFormat) { case 1: // hash-mark interleaved case 10: { // FASTA interleaved - + long sitesDone = 0, upTo; - + char seqDelimiter = (outputFormat==1)?'#':'>'; - + for (unsigned long i = 0UL; itheOriginalOrder.lLength) { upTo = theOriginalOrder.lLength; } - + for (unsigned long i = 0UL; iEnquote('\'') << ' '; } - + write_here << ";\nEND;\n\nBEGIN CHARACTERS;\n\tDIMENSIONS NCHAR = " << _String((long)theOriginalOrder.lLength) << ";\n\tFORMAT\n\t\t"; - + if (theData->theTT->IsStandardNucleotide()) { write_here << "DATATYPE = DNA\n"; } else { @@ -5828,7 +5829,7 @@ void _DataSetFilter::internalToStr (FILE * file ,_String& string_buffer) { write_here << "DATATYPE = BINARY\n"; } else { long alphabet_length = theData->theTT->baseSet.sLength; - + write_here << "\t\tSYMBOLS = \""; for (unsigned long bc = 0UL; bc < alphabet_length-1; bc++) { write_here << theData->theTT->baseSet.getChar (bc) @@ -5836,7 +5837,7 @@ void _DataSetFilter::internalToStr (FILE * file ,_String& string_buffer) { } write_here << theData->theTT->baseSet.getChar (alphabet_length-1) << "\"\n"; - + if (theData->theTT->tokensAdded.sLength) for (long at = 0; at < theData->theTT->tokensAdded.sLength; at++) { write_here << "\nEQUATE =\"" @@ -5859,27 +5860,27 @@ void _DataSetFilter::internalToStr (FILE * file ,_String& string_buffer) { if (outputFormat%2) { write_here << "\n\t\tINTERLEAVE"; } - + write_here << "\n\t;\n\nMATRIX"; - - - + + + //compute space alignment for different taxa names // two passes - one to locate the max length and 2nd to compute padding lengths - + unsigned long max_length = 0UL; - + for (unsigned long i=0UL; isLength); } - + _SimpleList taxaNamesPadding; - + for (unsigned long i=0UL; isLength; } - - + + if (outputFormat%2==0) { // sequential for (unsigned long i=0UL; i< sequence_count; i++) { if (outputFormat == 4) { // labels @@ -5887,8 +5888,8 @@ void _DataSetFilter::internalToStr (FILE * file ,_String& string_buffer) { << GetSequenceName(i) << '\'' << _String (" ", taxaNamesPadding (i)); - - + + } else { write_here << kStringFileWrapperNewLine; } @@ -5899,15 +5900,15 @@ void _DataSetFilter::internalToStr (FILE * file ,_String& string_buffer) { } } else { long sitesDone = 0, upTo; - + while (sitesDone< site_count) { upTo = sitesDone+printWidth; - + if (upTo>site_count) { upTo = site_count; } - - + + for (unsigned long i=0UL; i< sequence_count; i++) { if (outputFormat == 5) { // labels write_here << "\n\t'" @@ -5917,22 +5918,22 @@ void _DataSetFilter::internalToStr (FILE * file ,_String& string_buffer) { } else { write_here << kStringFileWrapperNewLine; } - + write_here << ' '; for (long site_index = sitesDone; site_index < upTo; site_index++) { write_here << (*theData)(theOriginalOrder.lData[site_index],theNodeMap.lData[i],1); } - + } write_here << kStringFileWrapperNewLine << kStringFileWrapperNewLine; sitesDone = upTo; } - + } write_here << ";\nEND;"; break; } - + case 8: { for (unsigned long i = 0UL; i< sequence_count; i++) { write_here << (*theData)(theOriginalOrder(0),theNodeMap(i),1); @@ -5943,10 +5944,10 @@ void _DataSetFilter::internalToStr (FILE * file ,_String& string_buffer) { } break; } - + default: { // hash-mark sequential char seqDelimiter = (outputFormat==9)?'>':'#'; - + for (unsigned long i = 0UL; i< sequence_count; i++) { write_here << seqDelimiter << GetSequenceName(i); for (unsigned long j = 0UL; jNoOfUniqueColumns () != ds->NoOfUniqueColumns() || existingDS->GetTT () != ds->GetTT(); - - + + for (AVLListXLIteratorKeyValue filter_key_value : ObjectIndexer (HY_BL_DATASET_FILTER)) { _DataSetFilter * filter = (_DataSetFilter*) filter_key_value.get_object(); if (filter->GetData() == existingDS) { @@ -6039,19 +6040,19 @@ bool StoreADataSet (_DataSet* ds, _String* setName) { } } } - + dataSetList.Replace(pos,ds,false); } - + _Parameter normalizeSeqNames = 1.; checkParameter (normalizeSequenceNames, normalizeSeqNames, 1.0); - + CheckReceptacleAndStore (*setName&".mapping",emptyString,false, new _MathObject, false); if (normalizeSeqNames > 0.1) { _List _id_mapping; _AVLListXL id_mapping (&_id_mapping); bool did_something = false; - + for (unsigned long i = 0UL; i < ds->NoOfSpecies(); i ++) { _String * old_name = new _String (*ds->GetSequenceName (i)); if (! old_name->IsValidIdentifier(false) ) { @@ -6067,14 +6068,14 @@ bool StoreADataSet (_DataSet* ds, _String* setName) { *ds->GetSequenceName (i) = new_name; did_something = true; } - + ds->GetSequenceName (i)->AddAReference(); id_mapping.Insert (ds->GetSequenceName (i), (long)old_name, false, false); } - + if (did_something) { _AssociativeList * mapping = new _AssociativeList(); - + _SimpleList history; long t, current_index = id_mapping.Traverser(history, t, id_mapping.GetRoot()); @@ -6083,7 +6084,7 @@ bool StoreADataSet (_DataSet* ds, _String* setName) { mapping->MStore(*(_String*)_id_mapping.GetItem (current_index), *(_String*)id_mapping.GetXtra(current_index)); current_index = id_mapping.Traverser(history, t); } - + CheckReceptacleAndStore (*setName&".mapping",emptyString,false, mapping, false); } } @@ -6100,7 +6101,7 @@ bool StoreADataSet (_DataSet* ds, _String* setName) { _Matrix * _DataSet::HarvestFrequencies (unsigned char unit, unsigned char atom, bool posSpec, _SimpleList& hSegmentation, _SimpleList& vSegmentation, bool countGaps) const { - + if (hSegmentation.lLength == 0L || vSegmentation.lLengthMultiTokenResolutions(unit_for_counting, static_store, countGaps); - + if (resolution_count > 0UL) { - + _Parameter normalized = 1./resolution_count; for (long resolution_index = 0UL; resolution_index < resolution_count; resolution_index ++) { @@ -6167,7 +6168,7 @@ _Matrix * _DataSet::HarvestFrequencies (unsigned char unit, unsigned char atom, unsigned long row_count = out->GetHDim(), column_count = out->GetVDim(); - + for (unsigned long column =0UL; column < column_count; column++) { // normalize each _column_ to sum to 1. _Parameter sum = 0.0; From d0db54089e1013812a2bf6f53e6bd9d00254f378 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Fri, 20 Jul 2018 17:25:58 -0400 Subject: [PATCH 34/53] Fixing the previous bug fix (so no seg fault); modifying CMakeLists to properly do SSE and AVX checks when clang is the compiler --- CMakeLists.txt | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 85453c77a..4dab1a456 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,9 +20,9 @@ macro(PCL_CHECK_FOR_SSE3) include(CheckCXXSourceRuns) set(CMAKE_REQUIRED_FLAGS) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG) + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "-msse3") - endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG) + endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") check_cxx_source_runs(" #include @@ -46,9 +46,9 @@ macro(PCL_CHECK_FOR_AVX) include(CheckCXXSourceRuns) set(CMAKE_REQUIRED_FLAGS) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG) + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "-march=corei7-avx -mtune=corei7-avx") - endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG) + endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") check_cxx_source_runs(" #include @@ -141,16 +141,16 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g") PCL_CHECK_FOR_AVX() if(${HAVE_AVX_EXTENSIONS}) - set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -march=corei7-avx -mtune=corei7-avx") + set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -march=corei7-avx -mtune=corei7-avx ") add_definitions (-D_SLKP_USE_AVX_INTRINSICS) else(${HAVE_AVX_EXTENSIONS}) PCL_CHECK_FOR_SSE3() if(${HAVE_SSE3_EXTENSIONS}) add_definitions (-D_SLKP_USE_SSE_INTRINSICS) - set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -msse3 ") + set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -msse3 -g") endif(${HAVE_SSE3_EXTENSIONS}) endif (${HAVE_AVX_EXTENSIONS}) @@ -177,9 +177,10 @@ endif(NOT DEFINED DEFAULT_WARNING_FLAGS) include (CheckLibraryExists) -CHECK_LIBRARY_EXISTS( "libamdlibm.so" "log" "" HAS_AMD_LIBM) +CHECK_LIBRARY_EXISTS( "libamdlibm.so" "amd_log" "" HAS_AMD_LIBM) if(HAS_AMD_LIBM) set(DEFAULT_LINK_FLAGS "${DEFAULT_LINK_FLAGS} -lamdlibm") + add_definitions(-D__USEAMDLOG__) endif(HAS_AMD_LIBM) From 23fc8b59160dd6d6ad1ea01a5635c96e1159bec3 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Mon, 23 Jul 2018 21:07:49 -0400 Subject: [PATCH 35/53] More sensible check for roundoff erros in ConjugateGradientDescent --- res/TemplateBatchFiles/BGM.bf | 440 +++++++++++------- .../modules/shared-load-file.bf | 19 +- res/TemplateBatchFiles/libv3/all-terms.bf | 15 +- .../libv3/models/parameters.bf | 14 +- .../libv3/tasks/estimators.bf | 2 +- src/core/likefunc.cpp | 8 +- 6 files changed, 314 insertions(+), 184 deletions(-) diff --git a/res/TemplateBatchFiles/BGM.bf b/res/TemplateBatchFiles/BGM.bf index 2a4255702..896998651 100644 --- a/res/TemplateBatchFiles/BGM.bf +++ b/res/TemplateBatchFiles/BGM.bf @@ -1,165 +1,275 @@ -RequireVersion("2.3.13"); - -// ---- load library files -------------------------------- -LoadFunctionLibrary("libv3/UtilityFunctions.bf"); -LoadFunctionLibrary("libv3/IOFunctions.bf"); -LoadFunctionLibrary("libv3/stats.bf"); - -LoadFunctionLibrary("libv3/tasks/ancestral.bf"); -LoadFunctionLibrary("libv3/tasks/alignments.bf"); -LoadFunctionLibrary("libv3/tasks/estimators.bf"); -LoadFunctionLibrary("libv3/tasks/trees.bf"); -LoadFunctionLibrary("libv3/tasks/mpi.bf"); - -LoadFunctionLibrary("libv3/models/codon/MG_REV.bf"); - -LoadFunctionLibrary("SelectionAnalyses/modules/io_functions.ibf"); -LoadFunctionLibrary("SelectionAnalyses/modules/selection_lib.ibf"); - -LoadFunctionLibrary("bayesgraph.ibf"); - - - - - -// --- display analysis information ----------------------- - -bgm.analysis_description = { - terms.io.info: "BGM (Bayesian Graphical Model) uses a - maximum likelihood ancestral state reconstruction to - map non-synonymous substitution events to branches in the - phylogeny and then analyzes the joint distribution of the - substitution map using a Bayesian graphical model (network). - Next, a Markov chain Monte Carlo analysis is used to generate - a random sample of network structures from the posterior - distribution given the data. Each node in the network - represents a codon site in the alignment, and links (edges) - between nodes indicate high posterior support for correlated - substitutions at the two sites over time, which implies - coevolution. - ", - terms.io.version: "1.0", - terms.io.reference: "Spidermonkey: rapid detection of co-evolving sites using Bayesian graphical models (2008). _Bioinformatics_ 24(17): 1949-1950", - terms.io.authors: "Art FY Poon, Fraser I Lewis, Simon DW Frost and Sergei LK Pond", - terms.io.contact: "apoon42@uwo.ca", - terms.io.requirements: "in-frame codon alignment and a phylogenetic tree" -}; -io.DisplayAnalysisBanner(bgm.analysis_description); - - - -// --- enviornment setup ------------------------- - - - -// --- globals ---------------------------- - -bgm.samples = 10; -bgm.pvalue = 0.1; - -bgm.json = { - terms.json.analysis: bgm.analysis_description, - terms.json.fits: {}, - terms.json.timers: {}, -}; - -bgm.scaler_prefix = "BGM.scaler"; - -bgm.by_site = "by-site"; -bgm.AVERAGED = "AVERAGED"; -bgm.RESOLVED = "RESOLVED"; - -bgm.nsteps = io.PromptUser("\n>Select the number of MCMC steps to sample [default 100000]", 1e5, 0, 1e9, TRUE); -bgm.burnin = io.PromptUser("\n>Select the number of MCMC steps to discard as burn-in [default 10000]", 1e4, 0, 1e9, TRUE); -bgm.nsamples = io.PromptUser("\n>Select the number of steps to extract from the chain sample [default 100]", 100, 0, bgm.nsteps, TRUE); -bgm.max_parents = io.PromptUser ("\n>Select the maximum number of parents allowed per node [default 1]", 1, 1, 3, TRUE); - - -// --- execution ------------------------- - -// load and pre-process codon alignment -namespace bgm { - LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf"); - load_file ("bgm"); -} - - -// fit nucleotide general time-reversible model -// we always re-estimate branch lengths? Can we constrain to scale? -namespace bgm { - doGTR ("bgm"); -} - -// what does this do? -estimators.fixSubsetOfEstimates(bgm.gtr_results, bgm.gtr_results[terms.global]); - -bgm.user_tree = bgm.trees["0"]; - -namespace bgm { - doPartitionedMG("bgm", TRUE); // keep LF -} - - -// --- ancestral reconstruction -------------------- -bgm.ancestors = ancestral.build (bgm.partitioned_mg_results[terms.likelihood_function], 0, None); - - -bgm.code = bgm.codon_data_info[utility.getGlobalValue("terms.code")]; - -function bgm.nsfilter(state1, state2, ancestral_data) { - if (bgm.code[state1] != bgm.code[state2] && - bgm.code[state1] != genetic_code.stop_code && - bgm.code[state2] != genetic_code.stop_code) { - return 1; - } else { - return 0; - } -} - -bgm.counts = ancestral.ComputeSubstitutionCounts( - bgm.ancestors, - None, // all branches - "bgm.nsfilter", // substitution filter - None // site filter (e.g., MinCount) -); - - - -// --- BGM analysis ------------------------------- - -lfunction bgm.run (_bgm_data, burnin, nsteps, nsamples, max_parents) { - /* convert data to matrix form */ - nodes = {}; - num_nodes = Abs (_bgm_data["Sites"]); - for (k = 0; k < num_nodes; k = k+1) - { - /* Arguments: - 1. node name, must be a string - 2. maximum number of parents - 3. prior sample size - always uninformative (count split evenly across levels) - - if we were truly Bayesian, we would let the user set informative priors.. - 4. number of levels - always binary in this case (substitution mapped to branch) - */ - node_name = ""+ ((_bgm_data["Sites"])[k] + 1); - nodes + add_discrete_node (node_name, max_parents, 0, 2); - } - - BayesianGraphicalModel gen_bgm = (nodes); - attach_data("gen_bgm", _bgm_data["Counts"], 0, 0, 0); - bgm_result = order_MCMC("gen_bgm", nsteps, burnin, nsamples); - return bgm_result; -} - - -bgm.results = bgm.run (bgm.counts, bgm.burnin, bgm.nsteps, bgm.nsamples, bgm.max_parents); - - -// --- process BGM results ------------------------------- - -bgm.trace = {1, bgm.nsamples}; // row vector -for (bgm.i = 0; bgm.i < bgm.nsamples; bgm.i += 1) { - bgm.trace[bgm.i] = bgm.results[bgm.i][0]; -} - - - +RequireVersion("2.3.12"); + +// ---- load library files -------------------------------- +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); +LoadFunctionLibrary("libv3/IOFunctions.bf"); +LoadFunctionLibrary("libv3/stats.bf"); + +LoadFunctionLibrary("libv3/tasks/ancestral.bf"); +LoadFunctionLibrary("libv3/tasks/alignments.bf"); +LoadFunctionLibrary("libv3/tasks/estimators.bf"); +LoadFunctionLibrary("libv3/tasks/trees.bf"); + + +LoadFunctionLibrary("SelectionAnalyses/modules/io_functions.ibf"); + +namespace bgm { + LoadFunctionLibrary ("libv3/tasks/bayesgraph.ibf"); + /* + namespace terms { + namespace settings { + nsteps + } + } + */ + +} + + + + +// --- display analysis information ----------------------- + +bgm.analysis_description = { + terms.io.info: +"BGM (Bayesian Graphical Model) uses a maximum likelihood ancestral state reconstruction to +map substitution (non-synonymous only for coding data) events to branches in the +phylogeny and then analyzes the joint distribution of the +substitution map using a Bayesian graphical model (network). +Next, a Markov chain Monte Carlo analysis is used to generate +a random sample of network structures from the posterior +distribution given the data. Each node in the network +represents a codon site in the alignment, and links (edges) +between nodes indicate high posterior support for correlated +substitutions at the two sites over time, which implies coevolution.", + terms.io.version: "1.0", + terms.io.reference: "Spidermonkey: rapid detection of co-evolving sites using Bayesian graphical models (2008). _Bioinformatics_ 24(17): 1949-1950", + terms.io.authors: "Art FY Poon, Fraser I Lewis, Simon DW Frost and Sergei L Kosakovsky Pond", + terms.io.contact: "apoon42@uwo.ca", + terms.io.requirements: "in-frame codon alignment and a phylogenetic tree" +}; +io.DisplayAnalysisBanner(bgm.analysis_description); + + +namespace bgm { + LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf"); +} + +bgm.json = { + terms.json.analysis: bgm.analysis_description, + terms.json.fits: {}, + terms.json.timers: {}, +}; + +bgm.data_types = {terms.nucleotide : "Nucleotide multiple sequence alignment", + terms.amino_acid : "Protein multiple sequence alignment", + terms.codon : "Codon multiple sequence alignment"}; + +bgm.run_type = io.SelectAnOption (bgm.data_types, "Data type"); + +SetDialogPrompt ("Specify a `bgm.run_type` multiple sequence alignment file"); + +bgm.fit_options = {terms.run_options.retain_lf_object : TRUE}; + + +if (bgm.run_type == "nucleotide") { + bgm.alignment_info = alignments.ReadNucleotideDataSet ("bgm.dataset", None); + bgm.substitution_model_generator = "models.DNA.GTR.ModelDescription"; +} else { + if (bgm.run_type == "amino-acid") { + + } else { // codon + LoadFunctionLibrary("libv3/models/codon/MG_REV.bf"); + } +} + +bgm.name_mapping = bgm.alignment_info[utility.getGlobalValue("terms.data.name_mapping")]; + + +selection.io.json_store_key_value_pair (bgm.json, terms.json.input, terms.json.file, bgm.alignment_info [terms.data.file]); +selection.io.json_store_key_value_pair (bgm.json, terms.json.input, terms.json.sequences, bgm.alignment_info [terms.data.sequences]); +selection.io.json_store_key_value_pair (bgm.json, terms.json.input, terms.json.sites, bgm.alignment_info [terms.data.sites]); +selection.io.json_store_key_value_pair (bgm.json, terms.json.input, terms.data_type, bgm.run_type); + +bgm.alignment_info[terms.json.json] = bgm.alignment_info[terms.data.file] + ".BGM.json"; + +bgm.path.base = (bgm.json [terms.json.input])[terms.json.file]; + + +bgm.sample_size = bgm.alignment_info[terms.data.sequences] * bgm.alignment_info[terms.data.sites]; +alignments.EnsureMapping ("bgm.dataset", bgm.alignment_info); + +bgm.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions ( + bgm.alignment_info[utility.getGlobalValue("terms.data.partitions")], + bgm.alignment_info[utility.getGlobalValue("terms.data.name_mapping")] + ); + + + +io.CheckAssertion ("Abs (bgm.partitions_and_trees) == 1", "BGM cannot be run on data with multiple site partitions (and trees)"); +bgm.filter_specification = alignments.DefineFiltersForPartitions (bgm.partitions_and_trees, "bgm.dataset" , "bgm.filter.", bgm.alignment_info); +bgm.store_tree_information(); + +io.ReportProgressMessageMD ("BGM", "Data", "Loaded **" + + bgm.alignment_info [terms.data.sequences] + "** `bgm.run_type` sequences, **" + + bgm.alignment_info [terms.data.sites] + "** sites, from \`" + bgm.alignment_info [terms.data.file] + "\`"); + +bgm.initial_values = parameters.helper.tree_lengths_to_initial_values (bgm.trees, None); + + + +console.log ( "\n> BGM will write result file to `bgm.alignment_info[terms.json.json]`\n"); + +bgm.selected_branches = selection.io.defineBranchSets ( bgm.partitions_and_trees ); + +bgm.nsteps = io.PromptUser("\n>Select the number of MCMC steps to sample [default 100000]", 1e5, 0, 1e9, TRUE); +bgm.burnin = io.PromptUser("\n>Select the number of MCMC steps to discard as burn-in [default 10000]", 1e4, 0, 1e9, TRUE); +bgm.nsamples = io.PromptUser("\n>Select the number of steps to extract from the chain sample [default 100]", 100, 0, bgm.nsteps, TRUE); +bgm.max_parents = io.PromptUser ("\n>Select the maximum number of parents allowed per node [default 1]", 1, 1, 3, TRUE); +bgm.min_subs = io.PromptUser ("\n>Select the minium number of substitutions per site to include it in the analysis", 1, 1, 1e5, TRUE); + + +// FIT THE BASELINE MODEL + +if (bgm.run_type == "nucleotide") { + bgm.initial_values = utility.Extend (bgm.initial_values, + { + utility.getGlobalValue ("terms.global") : { + terms.nucleotideRate ("A","C") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , + terms.nucleotideRate ("A","T") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , + terms.nucleotideRate ("C","G") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , + terms.nucleotideRate ("G","T") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} + } + }); + + } + +bgm.baseline_fit = estimators.FitSingleModel_Ext ( + bgm.filter_names, + bgm.trees, + bgm.substitution_model_generator , + bgm.initial_values, + bgm.fit_options + ); + +bgm.ancestral_cache = ancestral.build (bgm.baseline_fit[terms.likelihood_function], 0, None); +bgm.branch_filter = utility.Filter (bgm.selected_branches[0], "_class_", "_class_ == terms.tree_attributes.test"); + + +if (bgm.run_type != "codon") { + bgm.counts = ancestral.ComputeSubstitutionCounts( + bgm.ancestral_cache, + bgm.branch_filter, // selected branches + None, // substitution filter + "bgm.min_sub_filter" // site filter (e.g., MinCount) + ); +} else { + +} + +if (Abs (bgm.counts["Sites"]) <= 2) { + +} else { + +} + +console.log (bgm.counts); + +return 0; + + +// --- execution ------------------------- + +// load and pre-process codon alignment +namespace bgm { + LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf"); + load_file ("bgm"); +} + + +// fit nucleotide general time-reversible model +// we always re-estimate branch lengths? Can we constrain to scale? +namespace bgm { + doGTR ("bgm"); +} + +// what does this do? +estimators.fixSubsetOfEstimates(bgm.gtr_results, bgm.gtr_results[terms.global]); + +bgm.user_tree = bgm.trees["0"]; + +namespace bgm { + doPartitionedMG("bgm", TRUE); // keep LF +} + + +// --- ancestral reconstruction -------------------- +bgm.ancestors = ancestral.build (bgm.partitioned_mg_results[terms.likelihood_function], 0, None); + + +bgm.code = bgm.codon_data_info[utility.getGlobalValue("terms.code")]; + + + +bgm.counts = ancestral.ComputeSubstitutionCounts( + bgm.ancestors, + None, // all branches + "bgm.nsfilter", // substitution filter + "bgm.min_sub_filter" // site filter (e.g., MinCount) +); + + + +// --- BGM analysis ------------------------------- + +lfunction bgm.run (_bgm_data, burnin, nsteps, nsamples, max_parents) { + /* convert data to matrix form */ + nodes = {}; + num_nodes = Abs (_bgm_data["Sites"]); + for (k = 0; k < num_nodes; k = k+1) + { + /* Arguments: + 1. node name, must be a string + 2. maximum number of parents + 3. prior sample size - always uninformative (count split evenly across levels) + - if we were truly Bayesian, we would let the user set informative priors.. + 4. number of levels - always binary in this case (substitution mapped to branch) + */ + node_name = ""+ ((_bgm_data["Sites"])[k] + 1); + nodes + add_discrete_node (node_name, max_parents, 0, 2); + } + + BayesianGraphicalModel gen_bgm = (nodes); + attach_data(&gen_bgm, _bgm_data["Counts"], 0, 0, 0); + bgm_result = order_MCMC(&gen_bgm, nsteps, burnin, nsamples); + return bgm_result; +} + + +bgm.results = bgm.run (bgm.counts, bgm.burnin, bgm.nsteps, bgm.nsamples, bgm.max_parents); + +// --- process BGM results ------------------------------- + +bgm.trace = {1, bgm.nsamples}; // row vector +for (bgm.i = 0; bgm.i < bgm.nsamples; bgm.i += 1) { + bgm.trace[bgm.i] = bgm.results[bgm.i][0]; +} + +// ==== HELPER FUNCTIONS ==== + +lfunction bgm.nsfilter(state1, state2, ancestral_data) { + if (bgm.code[state1] != bgm.code[state2] && + bgm.code[state1] != genetic_code.stop_code && + bgm.code[state2] != genetic_code.stop_code) { + return TRUE; + } else { + return FALSE; + } +} + +function bgm.min_sub_filter (counts) { + return (+counts) > bgm.min_subs; +} + + + diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index bd1d2653f..25223944c 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -69,7 +69,10 @@ function load_file (prefix) { } */ - codon_data_info[utility.getGlobalValue("terms.data.sample_size")] = codon_data_info[utility.getGlobalValue("terms.data.sites")] * codon_data_info[utility.getGlobalValue("terms.data.sequences")]; + + "`prefix`.sample_size" = codon_data_info[utility.getGlobalValue("terms.data.sites")] * codon_data_info[utility.getGlobalValue("terms.data.sequences")]; + + codon_data_info[utility.getGlobalValue("terms.data.sample_size")] = "`prefix`.sample_size"; upper_prefix = prefix && 1; //uppercase the prefix for json name codon_data_info[utility.getGlobalValue("terms.json.json")] = codon_data_info[utility.getGlobalValue("terms.data.file")] + "."+upper_prefix+".json"; @@ -272,7 +275,8 @@ function doGTR (prefix) { trees, gtr_results); - io.ReportProgressMessageMD (prefix, "nuc-fit", "* " + selection.io.report_fit (gtr_results, 0, 3*(^"`prefix`.codon_data_info")[utility.getGlobalValue ("terms.data.sample_size")])); + io.ReportProgressMessageMD (prefix, "nuc-fit", "* " + + selection.io.report_fit (gtr_results, 0, 3*(^"`prefix`.sample_size"))); @@ -281,24 +285,19 @@ function doGTR (prefix) { utility.Map (gtr_results[utility.getGlobalValue("terms.global")], "_value_", ' {terms.fit.MLE : _value_[terms.fit.MLE]}'), "_value_", "_value_[terms.fit.MLE]"); + efv = (gtr_results[utility.getGlobalValue("terms.efv_estimate")])["VALUEINDEXORDER"][0]; + selection.io.json_store_lf_GTR_MG94 (json, utility.getGlobalValue ("terms.json.nucleotide_gtr"), gtr_results[utility.getGlobalValue ("terms.fit.log_likelihood")], gtr_results[utility.getGlobalValue ("terms.parameters")] , - codon_data_info[utility.getGlobalValue ("terms.data.sample_size")], + 3*(^"`prefix`.sample_size"), gtr_rates, efv, display_orders[utility.getGlobalValue ("terms.json.nucleotide_gtr")]); - /* TODO: Why does this not work here? */ - /* - utility.ForEachPair (filter_specification, "_key_", "_value_", - 'selection.io.json_store_branch_attribute(json, utility.getGlobalValue ("terms.json.nucleotide_gtr"), utility.getGlobalValue ("terms.branch_length"), display_orders[terms.json.nucleotide_gtr], - _key_, - selection.io.extract_branch_info((gtr_results[utility.getGlobalValue ("terms.branch_length")])[_key_], "selection.io.branch.length"));'); - */ /* Store branch lengths */ for (partition_index = 0; partition_index < Abs(filter_specification); partition_index += 1) { diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index 0e494af62..8708d86d0 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -23,6 +23,7 @@ namespace terms{ nonsynonymous_sub_count = "nonsynonymous substitution count"; original_name = "original name"; replicates = "replicates"; + data_type = "datatype"; category = "category"; mixture = "mixture"; @@ -470,13 +471,25 @@ namespace terms{ } /* Terms associated with tree labeling */ - namespace tree_attributes{ + namespace tree_attributes { internal = "internal"; leaf = "leaf"; test = "test"; background = "background"; } + /* Terms associated with BGMs */ + + namespace bgm { + namespace node { + id = "NodeID"; + type = "NodeType"; + max_parents = "MaxParents"; + prior_size = "PriorSize"; + levels = "NumLevels"; + } + } + } diff --git a/res/TemplateBatchFiles/libv3/models/parameters.bf b/res/TemplateBatchFiles/libv3/models/parameters.bf index 9a8d8a210..082b545ba 100644 --- a/res/TemplateBatchFiles/libv3/models/parameters.bf +++ b/res/TemplateBatchFiles/libv3/models/parameters.bf @@ -368,8 +368,8 @@ lfunction parameters.GenerateSequentialNames(prefix, count, delimiter) { * @param ranges * @returns nothing */ -function parameters.SetRange(id, ranges) { - +function parameters.SetRange(id, ranges) { + if (Type(id) == "String") { if (Abs(id)) { if (Type(ranges) == "AssociativeList") { @@ -545,7 +545,7 @@ lfunction parameters.SetStickBreakingDistribution (parameters, values) { for (i = 0; i < rate_count; i += 1) { - + parameters.SetValue ((parameters["rates"])[i], values[i][0]); if (i < rate_count - 1) { break_here = values[i][1] / left_over; @@ -642,8 +642,14 @@ lfunction parameters.helper.tree_lengths_to_initial_values(dict, type) { for (i = 0; i < components; i += 1) { this_component = {}; - utility.ForEachPair((dict[i])[ utility.getGlobalValue("terms.branch_length")], "_branch_name_", "_branch_length_", "`&this_component`[_branch_name_] = {utility.getGlobalValue('terms.fit.MLE') : `&factor`*_branch_length_}"); + + + utility.ForEachPair((dict[i])[ utility.getGlobalValue("terms.branch_length")], "_branch_name_", "_branch_length_", + " + `&this_component`[_branch_name_] = {utility.getGlobalValue('terms.fit.MLE') : `&factor`*_branch_length_} + "); result[i] = this_component; + } return { utility.getGlobalValue("terms.branch_length"): result diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 6aec71731..79c3a3236 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -858,7 +858,7 @@ lfunction estimators.FitSingleModel_Ext (data_filter, tree, model_template, init */ lfunction estimators.FitGTR_Ext (data_filter, tree, initial_values, run_options) { - return estimators.FitSingleModel_Ext (data_filter, tree, "models.DNA.GTR.ModelDescription", initial_values, run_options) + return estimators.FitSingleModel_Ext (data_filter, tree, "models.DNA.GTR.ModelDescription", initial_values, run_options); } /** diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index b00807a43..f465a6039 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -6059,10 +6059,12 @@ _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precisio } SetAllIndependent (&bestVal); - if (maxSoFar < initial_value && CheckEqual(maxSoFar, initial_value) == false) { - WarnError (_String("Internal optimization error in _LikelihoodFunction::ConjugateGradientDescent. Worsened likelihood score from ") & initial_value & " to " & maxSoFar); + if (maxSoFar < initial_value && CheckEqual(maxSoFar, initial_value, machineEps * 100.) == false) { + WarnError (_String("Internal optimization error in _LikelihoodFunction::ConjugateGradientDescent. Worsened likelihood score from ") & + _String (initial_value,"%20.16g") & " to " & _String(maxSoFar,"%20.16g")); } - + + if (vl>1) { BufferToConsole("\n"); From 12705d6a15b8ad25ce006e3b7c3d5d4bcdd709df Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Tue, 24 Jul 2018 06:29:02 -0400 Subject: [PATCH 36/53] Sample size definition fix --- .../SelectionAnalyses/modules/shared-load-file.bf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index 25223944c..e7ba05c9b 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -70,9 +70,9 @@ function load_file (prefix) { */ - "`prefix`.sample_size" = codon_data_info[utility.getGlobalValue("terms.data.sites")] * codon_data_info[utility.getGlobalValue("terms.data.sequences")]; + sample_size=codon_data_info[utility.getGlobalValue("terms.data.sites")]*codon_data_info[utility.getGlobalValue("terms.data.sequences")]; - codon_data_info[utility.getGlobalValue("terms.data.sample_size")] = "`prefix`.sample_size"; + codon_data_info[utility.getGlobalValue("terms.data.sample_size")] = sample_size; upper_prefix = prefix && 1; //uppercase the prefix for json name codon_data_info[utility.getGlobalValue("terms.json.json")] = codon_data_info[utility.getGlobalValue("terms.data.file")] + "."+upper_prefix+".json"; From fa60d9b609caa0c4deabf9327bb738214f5d90aa Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Tue, 24 Jul 2018 08:07:06 -0400 Subject: [PATCH 37/53] Progress on BGM.bf --- res/TemplateBatchFiles/BGM.bf | 115 +++++++++++++--------------------- 1 file changed, 45 insertions(+), 70 deletions(-) diff --git a/res/TemplateBatchFiles/BGM.bf b/res/TemplateBatchFiles/BGM.bf index 896998651..7d01e2016 100644 --- a/res/TemplateBatchFiles/BGM.bf +++ b/res/TemplateBatchFiles/BGM.bf @@ -13,17 +13,8 @@ LoadFunctionLibrary("libv3/tasks/trees.bf"); LoadFunctionLibrary("SelectionAnalyses/modules/io_functions.ibf"); -namespace bgm { - LoadFunctionLibrary ("libv3/tasks/bayesgraph.ibf"); - /* - namespace terms { - namespace settings { - nsteps - } - } - */ +LoadFunctionLibrary ("libv3/tasks/bayesgraph.ibf"); -} @@ -74,7 +65,7 @@ bgm.fit_options = {terms.run_options.retain_lf_object : TRUE}; if (bgm.run_type == "nucleotide") { bgm.alignment_info = alignments.ReadNucleotideDataSet ("bgm.dataset", None); - bgm.substitution_model_generator = "models.DNA.GTR.ModelDescription"; + bgm.baseline_model = "models.DNA.GTR.ModelDescription"; } else { if (bgm.run_type == "amino-acid") { @@ -105,7 +96,6 @@ bgm.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions ( ); - io.CheckAssertion ("Abs (bgm.partitions_and_trees) == 1", "BGM cannot be run on data with multiple site partitions (and trees)"); bgm.filter_specification = alignments.DefineFiltersForPartitions (bgm.partitions_and_trees, "bgm.dataset" , "bgm.filter.", bgm.alignment_info); bgm.store_tree_information(); @@ -131,6 +121,8 @@ bgm.min_subs = io.PromptUser ("\n>Select the minium number of substitutions p // FIT THE BASELINE MODEL +io.ReportProgressMessageMD("bgm", "phylo", "Performing initial model fit to obtain branch lengths and rate parameters"); + if (bgm.run_type == "nucleotide") { bgm.initial_values = utility.Extend (bgm.initial_values, { @@ -147,11 +139,36 @@ if (bgm.run_type == "nucleotide") { bgm.baseline_fit = estimators.FitSingleModel_Ext ( bgm.filter_names, bgm.trees, - bgm.substitution_model_generator , + bgm.baseline_model , bgm.initial_values, bgm.fit_options ); + +io.ReportProgressMessageMD ("bgm", "phylo", ">Fitted an alignment-wide model. " + selection.io.report_fit (bgm.baseline_fit, 0, bgm.sample_size ) + "\n\nTotal tree lengths by partition\n"); +utility.ForEachPair (bgm.baseline_fit[terms.branch_length], "_part_", "_value_", +' + io.ReportProgressMessageMD ("bgm", "phylo", "Partition " + (1+_part_) + ". " + Format (+(utility.Map (_value_, "_data_", + " + _data_ [terms.fit.MLE] + ")) + ,6,3) + " subs/site." + ) +' +); + +selection.io.json_store_lf(bgm.json, bgm.baseline_model,bgm.baseline_fit[terms.fit.log_likelihood], + bgm.baseline_fit[terms.parameters], + bgm.sample_size, None, 0); + +utility.ForEachPair (bgm.filter_specification, "_key_", "_value_", + 'selection.io.json_store_branch_attribute(bgm.json, bgm.baseline_model, terms.branch_length, 0, + _key_, + selection.io.extract_branch_info((bgm.baseline_fit[terms.branch_length])[_key_], "selection.io.branch.length"));'); + + +io.ReportProgressMessageMD("bgm", "ancestral", "Performing joint ancestral state reconstruction and mapping substitutions"); + bgm.ancestral_cache = ancestral.build (bgm.baseline_fit[terms.likelihood_function], 0, None); bgm.branch_filter = utility.Filter (bgm.selected_branches[0], "_class_", "_class_ == terms.tree_attributes.test"); @@ -168,57 +185,21 @@ if (bgm.run_type != "codon") { } if (Abs (bgm.counts["Sites"]) <= 2) { - + console.log ("###ERROR: NOT ENOUGH SUBSTITUTIONS###"); + console.log ("\n>BGM requires at least three sites to have accumulated sufficient substitutions to run network inference"); } else { + bgm.site_count (bgm.counts); -} - -console.log (bgm.counts); - -return 0; - - -// --- execution ------------------------- - -// load and pre-process codon alignment -namespace bgm { - LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf"); - load_file ("bgm"); -} - - -// fit nucleotide general time-reversible model -// we always re-estimate branch lengths? Can we constrain to scale? -namespace bgm { - doGTR ("bgm"); -} + bgm.raw_results = bgm.run (bgm.counts, bgm.burnin, bgm.nsteps, bgm.nsamples, bgm.max_parents); + bgm.mcmc_trace = utility.Map ({1, bgm.nsamples}["_MATRIX_ELEMENT_COLUMN_"], "_index_", "bgm.raw_results[_index_][0]"); -// what does this do? -estimators.fixSubsetOfEstimates(bgm.gtr_results, bgm.gtr_results[terms.global]); -bgm.user_tree = bgm.trees["0"]; -namespace bgm { - doPartitionedMG("bgm", TRUE); // keep LF + //bgm.processed_results = { } -// --- ancestral reconstruction -------------------- -bgm.ancestors = ancestral.build (bgm.partitioned_mg_results[terms.likelihood_function], 0, None); - - -bgm.code = bgm.codon_data_info[utility.getGlobalValue("terms.code")]; - - - -bgm.counts = ancestral.ComputeSubstitutionCounts( - bgm.ancestors, - None, // all branches - "bgm.nsfilter", // substitution filter - "bgm.min_sub_filter" // site filter (e.g., MinCount) -); - - +return 0; // --- BGM analysis ------------------------------- @@ -226,8 +207,7 @@ lfunction bgm.run (_bgm_data, burnin, nsteps, nsamples, max_parents) { /* convert data to matrix form */ nodes = {}; num_nodes = Abs (_bgm_data["Sites"]); - for (k = 0; k < num_nodes; k = k+1) - { + for (k = 0; k < num_nodes; k += 1) { /* Arguments: 1. node name, must be a string 2. maximum number of parents @@ -236,25 +216,20 @@ lfunction bgm.run (_bgm_data, burnin, nsteps, nsamples, max_parents) { 4. number of levels - always binary in this case (substitution mapped to branch) */ node_name = ""+ ((_bgm_data["Sites"])[k] + 1); - nodes + add_discrete_node (node_name, max_parents, 0, 2); + nodes + bgm.add_discrete_node (node_name, max_parents, 0, 2); } - BayesianGraphicalModel gen_bgm = (nodes); - attach_data(&gen_bgm, _bgm_data["Counts"], 0, 0, 0); - bgm_result = order_MCMC(&gen_bgm, nsteps, burnin, nsamples); - return bgm_result; -} - -bgm.results = bgm.run (bgm.counts, bgm.burnin, bgm.nsteps, bgm.nsamples, bgm.max_parents); + BayesianGraphicalModel gen_bgm = (nodes); + bgm.attach_data (&gen_bgm, _bgm_data["Counts"], 0, 0, 0); + bgm_result = bgm.order_MCMC(&gen_bgm, nsteps, burnin, nsamples); -// --- process BGM results ------------------------------- + console.log (Rows(bgm_result)); -bgm.trace = {1, bgm.nsamples}; // row vector -for (bgm.i = 0; bgm.i < bgm.nsamples; bgm.i += 1) { - bgm.trace[bgm.i] = bgm.results[bgm.i][0]; + return bgm_result; } + // ==== HELPER FUNCTIONS ==== lfunction bgm.nsfilter(state1, state2, ancestral_data) { From 2b570c626de9e51dc75e72f0208cda8a39c8ca96 Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Tue, 24 Jul 2018 14:49:54 -0400 Subject: [PATCH 38/53] Fixing issues with number bin-op not-number memory errors --- src/core/constant.cpp | 150 +++++++++++++++--------------------- src/core/include/constant.h | 10 +++ src/core/matrix.cpp | 11 +-- 3 files changed, 72 insertions(+), 99 deletions(-) diff --git a/src/core/constant.cpp b/src/core/constant.cpp index df63bfa44..10eb3fe5e 100644 --- a/src/core/constant.cpp +++ b/src/core/constant.cpp @@ -53,7 +53,6 @@ _Formula *chi2 = nil, *derchi2 = nil; -long randomCount = 0; extern _Parameter machineEps; extern _Parameter tolerance; @@ -129,10 +128,10 @@ _Constant::_Constant (void) //} //__________________________________________________________________________________ -_Parameter _Constant::Value (void) -{ +_Parameter _Constant::Value (void) { return theValue; } + //__________________________________________________________________________________ BaseRef _Constant::toStr(unsigned long) { @@ -140,112 +139,89 @@ BaseRef _Constant::toStr(unsigned long) } //__________________________________________________________________________________ -_PMathObj _Constant::Add (_PMathObj theObj) -{ +_PMathObj _Constant::Add (_PMathObj theObj) { + if (theObj->ObjectClass() == STRING) { return new _Constant ((theValue+((_FString*)theObj)->theString->toNum())); - } else { - return new _Constant ((theValue+((_Constant*)theObj)->theValue)); } + + return _check_type_and_compute (theObj, [] (_Parameter a, _Parameter b) -> _Parameter {return a + b;}); } //__________________________________________________________________________________ -_PMathObj _Constant::Sub (_PMathObj theObj) -{ - //if (theObj) return nil; - return new _Constant ((theValue-((_Constant*)theObj)->theValue)); - //else - // return nil; - //return (_PMathObj)result.makeDynamic(); +_PMathObj _Constant::Sub (_PMathObj theObj) { + return _check_type_and_compute (theObj, [] (_Parameter a, _Parameter b) -> _Parameter {return a - b;}); } //__________________________________________________________________________________ -_PMathObj _Constant::Minus (void) -{ +_PMathObj _Constant::Minus (void) { return new _Constant (-Value()); } //__________________________________________________________________________________ -_PMathObj _Constant::Sum (void) -{ +_PMathObj _Constant::Sum (void) { return new _Constant (Value()); } //__________________________________________________________________________________ -_PMathObj _Constant::Mult (_PMathObj theObj) -{ -// if (!theObj) return nil; - return new _Constant ((theValue*((_Constant*)theObj)->theValue)); +_PMathObj _Constant::Mult (_PMathObj theObj) { + return _check_type_and_compute (theObj, [] (_Parameter a, _Parameter b) -> _Parameter {return a * b;}); + } //__________________________________________________________________________________ -_PMathObj _Constant::Div (_PMathObj theObj) -{ -// if (!theObj) return nil; - return new _Constant ((theValue/((_Constant*)theObj)->theValue)); +_PMathObj _Constant::Div (_PMathObj theObj) { + return _check_type_and_compute (theObj, [] (_Parameter a, _Parameter b) -> _Parameter {return a / b;}); } - //__________________________________________________________________________________ -_PMathObj _Constant::lDiv (_PMathObj theObj) // % -{ - if (theObj) { - long denom = ((_Constant*)theObj)->theValue; - return denom?new _Constant ((long)(Value())%denom):new _Constant ((long)(Value())); - } else { - return nil; - } +_PMathObj _Constant::lDiv (_PMathObj theObj) { + return _check_type_and_compute (theObj, [] (_Parameter a, _Parameter b) -> _Parameter { + long denom = b; + return denom != 0L ? (long(a) % denom): a; + }); } //__________________________________________________________________________________ -_PMathObj _Constant::longDiv (_PMathObj theObj) // div -{ - if (theObj) { - long denom = ((_Constant*)theObj)->theValue; - return denom?new _Constant ((long)(Value())/denom):new _Constant (0.0); - } else { - return nil; - } +_PMathObj _Constant::longDiv (_PMathObj theObj) { + return _check_type_and_compute (theObj, [] (_Parameter a, _Parameter b) -> _Parameter { + long denom = b; + return denom != 0L ? (long(a) / denom): 0.0; + }); } + //__________________________________________________________________________________ _PMathObj _Constant::Raise (_PMathObj theObj) { - if (!theObj) { - return nil; - } - - _Parameter base = Value(), - expon = theObj->Value(); - - if (base>0.0) { - return new _Constant (exp (log(base)*(expon)));; - } else { - if (base<0.0) { - if (CheckEqual (expon, (long)expon)) { - return new _Constant (((((long)expon)%2)?-1:1)*exp (log(-base)*(expon))); - } else { - _String errMsg ("An invalid base/exponent pair passed to ^"); - WarnError (errMsg.sData); + return _check_type_and_compute (theObj, [] (_Parameter base, _Parameter expon) -> _Parameter { + if (base>0.0) { + return exp (log(base)*(expon)); + } else { + if (base<0.0) { + if (CheckEqual (expon, (long)expon)) { + return ((((long)expon)%2)?-1:1)*exp (log(-base)*(expon)); + } else { + _String errMsg ("An invalid base/exponent pair passed to ^"); + WarnError (errMsg.sData); + } } + + if (expon != 0.0) + return 0.0; + else + return 1.0; } - - if (expon != 0.0) - return new _Constant (0.0); - else - return new _Constant (1.0); - } + }); } //__________________________________________________________________________________ -_PMathObj _Constant::Random (_PMathObj upperB) -{ - if (randomCount == 0) { - randomCount++; - } - _Parameter l = theValue, u=((_Constant*)upperB)->theValue,r = l; - if (u>l) { - r=genrand_int32(); - r/=RAND_MAX_32; - r =l+(u-l)*r; - } - return new _Constant (r); - +_PMathObj _Constant::Random (_PMathObj theObj) { + + return _check_type_and_compute (theObj, [] (_Parameter l, _Parameter u) -> _Parameter { + _Parameter r = l; + if (u>l) { + r=genrand_int32(); + r/=RAND_MAX_32; + r =l+(u-l)*r; + } + return r; + }); } //__________________________________________________________________________________ @@ -256,26 +232,22 @@ void _Constant::Assign (_PMathObj theObj) } //__________________________________________________________________________________ -bool _Constant::Equal (_PMathObj theObj) -{ +bool _Constant::Equal (_PMathObj theObj) { return theValue==((_Constant*)theObj)->theValue; } //__________________________________________________________________________________ -_PMathObj _Constant::Abs (void) -{ +_PMathObj _Constant::Abs (void) { return new _Constant (fabs(theValue)); } //__________________________________________________________________________________ -_PMathObj _Constant::Sin (void) -{ +_PMathObj _Constant::Sin (void) { return new _Constant (sin(theValue)); } //__________________________________________________________________________________ -_PMathObj _Constant::Cos (void) -{ +_PMathObj _Constant::Cos (void){ return new _Constant (cos(theValue)); } @@ -332,13 +304,11 @@ _PMathObj _Constant::FormatNumberString (_PMathObj p, _PMathObj p2) return new _FString (new _String (buffer)); } //__________________________________________________________________________________ -_PMathObj _Constant::Log (void) -{ +_PMathObj _Constant::Log (void) { return new _Constant (log(theValue)); } //__________________________________________________________________________________ -_PMathObj _Constant::Sqrt (void) -{ +_PMathObj _Constant::Sqrt (void) { return new _Constant (sqrt(theValue)); } //__________________________________________________________________________________ diff --git a/src/core/include/constant.h b/src/core/include/constant.h index 4bbca66a3..c9b0b98be 100644 --- a/src/core/include/constant.h +++ b/src/core/include/constant.h @@ -43,6 +43,16 @@ #include "mathobj.h" class _Constant : public _MathObject { + + +private: + template _PMathObj _check_type_and_compute (_PMathObj operand, T functor) { + if (operand && operand->ObjectClass() == NUMBER) { + return new _Constant (functor (Value (), ((_Constant*)operand)->Value())); + } + WarnError ("Not a numeric 'X' type in a <'constant' operation 'X'> call"); + return new _MathObject; + } public: diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index 722a33cff..30dade0b9 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -2200,15 +2200,8 @@ _Matrix::_Matrix (_String& s, bool isNumeric, _VariableContainer const* theP) { _Formula* theTerm = new _Formula (lterm, theP); - if (isAConstant) { - // there is hope that this matrix is of numbers - if (theTerm->ObjectClass() == NUMBER) { - isAConstant = theTerm->IsAConstant(); - } else { - isAConstant = false; - } - } - + isAConstant = isAConstant && theTerm->IsAConstant() && theTerm->ObjectClass() == NUMBER; + ((_Formula**)theData)[vDim*hPos+vPos] = theTerm; } From 5252a98a317734526bf2d0d8c90e8f615f70a26a Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Tue, 24 Jul 2018 15:27:22 -0400 Subject: [PATCH 39/53] Resolving an issue with selection.io.SelectAllBranches --- .../SelectionAnalyses/modules/io_functions.ibf | 2 ++ tests/hbltests/UnitTests/HBLCommands/GetString.bf | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf b/res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf index db84c7220..bd00129ca 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf @@ -90,6 +90,8 @@ lfunction selection.io.defineBranchSets(partition_info) { //------------------------------------------------------------------------------ lfunction selection.io.SelectAllBranches(partition_info) { + return_set = {}; + for (k = 0; k < Abs (partition_info); k+=1) { tree_configuration = {}; tree_for_analysis = (partition_info[k])[utility.getGlobalValue("terms.data.tree")]; diff --git a/tests/hbltests/UnitTests/HBLCommands/GetString.bf b/tests/hbltests/UnitTests/HBLCommands/GetString.bf index cb919da0e..707284c9a 100644 --- a/tests/hbltests/UnitTests/HBLCommands/GetString.bf +++ b/tests/hbltests/UnitTests/HBLCommands/GetString.bf @@ -39,7 +39,7 @@ function runTest () //----------------------------------------------------------------------------------------------------------------- GetString (versionString, HYPHY_VERSION, 0); - assert ((versionString$"^[0-9]+\\.[0-9a-zA-Z]+$")[0]==0, "The short version string must be of the form major.minor[beta]. Had " + versionString); + assert ((versionString$"^[0-9]+\\.[0-9]+\\.[0-9a-zA-Z]*$")[0]==0, "The short version string must be of the form major.minor.release[beta]. Had " + versionString); GetString (versionString, HYPHY_VERSION, 1); assert ((versionString$"^HYPHY\\ [0-9]+\\.[0-9a-zA-Z]+.+\\ for .+$")[0]==0, "The full version string must be of the form major.minor[beta][MP] for platform description. Had " + versionString); From 6089b642337b0cdb9b7d0290d10385e6c1685db1 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Tue, 24 Jul 2018 19:23:15 -0400 Subject: [PATCH 40/53] Adding partially implemented bayesgraph.ibf --- .../libv3/tasks/bayesgraph.ibf | 835 ++++++++++++++++++ 1 file changed, 835 insertions(+) create mode 100644 res/TemplateBatchFiles/libv3/tasks/bayesgraph.ibf diff --git a/res/TemplateBatchFiles/libv3/tasks/bayesgraph.ibf b/res/TemplateBatchFiles/libv3/tasks/bayesgraph.ibf new file mode 100644 index 000000000..195053aed --- /dev/null +++ b/res/TemplateBatchFiles/libv3/tasks/bayesgraph.ibf @@ -0,0 +1,835 @@ +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); +LoadFunctionLibrary("libv3/IOFunctions.bf"); +LoadFunctionLibrary("libv3/all-terms.bf"); + +utility.SetEnvVariable ("USE_MPI_CACHING", TRUE); + +namespace bgm { + + lfunction add_discrete_node (node_id, max_parents, sample_size, nlevels) { + return { + utility.getGlobalValue ("terms.bgm.node.id") : node_id, + utility.getGlobalValue ("terms.bgm.node.type") : 0, + utility.getGlobalValue ("terms.bgm.node.max_parents") : max_parents, + utility.getGlobalValue ("terms.bgm.node.prior_size") : sample_size, + utility.getGlobalValue ("terms.bgm.node.levels") : nlevels + }; + } + + function add_gaussian_node (node_id, max_parents, sample_size, mean, precision, scale) { + node = {}; + node["NodeID"] = node_id; + node["NodeType"] = 1; + node["MaxParents"] = max_parents; + node["PriorSize"] = sample_size; + node["PriorMean"] = mean; + node["PriorPrecision"] = precision; + node["PriorScale"] = scale; + return node; + } + + + + /* utility functions from ReadDelimitedFiles.bf */ + function ReadCSVTable (fileName, haveHeader) { + if (Abs(fileName) == 0) + { + fscanf (PROMPT_FOR_FILE, "Lines", inData); + } + else + { + fscanf (fileName, "Lines", inData); + } + if (haveHeader) + { + output = {}; + output[0] = splitOnRegExp (inData[0],"\\,"); + } + felMXString = ""; + felMXString * 256; + felMXString * "_tempMatrix={"; + for (lineID = haveHeader; lineID < Columns(inData); lineID = lineID + 1) + { + felMXString * ("{" + inData[lineID] + "}\n"); + } + felMXString * "}"; + felMXString * 0; + ExecuteCommands (felMXString); + felMXString = 0; + inData = 0; + if (haveHeader) + { + output[1] = _tempMatrix; + _tempMatrix = 0; + return output; + } + return _tempMatrix; + } + + + function splitOnRegExp (string, splitter) { + matched = string || splitter; + splitBits = {}; + if (matched [0] < 0) + { + splitBits[0] = string; + } + else + { + mc = 0; + if (matched[0] == 0) + { + fromPos = matched[1]+1; + mc = 2; + } + else + { + fromPos = 0; + toPos = 0; + } + for (; mc < Rows (matched); mc = mc+2) + { + toPos = matched[mc]-1; + splitBits [Abs(splitBits)] = string[fromPos][toPos]; + fromPos = matched[mc+1]+1; + } + splitBits [Abs(splitBits)] = string[fromPos][Abs(string)-1]; + } + return splitBits; + } + + + /* a wrapper around ReadCSVTable */ + function import_data (inData, hasHeader) { + timer0 = Time(0); + file_input = ReadCSVTable (inData, hasHeader); + + bgm_data_matrix = {{}}; + names = {{}}; + num_nodes = 0; + + if (hasHeader) + { + names = file_input["0"]; + bgm_data_matrix = file_input["1"]; + + fprintf (stdout, "Read ", Rows(bgm_data_matrix), " cases from file.\n"); + + num_nodes = Columns(bgm_data_matrix); + + if (Abs(file_input["0"]) != num_nodes) + { + fprintf (stdout, "ERROR! Number of items in header does not match the number of items in the data matrix."); + return 0; + } + + fprintf (stdout, "Detected ", num_nodes, " variables.\n"); + } + else + { + bgm_data_matrix = file_input; + + fprintf (stdout, "Read ", Rows(bgm_data_matrix), " cases from file.\n"); + + num_nodes = Columns(bgm_data_matrix); + names = {num_nodes, 1}; + + for (i = 0; i < num_nodes; i = i+1) + { + names[i] = i; + } + + fprintf (stdout, "Detected ", num_nodes, " variables.\n"); + } + + return bgm_data_matrix; + } + + + + function import_cache (filename, cache_name) { + fscanf (filename, "Raw", cacheStr); + ExecuteCommands(cache_name+" = "+cacheStr+";"); + return 0; + } + + + + function attach_cache (_bgm, cache) { + ExecuteCommands ("SetParameter("+_bgm+", BGM_SCORE_CACHE, cache);"); + return 0; + } + + + + + /* ____________________________________________________________ */ + /* accessor functions */ + function setStructure (_bgm, graph_matrix) { + ExecuteCommands("SetParameter ("+_bgm+", BGM_GRAPH_MATRIX, graph_matrix);"); + } + + function setOrder (_bgm, order_matrix) { + if (Rows(order_matrix) > 1) + { + if (Columns(order_matrix) == 1) + { + t_order_matrix = Transpose(order_matrix); + ExecuteCommands("SetParameter ("+_bgm+", BGM_NODE_ORDER, t_order_matrix);"); + } + else + { + fprintf (stdout, "Warning: expecting row vector matrix, received non-vector matrix"); + fprintf (stdout, " with dimensions ", Rows(order_matrix), " x ", Columns(order_matrix), "\n"); + fprintf (stdout, "Node order not set!\n"); + } + } + else + { + ExecuteCommands ("SetParameter ("+_bgm+", BGM_NODE_ORDER, order_matrix);"); + } + } + + + function setConstraints (_bgm, constraint_matrix) { + ExecuteCommands("SetParameter ("+_bgm+", BGM_CONSTRAINT_MATRIX, constraint_matrix);"); + return 0; + } + + + /* ____________________________________________________________ */ + /* Assign data matrix to _BayesianGraphicalModel object */ + lfunction attach_data (_bgm, data, impute_max, impute_burn, impute_samp) { + utility.SetEnvVariable ("BGM_IMPUTE_MAXSTEPS", impute_max$1); + utility.SetEnvVariable ("BGM_IMPUTE_BURNIN" ,impute_burn$1); + utility.SetEnvVariable ("BGM_IMPUTE_SAMPLES", impute_samp$1); + + SetParameter (^_bgm, BGM_DATA_MATRIX, data); + return 0; + } + + + + /* + Structural (graph) MCMC by Metropolis-Hastings + Returns matrix object containing chain trace, edge + marginal posterior probabilities, and best graph as + adjacency matrix. + + rand_tolerance = maximum number of failed steps in graph randomization + to tolerate + + prob_swap = probability of reversing an edge, instead of adding or deleting an edge + + with_order = a vector containing node ordering to constrain graph MCMC + set to 0 to have unconstrained chain sample + */ + + + function graph_MCMC (_bgm, duration, burnin, num_samples, with_order=0) + { + BGM_MCMC_MAXFAILS = 100; + BGM_MCMC_PROBSWAP = 0.1; + if (Rows(with_order) * Columns(with_order) > 0) + { + /* fixed node order */ + ExecuteCommands("setOrder ("+_bgm+", with_order);"); + BGM_OPTIMIZATION_METHOD = 2; + } + else + { + /* shuffle node order */ + BGM_OPTIMIZATION_METHOD = 3; + } + + BGM_MCMC_MAXSTEPS = duration; + BGM_MCMC_BURNIN = burnin; + BGM_MCMC_SAMPLES = num_samples; + + ExecuteCommands("Optimize(res, "+_bgm+");"); + + return res; + } + + + /* + Order (node precedence permutation) MCMC by Metropolis-Hastings + */ + lfunction order_MCMC (_bgm, duration, burnin, num_samples) { + + utility.SetEnvVariable ("BGM_OPTIMIZATION_METHOD", 4); + utility.SetEnvVariable ("BGM_MCMC_MAXSTEPS", duration); + utility.SetEnvVariable ("BGM_MCMC_BURNIN", burnin); + utility.SetEnvVariable ("BGM_MCMC_SAMPLES", num_samples); + Optimize (res, ^_bgm); + return res; + } + + + + + + function display_MCMC_chain (res) + { + if (Rows(res)*Columns(res) == 0) + { + fprintf (stdout, "ERROR: Cannot display MCMC chain for empty matrix\n"); + return 1; + } + + pp_trace = res[-1][0]; + min_trace = pp_trace[0]; + max_trace = pp_trace[0]; + + /* locate min/max and end of trace */ + for (k = 0; k < Rows(pp_trace); k = k+1) + { + if (pp_trace[k] == 0) + { + break; + } + if (pp_trace[k] < min_trace) + { + min_trace = pp_trace[k]; + } + if (pp_trace[k] > max_trace) + { + max_trace = pp_trace[k]; + } + } + k = k-1; + pp_trace = pp_trace[{{0,0}}][{{k-1,0}}]; + + + columnHeaders = {{"MCMC chain","sample;1;2;3;4;5;6;7;8;9"}}; + + OpenWindow (CHARTWINDOW,{{"Posterior probability"} + {"columnHeaders"} + {"pp_trace"} + {"Step Plot"} + {"Index"} + {"MCMC chain"} + {"chain sample step"} + {"posterior prob."} + {""} + {"0"} + {""} + {"0;0"} + {"10;1.309;0.785398"} + {"Times:12:0;Times:10:0;Times:12:2"} + {"0;0;13816530;16777215;0;0;6579300;11842740;13158600;14474460;0;3947580;16777215;15670812;6845928;16771158;2984993;9199669;7018159;1460610;16748822;11184810;14173291"} + {"16,"+min_trace+","+max_trace} + }, + "405;462;105;100"); + + return 0; + } + + + function get_MCMC_graph (res, num_nodes, mode) + { + /* mode = -1 : best_graph + mode = 0 : last_graph + 0 < mode <= 1 : marginal posterior graph with threshold = mode (e.g. 0.9) + */ + graph = {num_nodes, num_nodes}; + + if (mode > 0) + { + for (row = 0; row < num_nodes * num_nodes; row = row+1) + { + if (res[row][1] >= mode) + { + graph[row $ num_nodes][row % num_nodes] = 1; + } + } + } + else + { + for (row = 0; row < num_nodes; row = row+1) + { + for (col = 0; col < num_nodes; col = col+1) + { + graph[row][col] = res[row*num_nodes+col][mode+3]; + } + } + } + + return graph; + } + + + function write_edgelist (filename,res,num_nodes,directed) + { + fprintf (filename, CLEAR_FILE, KEEP_OPEN); + if (directed) + { + for (row = 0; row < num_nodes; row = row+1) + { + for (col = 0; col < num_nodes; col = col+1) + { + fprintf (filename, names[row], ",", names[col], ",", res[row*num_nodes+col][1], "\n"); + } + } + } + else + { + for (row = 0; row < num_nodes-1; row = row+1) + { + for (col = row+1; col < num_nodes; col = col+1) + { + fprintf (filename, names[row], ",", names[col], ",", res[row*num_nodes+col][1] + res[col*num_nodes+row][1], "\n"); + } + } + } + fprintf (filename, CLOSE_FILE); + return 0; + } + + + function mcmc_graph_to_dotfile (filename, threshold, res, nodes) + { + fprintf (filename, CLEAR_FILE); + fprintf (filename, "digraph foo\n{\n"); + fprintf (filename, "\tnode [fontname=\"Helvetica\" style=\"filled\" fillcolor=\"white\"];\n"); + fprintf (filename, "\tedge [labelfontname=\"Helvetica\" labelangle=30 labeldistance=2];\n"); + + for (_n = 0; _n < Abs(nodes); _n+=1) { + fprintf (filename, "\t", (nodes[_n])["NodeID"]); + if ((nodes[_n])["NodeType"]==0) { + fprintf (filename, " [shape=\"Msquare\"];\n"); + } else { + fprintf (filename, " [shape=\"circle\"];\n"); + } + } + + + // sum edge posteriors in both directions between nodes X and Y, + // and assign direction to the greater value + for (row = 0; row < num_nodes-1; row = row+1) { + for (col = row+1; col < num_nodes; col = col+1) { + xy = res[row*num_nodes+col][1]; + yx = res[col*num_nodes+row][1]; + if (xy+yx > threshold) { + /* + This is really annoying - order MCMC reports edge marginal matrix with rows = child + whereas graph MCMC reports rows = parent + */ + if ( xy > yx ) { + fprintf (filename, "\t", (nodes[row])["NodeID"], "->", (nodes[col])["NodeID"], ";\n"); + } else { + fprintf (filename, "\t", (nodes[col])["NodeID"], "->", (nodes[row])["NodeID"], ";\n"); + } + } + } + } + + fprintf (filename, "}\n"); + return 0; + } + + + /* argument must be string identifier of BGM object */ + function get_network_parameters (_bgm) + { + ExecuteCommands("GetString (res, "+_bgm+", 1);"); + ExecuteCommands(res); + /* returns string identifier to associative array */ + ExecuteCommands("params="+_bgm+"_export;"); + return params; + } + + + function get_node_score_cache (_bgm) + { + ExecuteCommands("GetString (res, "+_bgm+", 0);"); + return res; + } + + + /* + function getStructure (_bgm) + { + ExecuteCommands("GetInformation (s, "+_bgm+", 0);"); + return s; + } + + function getNodeOrder (_bgm) + { + ExecuteCommands("GetInformation (s, "+_bgm+", 1);"); + return s; + } + + */ + + + + + /* + Simulation of data based on the inferred network + structure and parameters. + mode = 0 (local) : for each case, instantiate parameters de novo. + Better for assessing uncertainty. + mode = 1 (global) : instantiate all parameters once. + Assuming known network. + */ + function instantiate_CPDFs (params) + { + node_names = Rows(params); + + /* instantiate network parameters from conditional posterior distribution functions */ + for (i = 0; i < Abs(params); i = i + 1) { + /* stores instantiations */ + ExecuteCommands("(params[\""+node_names[i]+"\"])[\"Parameters\"] = {};"); + + /* number of parent combinations */ + //ExecuteCommands("npac = Columns((params[\""+node_names[i]+"\"])[\"CPDFs\"]);"); + ExecuteCommands("npac = (params[\""+node_names[i]+"\"])[\"NParentCombs\"];"); // safe version + + for (pa = 0; pa < npac; pa = pa+1) { + ExecuteCommands("_p = " + ((params[node_names[i]])["CPDFs"])[pa] + ";"); + ExecuteCommands("((params[\""+node_names[i]+"\"])[\"Parameters\"])[\""+pa+"\"] = "+_p+";"); + } + + //ExecuteCommands("((params[\""+node_names[i]+"\"])[\"Levels\"] = Columns( ((params[\""+node_names[i]+"\"])[\"Parameters\"])[0] ));"); + } + return 0; + } + + + /* + Return a parameter vector for conditional Gaussian (CG) node given + hyperparameters passed as arguments. + */ + function cg_params (mean_vec, rho, phi, tau) { + ExecuteCommands("sigma = Random({{"+phi+"}}, {\"PDF\":\"InverseWishart\", \"ARG0\":{{"+rho+"}} });"); + ExecuteCommands("em = Random("+mean_vec+", {\"PDF\":\"Gaussian\", \"ARG0\":(Inverse("+tau+") * "+sigma[0]+") } );"); + return ({"EM":em, "SIGMA":sigma}); + } + + + + function simulate_data (params, num_cases) + { + // prepare matrix to store simulated data + result = {num_cases, Abs(params)}; + + node_names = Rows(params); + if ( Columns(Rows((params[node_names[0]])["Parameters"])) == 0 ) + { + /* parameters have not been instantiated yet */ + instantiate_CPDFs(params); + } + + + // initialize State variables and generate root states + for (case = 0; case < num_cases; case = case+1) { + + for (i = 0; i < Abs(params); i = i + 1) { + // set to String as a placeholder + (params[node_names[i]])["State"] = ""; + + if ( Type((params[node_names[i]])["Parents"]) == "AssociativeList" ) { + // if condition is true then this is a root node (no parents) + if ( (params[node_names[i]])["NodeType"] == 0 ) { + // discrete node, parameters define conditional probability table + urn = Random(0,1); + cpt = ((params[node_names[i]])["Parameters"])[0]; + r_i = Columns(cpt); + for (k = 0; k < r_i; k = k+1) + { + if ( urn <= cpt[k] ) + { + (params[node_names[i]])["State"] = k; + break; + } + urn = urn - cpt[k]; + } + } else { + // conditional Gaussian node, parameter defines intercept + em = (((params[node_names[i]])["Parameters"])[0])["EM"]; + sigma = (((params[node_names[i]])["Parameters"])[0])["SIGMA"]; + (params[node_names[i]])["State"] = (Random(em, {"PDF":"Gaussian", "ARG0":sigma}))[0]; + } + } + } + + while (1) + { + all_done = 1; + + /* loop until parameters are instantiated for all nodes */ + for (i = 0; i < Abs(params); i = i+1) + { + if (Type(params[node_names[i]])["State"] == "String") + { + // Type String indicates no value - replace placeholder with NoneType when it becomes available + + all_done = 0; + ok_to_go = 1; + + parents = (params[node_names[i]])["Parents"]; + num_parent_combos = 1; + pa_index = 0; + + for (p = 0; p < Abs(Rows(parents)); p = p+1) + { + pid = parents[p]; + if ( Type(params[pid])["State"] == "String" ) + { + // parents not resolved, skipping + ok_to_go = 0; + break; + } + + // compute parental index for discrete parents + if ( (params[pid])["NodeType"] == 0 ) { + pa_index = pa_index + (params[pid])["State"] * num_parent_combos; + num_parent_combos = num_parent_combos * (params[pid])["Levels"]; + } + } + + + if (ok_to_go) + { + // instantiate this node's parameters + if ( (params[node_names[i]])["NodeType"] == 0 ) { + urn = Random(0,1); + cpt = ((params[node_names[i]])["Parameters"])[pa_index]; + r_i = Columns(cpt); + for (k = 0; k < r_i; k = k+1) { + if ( urn <= cpt[k] ) { + (params[node_names[i]])["State"] = k; + break; + } + urn = urn - cpt[k]; + } + } else { + em = ( ((params[node_names[i]])["Parameters"])[pa_index] )["EM"]; + sigma = ( ((params[node_names[i]])["Parameters"])[pa_index] )["SIGMA"]; + zvec = {Columns(em), 1}; + zvec[0] = 1; + + // get states of continuous parents + cpar = 0; + for (p = 0; p < Abs(Rows(parents)); p += 1) { + pid = parents[p]; + if ( (params[pid])["NodeType"] == 1 ) { + zvec[cpar+1] = (params[pid])["State"]; + cpar += 1; + } + } + + // conditional mean + cond_mean = em * zvec; + (params[node_names[i]])["State"] = (Random(cond_mean, {"PDF":"Gaussian", "ARG0":sigma}))[0]; + } + } + } + } + /* end for loop */ + + if (all_done) break; + } + /* end while */ + + /* add case to result */ + for (i = 0; i < Abs(params); i = i+1) { + result[case][i] = (params[node_names[i]])["State"]; + } + } + + return result; + } + + + /* + Example: + import_xmlbif("/Users/apoon/svn/hyphy/HBL/art/BGM/alarm/alarm.xml", "Alarm"); + */ + function import_xmlbif (filename, newname) + { + ExecuteCommands(newname+"={};"); + + fscanf (filename, "Raw", input); + + var_tags = input||" tag absent from XML, exiting.."); + return 1; + } + + ntags = Rows(var_tags)$2; + + + for (tag = 0; tag < ntags; tag = tag+1) + { + /* + search for tag - note that we use an arbitrary character limit (1000) + for the last entry because if we use the rest of the XML file, it causes the + regular expression search to fail! - afyp, October 26, 2011 + */ + start_char = var_tags[tag*2+1]; + if (tag == ntags-1) { end_char = start_char+1000; } + else { end_char = var_tags[(tag+1)*2]; } + substr = input[start_char][end_char]; + + /* create node */ + name_tag = substr||".+"; + node_name = substr[name_tag[0]+6][name_tag[1]-7]; + + + ExecuteCommands(newname+"[\""+node_name+"\"]= {};"); + + outcome_tags = substr||""; + ExecuteCommands("("+newname+"[\""+node_name+"\"])[\"Levels\"]= "+Rows(outcome_tags)$2+";"); + } + + + def_tags = input||""; + if (def_tags[0] < 0) + { + fprintf (stdout, "ERROR: tag absent from XML, exiting.."); + return 1; + } + + ntags = Rows(def_tags)$2; + for (tag = 0; tag < ntags; tag = tag+1) + { + /* parse definition tags */ + start_char = def_tags[tag*2+1]; + if (tag == ntags-1) { end_char = Abs(input); } + else { end_char = def_tags[(tag+1)*2]; } + substr = input[start_char][end_char]; + + /* start a new node */ + for_tag = substr||".+"; + node_name = substr[for_tag[0]+5][for_tag[1]-6]; + + /* assign parents */ + exec_str = ""; + exec_str * 256; + exec_str * "("; + exec_str * newname; + exec_str * "[\""; + exec_str * node_name; + exec_str * "\"])[\"Parents\"]={"; + given_tags = substr||""; + if (given_tags[0] >= 0) + { + for (gt = 1; gt < Rows(given_tags); gt = gt+4) + { + exec_str * "{\""; + exec_str * substr[given_tags[gt]+1][given_tags[gt+1]-3]; + exec_str * "\"}"; + if (gt < Rows(given_tags)-4) { exec_str * ","; } + } + } + exec_str * "};"; + exec_str * 0; + ExecuteCommands(exec_str); + + + /* assign conditional probability table - child state cycles fastest, then parents */ + table_tag = substr||".+
"; + table_str = substr[table_tag[0]+7][table_tag[1]-8]; + prob_tags = table_str||"[01]\.[0-9]+"; + + n_parent_combos = 1; + ExecuteCommands("parents = ("+newname+"[\""+node_name+"\"])[\"Parents\"];"); + for (par = 0; par < Abs(Rows(parents)); par=par+1) + { + ExecuteCommands("n_parent_combos = n_parent_combos * ("+newname+"[\""+parents[par]+"\"])[\"Levels\"];"); + } + ExecuteCommands("n_levels = ("+newname+"[node_name])[\"Levels\"];"); + + ExecuteCommands("("+newname+"[\""+node_name+"\"])[\"Parameters\"]= {};"); + + for (pa = 0; pa < n_parent_combos; pa = pa+1) + { + ExecuteCommands("(("+newname+"[\""+node_name+"\"])[\"Parameters\"])[\""+pa+"\"]={1,n_levels};"); + for (lev = 0; lev < n_levels; lev=lev+1) + { + foo = lev * n_parent_combos + pa; + /* fprintf (stdout, lev, ",", pa, ",", table_str[prob_tags[foo*2]][prob_tags[foo*2+1]], "\n"); */ + ExecuteCommands("((("+newname+"[\""+node_name+"\"])[\"Parameters\"])[\""+pa+"\"])["+lev+"]="+table_str[prob_tags[foo*2]][prob_tags[foo*2+1]]+";"); + } + } + + + } + + return 0; + } + + + + function list2adjmat (alist) { + /* + convert associative list returned by import_xmlbif into an adjacency matrix + */ + num_nodes = Abs(alist); + res = {num_nodes, num_nodes}; + node_names = Rows(alist); + name2index = {}; + + // for indexing into adjacency matrix + for (node = 0; node < num_nodes; node += 1) { + name2index[node_names[node]] = node; + } + + for (child = 0; child < num_nodes; child += 1) { + parents = (alist[node_names[child]])["Parents"]; + if (Type(parents) == "Matrix") { + for (par = 0; par < Rows(parents); par += 1) { + parent = name2index[parents[par]]; + res[parent][child] = 1; + } + } + } + + return res; + } + + + function check_edgelist (results, adjmat, cutoff) { + // extract edge marginal posteriors vector from results matrix (in column 1) + edgep = results[-1][1]; + num_nodes = Rows(adjmat); + true_pos = 0; + false_pos = 0; + true_neg = 0; + false_neg = 0; + + for (parent = 0; parent < (num_nodes-1); parent += 1) { + for (child = (parent+1); child < num_nodes; child += 1) { + x = edgep[parent * num_nodes + child] + edgep[child * num_nodes + parent]; + + if (adjmat[parent][child] > 0 || adjmat[child][parent] > 0) { + if ( x > cutoff ) { + true_pos += 1; + } else { + false_neg += 1; + } + } else { + if ( x > cutoff ) { + false_pos += 1; + } else { + true_neg += 1; + } + } + } + } + + result = {4,1}; /* TP, FN, FP, TN */ + result[0] = true_pos; + result[1] = false_neg; + result[2] = false_pos; + result[3] = true_neg; + + return (result); + } +} + + From d38d6a61ddd66bc0d20ac34dab561aa860e52958 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Tue, 24 Jul 2018 19:33:15 -0400 Subject: [PATCH 41/53] Fixing lack of return value in fubar.scalers.SetBranchLength; updating FUBAR.wbf to use VB and check results --- res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf | 3 ++- res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf | 1 + tests/hbltests/libv3/FUBAR.wbf | 8 +++----- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf index 9029d52c1..3b573d1e0 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf @@ -253,7 +253,7 @@ if (utility.Has (fubar.cache, terms.fubar.cache.grid, "Matrix") && utility.Has ( estimators.ApplyExistingEstimates ("fubar.lf.codon", fubar.model_id_to_object, fubar.gtr_results, None); estimators.TraverseLocalParameters ("fubar.lf.codon", fubar.model_id_to_object, "fubar.scalers.Constrain"); - + fubar.pass1 = Max (fubar.ComputeOnGrid ("fubar.lf.codon", fubar.grid.MatrixToDict (fubar.grid.matrix), "fubar.pass1.evaluator", @@ -720,6 +720,7 @@ lfunction fubar.scalers.SetBranchLength (model, value, parameter) { parameters.RemoveConstraint (beta); parameters.SetValue ("`parameter`.`alpha`", s); parameters.SetValue ("`parameter`.`beta`", ^beta); + return 1; } //------------------------------------------------------------------------------ diff --git a/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf b/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf index 6e92955ea..ab319bf19 100644 --- a/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf +++ b/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf @@ -117,6 +117,7 @@ function models.codon.MG_REV._DefineQ(mg_rev, namespace) { * @returns {Number} 0 */ function models.codon.MG_REV.set_branch_length(model, value, parameter) { + if (model[terms.model.type] == terms.global) { return models.generic.SetBranchLength(model, value, parameter); } diff --git a/tests/hbltests/libv3/FUBAR.wbf b/tests/hbltests/libv3/FUBAR.wbf index d81f77340..3fbba43c3 100644 --- a/tests/hbltests/libv3/FUBAR.wbf +++ b/tests/hbltests/libv3/FUBAR.wbf @@ -2,11 +2,9 @@ LoadFunctionLibrary("SelectionAnalyses/FUBAR.bf", { "0": "Universal", "1": PATH_TO_CURRENT_BF + "data/CD2.nex", "2": "20", - "3": "Metropolis-Hastings", - "4": "5", - "5": "2000000", - "6": "1000000", - "7": "100", + "3": "Variational Bayes", "8": "0.5", }); +assert (utility.Array1D (fubar.report.sites_found) == 13, "Expected a different number of sites under selection"); +assert (fubar.partition_results[33][4] > 0.9, "Expected site 34 to be under selection"); From 086125d9ff855a091e706ef93e3871edf0ad38f6 Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Wed, 25 Jul 2018 13:02:26 -0400 Subject: [PATCH 42/53] Further refactoring and type checks in constant.cpp --- src/core/constant.cpp | 686 ++++++++++++++++-------------------- src/core/include/constant.h | 8 + 2 files changed, 308 insertions(+), 386 deletions(-) diff --git a/src/core/constant.cpp b/src/core/constant.cpp index 10eb3fe5e..29cfc6fa0 100644 --- a/src/core/constant.cpp +++ b/src/core/constant.cpp @@ -62,7 +62,15 @@ long lastMatrixDeclared = -1, dummyVariable2, expressionsParsed = 0; -_Parameter gammaCoeff [7] = { + + + + +//__________________________________________________________________________________ + + +_Parameter _gamma (_Parameter alpha) { + _Parameter static gammaCoeff [7] = { 2.50662827463100050, 190.9551718944012, -216.8366818451899, @@ -70,16 +78,209 @@ _Parameter gammaCoeff [7] = { -3.087513097785903, 0.003029460875352382, -0.00001345152485367085 -}; + }; + + _Parameter theV = alpha >=1.0? alpha : 2.-alpha, + result = gammaCoeff[0], + temp = theV; + + for (int i = 1; i < 7; ++i , temp += 1.) { + result += gammaCoeff[i] / temp; + } + + temp = theV + 4.5; + result *= exp(-temp+log(temp)*(theV-.5)); + + if (alpha >= 1.0) { + return result; + } + temp = pi_const * (1-alpha); + return temp / result / sin (temp); +} + + //__________________________________________________________________________________ + -_Parameter lngammaCoeff [6] = { +_Parameter _ln_gamma (_Parameter alpha) { + // obtained from Numerical Recipes in C, p. 214 by afyp, February 7, 2007 + + _Parameter static lngammaCoeff [6] = { 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5 -}; + }; + + + _Parameter x, y, tmp, ser; + + y = x = alpha; + tmp = x + 5.5; + tmp -= (x+0.5) * log(tmp); + ser = 1.000000000190015; + + for (int j = 0; j < 6 ; ++j ) { + ser += lngammaCoeff[j] / ( y += 1. ); + } + + return -tmp + log(2.506628274631005*ser/x); + + +} + +//__________________________________________________________________________________ + +_Parameter _ibeta (_Parameter x, _Parameter a, _Parameter b) { + // check ranges + if (x > 0. && x < 1.) { // in range + + + _Parameter aa, + c, + d, + del, + h, + qab, + qam, + qap, + FPMIN = 1e-100; + + + bool swap = false; + + + if (x >= (a+1.)/(a+b+2.)) { + swap = true; + c = b; + b = a; + a = c; + x = 1. - x; + } + + qab = a+b; + qap = a+1.; + qam = a-1.; + c = 1.; + d = 1. - qab*x/qap; + if ((d-FPMIN)) { + d = FPMIN; + } + d = 1./d; + h = d; + + for (int m=1; m<100; m++) { + _Parameter m2 = 2*m; + aa = m*(b-m)*x / ((qam+m2)*(a+m2)); + d = 1.+aa*d; + if ((d-FPMIN)) { + d = FPMIN; + } + c = 1.+aa/c; + if ((c-FPMIN)) { + c = FPMIN; + } + d = 1./d; + h*= d*c; + aa = -(a+m)*(qab+m)*x/((a+m2)*(qap+m2)); + d = 1.+aa*d; + if ((d-FPMIN)) { + d = FPMIN; + } + c = 1.+aa/c; + if ((c-FPMIN)) { + c = FPMIN; + } + d = 1./d; + del = d*c; + h*= del; + del -= 1.; + if ((del<1.e-14)&&(del>-1.e-14)) { + break; + } + } + + c = exp (a*log(x)+b*log(1.-x)+_ln_gamma(a+b)-_ln_gamma(a)-_ln_gamma(b)); + + if (swap) { + return 1.-c*h/a; + } else { + return c*h/a; + } + } + + + if (x <= 0.) { + if ( x < 0.) { + ReportWarning (_String ("IBeta is defined for x in [0,1]. Had x = ") & x); + } + return 0.; + } + + if ( x > 1.) { + ReportWarning (_String ("IBeta is defined for x in [0,1]. Had x = ") & x); + } + + return 1.; +} + +//__________________________________________________________________________________ + +_Parameter _igamma (_Parameter a, _Parameter x) { + _Parameter sum = 0.; + if (x>1e25) { + x=1.e25; + } else if (x<0.) { + WarnError ("The domain of x is {x>0} for IGamma (a,x)"); + return 0.0; + } else if (x==0.0) { + return 0.0; + } + + + _Parameter gamma = _gamma (a); + + if (x <= a + 1.) { + // use the series representation + // IGamma (a,x)=exp(-x) x^a \sum_{n=0}^{\infty} \frac{\Gamma((a)}{\Gamma(a+1+n)} x^n + + _Parameter term = 1.0/a, den = a+1.; + + for (int count = 0; fabs (term) >= fabs (sum) * machineEps && count < 500; ++ count) { + sum+=term; + term*=x/den; + den += 1.0; + } + + return sum * exp (-x + a * log (x)) / gamma; + + } + // use the continue fraction representation + // IGamma (a,x)=exp(-x) x^a 1/x+/1-a/1+/1/x+/2-a/1+/2/x+... + + _Parameter lastTerm = 0., a0 = 1.0, a1 = x, b0 = 0.0, b1 = 1.0, factor = 1.0, an, ana, anf; + for (int count = 1; count<500; ++count) { + an = count; + ana = an - a; + a0 = (a1+a0*ana)*factor; + b0 = (b1+b0*ana)*factor; + anf = an*factor; + a1 = x*a0+anf*a1; + b1 = x*b0+anf*b1; + if (a1!=0.0) { + factor=1.0/a1; + sum = b1*factor; + if (fabs(sum-lastTerm)/sum=1.0?theValue:2-theValue, result = gammaCoeff[0], temp = theV; - - for (long i=1; i<7; i++, temp+=1.0) { - result+=gammaCoeff[i]/temp; - } - - temp = theV+4.5; - result *= exp(-temp+log(temp)*(theV-.5)); - if (theValue>=1.0) { - return new _Constant (result); - } - - else { - temp = pi_const*(1-theValue); +//__________________________________________________________________________________ - return new _Constant (temp/result/sin(temp)); - } - return nil; +_PMathObj _Constant::Gamma (void) { + return new _Constant (_gamma (theValue)); } //__________________________________________________________________________________ -_PMathObj _Constant::LnGamma (void) -{ - // obtained from Numerical Recipes in C, p. 214 by afyp, February 7, 2007 - _Parameter x, y, tmp, ser; - - y = x = theValue; - tmp = x + 5.5; - tmp -= (x+0.5) * log(tmp); - ser = 1.000000000190015; - - for (long j = 0; j <= 5; j++) { - ser += lngammaCoeff[j] / ++y; - } - - return new _Constant (-tmp + log(2.506628274631005*ser/x)); +_PMathObj _Constant::LnGamma (void) { + return new _Constant (_ln_gamma (theValue)); } //__________________________________________________________________________________ -_PMathObj _Constant::Beta (_PMathObj arg) -{ - if (arg->ObjectClass()!=NUMBER) { - WarnError ("A non-numerical argument passed to Beta(x,y)"); - return nil; - } - - _Constant xy = _Constant (theValue + ((_Constant*)arg)->theValue); - - _Constant * lnGammaX = (_Constant *)LnGamma(), - * lnGammaY = (_Constant *)arg->LnGamma(), - * lnGammaXY = (_Constant *)xy.LnGamma(), - * result = new _Constant (exp (lnGammaX->theValue + lnGammaY->theValue - lnGammaXY->theValue)); - - DeleteObject (lnGammaX); - DeleteObject (lnGammaY); - DeleteObject (lnGammaXY); - - return result; +_PMathObj _Constant::Beta (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { + return exp (_ln_gamma (a) + _ln_gamma (b) - _ln_gamma (a + b)); + }); } //__________________________________________________________________________________ -_PMathObj _Constant::IBeta (_PMathObj arg1, _PMathObj arg2) -{ - if (theValue<=0.0) { - if (theValue < 0.0) { - _String errMsg; - errMsg = _String ("IBeta is defined for x betweeen 0 and 1. Had: ") & theValue; - ReportWarning (errMsg); - } - return new _Constant (0.0); - } - - if (theValue>=1.0) { - if (theValue>1.0) { - _String errMsg; - errMsg = _String ("IBeta is defined for x betweeen 0 and 1. Had: ") & theValue; - ReportWarning (errMsg); - } - return new _Constant (1.0); - } - - - if ((arg1->ObjectClass()!=NUMBER)||(arg2->ObjectClass()!=NUMBER)) { - _String errMsg ("IBeta called with a non-scalar argument."); - WarnError (errMsg); - return nil; - } - - _Constant *ga = (_Constant*)arg1->LnGamma(), - *gb = (_Constant*)arg2->LnGamma(); - - if (ga&&gb) { - _Constant *ac = (_Constant*)arg1, - *bc = (_Constant*)arg2; - - _Parameter a = ac->Value(), - b = bc->Value(), - x = theValue, - aa, - c, - d, - del, - h, - qab, - qam, - qap, - FPMIN = 1e-100; - - bool swap = false; - - long m, - m2; - - if (x >= (a+1.)/(a+b+2.)) { - swap = true; - c = b; - b = a; - a = c; - x = 1. - x; - } - - qab = a+b; - qap = a+1.; - qam = a-1.; - c = 1.; - d = 1. - qab*x/qap; - if ((d-FPMIN)) { - d = FPMIN; - } - d = 1./d; - h = d; - - for (m=1; m<100; m++) { - m2 = 2*m; - aa = m*(b-m)*x / ((qam+m2)*(a+m2)); - d = 1.+aa*d; - if ((d-FPMIN)) { - d = FPMIN; - } - c = 1.+aa/c; - if ((c-FPMIN)) { - c = FPMIN; - } - d = 1./d; - h*= d*c; - aa = -(a+m)*(qab+m)*x/((a+m2)*(qap+m2)); - d = 1.+aa*d; - if ((d-FPMIN)) { - d = FPMIN; - } - c = 1.+aa/c; - if ((c-FPMIN)) { - c = FPMIN; - } - d = 1./d; - del = d*c; - h*= del; - del -= 1.; - if ((del<1.e-14)&&(del>-1.e-14)) { - break; - } - } - - _Constant * res = new _Constant (a+b); - ac = (_Constant*)res->LnGamma(); - c = exp (a*log(x)+b*log(1-x)+ac->Value()-ga->Value()-gb->Value()); - - if (swap) { - res->theValue = 1.-c*h/a; - } else { - res->theValue = c*h/a; - } - - DeleteObject (ac); - DeleteObject (ga); - DeleteObject (gb); - return res; - } - DeleteObject (ga); - DeleteObject (gb); - return nil; +_PMathObj _Constant::IBeta (_PMathObj arg1, _PMathObj arg2) { + return _check_type_and_compute_3 (arg1, arg2, [] (_Parameter a, _Parameter b, _Parameter c) -> _Parameter { + return _ibeta(a,b,c); + }); } //__________________________________________________________________________________ -_PMathObj _Constant::IGamma (_PMathObj arg) -{ - if (arg->ObjectClass()!=NUMBER) { - _String errMsg ("A non-numerical argument passed to IGamma(a,x)"); - WarnError (errMsg); - return new _Constant (0.0); - } - _Parameter x = ((_Constant*)arg)->theValue, sum=0.0; - if (x>1e25) { - x=1e25; - } else if (x<0) { - _String errMsg ("The domain of x is {x>0} for IGamma (a,x)"); - WarnError (errMsg); - return new _Constant (0.0); - } else if (x==0.0) { - return new _Constant (0.0); - } - - - if (x<=theValue+1) // use the series representation - // IGamma (a,x)=exp(-x) x^a \sum_{n=0}^{\infty} \frac{\Gamma((a)}{\Gamma(a+1+n)} x^n - { - _Parameter term = 1.0/theValue, den = theValue+1; - long count = 0; - while ((fabs(term)>=fabs(sum)*machineEps)&&(count<500)) { - sum+=term; - term*=x/den; - den += 1.0; - count++; - } - } else // use the continue fraction representation - // IGamma (a,x)=exp(-x) x^a 1/x+/1-a/1+/1/x+/2-a/1+/2/x+... - { - _Parameter lastTerm = 0, a0 = 1.0, a1 = x, b0 = 0.0, b1 = 1.0, factor = 1.0, an, ana, anf; - for (long count = 1; count<500; count++) { - an = count; - ana = an - theValue; - a0 = (a1+a0*ana)*factor; - b0 = (b1+b0*ana)*factor; - anf = an*factor; - a1 = x*a0+anf*a1; - b1 = x*b0+anf*b1; - if (a1!=0.0) { - factor=1.0/a1; - sum = b1*factor; - if (fabs(sum-lastTerm)/sumSetValue(sum*exp(-x+theValue*log(x))/result->theValue); - if (x>theValue+1) { - result->SetValue (1.0-result->theValue); - } - return result; +_PMathObj _Constant::IGamma (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { return _igamma(a, b);}); } //__________________________________________________________________________________ -_PMathObj _Constant::Erf (void) -{ - _Parameter lV = theValue; - _Constant half (.5), sq = (lV*lV); - _PMathObj IG = half.IGamma(&sq); - lV = ((_Constant*)IG)->theValue; - if (theValue<0) { - lV=-lV; +_PMathObj _Constant::Erf (void) { + + _Parameter ig = _igamma (0.5, theValue * theValue); + if (theValue < 0.) { + ig = -ig; } - ((_Constant*)IG)->SetValue(lV); - return (_PMathObj)IG; + return new _Constant (ig); } //__________________________________________________________________________________ -_PMathObj _Constant::ZCDF (void) -{ - _Parameter lV = theValue; - - _Constant half (.5), - sq (lV*lV/2); - - _PMathObj IG = half.IGamma(&sq); - lV = ((_Constant*)IG)->theValue/2; - - if (theValue>0) { - ((_Constant*)IG)->SetValue(lV+.5); - } else { - ((_Constant*)IG)->SetValue(.5-lV); +_PMathObj _Constant::ZCDF (void) { + _Parameter ig = _igamma (0.5, theValue * theValue * 0.5); + + if (theValue > 0) { + return new _Constant (0.5 * (ig + 1.)); } - return (_PMathObj)IG; -} + return new _Constant (0.5 * ( 1. - ig)); + } //__________________________________________________________________________________ -_PMathObj _Constant::Time (void) -{ - _Constant result; +_PMathObj _Constant::Time (void) { + _Constant * result = new _Constant; if (theValue<1.0) { - result.theValue = ((_Parameter)clock()/CLOCKS_PER_SEC); + result->theValue = ((_Parameter)clock()/CLOCKS_PER_SEC); } else { time_t tt; - result.theValue = ((_Parameter)time(&tt)); + result->theValue = ((_Parameter)time(&tt)); } - return (_PMathObj)result.makeDynamic(); + return result; } //__________________________________________________________________________________ -_PMathObj _Constant::Less (_PMathObj theObj) -{ - if (theObj) { - return new _Constant (theValue<((_Constant*)theObj)->theValue); - } else { - return nil; - } +_PMathObj _Constant::Less (_PMathObj theObj) { + return _check_type_and_compute (theObj, [] (_Parameter a, _Parameter b) -> _Parameter {return a < b;}); } //__________________________________________________________________________________ -_PMathObj _Constant::Greater (_PMathObj theObj) -{ - if (theObj) { - return new _Constant (theValue>((_Constant*)theObj)->theValue); - } else { - return nil; - } +_PMathObj _Constant::Greater (_PMathObj theObj) { + return _check_type_and_compute (theObj, [] (_Parameter a, _Parameter b) -> _Parameter {return a > b;}); } //__________________________________________________________________________________ -_PMathObj _Constant::GammaDist (_PMathObj alpha, _PMathObj beta) -{ - _Parameter x = theValue, a = ((_Constant*)alpha)->theValue, - b = ((_Constant*)beta)->theValue, gd = exp(a * log(b) -b*x +(a-1)*log(x)); - _Constant * c = (_Constant*)alpha->Gamma(); - gd/=c->theValue; - c->SetValue(gd); - return c; +_PMathObj _Constant::GammaDist (_PMathObj alpha, _PMathObj beta) { + return _check_type_and_compute_3 (alpha, beta, [] (_Parameter x, _Parameter a, _Parameter b) -> _Parameter { + _Parameter gamma_dist = exp(a * log(b) -b*x +(a-1.)*log(x)); + return gamma_dist / _gamma (a); + }); } //__________________________________________________________________________________ -_PMathObj _Constant::CGammaDist (_PMathObj alpha, _PMathObj beta) -{ - _Parameter arg = theValue*((_Constant*)beta)->theValue; - /*if (arg==0) - { - _Constant zer (0); - return (_PMathObj)zer.makeDynamic(); - }*/ - _Constant newX (arg); - return alpha->IGamma( &newX); -} +_PMathObj _Constant::CGammaDist (_PMathObj alpha, _PMathObj beta) { + return _check_type_and_compute_3 (alpha, beta, [] (_Parameter x, _Parameter a, _Parameter b) -> _Parameter { + return _igamma (a, b * x); + }); + } //__________________________________________________________________________________ -_PMathObj _Constant::CChi2 (_PMathObj n) -// chi^2 n d.f. probability up to x -{ - _Constant halfn (((_Constant*)n)->theValue*.5), - halfx (theValue*0.5); - - if (theValue < 0. || halfn.theValue <= 0.) { +_PMathObj _Constant::CChi2 (_PMathObj theObj) { + return _check_type_and_compute (theObj, [] (_Parameter x, _Parameter b) -> _Parameter { + if (x < 0.0 || b <= 0.) { ReportWarning ("CChi2(x,n) only makes sense for both arguments positive"); - return new _Constant (0.0); - } - return halfn.IGamma( &halfx); + return 0.0; + } + return _igamma( b*0.5 , x * 0.5); + }); } //__________________________________________________________________________________ -_PMathObj _Constant::InvChi2 (_PMathObj n) +_PMathObj _Constant::InvChi2 (_PMathObj n) { // chi^2 n d.f. probability up to x -{ if (!chi2) { _String fla ("IGamma(_n_,_x_)"); chi2 = new _Formula (fla, nil); @@ -693,98 +638,67 @@ _PMathObj _Constant::InvChi2 (_PMathObj n) } //__________________________________________________________________________________ -_PMathObj _Constant::LessEq (_PMathObj theObj) -{ - if (theObj) { - return new _Constant (theValue<=((_Constant*)theObj)->theValue); - } else { - return nil; - } +_PMathObj _Constant::LessEq (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { return a <= b;}); } //__________________________________________________________________________________ -_PMathObj _Constant::GreaterEq (_PMathObj theObj) -{ - if (theObj) { - return new _Constant (theValue>=((_Constant*)theObj)->theValue); - } else { - return nil; - } +_PMathObj _Constant::GreaterEq (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { return a >= b;}); } //__________________________________________________________________________________ -_PMathObj _Constant::AreEqual (_PMathObj theObj) -{ - if (!theObj) { - return nil; - } - - _Parameter a = theValue, - b = ((_Constant*)theObj)->theValue; - +_PMathObj _Constant::AreEqual (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { if (a==0.0) { - return new _Constant (b==0.0); + return b==0.0; } - - return new _Constant(fabs ((a-b)/a)theValue; +//__________________________________________________________________________________ +_PMathObj _Constant::NotEqual (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { if (a==0.0) { - return new _Constant (b!=0.0); + return b!=0.0; } - - return new _Constant(fabs ((a-b)/a)>=tolerance); + + return fabs ((a-b)/a)>=tolerance; + }); } + //__________________________________________________________________________________ -_PMathObj _Constant::LAnd (_PMathObj theObj) -{ - if (!theObj) { - return nil; - } - return new _Constant ((long)(theValue)&&(long)(((_Constant*)theObj)->theValue)); +_PMathObj _Constant::LAnd (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { + return long (a) && long (b); + }); } -//__________________________________________________________________________________ -_PMathObj _Constant::LOr (_PMathObj theObj) -{ - if (!theObj) { - return nil; - } - return new _Constant ((long)(theValue)||(long)(((_Constant*)theObj)->theValue)); + + //__________________________________________________________________________________ +_PMathObj _Constant::LOr (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { + return long (a) || long (b); + }); } //__________________________________________________________________________________ -_PMathObj _Constant::LNot () -{ +_PMathObj _Constant::LNot () { return new _Constant (CheckEqual(theValue, 0.0)); } //__________________________________________________________________________________ -_PMathObj _Constant::Min (_PMathObj theObj) -{ - if (!theObj) { - return nil; - } - if (theValue<((_Constant*)theObj)->theValue) { - return (_PMathObj) makeDynamic(); - } - return (_PMathObj) theObj->makeDynamic(); +_PMathObj _Constant::Min (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { + return a < b ? a : b; + }); } //__________________________________________________________________________________ -_PMathObj _Constant::Max (_PMathObj theObj) -{ - if (!theObj) { - return nil; - } - if (theValue>((_Constant*)theObj)->theValue) { - return (_PMathObj) makeDynamic(); - } - return (_PMathObj) theObj->makeDynamic(); +_PMathObj _Constant::Max (_PMathObj arg) { + return _check_type_and_compute (arg, [] (_Parameter a, _Parameter b) -> _Parameter { + return a > b ? a : b; + }); } diff --git a/src/core/include/constant.h b/src/core/include/constant.h index c9b0b98be..6130bff73 100644 --- a/src/core/include/constant.h +++ b/src/core/include/constant.h @@ -54,6 +54,14 @@ class _Constant : public _MathObject { return new _MathObject; } + template _PMathObj _check_type_and_compute_3 (_PMathObj operand, _PMathObj operand2, T functor) { + if (operand && operand2 && operand->ObjectClass() == NUMBER && operand2->ObjectClass() == NUMBER) { + return new _Constant (functor (Value (), ((_Constant*)operand)->Value(), ((_Constant*)operand2)->Value())); + } + WarnError ("Not a numeric 'X' type in a <'constant' operation 'X'> call"); + return new _MathObject; + } + public: _Constant (_Parameter); From 12536577331c070e140167e23c0e46f14c82363d Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Thu, 26 Jul 2018 11:50:25 -0400 Subject: [PATCH 43/53] Memory leak fixes --- CMakeLists.txt | 3 +- .../SelectionAnalyses/SLAC.bf | 6 +- .../modules/shared-load-file.bf | 3 +- .../libv3/UtilityFunctions.bf | 2 +- .../libv3/tasks/alignments.bf | 55 ++++++++++--------- .../libv3/tasks/estimators.bf | 2 +- src/core/formula.cpp | 44 ++++++++------- src/core/matrix.cpp | 8 ++- src/core/operation.cpp | 16 +++--- src/core/site.cpp | 2 +- 10 files changed, 78 insertions(+), 63 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dab1a456..21350572c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,7 +141,8 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g -fsanitize=address -fno-omit-frame-pointer ") + set(DEFAULT_LINK_FLAGS " -g -fsanitize=address -fno-omit-frame-pointer ") PCL_CHECK_FOR_AVX() if(${HAVE_AVX_EXTENSIONS}) set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -march=corei7-avx -mtune=corei7-avx ") diff --git a/res/TemplateBatchFiles/SelectionAnalyses/SLAC.bf b/res/TemplateBatchFiles/SelectionAnalyses/SLAC.bf index d64b6166b..9286e4a01 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/SLAC.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/SLAC.bf @@ -19,6 +19,7 @@ LoadFunctionLibrary("libv3/models/codon/MG_REV.bf"); LoadFunctionLibrary("modules/io_functions.ibf"); LoadFunctionLibrary("modules/selection_lib.ibf"); + /*------------------------------------------------------------------------------ Display analysis information */ @@ -111,13 +112,12 @@ slac.table_headers = {{"ES", "Expected synonymous sites"} slac.table_screen_output = {{"Codon", "Partition", "S", "N", "dS", "dN", "Selection detected?"}}; slac.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width : 16, terms.table_options.align : "center"}; + namespace slac { LoadFunctionLibrary ("modules/shared-load-file.bf"); load_file ("slac"); } - - slac.samples = io.PromptUser ("\n>Select the number of samples used to assess ancestral reconstruction uncertainty [select 0 to skip]",100,0,100000,TRUE); slac.pvalue = io.PromptUser ("\n>Select the p-value threshold to use when testing for selection",0.1,0,1,FALSE); @@ -132,6 +132,8 @@ selection.io.startTimer (slac.json [terms.json.timers], "Model fitting",1 ); namespace slac { doGTR ("slac"); } + + estimators.fixSubsetOfEstimates(slac.gtr_results, slac.gtr_results[terms.global]); namespace slac { diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index e7ba05c9b..ab9c69609 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -132,7 +132,6 @@ function load_file (prefix) { io.ReportProgressMessage ("", ">Loaded a multiple sequence alignment with **" + codon_data_info[utility.getGlobalValue("terms.data.sequences")] + "** sequences, **" + codon_data_info[utility.getGlobalValue("terms.data.sites")] + "** codons, and **" + partition_count + "** partitions from \`" + codon_data_info[utility.getGlobalValue("terms.data.file")] + "\`"); - if (utility.Has (settings, utility.getGlobalValue("terms.settings.branch_selector"), "String")) { selected_branches = Call (settings[utility.getGlobalValue("terms.settings.branch_selector")], partitions_and_trees); } else { @@ -238,6 +237,8 @@ function store_tree_information () { selection.io.json_store_key_value_pair (json, None, utility.getGlobalValue("terms.json.partitions"), filter_specification); trees = utility.Map (partitions_and_trees, "_partition_", '_partition_[terms.data.tree]'); + + filter_names = utility.Map (filter_specification, "_partition_", '_partition_[terms.data.name]'); /* Store original name mapping */ diff --git a/res/TemplateBatchFiles/libv3/UtilityFunctions.bf b/res/TemplateBatchFiles/libv3/UtilityFunctions.bf index eade32e1a..78682c03c 100644 --- a/res/TemplateBatchFiles/libv3/UtilityFunctions.bf +++ b/res/TemplateBatchFiles/libv3/UtilityFunctions.bf @@ -58,7 +58,7 @@ lfunction utility.Array1D (m) { return Abs (m); } } - return None; + return 0; } /** diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 381014cb8..cd021d3d5 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -408,6 +408,8 @@ lfunction alignments.DefineFiltersForPartitions(partitions, source_data, prefix, part_count = utility.Array1D(partitions); filters = {}; if (utility.CheckKey(data_info, utility.getGlobalValue("terms.code"), "Matrix")) { + + for (i = 0; i < part_count; i += 1) { this_filter = {}; DataSetFilter test = CreateFilter( ^ source_data, 1, (partitions[i])[utility.getGlobalValue("terms.data.filter_string")]); @@ -424,6 +426,7 @@ lfunction alignments.DefineFiltersForPartitions(partitions, source_data, prefix, } else { for (i = 0; i < part_count; i += 1) { + this_filter = {}; this_filter[utility.getGlobalValue("terms.data.name")] = prefix + (partitions[i])[utility.getGlobalValue("terms.data.name")]; DataSetFilter ^ (this_filter[utility.getGlobalValue("terms.data.name")]) = CreateFilter( ^ source_data, 1, (partitions[i])[utility.getGlobalValue("terms.data.filter_string")]); @@ -554,29 +557,29 @@ lfunction alignments.TranslateCodonsToAminoAcidsWithAmbiguities (sequence, offse * @param {Dictionary} code - genetic code description (e.g. returned by alignments.LoadGeneticCode) * @param {lookup} code - resolution lookup dictionary * @returns {Dict} for each reading frame F in {0, 1, 2} returns - + F -> { terms.data.sequence: translated sequence (always choose X if available, otherwise first sense resolution) terms.sense_codons : N, // number of sense A/A terms.stop_codons : N // number of stop codons } */ - + lfunction alignments.TranslateCodonsToAminoAcidsWithAmbigsAllFrames (sequence, code, lookup) { - + result = {}; - - + + for (frame = 0; frame < 3; frame += 1) { try_run = alignments.TranslateCodonsToAminoAcidsWithAmbiguities (sequence, frame, code, lookup); - + translation = ""; translation * 128; - + frame_result = {utility.getGlobalValue ("terms.sense_codons") : 0, utility.getGlobalValue ("terms.stop_codons") : 0 }; - - upper_bound = Abs (try_run); + + upper_bound = Abs (try_run); for (i = 0; i < upper_bound; i+=1) { if (try_run[i] / "X") { // has_stop translation * "X"; @@ -586,8 +589,8 @@ lfunction alignments.TranslateCodonsToAminoAcidsWithAmbiguities (sequence, offse frame_result [^"terms.sense_codons"] += 1; } } - - + + translation * 0; frame_result [utility.getGlobalValue ("terms.data.sequence")] = translation; result[frame] = frame_result; @@ -595,7 +598,7 @@ lfunction alignments.TranslateCodonsToAminoAcidsWithAmbiguities (sequence, offse return result; } - + /** * @name alignments.MapAlignmentToReferenceCoordinates @@ -771,26 +774,26 @@ lfunction alignments.StripGaps (sequence) { /** * @name alignments.alignment.MapCodonsToAA * Map in-frame nucleotides onto a protein alignment string - + * @param {String} codon_sequence - the codon sequence to map - * @param {String} aa_sequence - the matching aligned a.a. sequence + * @param {String} aa_sequence - the matching aligned a.a. sequence * @param {Number} no more than this many mismatches - the codon sequence to map * @param {Dict} mapping - code ["terms.code.mapping"] - + * @returns {String} the mapped sequence - - * @example + + * @example GCAAAATCATTAGGGACTATGGAAAACAGA -AKSLGTMEN-R - - maps to - + + maps to + ---GCAAAATCATTAGGGACTATGGAAAAC---AGA */ lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, mapping) { - + seqLen = Abs(aa_sequence); translString = ""; translString * (seqLen); @@ -800,13 +803,13 @@ lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, map seqPos = 0; codon = codon_sequence[seqPos][seqPos + 2]; currentAA = mapping[codon]; - + mismatch_count = 0; for (aaPos = 0; aaPos < seqLen && seqPos < seqLenN; aaPos += 1) { advance = 1; copy_codon = 1; - + if (currentAA != 0) { if (aa_sequence[aaPos] == "-") { //if (currentAA != "X") { @@ -849,12 +852,12 @@ lfunction alignment.MapCodonsToAA(codon_sequence, aa_sequence, this_many_mm, map currentAA = mapping[codon]; } } - + for (; aaPos < seqLen; aaPos += 1) { translString * "---"; } - + translString * 0; return translString; -} \ No newline at end of file +} diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 79c3a3236..dcef14aaf 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -103,7 +103,6 @@ lfunction estimators.GetGlobalMLE_RegExp(results, re) { * @returns nothing */ function estimators.copyGlobals2(key2, value2) { - if (Type((estimators.ExtractMLEs.results[terms.global])[key2]) == "AssociativeList") { key2 = "[`key`] `key2`"; } @@ -730,6 +729,7 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o } lfunction estimators.CreateLFObject (context, data_filter, tree, model_template, initial_values, run_options, model_objects) { + if (Type(data_filter) == "String") { return estimators.CreateLFObject (context, { { diff --git a/src/core/formula.cpp b/src/core/formula.cpp index d5d47cc40..c2df273f6 100644 --- a/src/core/formula.cpp +++ b/src/core/formula.cpp @@ -624,21 +624,25 @@ void _Formula::internalToStr (_String& result, node* currentNode, char opL return; } _PMathObj opValue = thisNodeOperation->GetANumber(); - _String* conv = (_String*)opValue->toStr(); - if (opValue->ObjectClass()==STRING) { - result<<'"'; - result<ObjectClass() == NUMBER && opValue->Value() < 0.0) { - result<<'('; + if (opValue) { + _String* conv = (_String*)opValue->toStr(); + if (opValue->ObjectClass()==STRING) { + result<<'"'; result<ObjectClass() == NUMBER && opValue->Value() < 0.0) { + result<<'('; + result<"; } - DeleteObject(conv); } //__________________________________________________________________________________ bool _Formula::IsEmpty(void) const { @@ -887,7 +891,7 @@ _Parameter _Formula::Brent(_Variable* unknown, _Parameter a, _Parameter b, _Pa if (fa*fb<0.0) { fc = fb; c = b; - + for (it = 0; it < MAX_BRENT_ITERATES; it++) { if (fb*fc>0.0) { fc = fa; @@ -927,7 +931,7 @@ _Parameter _Formula::Brent(_Variable* unknown, _Parameter a, _Parameter b, _Pa q = -q; } p = fabs (p); - + if (p<0.0) { p = -p; } @@ -1292,7 +1296,7 @@ long _Formula::ExtractMatrixExpArguments (_List* storage) { if (! cacheUpdated && nextOp->CanResultsBeCached(thisOp)) { /*if (likeFuncEvalCallCount == 12733 && i == 13) { _Matrix * this_matrix = (_Matrix *)LocateVar (thisOp->GetAVariable())->GetValue(); - + _String buffer (1024UL, true), id ("TEMP"); this_matrix->Serialize(buffer, id); buffer.Finalize(); @@ -1764,9 +1768,9 @@ _Parameter _Formula::ComputeSimple (_SimpleFormulaDatum* stack, _SimpleFormulaDa if (!theFormula.lLength) { return 0.0; } - + long stackTop = 0; - + for (int i=0; itheNumber) { @@ -1800,7 +1804,7 @@ _Parameter _Formula::ComputeSimple (_SimpleFormulaDatum* stack, _SimpleFormulaDa fprintf (stderr, "[_Formula::ComputeSimple] Computing step %d (two op function), value %g\n", i, stack[stackTop-1].value ); } #endif - + } else { switch (thisOp->numberOfTerms) { case -2 : { @@ -1818,7 +1822,7 @@ _Parameter _Formula::ComputeSimple (_SimpleFormulaDatum* stack, _SimpleFormulaDa theFunc = (void(*)(Ptr,_Parameter,_Parameter))thisOp->opCode; if (stackTop != 2 || i != theFormula.lLength - 1) { WarnError ("Internal error in _Formula::ComputeSimple - stack underflow or MCoord command is not the last one.)"); - + return 0.0; } //stackTop = 0; @@ -1838,12 +1842,12 @@ _Parameter _Formula::ComputeSimple (_SimpleFormulaDatum* stack, _SimpleFormulaDa ++stackTop; } } - + } } } } - + return stack[0].value; } diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index 30dade0b9..b767890c4 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -2201,7 +2201,7 @@ _Matrix::_Matrix (_String& s, bool isNumeric, _VariableContainer const* theP) { _Formula* theTerm = new _Formula (lterm, theP); isAConstant = isAConstant && theTerm->IsAConstant() && theTerm->ObjectClass() == NUMBER; - + ((_Formula**)theData)[vDim*hPos+vPos] = theTerm; } @@ -9471,6 +9471,8 @@ _PMathObj _AssociativeList::MIterator (_PMathObj p, _PMathObj p2) actionFormula.GetList().AppendNewInstance(new _Operation()); actionFormula.GetList().AppendNewInstance(new _Operation(emptyString,-fID-1)); + //fprintf (stderr, "--->Action formula = %s\n", _String ( (_String*) actionFormula.toStr()).sData); + if (fID2 >= 0) { testFormula.GetList().AppendNewInstance(new _Operation()); testFormula.GetList().AppendNewInstance(new _Operation(emptyString,-fID2-1)); @@ -9485,7 +9487,8 @@ _PMathObj _AssociativeList::MIterator (_PMathObj p, _PMathObj p2) while (cn >= 0) { _String* aKey = ((_String**)avl.dataList->lData)[cn]; if (aKey) { - DeleteObject (fKey->theString); + //fprintf (stderr, "[loop start] fKey instance check %ld (at key %s)\n", fKey->nInstances, aKey->sData); + DeleteObject (fKey->theString); fKey->theString = (_String*)aKey->toStr(); if (fID2 >= 0) { ((_Operation**)testFormula.GetList().lData)[0]->SetNumber(fKey); @@ -9499,6 +9502,7 @@ _PMathObj _AssociativeList::MIterator (_PMathObj p, _PMathObj p2) actionFormula.Compute(); done ++; } + //fprintf (stderr, "[loop end] fKey instance check %ld (at key %s)\n", fKey->nInstances, aKey ? fKey->theString->sData : "Empty"); cn = avl.Traverser (hist,ls); } DeleteObject (fKey); diff --git a/src/core/operation.cpp b/src/core/operation.cpp index 236ae4412..68ee9b200 100644 --- a/src/core/operation.cpp +++ b/src/core/operation.cpp @@ -440,7 +440,7 @@ bool _Operation::Execute (_Stack& theScrap, _VariableContainer const* nam } else { _Variable *newV = new _Variable (*argument_k); newV->SetValue(nthterm,false); - nthterm->AddAReference(); + //nthterm->AddAReference(); existingDVars<GetAVariable(); displacedVars<AddAReference(); // 3 references @@ -474,29 +474,29 @@ bool _Operation::Execute (_Stack& theScrap, _VariableContainer const* nam } _PMathObj ret; - + if (currentExecutionList && currentExecutionList->stdinRedirect) { // 20180620: SLKP, need to split this off because if Execute fails // then there will be a double free on stdinRedirect - + auto stash1 = currentExecutionList->stdinRedirect; auto stash2 = currentExecutionList->stdinRedirectAux;; // for recursive calls, both function_body and currentExecutionList can be reset to null - + function_body -> stdinRedirect = currentExecutionList->stdinRedirect; function_body -> stdinRedirectAux = currentExecutionList->stdinRedirectAux; - + currentExecutionList -> stdinRedirect -> AddAReference(); currentExecutionList -> stdinRedirectAux -> AddAReference(); - + ret = function_body->Execute(); - + stash1 -> RemoveAReference(); stash2-> RemoveAReference(); } else { ret = function_body->Execute(); } - + function_body -> stdinRedirect = nil; function_body -> stdinRedirectAux = nil; diff --git a/src/core/site.cpp b/src/core/site.cpp index 7dc44b157..8b90b471b 100644 --- a/src/core/site.cpp +++ b/src/core/site.cpp @@ -1205,7 +1205,7 @@ void _DataSet::ConvertRepresentations (void) _List horStrings; if (lLength == 0) { - AppendNewInstance (new _Site); + AppendNewInstance (new _String (128UL, true)); } else { _Site * aSite = (_Site*)lData[0]; From f7dbd0e8d284b4ef941c79d84dd4c21993ab66e2 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Thu, 26 Jul 2018 11:52:16 -0400 Subject: [PATCH 44/53] Remove ASAN from CMAKE --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 21350572c..4dab1a456 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,8 +141,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g -fsanitize=address -fno-omit-frame-pointer ") - set(DEFAULT_LINK_FLAGS " -g -fsanitize=address -fno-omit-frame-pointer ") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g") PCL_CHECK_FOR_AVX() if(${HAVE_AVX_EXTENSIONS}) set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -march=corei7-avx -mtune=corei7-avx ") From 8c06d2907bc192ae305a91366030f3da16d4357d Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Thu, 26 Jul 2018 13:19:16 -0400 Subject: [PATCH 45/53] Fixing _CalcNode::ReplaceModel leak in theProbs --- src/core/calcnode.cpp | 42 +++++++++++----------------- src/core/include/calcnode.h | 3 ++ src/core/include/variablecontainer.h | 2 +- 3 files changed, 20 insertions(+), 27 deletions(-) diff --git a/src/core/calcnode.cpp b/src/core/calcnode.cpp index 316d06788..010d4c53c 100644 --- a/src/core/calcnode.cpp +++ b/src/core/calcnode.cpp @@ -427,21 +427,8 @@ void _CalcNode::SetCompMatrix (long categID) //_______________________________________________________________________________________________ -_CalcNode::~_CalcNode (void) -{ - -#ifndef __HYALTIVEC__ - if (theProbs) { - delete [] theProbs; - } -#else - if (theProbs) { - vec_free(theProbs); - } -#endif - if (compExp && referenceNode < 0) { - DeleteObject (compExp); - } +_CalcNode::~_CalcNode (void) { + Clear(); } //_______________________________________________________________________________________________ @@ -457,23 +444,26 @@ long _CalcNode::FreeUpMemory (long) return res; } +//_______________________________________________________________________________________________ + +void _CalcNode::Clear (void) { + if (compExp && referenceNode < 0) { + DeleteAndZeroObject(compExp); + } + if (theProbs) { + delete [] theProbs; + theProbs = nil; + } + _VariableContainer::Clear(); +} + //__________________________________________________________________________________ -void _CalcNode::RemoveModel (void) -{ - - if (compExp && referenceNode < 0) { - DeleteAndZeroObject(compExp); - compExp = nil; - } - - if (matrixCache) { - } +void _CalcNode::RemoveModel (void) { categoryVariables.Clear(); categoryIndexVars.Clear(); remapMyCategories.Clear(); - Clear(); } diff --git a/src/core/include/calcnode.h b/src/core/include/calcnode.h index 01b38d0f9..4957b0bd2 100644 --- a/src/core/include/calcnode.h +++ b/src/core/include/calcnode.h @@ -167,6 +167,9 @@ class _CalcNode: public _VariableContainer void SetFlag (void) { theProbs[0]=-3.1415296; } + + virtual void Clear (void); + void SetSummedFlag (void) { if (theProbs[0]>=0) { diff --git a/src/core/include/variablecontainer.h b/src/core/include/variablecontainer.h index 85406d180..2129f49a8 100644 --- a/src/core/include/variablecontainer.h +++ b/src/core/include/variablecontainer.h @@ -100,7 +100,7 @@ class _VariableContainer: public _Variable virtual long SetDependance (long); bool SetMDependance (_SimpleList&); - void Clear (void); + virtual void Clear (void); virtual void ClearConstraints (void); long CountIndependents (void); From dcaa26b3a293e91ca9d64a445b422c21207e5754 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Thu, 26 Jul 2018 13:37:32 -0400 Subject: [PATCH 46/53] Fixing a memory leak in GetString (., LIST_OF_LOADED_LIBRARIES, -1) --- src/core/batchlanruntime.cpp | 306 +++++++++++++++++------------------ 1 file changed, 153 insertions(+), 153 deletions(-) diff --git a/src/core/batchlanruntime.cpp b/src/core/batchlanruntime.cpp index a1cdd019b..da7ef116c 100644 --- a/src/core/batchlanruntime.cpp +++ b/src/core/batchlanruntime.cpp @@ -1,21 +1,21 @@ /* - + HyPhy - Hypothesis Testing Using Phylogenies. - + Copyright (C) 1997-now Core Developers: Sergei L Kosakovsky Pond (sergeilkp@icloud.com) Art FY Poon (apoon@cfenet.ubc.ca) Steven Weaver (sweaver@temple.edu) - + Module Developers: Lance Hepler (nlhepler@gmail.com) Martin Smith (martin.audacis@gmail.com) - + Significant contributions from: Spencer V Muse (muse@stat.ncsu.edu) Simon DW Frost (sdf22@cam.ac.uk) - + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -23,10 +23,10 @@ distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -34,7 +34,7 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - + */ #include "baseobj.h" @@ -68,47 +68,47 @@ _AVLListX openFileHandles (&openFileHandlesBackend); bool _ElementaryCommand::HandleHarvestFrequencies (_ExecutionList& currentProgram) { currentProgram.currentCommand++; - + _String freqStorageID = *(_String*)parameters(0), - dataID = currentProgram.AddNameSpaceToID(*(_String*)parameters(1)), + dataID = currentProgram.AddNameSpaceToID(*(_String*)parameters(1)), errMsg ; - + _Variable * theReceptacle = CheckReceptacleCommandID (&AppendContainerName(freqStorageID,currentProgram.nameSpacePrefix),HY_HBL_COMMAND_HARVEST_FREQUENCIES, true, false, ¤tProgram); if (!theReceptacle) { return false; - } + } SetStatusLine ("Gathering Frequencies"); long objectType = HY_BL_DATASET|HY_BL_DATASET_FILTER; BaseRefConst sourceObject = _HYRetrieveBLObjectByName (dataID, objectType,nil,false); - + long unit = ProcessNumericArgument((_String*)parameters(2),currentProgram.nameSpacePrefix), posspec = ProcessNumericArgument((_String*)parameters(4),currentProgram.nameSpacePrefix), atom = ProcessNumericArgument((_String*)parameters(3),currentProgram.nameSpacePrefix); - + _Matrix* receptacle = nil; - + _Parameter cghf = 1.0; checkParameter (hfCountGap,cghf,1.0, currentProgram.nameSpacePrefix); - + if (objectType == HY_BL_DATASET) { // harvest directly from a DataSet _String vSpecs, hSpecs; - + if (parameters.lLength>5) { vSpecs = *(_String*)parameters(5); } if (parameters.lLength>6) { hSpecs = *(_String*)parameters(6); } - + _DataSet const * dataset = (_DataSet const*)sourceObject; _SimpleList hL, vL; dataset->ProcessPartition (hSpecs,hL,false); dataset->ProcessPartition (vSpecs,vL,true); - + receptacle = dataset->HarvestFrequencies(unit,atom,posspec,hL, vL,cghf>0.5); } else { // harvest from a DataSetFilter if (objectType == HY_BL_DATASET_FILTER) { @@ -117,18 +117,18 @@ bool _ElementaryCommand::HandleHarvestFrequencies (_ExecutionList& currentP errMsg = _String ("'") & dataID & "' is neither a DataSet nor a DataSetFilter"; } } - + SetStatusLine (emptyString); - + if (errMsg.sLength || receptacle == nil) { DeleteObject (receptacle); - currentProgram.ReportAnExecutionError (errMsg); + currentProgram.ReportAnExecutionError (errMsg); theReceptacle->SetValue (new _MathObject, false); return false; - } + } theReceptacle->SetValue (receptacle, false); return true; - + //CheckReceptacleCommandIDAndStore (&freqStorageID,HY_HBL_COMMAND_HARVEST_FREQUENCIES,true, receptacle, false); } @@ -137,10 +137,10 @@ bool _ElementaryCommand::HandleHarvestFrequencies (_ExecutionList& currentP bool _ElementaryCommand::HandleOptimizeCovarianceMatrix (_ExecutionList& currentProgram, bool doOptimize) { currentProgram.currentCommand++; // construct the receptacle matrix - + _String lfResName (currentProgram.AddNameSpaceToID(*(_String*)parameters(0))), lfNameID (currentProgram.AddNameSpaceToID(*(_String*)parameters(1))); - + _Variable* result = CheckReceptacleCommandID (&lfResName, doOptimize?HY_HBL_COMMAND_OPTIMIZE:HY_HBL_COMMAND_COVARIANCE_MATRIX,true); // Handle string variables passed as likefunc IDs? @@ -151,20 +151,20 @@ bool _ElementaryCommand::HandleOptimizeCovarianceMatrix (_ExecutionList& cu long objectType = HY_BL_LIKELIHOOD_FUNCTION|HY_BL_SCFG|HY_BL_BGM; _LikelihoodFunction *lkf = (_LikelihoodFunction*)_HYRetrieveBLObjectByName (lfNameID, objectType,nil,doOptimize==false); - - if (lkf == nil) { // will only happen if the object is a custom function + + if (lkf == nil) { // will only happen if the object is a custom function lkf = (_LikelihoodFunction*)checkPointer(new _CustomFunction (&lfNameID)); } - - if (!doOptimize) { - // COVARIANCE_MATRIX - + + if (!doOptimize) { + // COVARIANCE_MATRIX + SetStatusLine (_String("Finding the cov. matrix/profile CI for ")&lfNameID); _String cpl = currentProgram.AddNameSpaceToID(covarianceParameterList); _Variable * restrictVariable = FetchVar (LocateVarByName(cpl)); _SimpleList * restrictor = nil; - - if (objectType == HY_BL_LIKELIHOOD_FUNCTION || objectType == HY_BL_SCFG){ + + if (objectType == HY_BL_LIKELIHOOD_FUNCTION || objectType == HY_BL_SCFG){ // not a BGM if (restrictVariable) { // only consider some variables _SimpleList variableIDs; @@ -194,7 +194,7 @@ bool _ElementaryCommand::HandleOptimizeCovarianceMatrix (_ExecutionList& cu restrictor = nil; } } - } + } result->SetValue( (_Matrix*)lkf->CovarianceMatrix(restrictor),false); DeleteObject (restrictor); } else { @@ -209,7 +209,7 @@ bool _ElementaryCommand::HandleOptimizeCovarianceMatrix (_ExecutionList& cu if (objectType != HY_BL_NOT_DEFINED) { SetStatusLine (_String("Optimizing ") & _HYHBLTypeToText (objectType) & ' ' &lfNameID); } else { - SetStatusLine (_String("Optimizing user function ") &lfNameID); + SetStatusLine (_String("Optimizing user function ") &lfNameID); } result -> SetValue(lkf->Optimize(),false); } @@ -217,8 +217,8 @@ bool _ElementaryCommand::HandleOptimizeCovarianceMatrix (_ExecutionList& cu if (objectType == HY_BL_NOT_DEFINED) { DeleteObject (lkf); // delete the custom function object } - - + + SetStatusLine ("Finished with the optimization"); return true; @@ -227,13 +227,13 @@ bool _ElementaryCommand::HandleOptimizeCovarianceMatrix (_ExecutionList& cu //____________________________________________________________________________________ bool _ElementaryCommand::HandleComputeLFFunction (_ExecutionList& currentProgram) { - + currentProgram.currentCommand++; _String *arg1 = (_String*)parameters(0), *arg2 = (_String*)parameters(1), name2Find = AppendContainerName(*arg1,currentProgram.nameSpacePrefix); - + // bool isSCFG = false; long objectType = HY_BL_LIKELIHOOD_FUNCTION|HY_BL_SCFG|HY_BL_BGM; @@ -252,18 +252,18 @@ bool _ElementaryCommand::HandleComputeLFFunction (_ExecutionList& currentPr return false; } else { return CheckReceptacleCommandIDAndStore(&AppendContainerName(*arg2,currentProgram.nameSpacePrefix), HY_HBL_COMMAND_LFCOMPUTE, HY_HBL_COMMAND_LFCOMPUTE, new _Constant (lf->Compute()), false); - + } } return true; - + } //____________________________________________________________________________________ bool _ElementaryCommand::HandleSelectTemplateModel (_ExecutionList& currentProgram) { - + currentProgram.currentCommand++; SetStatusLine ("Waiting for model selection"); @@ -291,7 +291,7 @@ bool _ElementaryCommand::HandleSelectTemplateModel (_ExecutionList& current unitLength = thisDF->GetUnitLength(); _TranslationTable const* thisTT = thisDF->GetData()->GetTT(); - + if (unitLength==1) { if (thisTT->IsStandardNucleotide()) { dataType = "nucleotide"; @@ -318,7 +318,7 @@ bool _ElementaryCommand::HandleSelectTemplateModel (_ExecutionList& current for (unsigned long model_index = 0; model_index < templateModelList.lLength; model_index++) { _List *model_components = (_List*)templateModelList(model_index); - + if (dataType.Equal((_String*)model_components->GetItem(3))) { _String * dim = (_String*)model_components->GetItem(2); if (*dim==_String("*")|| dataDimension == dim->toNum()) { @@ -354,7 +354,7 @@ bool _ElementaryCommand::HandleSelectTemplateModel (_ExecutionList& current printf ("\n\n +--------------------------+\n"); printf (" | Select a standard model. |\n"); printf (" +--------------------------+\n\n\n"); - + for (model_id = 0; model_idgetStr(), ((_String*)(*(_List*)templateModelList(matchingModels(model_id)))(1))->getStr()); @@ -399,16 +399,16 @@ bool _ElementaryCommand::HandleSelectTemplateModel (_ExecutionList& current stdModel.stdinRedirect = currentProgram.stdinRedirect; stdModel.Execute(); stdModel.stdinRedirectAux = nil; - stdModel.stdinRedirect = nil; + stdModel.stdinRedirect = nil; return true; - + } //____________________________________________________________________________________ bool _ElementaryCommand::HandleUseModel (_ExecutionList& currentProgram) { - + currentProgram.currentCommand++; _String namedspacedMM (currentProgram.AddNameSpaceToID(*(_String*)parameters(0))); long mID = FindModelName(namedspacedMM); @@ -430,7 +430,7 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& currentProgram /* first check to see if matrix parameters here are valid */ - + _String *currentArgument = (_String*)parameters(0), nmspc = AppendContainerName(*currentArgument,currentProgram.nameSpacePrefix), errMsg, @@ -449,7 +449,7 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& currentProgram setParameter (randomSeed, ((long)globalRandSeed)); return true; } - + if (currentArgument->Equal (&deferConstrainAssignment)) { bool on = ProcessNumericArgument ((_String*)parameters(1), currentProgram.nameSpacePrefix); if (on) { @@ -475,12 +475,12 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& currentProgram if (currentArgument->Equal (&statusBarUpdateString)) { _String sbar_value = ProcessLiteralArgument ((_String*)parameters(1), currentProgram.nameSpacePrefix); -#if defined __UNIX__ +#if defined __UNIX__ #if not defined __HYPHY_GTK__ && not defined __HEADLESS__ SetStatusLineUser (sbar_value); #else SetStatusLine (sbar_value); - #endif + #endif #else SetStatusLine (empty,sbar_value, empty, 0, HY_SL_TASK); #endif @@ -490,9 +490,9 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& currentProgram long objectIndex, typeFlag = HY_BL_ANY; - + BaseRef theObject = _HYRetrieveBLObjectByNameMutable (nmspc, typeFlag, &objectIndex); - + switch (typeFlag) { case HY_BL_BGM: { // BGM Branch @@ -609,7 +609,7 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& currentProgram } } // end BGM break; - + case HY_BL_SCFG: case HY_BL_LIKELIHOOD_FUNCTION: { @@ -630,12 +630,12 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& currentProgram } break; // end SCFG and LF - + case HY_BL_DATASET: case HY_BL_DATASET_FILTER: { _DataSet * ds = nil; - - + + long sequence_index = ProcessNumericArgument ((_String*)parameters(1),currentProgram.nameSpacePrefix); if (typeFlag == HY_BL_DATASET) { ds = (_DataSet*) theObject; @@ -645,28 +645,28 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& currentProgram ds = dsf->GetData (); sequence_index = dsf->theNodeMap.Map (sequence_index); } - + if (typeFlag == HY_BL_DATASET_FILTER) { ReleaseDataFilterLock(objectIndex); } - - + + _String * sequence_name = new _String(ProcessLiteralArgument ((_String*)parameters(2),currentProgram.nameSpacePrefix)); - + if (! ds->SetSequenceName (sequence_index, sequence_name)) { delete sequence_name; currentProgram.ReportAnExecutionError (*((_String*)parameters(1)) & " (=" & sequence_index & ") is not a valid sequence index"); return false; - + } } // end data set and data set filter - break; + break; // Dataset and Datasetfilter - + default: // check to see if this is a calcnode _CalcNode* treeNode = (_CalcNode*)FetchObjectFromVariableByType(&nmspc, TREE_NODE); - if (treeNode) { + if (treeNode) { if (*((_String*)parameters(1)) == _String("MODEL")) { _String modelName = AppendContainerName(*((_String*)parameters(2)),currentProgram.nameSpacePrefix); long modelType = HY_BL_MODEL, modelIndex; @@ -678,9 +678,9 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& currentProgram return false; } long pID, lfID = ((_TheTree*)parentTree->Compute())->IsLinkedToALF(pID); - + treeNode->ReplaceModel (modelName, parentTree); - + if (lfID>=0){ ((_LikelihoodFunction*)likeFuncList(lfID))->Rebuild(true); //currentProgram.ReportAnExecutionError ((*parentTree->GetName()) & " is linked to a likelihood function (" & *GetObjectNameByType (HY_BL_LIKELIHOOD_FUNCTION, lfID) &") and cannot be modified "); @@ -697,11 +697,11 @@ bool _ElementaryCommand::HandleSetParameter (_ExecutionList& currentProgram return false; } } - + currentProgram.ReportAnExecutionError (*currentArgument & " is not a valid likelihood function/data set filter/tree topology/tree node"); return false; - + } // end cases return true; } @@ -743,7 +743,7 @@ bool _ElementaryCommand::HandleAssert (_ExecutionList& currentProgram) { } } currentProgram.ReportAnExecutionError(_String("Assertion statement '") & *(_String*)parameters(0) & "' could not be computed or was not numeric."); - + return false; } @@ -755,16 +755,16 @@ bool _ElementaryCommand::HandleRequireVersion(_ExecutionList& currentProgra _List local_version = __HYPHY__VERSION__.Tokenize ("."), required_version = theVersion.Tokenize("."); - + try { - + unsigned long const upper_bound = MIN (local_version.countitems(), required_version.countitems()); - - + + for (unsigned long i = 0UL; i < upper_bound; i++) { _Parameter local_number = ((_String*)local_version.GetItem(i))->toNum(); _Parameter required_number = ((_String*)required_version.GetItem(i))->toNum(); - + if (local_number > required_number) { return true; } @@ -772,7 +772,7 @@ bool _ElementaryCommand::HandleRequireVersion(_ExecutionList& currentProgra throw (0); } } - + if (required_version.countitems() > upper_bound) { return true; } @@ -809,7 +809,7 @@ bool _ElementaryCommand::HandleClearConstraints(_ExecutionList& currentProg if (cID>=0) { // variable exists FetchVar(cID)->ClearConstraints(); } - } + } return true; } @@ -817,24 +817,24 @@ bool _ElementaryCommand::HandleClearConstraints(_ExecutionList& currentProg bool _ElementaryCommand::HandleMolecularClock(_ExecutionList& currentProgram){ currentProgram.currentCommand++; - + _String theBaseNode (currentProgram.AddNameSpaceToID(*(_String*)parameters(0))), treeName; - + _Variable* theObject = FetchVar (LocateVarByName(theBaseNode)); - + if (!theObject || (theObject->ObjectClass()!=TREE && theObject->ObjectClass()!=TREE_NODE)) { WarnError (_String("Not a defined tree/tree node object '") & theBaseNode & "' in call to " & _HY_ValidHBLExpressions.RetrieveKeyByPayload(HY_HBL_COMMAND_MOLECULAR_CLOCK)); return false; } - + _TheTree *theTree = nil; if (theObject->ObjectClass() == TREE_NODE) { theTree = (_TheTree*)((_VariableContainer*)theObject)->GetTheParent(); if (!theTree) { WarnError (_String("Internal error - orphaned tree node '") & theBaseNode & "' in call to "& _HY_ValidHBLExpressions.RetrieveKeyByPayload(HY_HBL_COMMAND_MOLECULAR_CLOCK)); return false; - + } treeName = *theTree->GetName(); theBaseNode = theObject->GetName()->Cut(treeName.sLength+1,-1); @@ -843,7 +843,7 @@ bool _ElementaryCommand::HandleMolecularClock(_ExecutionList& currentProgra theTree = (_TheTree*)theObject; theBaseNode = emptyString; } - + theTree->MolecularClock(theBaseNode,parameters); return true; } @@ -852,7 +852,7 @@ bool _ElementaryCommand::HandleMolecularClock(_ExecutionList& currentProgra bool _ElementaryCommand::HandleGetURL(_ExecutionList& currentProgram){ currentProgram.currentCommand++; - + _String url (ProcessLiteralArgument((_String*)parameters(1),currentProgram.nameSpacePrefix)), *arg1 = (_String*)parameters(0), *act = parameters.lLength>2?(_String*)parameters(2):nil, @@ -882,7 +882,7 @@ bool _ElementaryCommand::HandleGetURL(_ExecutionList& currentProgram){ } } if (errMsg.sLength) { - currentProgram.ReportAnExecutionError (errMsg); + currentProgram.ReportAnExecutionError (errMsg); return false; } @@ -893,7 +893,7 @@ bool _ElementaryCommand::HandleGetURL(_ExecutionList& currentProgram){ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ currentProgram.currentCommand++; - + _String errMsg, *result = nil; @@ -916,7 +916,7 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ if (f >=0 ) { f = _HY_GetStringGlobalTypes.GetXtra (f); } - + switch (f) { case HY_BL_LIKELIHOOD_FUNCTION: // LikelihoodFunction @@ -931,7 +931,7 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ } break; } - + case HY_BL_HBL_FUNCTION: // UserFunction result = (_String*)GetObjectNameByType(HY_BL_HBL_FUNCTION,sID); if (result) { @@ -940,9 +940,9 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ resAVL->MStore ("Arguments", new _Matrix(GetBFFunctionArgumentList(sID)), false); theReceptacle->SetValue (resAVL,false); return true; - } + } break; - + case HY_BL_TREE: { // Tree // 20110608 SLKP: REFACTOR into a separate function // I am sure this is used somewhere else (perhaps for other types) @@ -955,14 +955,14 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ default: { // everything else... // decide what kind of object current argument represents - - - + + + _String *currentArgument = (_String*)parameters(1), nmspaced = AppendContainerName(*currentArgument,currentProgram.nameSpacePrefix); long typeFlag = HY_BL_ANY, index = -1; - + BaseRefConst theObject = _HYRetrieveBLObjectByName (nmspaced, typeFlag, &index); if (theObject) { @@ -988,7 +988,7 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ if (sID < 0) { _List filterSeqNames; _List const * originalNames = &dataSetFilterObject->GetData()->GetNames(); - + for (long seqID=0; seqIDNumberSpecies(); seqID++) { filterSeqNames << originalNames->GetItem (dataSetFilterObject->theNodeMap.Element (seqID)); } @@ -1030,7 +1030,7 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ } case HY_BL_LIKELIHOOD_FUNCTION: case HY_BL_SCFG: { - + _LikelihoodFunction *lf = (_LikelihoodFunction*)theObject; if (sID>=0) { if (sIDGetIndependentVars().lLength) { @@ -1050,10 +1050,10 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ } break; } - + case HY_BL_MODEL: { if (sID>=0) { - // check to make see if the + // check to make see if the if (sID2 < 0) { // get the sID's parameter name _SimpleList modelP; _AVLList modelPA (&modelP); @@ -1061,7 +1061,7 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ modelPA.ReorderList(); if (sIDGetName()->makeDynamic(); - } + } } else { // get the formula for cell (sID, sID2) if (!IsModelOfExplicitForm (index)) { @@ -1072,7 +1072,7 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ } } } - + } else { _Variable * tV, * tV2; bool mByF; @@ -1108,7 +1108,7 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ theReceptacle->SetValue (resAVL,false); return true; } - } // end of "switch" + } // end of "switch" } else { if (currentArgument->Equal(&versionString)) { @@ -1134,7 +1134,7 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ } else if (currentArgument->Equal(&timeStamp)) { result = new _String(GetTimeStamp(sID < 0.5)); } else if (currentArgument->Equal(&listLoadedLibraries)) { - theReceptacle->SetValue (new _Matrix (loadedLibraryPaths.Keys())); + theReceptacle->SetValue (new _Matrix (loadedLibraryPaths.Keys()), false); return true; } else { _Variable* theVar = FetchVar(LocateVarByName (nmspaced)); @@ -1143,7 +1143,7 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ result = (_String*)theVar->toStr(); } else { if (sID == -1){ - + _SimpleList vL; _AVLList vAVL (&vL); theVar->ScanForVariables (vAVL, true); @@ -1153,13 +1153,13 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ SplitVariableIDsIntoLocalAndGlobal (vL, splitVars); InsertVarIDsInList (resL, "Global", *(_SimpleList*)splitVars(0)); InsertVarIDsInList (resL, "Local", *(_SimpleList*)splitVars(1)); - + theReceptacle->SetValue (resL,false); return true; } - + else { // formula string - + if (sID == -3) { _String local, global; _SimpleList var_index; @@ -1196,18 +1196,18 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ } if (errMsg.sLength) { - currentProgram.ReportAnExecutionError (errMsg); + currentProgram.ReportAnExecutionError (errMsg); DeleteObject (result); - result = nil; + result = nil; } - + if (result) { theReceptacle->SetValue (new _FString (result),false); return true; } theReceptacle->SetValue (new _MathObject(), false); - + return false; } @@ -1218,21 +1218,21 @@ bool _ElementaryCommand::HandleGetString (_ExecutionList& currentProgram){ bool _ElementaryCommand::HandleExport(_ExecutionList& currentProgram){ currentProgram.currentCommand++; - + _String objectID (currentProgram.AddNameSpaceToID(*(_String*)parameters(1))), arg1 (currentProgram.AddNameSpaceToID(*(_String*)parameters(0))), errMsg; - + _Variable * theReceptacle = CheckReceptacleCommandID (&AppendContainerName(arg1,currentProgram.nameSpacePrefix),HY_HBL_COMMAND_EXPORT, true, false, ¤tProgram); if (!theReceptacle) { return false; - } - + } + _FString * outLF = new _FString (new _String (8192UL,1)); - + long typeFlag = HY_BL_MODEL | HY_BL_LIKELIHOOD_FUNCTION | HY_BL_DATASET_FILTER | HY_BL_HBL_FUNCTION, index; - + BaseRef objectToExport = _HYRetrieveBLObjectByNameMutable (objectID, typeFlag, &index); if (! objectToExport) { errMsg = _String ("'") & objectID & "' is not a supported type"; @@ -1260,18 +1260,18 @@ bool _ElementaryCommand::HandleExport(_ExecutionList& currentProgram){ outLF->theString->Finalize(); break; } - + } } if (errMsg.sLength) { outLF->theString->Finalize(); DeleteObject (outLF); - currentProgram.ReportAnExecutionError (errMsg); + currentProgram.ReportAnExecutionError (errMsg); theReceptacle->SetValue (new _MathObject, false); return false; } - + theReceptacle->SetValue (outLF,false); return true; @@ -1282,21 +1282,21 @@ bool _ElementaryCommand::HandleExport(_ExecutionList& currentProgram){ bool _ElementaryCommand::HandleDifferentiate(_ExecutionList& currentProgram){ currentProgram.currentCommand++; - + _String arg1 (currentProgram.AddNameSpaceToID(*(_String*)parameters(0))), errMsg, expressionToParse = *(_String*)parameters(1); - + _Formula *theResult = nil; _Variable * theReceptacle = CheckReceptacleCommandID (&AppendContainerName(arg1,currentProgram.nameSpacePrefix),HY_HBL_COMMAND_DIFFERENTIATE, true, false, ¤tProgram); if (!theReceptacle) { return false; - } + } _Formula theExpression (expressionToParse,currentProgram.nameSpacePrefix, &errMsg); - + if (!theExpression.IsEmpty() && errMsg.sLength == 0) { long times = 1; if (parameters.lLength==4) { @@ -1318,16 +1318,16 @@ bool _ElementaryCommand::HandleDifferentiate(_ExecutionList& currentProgram } if (errMsg.sLength || theResult == nil) { - if (theResult) { - delete (theResult); + if (theResult) { + delete (theResult); } else { errMsg = _String("Differentiation of '") & *(_String*)parameters(1) & "' failed"; } - currentProgram.ReportAnExecutionError (errMsg); + currentProgram.ReportAnExecutionError (errMsg); theReceptacle->SetValue (new _MathObject, false); return false; } - + theReceptacle->SetFormula (*theResult); if (theResult) delete (theResult); @@ -1341,15 +1341,15 @@ bool _ElementaryCommand::HandleFprintf (_ExecutionList& currentProgram) currentProgram.currentCommand++; _String* targetName = (_String*)parameters(0), fnm; - + bool doClose = true, print_to_stdout = false; - + FILE* dest = nil; - + try { bool skipFilePathEval = false; - + if (targetName->Equal(&stdoutDestination)) { _FString * redirect = (_FString*)FetchObjectFromVariableByType (&blFprintfRedirect, STRING); if (redirect && redirect->theString->sLength) { @@ -1358,15 +1358,15 @@ bool _ElementaryCommand::HandleFprintf (_ExecutionList& currentProgram) } else { skipFilePathEval = true; targetName = redirect->theString; - } + } } else { print_to_stdout = true; } } - + checkParameter (printDigitsSpec,printDigits,0L); - + if (!print_to_stdout) { fnm = *targetName; if (fnm.Equal(&messageLogDestination)) { @@ -1378,16 +1378,16 @@ bool _ElementaryCommand::HandleFprintf (_ExecutionList& currentProgram) if (skipFilePathEval == false && !fnm.IsALiteralArgument()) { fnm = GetStringFromFormula (&fnm,currentProgram.nameSpacePrefix); } - + if (!fnm.ProcessFileName(true,false,(Ptr)currentProgram.nameSpacePrefix, false, ¤tProgram)) { return false; } - - + + long k = openFileHandles.Find (&fnm); - + doClose = k<0; - + if (!doClose) { dest = (FILE*)openFileHandles.GetXtra (k); } else { @@ -1396,13 +1396,13 @@ bool _ElementaryCommand::HandleFprintf (_ExecutionList& currentProgram) } } } - + for (long i = 1; iEqual(&clearFile)) { if (!print_to_stdout && dest) { fclose (dest); @@ -1426,10 +1426,10 @@ bool _ElementaryCommand::HandleFprintf (_ExecutionList& currentProgram) thePrintObject=¤tProgram; } else { // check for possible string reference - + _String temp = ProcessStringArgument (varname), nmspace; - + if (temp.sLength > 0) { nmspace = AppendContainerName(temp,currentProgram.nameSpacePrefix); if (nmspace.IsValidIdentifier()) { @@ -1438,14 +1438,14 @@ bool _ElementaryCommand::HandleFprintf (_ExecutionList& currentProgram) } else { nmspace = AppendContainerName(*varname,currentProgram.nameSpacePrefix); } - - + + if (thePrintObject == nil) { long typeFlag = HY_BL_ANY, index; - + thePrintObject = _HYRetrieveBLObjectByNameMutable (nmspace, typeFlag, &index); - + if (!thePrintObject) { _String argCopy = *varname, errMsg; @@ -1466,7 +1466,7 @@ bool _ElementaryCommand::HandleFprintf (_ExecutionList& currentProgram) } } } - + if (thePrintObject) { if (!print_to_stdout) { thePrintObject->toFileStr (dest); @@ -1480,7 +1480,7 @@ bool _ElementaryCommand::HandleFprintf (_ExecutionList& currentProgram) catch (_String errMsg) { currentProgram.ReportAnExecutionError (errMsg); } - + #if !defined __UNIX__ || defined __HEADLESS__ || defined __HYPHYQT__ || defined __HYPHY_GTK__ if (print_to_stdout) { yieldCPUTime(); @@ -1489,7 +1489,7 @@ bool _ElementaryCommand::HandleFprintf (_ExecutionList& currentProgram) if (dest && dest!=globalMessageFile && doClose) { fclose (dest); } - + return !currentProgram.IsErrorState(); } From 68139646b342bc8c9b2e977ec4a640c3fe1eb55b Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Fri, 27 Jul 2018 19:33:20 -0400 Subject: [PATCH 47/53] RELAX fixes; adding BGM libv3 implementation --- res/TemplateBatchFiles/BGM.bf | 218 +++++++++++++--- .../SelectionAnalyses/RELAX.bf | 239 +++++++++++------- res/TemplateBatchFiles/files.lst | 4 + res/TemplateBatchFiles/libv3/IOFunctions.bf | 8 +- res/TemplateBatchFiles/libv3/all-terms.bf | 2 +- .../libv3/models/parameters.bf | 20 +- .../libv3/tasks/estimators.bf | 3 +- src/core/constant.cpp | 3 + src/core/likefunc.cpp | 12 +- tests/hbltests/data/CD2.prot | 33 +++ tests/hbltests/libv3/BGM.wbf | 12 + 11 files changed, 423 insertions(+), 131 deletions(-) create mode 100644 tests/hbltests/data/CD2.prot create mode 100644 tests/hbltests/libv3/BGM.wbf diff --git a/res/TemplateBatchFiles/BGM.bf b/res/TemplateBatchFiles/BGM.bf index 7d01e2016..202a7dc4b 100644 --- a/res/TemplateBatchFiles/BGM.bf +++ b/res/TemplateBatchFiles/BGM.bf @@ -46,12 +46,17 @@ namespace bgm { LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf"); } +bgm.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width: 12, terms.table_options.align : "center"}; + bgm.json = { terms.json.analysis: bgm.analysis_description, terms.json.fits: {}, terms.json.timers: {}, }; +selection.io.startTimer (bgm.json [terms.json.timers], "Overall", 0); + + bgm.data_types = {terms.nucleotide : "Nucleotide multiple sequence alignment", terms.amino_acid : "Protein multiple sequence alignment", terms.codon : "Codon multiple sequence alignment"}; @@ -61,6 +66,17 @@ bgm.run_type = io.SelectAnOption (bgm.data_types, "Data type"); SetDialogPrompt ("Specify a `bgm.run_type` multiple sequence alignment file"); bgm.fit_options = {terms.run_options.retain_lf_object : TRUE}; +bgm.reporting_thershold = 0.5; + +bgm.run_settings = { + "steps" : 1e5, + "burn-in" : 1e4, + "samples" : 100, + "max-parents" : 1, + "min-subs" : 1, + "data-type" : bgm.run_type, + "threshold" : bgm.reporting_thershold +}; if (bgm.run_type == "nucleotide") { @@ -68,9 +84,21 @@ if (bgm.run_type == "nucleotide") { bgm.baseline_model = "models.DNA.GTR.ModelDescription"; } else { if (bgm.run_type == "amino-acid") { - + bgm.alignment_info = alignments.ReadProteinDataSet ("bgm.dataset", None); + LoadFunctionLibrary ("libv3/models/protein.bf"); + LoadFunctionLibrary ("libv3/models/protein/empirical.bf"); + LoadFunctionLibrary ("libv3/models/protein/REV.bf"); + utility.Extend (models.protein.empirical_models, {"GTR" : "General time reversible model (189 estimated parameters)."}); + bgm.run_settings ["model"] = io.SelectAnOption (models.protein.empirical_models, "Baseline substitution model"); + bgm.baseline_model = (utility.Extend (models.protein.empirical.plusF_generators , {"GTR" : "models.protein.REV.ModelDescription"}))[bgm.run_settings ["model"]]; } else { // codon + bgm.alignment_info = alignments.PromptForGeneticCodeAndAlignment("bgm.dataset","bgm.codon.filter"); LoadFunctionLibrary("libv3/models/codon/MG_REV.bf"); + bgm.baseline_model = "models.codon.MG_REV.ModelDescription"; + function bgm.MG94_REV (options) { + return Call ("models.codon.MG_REV.ModelDescription", options, bgm.alignment_info[terms.code]); + } + } } @@ -112,15 +140,18 @@ console.log ( "\n> BGM will write result file to `bgm.alignment_info[terms.json. bgm.selected_branches = selection.io.defineBranchSets ( bgm.partitions_and_trees ); -bgm.nsteps = io.PromptUser("\n>Select the number of MCMC steps to sample [default 100000]", 1e5, 0, 1e9, TRUE); -bgm.burnin = io.PromptUser("\n>Select the number of MCMC steps to discard as burn-in [default 10000]", 1e4, 0, 1e9, TRUE); -bgm.nsamples = io.PromptUser("\n>Select the number of steps to extract from the chain sample [default 100]", 100, 0, bgm.nsteps, TRUE); -bgm.max_parents = io.PromptUser ("\n>Select the maximum number of parents allowed per node [default 1]", 1, 1, 3, TRUE); -bgm.min_subs = io.PromptUser ("\n>Select the minium number of substitutions per site to include it in the analysis", 1, 1, 1e5, TRUE); + +bgm.run_settings["steps"] = io.PromptUser("\n>Select the number of MCMC steps to sample", bgm.run_settings["steps"] , 0, 1e9, TRUE); +bgm.run_settings["burn-in"] = io.PromptUser("\n>Select the number of MCMC steps to discard as burn-in", bgm.run_settings["burn-in"], 0, 1e9, TRUE); +bgm.run_settings["samples"] = io.PromptUser("\n>Select the number of steps to extract from the chain sample", 100, 0, bgm.run_settings["samples"], TRUE); +bgm.run_settings["max-parents"] = io.PromptUser ("\n>Select the maximum number of parents allowed per node", bgm.run_settings["max-parents"], 1, 3, TRUE); +bgm.run_settings["min-subs"] = io.PromptUser ("\n>Select the minium number of substitutions per site to include it in the analysis", bgm.run_settings["min-subs"], 1, 1e5, TRUE); // FIT THE BASELINE MODEL +selection.io.startTimer (bgm.json [terms.json.timers], "Baseline fit", 1); + io.ReportProgressMessageMD("bgm", "phylo", "Performing initial model fit to obtain branch lengths and rate parameters"); if (bgm.run_type == "nucleotide") { @@ -134,15 +165,56 @@ if (bgm.run_type == "nucleotide") { } }); + } else { + if (bgm.run_type == "codon") { + bgm.initial_values = utility.Extend (bgm.initial_values, + { + utility.getGlobalValue ("terms.global") : { + terms.nucleotideRate ("A","C") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , + terms.nucleotideRate ("A","T") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , + terms.nucleotideRate ("C","G") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , + terms.nucleotideRate ("G","T") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , + terms.omega_ratio : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} + } + }); + + io.ReportProgressMessageMD("bgm", "phylo", "Fitting nucleotide GTR to obtain branch length estimates"); + + bgm.initial_values = estimators.FitSingleModel_Ext ( + bgm.filter_names, + bgm.trees, + "models.DNA.GTR.ModelDescription" , + bgm.initial_values, + bgm.fit_options + ); + + io.ReportProgressMessageMD ("bgm", "phylo", ">Fitted an alignment-wide GTR model. " + selection.io.report_fit (bgm.initial_values, 0, bgm.sample_size )); + + bgm.filter_names = { "0" : "bgm.codon.filter" }; + bgm.code = bgm.alignment_info [terms.code]; + } } -bgm.baseline_fit = estimators.FitSingleModel_Ext ( - bgm.filter_names, - bgm.trees, - bgm.baseline_model , - bgm.initial_values, - bgm.fit_options +if (bgm.run_type == "codon") { + //codon_data, tree, generator, genetic_code, option, initial_values + bgm.baseline_fit = estimators.FitCodonModel( + bgm.filter_names, + bgm.trees, + bgm.baseline_model , + bgm.code, + bgm.fit_options, + bgm.initial_values, + ); +} else { + bgm.baseline_fit = estimators.FitSingleModel_Ext ( + bgm.filter_names, + bgm.trees, + bgm.baseline_model , + bgm.initial_values, + bgm.fit_options + ); +} io.ReportProgressMessageMD ("bgm", "phylo", ">Fitted an alignment-wide model. " + selection.io.report_fit (bgm.baseline_fit, 0, bgm.sample_size ) + "\n\nTotal tree lengths by partition\n"); @@ -161,17 +233,20 @@ selection.io.json_store_lf(bgm.json, bgm.baseline_model,bgm.baseline_fit[terms.f bgm.baseline_fit[terms.parameters], bgm.sample_size, None, 0); +selection.io.stopTimer (bgm.json [terms.json.timers], "Baseline fit"); + utility.ForEachPair (bgm.filter_specification, "_key_", "_value_", 'selection.io.json_store_branch_attribute(bgm.json, bgm.baseline_model, terms.branch_length, 0, _key_, selection.io.extract_branch_info((bgm.baseline_fit[terms.branch_length])[_key_], "selection.io.branch.length"));'); + io.ReportProgressMessageMD("bgm", "ancestral", "Performing joint ancestral state reconstruction and mapping substitutions"); bgm.ancestral_cache = ancestral.build (bgm.baseline_fit[terms.likelihood_function], 0, None); bgm.branch_filter = utility.Filter (bgm.selected_branches[0], "_class_", "_class_ == terms.tree_attributes.test"); - +DeleteObject (^bgm.baseline_fit[terms.likelihood_function]); if (bgm.run_type != "codon") { bgm.counts = ancestral.ComputeSubstitutionCounts( @@ -180,26 +255,104 @@ if (bgm.run_type != "codon") { None, // substitution filter "bgm.min_sub_filter" // site filter (e.g., MinCount) ); -} else { +} else { + bgm.counts = ancestral.ComputeSubstitutionCounts( + bgm.ancestral_cache, + bgm.branch_filter, // selected branches + "bgm.nsfilter", // substitution filter + "bgm.min_sub_filter" // site filter (e.g., MinCount) + ); } -if (Abs (bgm.counts["Sites"]) <= 2) { +selection.io.startTimer (bgm.json [terms.json.timers], "Network inference", 2); + +bgm.site_count = utility.Array1D (bgm.counts["Sites"]); + +if (bgm.site_count <= 2) { console.log ("###ERROR: NOT ENOUGH SUBSTITUTIONS###"); console.log ("\n>BGM requires at least three sites to have accumulated sufficient substitutions to run network inference"); } else { - bgm.site_count (bgm.counts); + io.ReportProgressMessageMD("bgm", "inference", "Inferring a BGM on `bgm.site_count` nodes [sites]"); + + namespace bgm { + + raw_results = bgm.run (counts, run_settings["burn-in"], run_settings["steps"], run_settings["samples"], run_settings["max-parents"]); + mcmc_trace = utility.Map ({1, run_settings["samples"]}["_MATRIX_ELEMENT_COLUMN_"], "_index_", "bgm.raw_results[_index_][0]"); + + // unpack site results + + table_headers = { + {"Site 1", "Index of site 1"} + {"Site 2", "Index of site 2"} + {"P [Site 1 –> Site 2]", "Probability that site 2 is conditionally dependent on site 1"} + {"P [Site 2 –> Site 1]", "Probability that site 1 is conditionally dependent on site 2"} + {"P [Site 1 <–> Site 2]", "Probability that sites 1 and 2 are not conditionally independent"} + {"Site 1 subs", "Substitution counts inferred for Site 1"} + {"Site 2 subs", "Substitution counts inferred for Site 2"} + {"Shared subs", "Substitutions shared by both sites"} + }; + + processed_results = {site_count * (site_count - 1) / 2, 8}; + row_index = 0; + + table_screen_output = {{"Site 1", "Site 2", "P [Site 1 <-> Site 2]", "Subs (1,2,shared)"}}; - bgm.raw_results = bgm.run (bgm.counts, bgm.burnin, bgm.nsteps, bgm.nsamples, bgm.max_parents); - bgm.mcmc_trace = utility.Map ({1, bgm.nsamples}["_MATRIX_ELEMENT_COLUMN_"], "_index_", "bgm.raw_results[_index_][0]"); + report.coupled_pair = {{Format(processed_results[row_index][0],5,0), + Format(processed_results[row_index][1],5,0), + Format(processed_results[row_index][4],6,3), + "" + processed_results[row_index][5] + ", " + processed_results[row_index][6] + ", " + processed_results[row_index][7]}}; + report.pairs_found = {}; + for (site1 = 0; site1 < site_count; site1 += 1) { + for (site2 = site1 + 1; site2 < site_count; site2 += 1) { + processed_results[row_index][0] = 0 + (counts["Sites"])[site1] + 1; + processed_results[row_index][1] = 0 + (counts["Sites"])[site2] + 1; + processed_results[row_index][2] = raw_results[site1 * site_count + site2][1]; + processed_results[row_index][3] = raw_results[site2 * site_count + site1][1]; + processed_results[row_index][4] = processed_results[row_index][2] + processed_results[row_index][3]; + processed_results[row_index][5] = +(counts["Counts"])[-1][site1]; + processed_results[row_index][6] = +(counts["Counts"])[-1][site2]; + processed_results[row_index][7] = +((counts["Counts"])[-1][site2]$(counts["Counts"])[-1][site1]); + if (processed_results[row_index][4] >= reporting_thershold) { + if (Abs(report.pairs_found) == 0 && table_output_options[^"terms.table_options.header"]) { + fprintf (stdout, "\n", io.FormatTableRow (table_screen_output,table_output_options)); + table_output_options[^"terms.table_options.header"] = FALSE; + } + fprintf (stdout, io.FormatTableRow (report.coupled_pair,table_output_options)); + report.pairs_found + (processed_results[row_index][-1]); + } - //bgm.processed_results = { + row_index+=1; + } + } + + json [^"terms.fit.MLE"] = {^"terms.json.headers" : table_headers, + ^"terms.json.content" : processed_results }; + + + console.log ("----\n## BGM analysis summary on `site_count` sites each with at least `run_settings['min-subs']` substitutions. Evidence for conditional dependence was reported at posterior probability of " + reporting_thershold); + + _count_ = Abs(report.pairs_found); + + if (_count_ == 0) { + console.log ("* No pairs of conditionally independent sites found"); + } else { + console.log ("* " + _count_ + " " + io.SingularOrPlural (_count_, "pair ", "pairs ") + " of conditionally dependent sites found"); + } + + + } } +selection.io.stopTimer (bgm.json [terms.json.timers], "Network inference"); +selection.io.stopTimer (bgm.json [terms.json.timers], "Overall"); + +bgm.json [terms.settings] = bgm.run_settings; + +io.SpoolJSON (bgm.json, bgm.alignment_info[terms.json.json]); -return 0; // --- BGM analysis ------------------------------- @@ -212,19 +365,21 @@ lfunction bgm.run (_bgm_data, burnin, nsteps, nsamples, max_parents) { 1. node name, must be a string 2. maximum number of parents 3. prior sample size - always uninformative (count split evenly across levels) - - if we were truly Bayesian, we would let the user set informative priors.. + - if we were truly Bayesian, we would let the user set informative priors. 4. number of levels - always binary in this case (substitution mapped to branch) */ node_name = ""+ ((_bgm_data["Sites"])[k] + 1); nodes + bgm.add_discrete_node (node_name, max_parents, 0, 2); } + utility.ToggleEnvVariable ("VERBOSITY_LEVEL",0); BayesianGraphicalModel gen_bgm = (nodes); bgm.attach_data (&gen_bgm, _bgm_data["Counts"], 0, 0, 0); bgm_result = bgm.order_MCMC(&gen_bgm, nsteps, burnin, nsamples); - console.log (Rows(bgm_result)); + utility.ToggleEnvVariable ("VERBOSITY_LEVEL",None); + return bgm_result; } @@ -232,18 +387,21 @@ lfunction bgm.run (_bgm_data, burnin, nsteps, nsamples, max_parents) { // ==== HELPER FUNCTIONS ==== -lfunction bgm.nsfilter(state1, state2, ancestral_data) { - if (bgm.code[state1] != bgm.code[state2] && - bgm.code[state1] != genetic_code.stop_code && - bgm.code[state2] != genetic_code.stop_code) { - return TRUE; - } else { - return FALSE; +function bgm.nsfilter(state1, state2, ancestral_data) { + + if (state1 >= 0 && state2 >= 0) { + if (bgm.code[state1] != bgm.code[state2] && + bgm.code[state1] != genetic_code.stop_code && + bgm.code[state2] != genetic_code.stop_code) { + + return TRUE; + } } + return FALSE; } function bgm.min_sub_filter (counts) { - return (+counts) > bgm.min_subs; + return (+counts) >= bgm.run_settings["min-subs"]; } diff --git a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf index 20641ee33..70ace4dcd 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf @@ -33,6 +33,7 @@ utility.SetEnvVariable ("ASSUME_REVERSIBLE_MODELS", TRUE); //utility.SetEnvVariable ("LF_SMOOTHING_SCALER", 0.01); //utility.SetEnvVariable ("LF_SMOOTHING_REDUCTION", 1/2); + /*------------------------------------------------------------------------------*/ relax.analysis_description = { @@ -44,7 +45,7 @@ relax.analysis_description = { terms.io.contact : "spond@temple.edu", terms.io.requirements : "in-frame codon alignment and a phylogenetic tree, with at least two groups of branches defined using the {} notation (one group can be defined as all unlabeled branches)" }; - + relax.json = { terms.json.analysis: relax.analysis_description, terms.json.input: {}, terms.json.fits : {}, @@ -54,6 +55,8 @@ relax.json = { terms.json.analysis: relax.analysis_description, relax.relaxation_parameter = "relax.K"; relax.rate_classes = 3; + + relax.MG94_name = terms.json.mg94xrev_sep_rates; relax.general_descriptive_name = "General descriptive"; relax.alternative_name = "RELAX alternative"; @@ -66,11 +69,11 @@ terms.relax.k_range = { terms.lower_bound: "0", terms.upper_bound: "50" }; - + terms.relax.k_range1 = { terms.lower_bound: "1", terms.upper_bound: "50" - }; + }; relax.p_threshold = 0.05; @@ -147,12 +150,13 @@ namespace relax { io.ReportProgressMessageMD ("RELAX", "codon-refit", "Improving branch lengths, nucleotide substitution biases, and global dN/dS ratios under a full codon model"); + + relax.final_partitioned_mg_results = estimators.FitMGREV (relax.filter_names, relax.trees, relax.codon_data_info [terms.code], { terms.run_options.model_type: terms.local, terms.run_options.partitioned_omega: relax.selected_branches, }, relax.partitioned_mg_results); - io.ReportProgressMessageMD("RELAX", "codon-refit", "* " + selection.io.report_fit (relax.final_partitioned_mg_results, 0, relax.codon_data_info[terms.data.sample_size])); relax.global_dnds = selection.io.extract_global_MLE_re (relax.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio); @@ -190,84 +194,125 @@ parameters.DeclareGlobalWithRanges (relax.relaxation_parameter, 1, 0, 50); if (relax.model_set == "All") { // run all the models - relax.ge.bsrel_model = model.generic.DefineMixtureModel("relax.BS_REL.ModelDescription", - "relax.ge", { - "0": parameters.Quote(terms.local), - "1": relax.codon_data_info[terms.code], - "2": parameters.Quote (relax.rate_classes) // the number of rate classes - }, - relax.filter_names, - None); - - for (relax.i = 1; relax.i < relax.rate_classes; relax.i += 1) { - parameters.SetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i)), terms.range_almost_01); - } - parameters.SetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.rate_classes)), terms.range_gte1); + relax.ge_guess = None; - relax.model_object_map = { "relax.ge" : relax.ge.bsrel_model }; + while (1) { - io.ReportProgressMessageMD ("RELAX", "gd", "Fitting the general descriptive (separate k per branch) model"); - selection.io.startTimer (relax.json [terms.json.timers], "General descriptive model fitting", 2); + relax.ge.bsrel_model = model.generic.DefineMixtureModel("relax.BS_REL.ModelDescription", + "relax.ge", { + "0": parameters.Quote(terms.local), + "1": relax.codon_data_info[terms.code], + "2": parameters.Quote (relax.rate_classes) // the number of rate classes + }, + relax.filter_names, + None); - relax.ge_guess = relax.DistributionGuess(utility.Map (selection.io.extract_global_MLE_re (relax.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+test.+"), "_value_", - "_value_[terms.fit.MLE]")); + for (relax.i = 1; relax.i < relax.rate_classes; relax.i += 1) { + parameters.SetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i)), terms.range_almost_01); + } + parameters.SetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.rate_classes)), terms.range_gte1); + /* + for (relax.i = 1; relax.i <= relax.rate_classes; relax.i += 1) { + console.log (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i))); + console.log ( + parameters.GetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i))) + ); + } + */ - relax.distribution = models.codon.BS_REL.ExtractMixtureDistribution(relax.ge.bsrel_model); - - parameters.SetStickBreakingDistribution (relax.distribution, relax.ge_guess); + relax.model_object_map = { "relax.ge" : relax.ge.bsrel_model }; - - relax.general_descriptive.fit = estimators.FitLF (relax.filter_names, - relax.trees, - { "0" : {"DEFAULT" : "relax.ge"}}, - relax.final_partitioned_mg_results, - relax.model_object_map, - { - terms.run_options.apply_user_constraints: "relax.init.k", - terms.run_options.retain_lf_object : TRUE - - }); - - - estimators.TraverseLocalParameters (relax.general_descriptive.fit [terms.likelihood_function], relax.model_object_map, "relax.set.k2"); - - relax.general_descriptive.fit = estimators.FitExistingLF (relax.general_descriptive.fit [terms.likelihood_function], relax.model_object_map); - - selection.io.stopTimer (relax.json [terms.json.timers], "General descriptive model fitting"); - - io.ReportProgressMessageMD("RELAX", "ge", "* " + selection.io.report_fit (relax.general_descriptive.fit, 9, relax.codon_data_info[terms.data.sample_size])); - io.ReportProgressMessageMD("RELAX", "ge", "* The following baseline rate distribution for branch-site combinations was inferred"); - relax.inferred_ge_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistributionFromFit (relax.ge.bsrel_model, relax.general_descriptive.fit)) % 0; - selection.io.report_dnds (relax.inferred_ge_distribution); - relax.distribution_for_json = {'Shared' : utility.Map (utility.Range (relax.rate_classes, 0, 1), - "_index_", - "{terms.json.omega_ratio : relax.inferred_ge_distribution [_index_][0], - terms.json.proportion : relax.inferred_ge_distribution [_index_][1]}") - }; - selection.io.json_store_lf (relax.json, - relax.general_descriptive_name, - relax.general_descriptive.fit[terms.fit.log_likelihood], - relax.general_descriptive.fit[terms.parameters] + 9 , // +9 comes from CF3x4 - relax.codon_data_info[terms.data.sample_size], - relax.distribution_for_json, - relax.display_orders[relax.general_descriptive_name] - ); + io.ReportProgressMessageMD ("RELAX", "gd", "Fitting the general descriptive (separate k per branch) model"); + selection.io.startTimer (relax.json [terms.json.timers], "General descriptive model fitting", 2); - selection.io.json_store_branch_attribute(relax.json, relax.general_descriptive_name, terms.branch_length, relax.display_orders[relax.general_descriptive_name], - 0, - selection.io.extract_branch_info((relax.general_descriptive.fit[terms.branch_length])[0], "selection.io.branch.length")); + if (Type (relax.ge_guess) != "Matrix") { + relax.ge_guess = relax.DistributionGuess(utility.Map (selection.io.extract_global_MLE_re (relax.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+test.+"), "_value_", + "_value_[terms.fit.MLE]")); + } + + relax.distribution = models.codon.BS_REL.ExtractMixtureDistribution(relax.ge.bsrel_model); + + parameters.SetStickBreakingDistribution (relax.distribution, relax.ge_guess); + + relax.general_descriptive.fit = estimators.FitLF (relax.filter_names, + relax.trees, + { "0" : {"DEFAULT" : "relax.ge"}}, + relax.final_partitioned_mg_results, + relax.model_object_map, + { + terms.run_options.apply_user_constraints: "relax.init.k", + terms.run_options.retain_lf_object : TRUE + + }); + + //Export (lfe, ^relax.general_descriptive.fit [terms.likelihood_function]); + //console.log (lfe); + + estimators.TraverseLocalParameters (relax.general_descriptive.fit [terms.likelihood_function], relax.model_object_map, "relax.set.k2"); + + relax.general_descriptive.fit = estimators.FitExistingLF (relax.general_descriptive.fit [terms.likelihood_function], relax.model_object_map); + + selection.io.stopTimer (relax.json [terms.json.timers], "General descriptive model fitting"); + + io.ReportProgressMessageMD("RELAX", "ge", "* " + selection.io.report_fit (relax.general_descriptive.fit, 9, relax.codon_data_info[terms.data.sample_size])); + io.ReportProgressMessageMD("RELAX", "ge", "* The following baseline rate distribution for branch-site combinations was inferred"); + relax.inferred_ge_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistributionFromFit (relax.ge.bsrel_model, relax.general_descriptive.fit)) % 0; + + selection.io.report_dnds (relax.inferred_ge_distribution); + + if (relax.rate_classes > 2) { + if (relax.inferred_ge_distribution[0][1] < 1e-5 || relax.inferred_ge_distribution[1][1] < 1e-5) { + io.ReportProgressMessageMD("RELAX", "ge", "\n ### Because some of the rate classes were collapsed to 0, the model is likely overparameterized. RELAX will reduce the number of site rate classes by one and repeat the fit now.\n----\n"); + relax.rate_classes = relax.rate_classes - 1; + relax.ge_guess = {relax.rate_classes, 2}; + relax.shift = 0; + //console.log (relax.inferred_ge_distribution); + for (relax.i = 0; relax.i < relax.rate_classes; relax.i += 1) { + if (relax.inferred_ge_distribution[relax.i][1] < 1e-5 && relax.shift == 0) { + relax.shift += 1; + continue; + } + relax.ge_guess[relax.i][0] = relax.inferred_ge_distribution[relax.i + relax.shift][0]; + relax.ge_guess[relax.i][1] = relax.inferred_ge_distribution[relax.i + relax.shift][1]; + } + //console.log (relax.ge_guess); + continue; + } + } - relax.k_estimates = selection.io.extract_branch_info((relax.general_descriptive.fit[terms.branch_length])[0], "relax.extract.k"); - relax.k_stats = math.GatherDescriptiveStats (utility.Map (utility.Values (relax.k_estimates), "_value_", "0+_value_")); + relax.distribution_for_json = {'Shared' : utility.Map (utility.Range (relax.rate_classes, 0, 1), + "_index_", + "{terms.json.omega_ratio : relax.inferred_ge_distribution [_index_][0], + terms.json.proportion : relax.inferred_ge_distribution [_index_][1]}") + }; + selection.io.json_store_lf (relax.json, + relax.general_descriptive_name, + relax.general_descriptive.fit[terms.fit.log_likelihood], + relax.general_descriptive.fit[terms.parameters] + 9 , // +9 comes from CF3x4 + relax.codon_data_info[terms.data.sample_size], + relax.distribution_for_json, + relax.display_orders[relax.general_descriptive_name] + ); - io.ReportProgressMessageMD("RELAX", "ge", "* Branch-level `terms.relax.k` distribution has mean " + Format (relax.k_stats[terms.math.mean], 5,2) + ", median " + - Format (relax.k_stats[terms.math.median], 5,2) + ", and 95% of the weight in " + Format (relax.k_stats[terms.math._2.5], 5,2) + " - " + Format (relax.k_stats[terms.math._97.5], 5,2)); + selection.io.json_store_branch_attribute(relax.json, relax.general_descriptive_name, terms.branch_length, relax.display_orders[relax.general_descriptive_name], + 0, + selection.io.extract_branch_info((relax.general_descriptive.fit[terms.branch_length])[0], "selection.io.branch.length")); + relax.k_estimates = selection.io.extract_branch_info((relax.general_descriptive.fit[terms.branch_length])[0], "relax.extract.k"); - selection.io.json_store_branch_attribute(relax.json, "k (general descriptive)", terms.json.branch_label, relax.display_orders[relax.general_descriptive_name], - 0, - relax.k_estimates); + relax.k_stats = math.GatherDescriptiveStats (utility.Map (utility.Values (relax.k_estimates), "_value_", "0+_value_")); + + io.ReportProgressMessageMD("RELAX", "ge", "* Branch-level `terms.relax.k` distribution has mean " + Format (relax.k_stats[terms.math.mean], 5,2) + ", median " + + Format (relax.k_stats[terms.math.median], 5,2) + ", and 95% of the weight in " + Format (relax.k_stats[terms.math._2.5], 5,2) + " - " + Format (relax.k_stats[terms.math._97.5], 5,2)); + + + selection.io.json_store_branch_attribute(relax.json, "k (general descriptive)", terms.json.branch_label, relax.display_orders[relax.general_descriptive_name], + 0, + relax.k_estimates); + + break; + } } else { relax.general_descriptive.fit = relax.final_partitioned_mg_results; @@ -382,38 +427,37 @@ relax.lf.raw = relax.ComputeOnGrid ( relax.alternative_model.fit[terms.likeliho relax.grid.MatrixToDict ({200,1}["_MATRIX_ELEMENT_ROW_*0.025"]), "relax.pass1.evaluator", "relax.pass1.result_handler"); - + // FIND the difference between K < 1 and K > 1 relax.best_samples = {{-1e100,-1e100}}; for (relax.k = 0; relax.k < 40; relax.k += 1) { relax.best_samples[0] = Max (relax.best_samples[0], relax.lf.raw[relax.k]); -} +} for (relax.k = 40; relax.k < 200; relax.k += 1) { relax.best_samples[1] = Max (relax.best_samples[1], relax.lf.raw[relax.k]); -} +} -//console.log (relax.best_samples); if (Abs (relax.best_samples[1] - relax.best_samples[0]) < 5.) { // could be diagnostic of convergence problems io.ReportProgressMessageMD("RELAX", "alt-2", "* Potential convergence issues due to flat likelihood surfaces; checking to see whether K > 1 or K < 1 is robustly inferred"); if (relax.fitted.K > 1) { parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.range01); } else { - parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.relax.k_range1); + parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.relax.k_range1); } - relax.alternative_model.fit.take2 = estimators.FitLF (relax.filter_names, relax.trees, { "0" : relax.model_map}, - relax.alternative_model.fit , - relax.model_object_map, + relax.alternative_model.fit.take2 = estimators.FitLF (relax.filter_names, relax.trees, { "0" : relax.model_map}, + relax.alternative_model.fit , + relax.model_object_map, {terms.run_options.retain_lf_object: TRUE} ); - + if (relax.alternative_model.fit.take2 [terms.fit.log_likelihood] > relax.alternative_model.fit[terms.fit.log_likelihood]) { - + io.ReportProgressMessageMD("RELAX", "alt-2", "\n### Potential for highly unreliable K inference due to multiple local maxima in the likelihood function, treat results with caution "); io.ReportProgressMessageMD("RELAX", "alt-2", "> Relaxation parameter reset to opposite mode of evolution from that obtained in the initial optimization."); io.ReportProgressMessageMD("RELAX", "alt-2", "* " + selection.io.report_fit (relax.alternative_model.fit.take2, 9, relax.codon_data_info[terms.data.sample_size])); @@ -427,14 +471,14 @@ if (Abs (relax.best_samples[1] - relax.best_samples[0]) < 5.) { // could be diag io.ReportProgressMessageMD("RELAX", "alt-2", "* The following rate distribution was inferred for **reference** branches"); relax.inferred_distribution_ref = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.reference.bsrel_model)) % 0; selection.io.report_dnds (relax.inferred_distribution_ref); - + relax.alternative_model.fit = relax.alternative_model.fit.take2; } - - + + parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.relax.k_range); - - + + } relax.distribution_for_json = {relax.test_branches_name : utility.Map (utility.Range (relax.rate_classes, 0, 1), @@ -620,7 +664,10 @@ lfunction relax.set.k2 (tree_name, node_name, model_description) { lfunction relax.init.k (lf_id, components, data_filter, tree, model_map, initial_values, model_objects) { parameter_set = estimators.TraverseLocalParameters (lf_id, model_objects, "relax.set.k"); - parameters.SetConstraint (model.generic.GetGlobalParameter (utility.getGlobalValue("relax.ge.bsrel_model") , terms.AddCategory (utility.getGlobalValue("terms.parameters.omega_ratio"),2)), utility.getGlobalValue("terms.parameters.one"), utility.getGlobalValue("terms.global")); + rc = utility.getGlobalValue ("relax.rate_classes"); + /*if (rc > 2) { + parameters.SetConstraint (model.generic.GetGlobalParameter (utility.getGlobalValue("relax.ge.bsrel_model") , terms.AddCategory (utility.getGlobalValue("terms.parameters.omega_ratio"),rc-1)), utility.getGlobalValue("terms.parameters.one"), utility.getGlobalValue("terms.global")); + }*/ /*parameters.SetConstraint (model.generic.GetGlobalParameter (utility.getGlobalValue("relax.ge.bsrel_model") , terms.AddCategory (utility.getGlobalValue("terms.parameters.omega_ratio"),utility.getGlobalValue ("relax.rate_classes"))), "1/(" + Join ("*", utility.Map ( @@ -646,7 +693,17 @@ lfunction relax.BS_REL.ModelDescription (type, code, components) { lfunction relax.DistributionGuess (mean) { - guess = {{0.05,0.7}{0.25,0.2}{10,0.1}}; + rc = utility.getGlobalValue ("relax.rate_classes"); + + guess = {rc,2}; + + guess[rc-1][0] = 5; + guess[rc-1][1] = 0.1; + + for (k = 0; k < rc - 1; k += 1) { + guess[k][0] = 0.1 ^ (1 / (1 + k)); + guess[k][1] = (0.9) / (rc-1) ; + } norm = + guess[-1][1]; guess_mean = 1/(+(guess [-1][0] $ guess [-1][1]))/norm; @@ -715,7 +772,7 @@ lfunction relax.BS_REL._DefineQ (bs_rel, namespace) { key = "component_" + component; ExecuteCommands (" function rate_generator (fromChar, toChar, namespace, model_type, model) { - return relax.BS_REL._GenerateRate (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')], + return relax.BS_REL._GenerateRate (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')], 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'), 'beta_`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component), 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component)); @@ -726,7 +783,7 @@ lfunction relax.BS_REL._DefineQ (bs_rel, namespace) { model.generic.AddGlobal ( bs_rel, _aux[component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component )); parameters.DeclareGlobalWithRanges (_aux[component-1], 0.5, 0, 1); } else { - + } models.codon.generic.DefineQMatrix(bs_rel, namespace); rate_matrices [key] = bs_rel[utility.getGlobalValue("terms.model.rate_matrix")]; @@ -817,6 +874,6 @@ lfunction relax.grid.MatrixToDict (grid) { terms.id : relax.relaxation_parameter, terms.fit.MLE : _value_[1] } - + }'); } diff --git a/res/TemplateBatchFiles/files.lst b/res/TemplateBatchFiles/files.lst index e699be5dc..1f971dde9 100644 --- a/res/TemplateBatchFiles/files.lst +++ b/res/TemplateBatchFiles/files.lst @@ -13,6 +13,10 @@ "","Evolutionary rates on non-coding data.","!Relative evolutionary rate inference"; "LEISR","[LEISR] Infer relative evolutionary rates on a nucleotide or protein alignment, in a spirit similar to Rate4Site (PMID: 12169533).","LEISR.bf"; +"","Identify pairs or networks of sites that co-evolve.","!Coevolutionary analysis"; +"BGM","[BGM] Apply Bayesian Graphical Model inference to substitution histories at individual sites.","BGM.bf"; + + "","Perform a maximum likelihood analysis on a single file given a single tree.","!Basic Analyses"; "ACD","Analyse codon data with a variery of standard models using given tree.","AnalyzeCodonData.bf"; "AD","Analyse nucleotide or aminoacid data with a variery of standard models using given tree.","AnalyzeNucProtData.bf"; diff --git a/res/TemplateBatchFiles/libv3/IOFunctions.bf b/res/TemplateBatchFiles/libv3/IOFunctions.bf index fc2d9623f..88d6e3677 100644 --- a/res/TemplateBatchFiles/libv3/IOFunctions.bf +++ b/res/TemplateBatchFiles/libv3/IOFunctions.bf @@ -709,13 +709,17 @@ lfunction io.SelectAnOption (options, description) { option_set [k][1] = options[keys[k]]; } } + ChoiceList (selection,description,1,NO_SKIP,option_set); + if (selection >= 0) { return option_set[selection][0]; + } else { + selection = None; } } - assert (None != selection, "Selection canceled"); - return None; + assert (None != selection, "Selection canceled"); + return None; } /** diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index 8708d86d0..23157d6ca 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -74,7 +74,7 @@ namespace terms{ range_gte1 = { lower_bound: "1", - upper_bound: "1e25" + upper_bound: "1e10" }; range_any = { diff --git a/res/TemplateBatchFiles/libv3/models/parameters.bf b/res/TemplateBatchFiles/libv3/models/parameters.bf index 082b545ba..147a5aa81 100644 --- a/res/TemplateBatchFiles/libv3/models/parameters.bf +++ b/res/TemplateBatchFiles/libv3/models/parameters.bf @@ -362,6 +362,25 @@ lfunction parameters.GenerateSequentialNames(prefix, count, delimiter) { return holder; } +/** + * @name parameters.GetRange + * @param id + * @returns variable range + */ +lfunction parameters.GetRange(id) { + + if (Type(id) == "String") { + GetInformation (range, ^id, 0); + return { + ^"terms.lower_bound" : range[1], + ^"terms.upper_bound" : range[2] + }; + } + io.ReportAnExecutionError ("An invalid combination of parameters was passed to parameters.GetRange. ID = " + id); + return None; +} + + /** * @name parameters.SetRange * @param id @@ -543,7 +562,6 @@ lfunction parameters.SetStickBreakingDistribution (parameters, values) { rate_count = Rows (values); left_over = 1; - for (i = 0; i < rate_count; i += 1) { parameters.SetValue ((parameters["rates"])[i], values[i][0]); diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index dcef14aaf..7041c9197 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -965,8 +965,7 @@ lfunction estimators.FitCodonModel(codon_data, tree, generator, genetic_code, op None); - - //utility.ToggleEnvVariable("VERBOSITY_LEVEL", 1); + //utility.ToggleEnvVariable("VERBOSITY_LEVEL", 10); df = 0; model_assignment = { diff --git a/src/core/constant.cpp b/src/core/constant.cpp index 29cfc6fa0..20eb6112f 100644 --- a/src/core/constant.cpp +++ b/src/core/constant.cpp @@ -392,6 +392,9 @@ _PMathObj _Constant::longDiv (_PMathObj theObj) { _PMathObj _Constant::Raise (_PMathObj theObj) { return _check_type_and_compute (theObj, [] (_Parameter base, _Parameter expon) -> _Parameter { if (base>0.0) { + if (expon == 1.) { + return base; + } return exp (log(base)*(expon)); } else { if (base<0.0) { diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index f465a6039..8a73c8726 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -1786,7 +1786,9 @@ bool _LikelihoodFunction::PreCompute (void) for (; i < arrayToCheck->lLength; i++) { _Variable* cornholio = LocateVar(arrayToCheck->lData[i]); - if (!cornholio->IsValueInBounds(((_Constant*) cornholio->Compute())->Value())){ + _Parameter tp = cornholio->Compute()->Value(); + if (!cornholio->IsValueInBounds(tp)){ + ReportWarning (_String ("Failing bound checks on ") & *cornholio->GetName() & " = " & _String (tp, "%25.16g")); break; } } @@ -2691,9 +2693,10 @@ void _LikelihoodFunction::CheckDependentBounds (void) { subNumericValues = 0; DeleteObject (cStr); + + _TerminateAndDump(_String("Constrained optimization failed, since a starting point within the domain specified for the variables couldn't be found.\nSet it by hand, or check your constraints for compatibility.\nFailed constraint:") + & badX); - WarnError(_String("Constrained optimization failed, since a starting point within the domain specified for the variables couldn't be found.\nSet it by hand, or check your constraints for compatibility.\nFailed constraint:") - & badX); } } @@ -5923,7 +5926,8 @@ _Parameter _LikelihoodFunction::ConjugateGradientDescent (_Parameter precisio if (optimizatonHistory) { ReportWarning (_String ((_String*)optimizatonHistory->toStr())); } - WarnError (errorStr); + _TerminateAndDump (errorStr); + //WarnError (errorStr); return check_value; } //return; diff --git a/tests/hbltests/data/CD2.prot b/tests/hbltests/data/CD2.prot new file mode 100644 index 000000000..236bd7dcf --- /dev/null +++ b/tests/hbltests/data/CD2.prot @@ -0,0 +1,33 @@ +#NEXUS + +BEGIN TAXA; + DIMENSIONS NTAX = 10; + TAXLABELS + 'HUMAN' 'CHIMP' 'BABOON' 'RHMONKEY' 'COW' 'PIG' 'HORSE' 'CAT' 'MOUSE' 'RAT' ; +END; + +BEGIN CHARACTERS; + DIMENSIONS NCHAR = 187; + FORMAT + DATATYPE = PROTEIN + GAP=- + MISSING=? + NOLABELS + ; + +MATRIX + KEITNALETWGALGQDINLDIPSFQMSDDIDDIKWEKTSDKKKIAQFRKEKETFKEKDTYKLFKNGTLKIKH?LKTDDQDIYKVSIYDTKGKNVLEKIFDLKIQERVSKPKISWTCINTTLTCEVMNGTDPELNLYQDGKHLK?LSQRVITHKWTTSLSAKFKCTAGNKVSKESSVEPVSCPEKGLD + EEITNALETWGALGQDINLDIPSFQMSDDIDDIKWEKTSDKKKIAQFRKEKETFKEKDTYKLFKNGTLKIKH?LKTDDQDIYKVSIYDTKGKNVLEKIFDLKIQERVSKPKISWTCINTTLTCEVMNGTDPELNLYQDGKHLK?LSQRVITHKWTTSLSAKFKCTAGNKVSKESSVEPVSCPEKGLD + KEIRNALETWGALGQDIDLDIPSFQMSDDIDDIKWEKTSDKKKIAQFRKEKETFEEKDAYKLFKNGTLKIKH?LKIHDQDSYKVSIYDTKGKNVLEKTFDLKIQERVSEPKISWTCINTTLTCEVMNGTDPELNLYQDGKHLK?LSQRVITHKWTTSLSAKFKCTAGNKVSKESRMETVSCPEKGLD + KEIRNALETWGALGQDIDLDIPSFQMSDDIDDIRWEKTSDKKKIAQFRKEKETFEEKDAYKLFKNGTLK?KH?LKIHDQDSYKVSIYDTKGKNVLEKTFDLKIQERVSEPKISWTCINTTLTCEVMNGT?PELNLYQDGKHVK?LSQRVITHKWTTSLSAKFKCTAGNKVSKESRMETVSCPEKGLD + ----ESIVVWGALDHDLNLDIPGFPRSDIVADIKWNR??NKNKIARIKKDMPLHNEMDKYDMFTNGTLKIKT?LMRNDSGLYEVEVYDSNGVNLLSKKFDLKIQEMLSGPEINWICTNRTVSCKVENGSNPKLQLFLNTTRVKQDHGKLITYTWNTRWNKTFKCVASNHVDSKVSIEIAVCPDEGLD + -----TEVVWGIVDQDINLDIPELSKHDNVDHIRWQK??NENKIAEFKKNKETHPVKDTYMMLPNGTLRIKD?LKRDDEGIYKVTVYATDGKHMLERKFDLPILDGVSKPVISWSCADKTVTCEVAEGSDPKLKLYVNKSTAREGRQKVILWKWNTKWSTLFKCVASNNASEQISMVTISCTGQGLD + ----KNITILGALERDINLDIPAFQMSEHVEDIQWSK??GKTKIAKFKNGSMTFQKDKTYEVLKNGTLKIKH?LERIHEGTYKVDAYDSDGKNVLEETFHLSLLEMVSKPNISWSCTNTTLTCEVTKGTDFELKLYLNGRMIQKSPRKVIVYKRASNQIASFKCTANNTVSEESSSVVIRCTEKGLD + ---ANDDIVWGTLGQDINLDIPDSQ?GINIDDIHWEK??GKKKVARFQISNKPKNPDEKYNVSMNGTLKIKH?LMLEDCDTYKVVIYDKDGKNVLDKTFQLKIQEKVSTPNIDWNCINKTLVCKVSNGTDPELKLYVNGTSIKPVSSKFSTYRFINKQKILVNCTAENKVSKESDVKMITCSEKGLD + ---RDNETIWGVLGHGITLNIPNFQMTDDIDEVRWVR??RGTLVAEFKRKKPPFLISETYEVLANGSLKIKKPMMRNDSGTYNVMVYGTNGMTRLEKDLDVRILERVSKPMIHWECPNTTLTCAVLQGTDFELKLYQGETLLNSLPQKNMSYQW?TNLNAPFKCEAINPVSKESKMEVVNCPEKGLS + ---RDSGTVWGALGHGINLNIPNFQMTDDIDEVRWER??GSTLVAEFKRKMKPFLKSGAFEILANGDLKIKN?LTRDDSGTYNVTVYSTNGTRILDKALDLRILEMVSKPMIYWECSNATLTCEVLEGTDVELKLYQGKEHLRSLRQKTMSYQW?TNLRAPFKCKAVNRVSQESEMEVVNCPEKGLP; +END; + +BEGIN TREES; + TREE tree = ((((PIG:0.147969,COW:0.213430):0.085099,HORSE:0.165787,CAT:0.264806):0.058611,((RHMONKEY{PR}:0.002015,BABOON{PR}:0.003108){PR}:0.022733,(HUMAN{PR}:0.004349,CHIMP{PR}:0.000799){PR}:0.011873){PR}:0.101856){PR}:0.340802,RAT:0.050958,MOUSE:0.097950); +END; diff --git a/tests/hbltests/libv3/BGM.wbf b/tests/hbltests/libv3/BGM.wbf new file mode 100644 index 000000000..c102a493a --- /dev/null +++ b/tests/hbltests/libv3/BGM.wbf @@ -0,0 +1,12 @@ + +LoadFunctionLibrary ("BGM.bf", {"0" : "nucleotide", "1" : PATH_TO_CURRENT_BF + "data/CD2.nex", "2" : "All", "3": "1e5", "4": "1e4", "5": "100", "6" : "1", "7" : "4"}); +assert (Abs (bgm.report.pairs_found) == 3, "Expected to find three interacting pairs for nucleotide data"); + +LoadFunctionLibrary ("BGM.bf", {"0" : "nucleotide", "1" : PATH_TO_CURRENT_BF + "data/CD2.nex", "2" : "All", "3": "1e5", "4": "1e4", "5": "100", "6" : "1", "7" : "5"}); +assert (utility.Has (bgm.json,terms.fit.MLE,"AssociativeArray") == FALSE, "Expected to find no results when filtering threshold is too high"); + +LoadFunctionLibrary ("BGM.bf", {"0" : "amino-acid", "1" : PATH_TO_CURRENT_BF + "data/CD2.prot", "2" : "JTT", "3" : "All", "4": "1e5", "5": "1e4", "6": "100", "7" : "1", "8" : "4"}); +assert (Abs (bgm.report.pairs_found) >= 5, "Expected to find five or more interacting pairs for protein data"); + +LoadFunctionLibrary ("BGM.bf", {"0" : "codon", "2" : PATH_TO_CURRENT_BF + "data/CD2.nex", "1" : "Universal", "3" : "All", "4": "1e5", "5": "1e4", "6": "100", "7" : "1", "8" : "5"}); +assert (Abs (bgm.report.pairs_found) >= 4, "Expected to find four or more interacting pairs for codon data"); From 819e6974a039e31b8ae85fbf6f9baf9651b1a904 Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Mon, 30 Jul 2018 14:01:34 -0400 Subject: [PATCH 48/53] Error reporting refinements; multiple fixes to _LikelihoodFunction::CheckDependentBounds --- .../SelectionAnalyses/FEL-contrast.bf | 153 ++++++++++++------ .../libv3/UtilityFunctions.bf | 1 + src/core/include/constant.h | 11 +- src/core/likefunc.cpp | 103 ++++++++---- 4 files changed, 190 insertions(+), 78 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf index 8aa4c0dce..0e2a8600b 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf @@ -26,10 +26,10 @@ LoadFunctionLibrary("modules/io_functions.ibf"); */ fel.analysis_description = { - terms.io.info: "FEL-contrast (Fixed Effects Likelihood) investigates whether or not selective pressures differ between two sets of + terms.io.info: "FEL-contrast (Fixed Effects Likelihood) investigates whether or not selective pressures differ between two or more sets of branches at a site. Site-specific synonymous (alpha) and non-synonymous (beta, one per branch set) substitution rates are estimated and then beta rates are tested for equality at each site. LRT (one degree of freedom) is used to assess significance.", - terms.io.version: "0.1", + terms.io.version: "0.2", terms.io.reference: "Kosakovsky Pond SL, Frost SDW, Grossman Z, Gravenor MB, Richman DD, Leigh Brown AJ (2006) Adaptation to Different Human Populations by HIV-1 Revealed by Codon-Based Analyses. PLoS Comput Biol 2(6): e62.", terms.io.authors: "Sergei L Kosakovsky Pond", terms.io.contact: "spond@temple.edu", @@ -52,9 +52,10 @@ utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE); -fel.site_alpha = "Site relative synonymous rate"; -fel.site_beta_test = "Site relative non-synonymous rate (tested branches)"; +fel.site_alpha = "Site relative synonymous rate"; fel.site_beta_reference = "Site relative non-synonymous rate (reference branches)"; +fel.site_tested_classes = {}; +fel.alpha.scaler = "fel.alpha_scaler"; // default cutoff for printing to screen @@ -120,25 +121,45 @@ fel.pvalue = io.PromptUser ("\n>Select the p-value threshold to use when testin io.ReportProgressMessageMD('FEL', 'selector', 'Branches to use as the test set in the FEL-contrast analysis'); - - -utility.ForEachPair (fel.selected_branches, "_partition_", "_selection_", - "_selection_ = utility.Filter (_selection_, '_value_', '_value_ == terms.tree_attributes.test'); - io.ReportProgressMessageMD('FEL', 'selector', 'Selected ' + Abs(_selection_) + ' branches to include in FEL calculations: \\\`' + Join (', ',utility.Keys(_selection_)) + '\\\`')"); - +fel.branch_sets = {}; + +utility.ForEachPair (fel.selected_branches[0], "_branch_", "_model_", +" + utility.EnsureKey (fel.branch_sets, _model_); + fel.branch_sets[_model_] + _branch_; +"); + +fel.branch_class_count = utility.Array1D (fel.branch_sets); +fel.scaler_parameter_names = {}; + +io.ReportProgressMessageMD('FEL', 'selector', "Selected `fel.branch_class_count` sets of branches to test\n"); + +fel.branch_class_counter = 0; + +utility.ForEachPair (fel.branch_sets, "_group_", "_branches_", + " + if (_group_ != terms.tree_attributes.background) { + fel.site_tested_classes [_group_] = 'Site relative non-synonymous rate (' + _group_ + ' branches)'; + fel.branch_class_counter += 1; + fel.scaler_parameter_names [_group_] = 'fel.beta_scaler_group_' + fel.branch_class_counter; + io.ReportProgressMessageMD('FEL', 'selector', '* Selected ' + Abs(_branches_) + ' branches in group _' + _group_ + '_ : \\\`' + Join (', ',_branches_) + '\\\`') + } else { + fel.scaler_parameter_names [_group_] = 'fel.beta_scaler_background'; + fel.site_tested_classes [_group_] = 'Site relative non-synonymous rate (reference branches)' + } + " +); selection.io.startTimer (fel.json [terms.json.timers], "Model fitting",1); -namespace fel { - doGTR ("fel"); -} - estimators.fixSubsetOfEstimates(fel.gtr_results, fel.gtr_results[terms.global]); namespace fel { doPartitionedMG ("fel", FALSE); } +fel.final_partitioned_mg_results = fel.partitioned_mg_results; +/* io.ReportProgressMessageMD ("fel", "codon-refit", "Improving branch lengths, nucleotide substitution biases, and global dN/dS ratios under a full codon model"); @@ -156,8 +177,6 @@ utility.ForEach (fel.global_dnds, "_value_", 'io.ReportProgressMessageMD ("fel", -estimators.fixSubsetOfEstimates(fel.final_partitioned_mg_results, fel.final_partitioned_mg_results[terms.global]); - //Store MG94 to JSON selection.io.json_store_lf_GTR_MG94 (fel.json, @@ -168,6 +187,9 @@ selection.io.json_store_lf_GTR_MG94 (fel.json, utility.ArrayToDict (utility.Map (fel.global_dnds, "_value_", "{'key': _value_[terms.description], 'value' : Eval({{_value_ [terms.fit.MLE],1}})}")), (fel.final_partitioned_mg_results[terms.efv_estimate])["VALUEINDEXORDER"][0], fel.display_orders[terms.json.global_mg94xrev]); +*/ + +estimators.fixSubsetOfEstimates(fel.final_partitioned_mg_results, fel.final_partitioned_mg_results[terms.global]); utility.ForEachPair (fel.filter_specification, "_key_", "_value_", 'selection.io.json_store_branch_attribute(fel.json, terms.json.global_mg94xrev, terms.branch_length, fel.display_orders[terms.json.global_mg94xrev], @@ -202,6 +224,7 @@ selection.io.startTimer (fel.json [terms.json.timers], "FEL analysis", 2); //---------------------------------------------------------------------------------------- function fel.apply_proportional_site_constraint (tree_name, node_name, alpha_parameter, beta_parameter, alpha_factor, beta_factor, branch_length) { + console.log (node_name + " -> " + alpha_factor); fel.branch_length = (branch_length[terms.parameters.synonymous_rate])[terms.fit.MLE]; @@ -214,12 +237,17 @@ function fel.apply_proportional_site_constraint (tree_name, node_name, alpha_par } //---------------------------------------------------------------------------------------- -fel.scalers = {{"fel.alpha_scaler", "fel.beta_scaler_test", "fel.beta_scaler_reference"}}; +model.generic.AddGlobal (fel.site.mg_rev, fel.alpha.scaler, fel.site_alpha); + + +parameters.DeclareGlobal (fel.alpha.scaler, {}); +parameters.DeclareGlobal (fel.scaler_parameter_names, {}); + +utility.ForEachPair (fel.scaler_parameter_names, "_group_", "_name_", +' + model.generic.AddGlobal (fel.site.mg_rev, _name_ , fel.site_tested_classes [_group_]); +'); -model.generic.AddGlobal (fel.site.mg_rev, "fel.alpha_scaler", fel.site_alpha); -model.generic.AddGlobal (fel.site.mg_rev, "fel.beta_scaler_test", fel.site_beta_test); -model.generic.AddGlobal (fel.site.mg_rev, "fel.beta_scaler_reference", fel.site_beta_reference); -parameters.DeclareGlobal (fel.scalers, {}); @@ -238,25 +266,57 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod { ^"fel.alpha_scaler" := 1; } - ^"fel.beta_scaler_test" = 1; - ^"fel.beta_scaler_reference" = 1; - Optimize (results, ^lf); + // all rates free + utility.ForEach (^"fel.scaler_parameter_names", "_pname_", + ' + ^_pname_ = 1; + ' + ); + utility.SetEnvVariable ("VERBOSITY_LEVEL", 10); + + Optimize (results, ^lf); alternative = estimators.ExtractMLEs (lf, model_mapping); alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; - ^"fel.alpha_scaler" = (^"fel.alpha_scaler" + 3*^"fel.beta_scaler_test")/4; - parameters.SetConstraint ("fel.beta_scaler_test","fel.beta_scaler_reference", ""); + + sum = + utility.Map (^"fel.scaler_parameter_names", "_pname_", + '^_pname_' + ); + + + + // baseline NULL (everything = same rate) + + ^(^"fel.alpha.scaler") = (^(^"fel.alpha.scaler")+ ^("fel.branch_class_count")*sum)/(^"fel.branch_class_count" + 1); + + ref_parameter = (^"fel.scaler_parameter_names")["VALUEINDEXORDER"][0]; + + utility.ForEach (^"fel.scaler_parameter_names", "_pname_", + ' + if (_pname_ != ^`&ref_parameter`) { + parameters.SetConstraint (_pname_,^`&ref_parameter`, ""); + } + ' + ); + Optimize (results, ^lf); null = estimators.ExtractMLEs (lf, model_mapping); - null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; - return {utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.null"): null}; + console.log (alternative); + console.log (null); + + assert (0); + + return { + utility.getGlobalValue("terms.alternative") : alternative, + utility.getGlobalValue("terms.null"): null + }; } /* echo to screen calls */ @@ -378,12 +438,13 @@ fel.site_patterns = alignments.Extract_site_patterns ((fel.filter_specification[ utility.ForEach (fel.case_respecting_node_names, "_node_", '_node_class_ = (fel.selected_branches[fel.partition_index])[_node_]; - if (_node_class_ == terms.tree_attributes.test) { + /*if (_node_class_ == terms.tree_attributes.test) { _beta_scaler = fel.scalers[1]; } else { _beta_scaler = fel.scalers[2]; - } - fel.apply_proportional_site_constraint ("fel.site_tree", _node_, fel.alpha, fel.beta, fel.scalers[0], _beta_scaler, (( fel.final_partitioned_mg_results[terms.branch_length])[fel.partition_index])[_node_]); + }*/ + _beta_scaler = fel.scaler_parameter_names[_node_class_]; + fel.apply_proportional_site_constraint ("fel.site_tree", _node_, fel.alpha, fel.beta, fel.alpha.scaler, _beta_scaler, (( fel.final_partitioned_mg_results[terms.branch_length])[fel.partition_index])[_node_]); '); @@ -406,7 +467,7 @@ estimators.ApplyExistingEstimates ("fel.site_likelihood", fel.site_model_mapping fel.queue = mpi.CreateQueue ({"LikelihoodFunctions": {{"fel.site_likelihood"}}, "Models" : {{"fel.site.mg_rev"}}, "Headers" : {{"libv3/all-terms.bf"}}, - "Variables" : {{"fel.srv"}} + "Variables" : {{"fel.srv","fel.site_tested_classes","fel.scaler_parameter_names","fel.branch_class_count"}} }); @@ -489,26 +550,28 @@ lfunction fel.select_branches(partition_info) { } } - ChoiceList(testSet, "Choose the branches to use as the _test_ set", 1, NO_SKIP, selectTheseForTesting); - io.CheckAssertion ("`&testSet` >= 0", "User cancelled branch selection; analysis terminating"); + ChoiceList(testSet, "Choose sets of branches to compare. If more than one set is chosen, pairwise comparisons will be carried out in addition to a group-level difference test.", 0, NO_SKIP, selectTheseForTesting); + io.CheckAssertion ("`&testSet[0]` >= 0", "User cancelled branch selection; analysis terminating"); return_set = {}; tree_configuration = {}; - tree_for_analysis = (partition_info[0])[utility.getGlobalValue("terms.data.tree")]; - - tag_test = selectTheseForTesting [testSet][0]; - if (tag_test == "Unlabeled branches") { - tag_test = ""; - } - tag_reference = selectTheseForTesting [referenceSet][0]; - if (tag_reference == "Unlabeled branches") { - tag_reference = ""; + tree_for_analysis = (partition_info[0])[utility.getGlobalValue("terms.data.tree")]; + branch_set_count = utility.Array1D (testSet); + test_sets = {}; + + for (k = 0; k < branch_set_count; k+=1) { + tag_test = selectTheseForTesting [testSet[k]][0]; + if (tag_test == "Unlabeled branches") { + tag_test = ""; + } + test_sets[tag_test] = TRUE; } + utility.ForEachPair (tree_for_analysis[utility.getGlobalValue("terms.trees.model_map")], "_key_", "_value_", " - if (`&tag_test` == _value_ ) { - `&tree_configuration`[_key_] = utility.getGlobalValue('terms.tree_attributes.test'); + if (`&test_sets`[_value_]) { + `&tree_configuration`[_key_] = _value_; } else { `&tree_configuration`[_key_] = utility.getGlobalValue('terms.tree_attributes.background'); } diff --git a/res/TemplateBatchFiles/libv3/UtilityFunctions.bf b/res/TemplateBatchFiles/libv3/UtilityFunctions.bf index 78682c03c..df53a3686 100644 --- a/res/TemplateBatchFiles/libv3/UtilityFunctions.bf +++ b/res/TemplateBatchFiles/libv3/UtilityFunctions.bf @@ -576,6 +576,7 @@ function utility.ForEachPair(object, key_name, value_name, transform) { Eval ("`key_name` = None"); Eval ("`value_name` = None"); + if (Type (object) == "AssociativeList") { utility.ForEachPair.keys = Rows (object); ^(key_name) := utility.ForEachPair.keys[utility.ForEachPair.k]; diff --git a/src/core/include/constant.h b/src/core/include/constant.h index 6130bff73..20cc3ff57 100644 --- a/src/core/include/constant.h +++ b/src/core/include/constant.h @@ -47,10 +47,15 @@ class _Constant : public _MathObject { private: template _PMathObj _check_type_and_compute (_PMathObj operand, T functor) { - if (operand && operand->ObjectClass() == NUMBER) { - return new _Constant (functor (Value (), ((_Constant*)operand)->Value())); + if (operand) { + if (operand->ObjectClass() == NUMBER) { + return new _Constant (functor (Value (), ((_Constant*)operand)->Value())); + } else { + WarnError (_String("<'constant' operation 'X'>, where 'X' is not a number. \nconstant = ") & (_String((_String*)toStr())) & "\n'X' = " & (_String((_String*)operand->toStr()))); + } + } else { + WarnError (_String("<'constant' operation 'null'>, where constant = ") & (_String((_String*)toStr()))); } - WarnError ("Not a numeric 'X' type in a <'constant' operation 'X'> call"); return new _MathObject; } diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 8a73c8726..9ff0b2a3d 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -2502,6 +2502,7 @@ void _LikelihoodFunction::CheckDependentBounds (void) { _SimpleList badIndices; //indices of dependent variables which are out of bounds nonConstantDep = new _SimpleList; + _SimpleList nonConstantIndices; // for error reporting for (index = 0; indexCompute()->Value(); lowerBounds.theData[index] = cornholio->GetLowerBound(); upperBounds.theData[index] = cornholio->GetUpperBound(); + + //fprintf (stderr, "_LikelihoodFunction::CheckDependentBounds variable %s (%d), current value %g, range %g to %g\n", cornholio->theName->sData, index, currentValues.theData[index], lowerBounds.theData[index], upperBounds.theData[index]); bool badApple = currentValues.theData[index]upperBounds.theData[index]; if (badApple) { badIndices< Constraint violated\n"); } if (cornholio->IsConstant()) { @@ -2523,13 +2527,14 @@ void _LikelihoodFunction::CheckDependentBounds (void) { j = index; // for error reporting at the bottom } else { (*nonConstantDep) << indexDep.lData[index]; + nonConstantIndices << index; } } if (badIndices.lLength && !ohWell) // one of the variables has left its prescribed bounds // build a table of dependancies { - _Matrix dependancies (indexDep.lLength,indexInd.lLength,true,true); + _Matrix dependancies (MAX(3,indexDep.lLength),indexInd.lLength,true,true); // element (i,j) represents the dependance of i-th dep var on the j-th ind var // 0 - no dep, @@ -2540,7 +2545,7 @@ void _LikelihoodFunction::CheckDependentBounds (void) { _Parameter temp = GetIthIndependent(index); SetIthIndependent (index,temp*1.000000000001); - for (j=indexDep.lLength-1; j>-1; j--) { + for (j=0; j < indexDep.lLength; j++) { _Parameter temp1 = GetIthDependent(j); if (temp1>currentValues[j]) { dependancies.Store(j,index,1.0); @@ -2550,6 +2555,8 @@ void _LikelihoodFunction::CheckDependentBounds (void) { } SetIthIndependent (index,temp); } + + //fprintf (stderr, "\n%s\n", _String((_String*)dependancies.toStr()).sData); // now we can go through the dependant variables which are out of bounds one at a time // and attempt to move them back in. @@ -2566,26 +2573,31 @@ void _LikelihoodFunction::CheckDependentBounds (void) { // this is equivalent to searching for the matrix of dependancies for a column w/o negative // entries,such that row "index" has "1" in it - _Parameter correlation = 0.; + _Parameter correlation = 0.0; - for (j = indexInd.lLength-1; j>-1; j--) { - if (correlation == dependancies(badVarIndex,j)) { + for (j = 0; j < indexInd.lLength; j++) { + if ((correlation = dependancies(badVarIndex,j)) != 0.) { + // fprintf (stderr, "## %d -> %g\n", j, correlation); for (i=0; isData, j); // now nudge the independent variable (indexed by "j") upward (or downward), // until var badVarIndex is within limits again @@ -2657,37 +2669,68 @@ void _LikelihoodFunction::CheckDependentBounds (void) { // reuse the first two rows of "dependancies" to store // the lower and upper bounds for the dependent vars + //fprintf (stderr, "Trying random permutations...\n"); + + // compile the list of variables that are tied in with bad constraints + _SimpleList _aux; + _AVLList tagged ( &_aux); + + for (long k = 0; k < badIndices.lLength; k++) { + for (long k2 = 0; k2 < indexInd.lLength; k2++) { + if (dependancies(badIndices.Get(k), k2) != 0.0) { + tagged.Insert((BaseRef)k2); + } + } + } + + tagged.ReorderList(); + + // fprintf (stderr, "Tagged the following variables %s\n", _String((_String*)_aux.toStr()).sData); + + for (index = 0; index10?10:GetIthIndependentBound (index,true))-dependancies(0,index)); + dependancies.Store (2,index,GetIthIndependent (index)); } - - for (i=0; i<10000; i++) { - for (index = 0; index < indexInd.lLength; index++) { - SetIthIndependent (index,dependancies(0,index)+genrand_real2()*dependancies(1,index)); - for (j = 0; j < nonConstantDep->lLength; j++) - // check whether any of the dependent variables are out of bounds - { - currentValues.theData[j] = LocateVar(nonConstantDep->lData[j])->Compute()->Value(); - if (currentValues.theData[j]upperBounds.theData[j]) { - badConstraint = nonConstantDep->lData[j]; - break; - } - } - if (j == nonConstantDep->lLength) { - break; - } - } - if(index < indexInd.lLength) { - break; + + // fprintf (stderr, "\n%s\n", _String((_String*)dependancies.toStr()).sData); + + + for (i = 0L; i < 10000L; i++) { + // choose random values for the variables that are involved with bad constraints + for (long v = 0L; v < _aux.lLength; v++) { + index = _aux.Get(v); + SetIthIndependent (index,dependancies(0,index)+genrand_real2()*dependancies(1,index)); + //fprintf (stderr, "[%d] %s => %g\n", index, GetIthIndependentName(index)->sData, GetIthIndependent(index)); + } + for (j = 0; j < nonConstantDep->lLength; j++) { + // check whether any of the dependent variables are out of bounds + long j_corrected = nonConstantIndices.Get(j); + currentValues.theData[j_corrected] = LocateVar(nonConstantDep->lData[j])->Compute()->Value(); + //fprintf (stderr, "[%d] %g (%g, %g)\n", j, j_corrected, currentValues.theData[j_corrected], lowerBounds.theData[j_corrected], upperBounds[j_corrected]); + if (currentValues.theData[j_corrected]upperBounds.theData[j_corrected]) { + //fprintf (stderr, "===| CHECK FAILED\n"); + badConstraint = nonConstantDep->lData[j]; + break; } + } + if (j == nonConstantDep->lLength) { + break; + } } + ohWell = i==10000; } if (ohWell) { cornholio = LocateVar(badConstraint); subNumericValues = 3; + + //fprintf (stderr, "%d\n", j); + + j = nonConstantIndices.Get(j); + _String * cStr = (_String*)cornholio->GetFormulaString(), badX = (*cornholio->GetName()) & ":=" & *cStr & " must be in [" & lowerBounds[j] & "," & upperBounds[j] &"]. Current value = " & currentValues[j] & "."; @@ -4892,7 +4935,7 @@ void _LikelihoodFunction::_TerminateAndDump(const _String &error) { fwrite ((void*)sLF.getStr(), 1, sLF.Length(), out); fclose (out); - WarnError (_String("Internal error, likelihood function calculation on the same parameter value returned different scores, dumping the offending likelihood function to /tmp/hyphy.dump") & error ); + WarnError (_String("Internal error, dumping the offending likelihood function to /tmp/hyphy.dump") & error ); } From c3ebf72f700ff6bb2c967f63aafec0a2b6e2e7f2 Mon Sep 17 00:00:00 2001 From: Sergei Pond Date: Mon, 30 Jul 2018 14:46:59 -0400 Subject: [PATCH 49/53] More permissive error checks for LEISR and similar LFs --- .../SelectionAnalyses/FEL-contrast.bf | 51 +++++++++++++------ src/core/likefunc.cpp | 19 +++---- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf index 0e2a8600b..11a73d52b 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf @@ -135,17 +135,21 @@ fel.scaler_parameter_names = {}; io.ReportProgressMessageMD('FEL', 'selector', "Selected `fel.branch_class_count` sets of branches to test\n"); fel.branch_class_counter = 0; +fel.branches.testable = {}; +fel.branches.has_background = FALSE; utility.ForEachPair (fel.branch_sets, "_group_", "_branches_", " if (_group_ != terms.tree_attributes.background) { fel.site_tested_classes [_group_] = 'Site relative non-synonymous rate (' + _group_ + ' branches)'; fel.branch_class_counter += 1; + fel.branches.testable + _group_; fel.scaler_parameter_names [_group_] = 'fel.beta_scaler_group_' + fel.branch_class_counter; io.ReportProgressMessageMD('FEL', 'selector', '* Selected ' + Abs(_branches_) + ' branches in group _' + _group_ + '_ : \\\`' + Join (', ',_branches_) + '\\\`') } else { fel.scaler_parameter_names [_group_] = 'fel.beta_scaler_background'; - fel.site_tested_classes [_group_] = 'Site relative non-synonymous rate (reference branches)' + fel.site_tested_classes [_group_] = 'Site relative non-synonymous rate (reference branches)'; + fel.branches.has_background = TRUE; } " ); @@ -224,7 +228,7 @@ selection.io.startTimer (fel.json [terms.json.timers], "FEL analysis", 2); //---------------------------------------------------------------------------------------- function fel.apply_proportional_site_constraint (tree_name, node_name, alpha_parameter, beta_parameter, alpha_factor, beta_factor, branch_length) { - console.log (node_name + " -> " + alpha_factor); + //console.log (node_name + " -> " + alpha_factor); fel.branch_length = (branch_length[terms.parameters.synonymous_rate])[terms.fit.MLE]; @@ -261,10 +265,10 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod utility.SetEnvVariable ("USE_LAST_RESULTS", TRUE); if (^"fel.srv"){ - ^"fel.alpha_scaler" = 1; + ^(^"fel.alpha.scaler") = 1; } else { - ^"fel.alpha_scaler" := 1; + ^(^"fel.alpha.scaler") := 1; } // all rates free @@ -278,26 +282,37 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod Optimize (results, ^lf); + snapshot = estimators.TakeLFStateSnapshot (lf); + alternative = estimators.ExtractMLEs (lf, model_mapping); alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; sum = + utility.Map (^"fel.scaler_parameter_names", "_pname_", - '^_pname_' + '^_pname_' ); - + testable = utility.Array1D (^"fel.branches.testable"); + denominator = testable; + if (^"fel.branches.has_background") { + denominator = testable + 1; + } // baseline NULL (everything = same rate) - ^(^"fel.alpha.scaler") = (^(^"fel.alpha.scaler")+ ^("fel.branch_class_count")*sum)/(^"fel.branch_class_count" + 1); + if (^"fel.srv") { + ^(^"fel.alpha.scaler") = (^(^"fel.alpha.scaler")+denominator*sum)/denominator; + } ref_parameter = (^"fel.scaler_parameter_names")["VALUEINDEXORDER"][0]; + ^ref_parameter = sum /denominator; + utility.ForEach (^"fel.scaler_parameter_names", "_pname_", ' - if (_pname_ != ^`&ref_parameter`) { - parameters.SetConstraint (_pname_,^`&ref_parameter`, ""); + console.log (`&ref_parameter`); + if (_pname_ != `&ref_parameter`) { + parameters.SetConstraint (_pname_,`&ref_parameter`, ""); } ' ); @@ -305,14 +320,20 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod Optimize (results, ^lf); - null = estimators.ExtractMLEs (lf, model_mapping); - null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; - - console.log (alternative); - console.log (null); + if (denominator > 2) { + for (v = 0; v < testable; v+=1) { + for (v2 = v + 1; v2 < testable; v2+=1) { + console.log ("Testing " + (^"fel.branches.testable")[v] + " vs " + (^"fel.branches.testable")[v2]); + estimators.RestoreLFStateFromSnapshot (lf_id, snapshot); + } + } + } assert (0); + null = estimators.ExtractMLEs (lf, model_mapping); + null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; + return { utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.null"): null @@ -467,7 +488,7 @@ estimators.ApplyExistingEstimates ("fel.site_likelihood", fel.site_model_mapping fel.queue = mpi.CreateQueue ({"LikelihoodFunctions": {{"fel.site_likelihood"}}, "Models" : {{"fel.site.mg_rev"}}, "Headers" : {{"libv3/all-terms.bf"}}, - "Variables" : {{"fel.srv","fel.site_tested_classes","fel.scaler_parameter_names","fel.branch_class_count"}} + "Variables" : {{"fel.srv","fel.site_tested_classes","fel.scaler_parameter_names","fel.branches.testable","fel.branches.has_background","fel.alpha.scaler"}} }); diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 9ff0b2a3d..3f9c9ba74 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -5239,19 +5239,20 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& } - if (successful && !(rightValue<=middleValue && leftValue<=middleValue)) { + if (successful ) { /** SLKP 20180709 need to have a more permissive check, because sometimes if the change is too small (or involves a paremeter that has very little effect on the LF), recomputation could be within numerical error **/ - - char buf[512], buf2[512]; - snprintf (buf, 512, " \n\tERROR: [_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%20.16g) = %%20.16g [@%%20.16g -> %%20.16g:@%%20.16g -> %%20.16g]]", index, middle, middleValue, left, leftValue,right, rightValue); - snprintf (buf2, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %s: %20.16g <= %20.16g >= %20.16g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); - - _TerminateAndDump (_String (buf) & "\n" & buf2 & "\nParameter name " & (index >= 0 ? *GetIthIndependentName(index) : "line optimization")); - + if (rightValue - middleValue > 1e-12 || leftValue - middleValue > 1e-12) { + char buf[512], buf2[512]; + snprintf (buf, 512, " \n\tERROR: [_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%20.16g) = %%20.16g [@%%20.16g -> %%20.16g:@%%20.16g -> %%20.16g]]", index, middle, middleValue, left, leftValue,right, rightValue); + snprintf (buf2, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %s: %20.16g <= %20.16g >= %20.16g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue); + _TerminateAndDump (_String (buf) & "\n" & buf2 & "\nParameter name " & (index >= 0 ? *GetIthIndependentName(index) : "line optimization")); + } + + successful = rightValue<=middleValue && leftValue<=middleValue; } if (verbosityLevel > 100) { @@ -5266,7 +5267,7 @@ long _LikelihoodFunction::Bracket (long index, _Parameter& left, _Parameter& bracketFCount+=likeFuncEvalCallCount-funcCounts; bracketCount++; - return (rightValue<=middleValue && leftValue<=middleValue) ? 0 : -1; + return successful ? 0 : -1; } //_______________________________________________________________________________________ From 8ffcc8f26fd153d15fc655a84daa12f1ff888221 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Mon, 6 Aug 2018 22:31:31 -0400 Subject: [PATCH 50/53] Revised FEL contrast to work on more than two groups of branches --- .../SelectionAnalyses/FEL-contrast.bf | 277 ++++++++++++------ .../libv3/tasks/estimators.bf | 17 +- 2 files changed, 195 insertions(+), 99 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf index 11a73d52b..6808c4f8f 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf @@ -28,7 +28,7 @@ LoadFunctionLibrary("modules/io_functions.ibf"); fel.analysis_description = { terms.io.info: "FEL-contrast (Fixed Effects Likelihood) investigates whether or not selective pressures differ between two or more sets of branches at a site. Site-specific synonymous (alpha) and non-synonymous (beta, one per branch set) substitution rates are estimated - and then beta rates are tested for equality at each site. LRT (one degree of freedom) is used to assess significance.", + and then beta rates are tested for equality at each site. LRT is used to assess significance.", terms.io.version: "0.2", terms.io.reference: "Kosakovsky Pond SL, Frost SDW, Grossman Z, Gravenor MB, Richman DD, Leigh Brown AJ (2006) Adaptation to Different Human Populations by HIV-1 Revealed by Codon-Based Analyses. PLoS Comput Biol 2(6): e62.", terms.io.authors: "Sergei L Kosakovsky Pond", @@ -51,7 +51,7 @@ utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE); */ - +terms.fel.pairwise = "pairwise"; fel.site_alpha = "Site relative synonymous rate"; fel.site_beta_reference = "Site relative non-synonymous rate (reference branches)"; fel.site_tested_classes = {}; @@ -80,23 +80,6 @@ fel.display_orders = {terms.original_name: -1, selection.io.startTimer (fel.json [terms.json.timers], "Total time", 0); -fel.table_headers = {{"alpha", "Synonymous substitution rate at a site"} - {"beta_r", "Non-synonymous substitution rate at a site for reference branches"} - {"beta_t", "Non-synonymous substitution rate at a site for test branches"} - {"beta_r=beta_t", "The rate estimate under the constant selective pressure model"} - {"LRT", "Likelihood ration test statistic for beta = alpha, versus beta &neq; alpha"} - {"p-value", "Likelihood ration test statistic for beta_r = beta_t, versus beta_r &neq; beta_t"} - {"Total branch length", "The total length of branches contributing to inference at this site, and used to scale dN-dS"}}; - - -/** -This table is meant for HTML rendering in the results web-app; can use HTML characters, the second column -is 'pop-over' explanation of terms. This is ONLY saved to the JSON file. For Markdown screen output see -the next set of variables. -*/ - -fel.table_screen_output = {{"Codon", "alpha", "beta-reference", "beta-test", "LRT", "Difference detected?"}}; -fel.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width: 16, terms.table_options.align : "center"}; namespace fel { @@ -154,16 +137,82 @@ utility.ForEachPair (fel.branch_sets, "_group_", "_branches_", " ); -selection.io.startTimer (fel.json [terms.json.timers], "Model fitting",1); +fel.test_count = 1; +if (fel.branch_class_counter > 2) { + fel.test_count += fel.branch_class_counter * (fel.branch_class_counter-1) / 2; +} + +fel.table_headers = {2 + utility.Array1D (fel.scaler_parameter_names) + fel.test_count, 2}; +fel.table_headers [0][0] = "alpha"; fel.table_headers [0][1] = "Synonymous substitution rate at a site"; + +fel.k = 1; +fel.rate.names = utility.Keys (fel.scaler_parameter_names); +utility.ForEach (fel.rate.names, "_n_", , +' + fel.table_headers [fel.k][0] = "beta (" + _n_ + ")"; + fel.table_headers [fel.k][1] = "Non-synonymous substitution rate at a site for " + _n_ + " branches"; + fel.k += 1; +'); + +fel.report.rate_count = fel.k; + +fel.table_headers [fel.k][0] = "p-value (overall)"; +if (fel.branch_class_counter == 1) { + fel.table_headers [fel.k][1] = "P-value for the test that " + fel.branches.testable[0] + " branches have different non-synonymous rates than background branches"; +} else { + fel.table_headers [fel.k][1] = "P-value for the test that non-synonymous rates differ between any of the selected groups: " + Join (",", fel.branches.testable); +} + +fel.k += 1; + +fel.report.test_count = 1 + fel.branch_class_counter * (fel.branch_class_counter-1) / 2; +fel.tests.key = {fel.report.test_count, 4}; +fel.tests.key [0][0] = "overall"; +fel.tests.key [0][1] = -1; +fel.tests.key [0][3] = fel.report.rate_count; + +fel.test.index = 1; + +for (fel.v = 0; fel.v < fel.branch_class_counter; fel.v += 1) { + for (fel.v2 = fel.v + 1; fel.v2 < fel.branch_class_counter; fel.v2 += 1) { + fel.table_headers [fel.k][0] = "P-value for " + fel.branches.testable[fel.v] + " vs " + fel.branches.testable[fel.v2]; + fel.tests.key [fel.test.index][0] = fel.branches.testable[fel.v] + " vs " + fel.branches.testable[fel.v2]; + fel.tests.key [fel.test.index][1] = 1 + fel.v; + fel.tests.key [fel.test.index][2] = 1 + fel.v2; + fel.tests.key [fel.test.index][3] = fel.report.rate_count + fel.test.index; + + fel.table_headers [fel.k][1] = "P-value for the test that non-synonymous rates differ between " + fel.branches.testable[fel.v] + " and " + fel.branches.testable[fel.v2] + " branches"; + fel.k += 1; + fel.test.index += 1; + } +} + +fel.report.counts = {fel.report.test_count,1}; + +fel.table_headers [fel.k][0] = "Total branch length"; +fel.table_headers [fel.k][1] = "The total length of branches contributing to inference at this site, and used to scale beta-alpha"; + + +/** +This table is meant for HTML rendering in the results web-app; can use HTML characters, the second column +is 'pop-over' explanation of terms. This is ONLY saved to the JSON file. For Markdown screen output see +the next set of variables. +*/ + +fel.table_screen_output = {{"Codon", "alpha", "beta", "test", "p-value"}}; +fel.table_output_options = {terms.table_options.header : TRUE, terms.table_options.align : "center", + terms.table_options.column_widths : { "0" : 8, "1" : 16, "2" : 30, "3" : 40, "4" : 10}}; +selection.io.startTimer (fel.json [terms.json.timers], "Model fitting",1); estimators.fixSubsetOfEstimates(fel.gtr_results, fel.gtr_results[terms.global]); namespace fel { doPartitionedMG ("fel", FALSE); } -fel.final_partitioned_mg_results = fel.partitioned_mg_results; -/* + +//fel.final_partitioned_mg_results = fel.partitioned_mg_results; + io.ReportProgressMessageMD ("fel", "codon-refit", "Improving branch lengths, nucleotide substitution biases, and global dN/dS ratios under a full codon model"); @@ -191,7 +240,7 @@ selection.io.json_store_lf_GTR_MG94 (fel.json, utility.ArrayToDict (utility.Map (fel.global_dnds, "_value_", "{'key': _value_[terms.description], 'value' : Eval({{_value_ [terms.fit.MLE],1}})}")), (fel.final_partitioned_mg_results[terms.efv_estimate])["VALUEINDEXORDER"][0], fel.display_orders[terms.json.global_mg94xrev]); -*/ + estimators.fixSubsetOfEstimates(fel.final_partitioned_mg_results, fel.final_partitioned_mg_results[terms.global]); @@ -201,8 +250,6 @@ utility.ForEachPair (fel.filter_specification, "_key_", "_value_", selection.io.extract_branch_info((fel.final_partitioned_mg_results[terms.branch_length])[_key_], "selection.io.branch.length"));'); - - selection.io.stopTimer (fel.json [terms.json.timers], "Model fitting"); // define the site-level likelihood function @@ -278,12 +325,9 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod ' ); - utility.SetEnvVariable ("VERBOSITY_LEVEL", 10); - Optimize (results, ^lf); snapshot = estimators.TakeLFStateSnapshot (lf); - alternative = estimators.ExtractMLEs (lf, model_mapping); alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; @@ -304,76 +348,92 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod ^(^"fel.alpha.scaler") = (^(^"fel.alpha.scaler")+denominator*sum)/denominator; } - ref_parameter = (^"fel.scaler_parameter_names")["VALUEINDEXORDER"][0]; - ^ref_parameter = sum /denominator; + ref_parameter = (^"fel.scaler_parameter_names")[(^"fel.branches.testable")["VALUEINDEXORDER"][0]]; - utility.ForEach (^"fel.scaler_parameter_names", "_pname_", - ' - console.log (`&ref_parameter`); - if (_pname_ != `&ref_parameter`) { - parameters.SetConstraint (_pname_,`&ref_parameter`, ""); - } - ' - ); + ^ref_parameter = sum /denominator; + if (testable == 1) { + parameters.SetConstraint ((^"fel.scaler_parameter_names")[^"terms.tree_attributes.background"],ref_parameter, ""); + } else { + utility.ForEach (^"fel.scaler_parameter_names", "_pname_", + ' + //console.log ("REF " + `&ref_parameter`); + if (_pname_ != `&ref_parameter`) { + parameters.SetConstraint (_pname_,`&ref_parameter`, ""); + } + ' + ); + } Optimize (results, ^lf); + null = estimators.ExtractMLEs (lf, model_mapping); + null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; - if (denominator > 2) { + if (testable > 2) { + pairwise = {}; for (v = 0; v < testable; v+=1) { for (v2 = v + 1; v2 < testable; v2+=1) { - console.log ("Testing " + (^"fel.branches.testable")[v] + " vs " + (^"fel.branches.testable")[v2]); + v1n = (^"fel.branches.testable")[v]; + v2n = (^"fel.branches.testable")[v2]; + estimators.RestoreLFStateFromSnapshot (lf_id, snapshot); - } + parameters.SetConstraint ((^"fel.scaler_parameter_names")[v1n],(^"fel.scaler_parameter_names")[v2n], ""); + //GetString (lfi, ^lf, -1); + //console.log (lfi); + Optimize (results, ^lf); + pairwise[v1n + "|" + v2n] = estimators.ExtractMLEs (lf, model_mapping); + (pairwise[v1n + "|" + v2n])[utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; + } } + } else { + pairwise = None; } - assert (0); - - null = estimators.ExtractMLEs (lf, model_mapping); - null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; - return { utility.getGlobalValue("terms.alternative") : alternative, - utility.getGlobalValue("terms.null"): null + utility.getGlobalValue("terms.null"): null, + utility.getGlobalValue("terms.fel.pairwise"): pairwise }; } /* echo to screen calls */ -fel.report.counts = {{0,0}}; - - -fel.report.more_selection = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]), +fel.report.pairwise = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]), Format(fel.report.row[0],10,3), - Format(fel.report.row[1],10,3), - Format(fel.report.row[2],10,3), - Format(fel.report.row[4],10,3), - "Incr. p = " + Format(fel.report.row[5],6,4)}}; + Format(fel.report.row[fel.report.rate1_index],10,3) + " : " + Format(fel.report.row[fel.report.rate2_index],10,3), + fel.report.test , + Format(fel.report.row[fel.report.pvalue_index],6,4)}}; -fel.report.less_selection = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]), +fel.report.overall = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]), Format(fel.report.row[0],10,3), - Format(fel.report.row[1],10,3), - Format(fel.report.row[2],10,3), - Format(fel.report.row[4],10,3), - "Decr. p = " + Format(fel.report.row[5],6,4)}}; + Format(fel.report.rates[0],10,3) + " - " + Format(fel.report.rates[fel.report.rate_count-2],10,3), + fel.report.test, + Format(fel.report.row[fel.report.pvalue_index],6,4)}}; + function fel.report.echo (fel.report.site, fel.report.partition, fel.report.row) { - fel.print_row = None; - if (fel.report.row [5] < fel.pvalue) { - if (fel.report.row[1] < fel.report.row[2]) { - fel.print_row = fel.report.more_selection; - fel.report.counts[0] += 1; - } else { - fel.print_row = fel.report.less_selection; - fel.report.counts [1] += 1; + fel.k.bound = Rows (fel.tests.key); + + for (fel.k = 0; fel.k < fel.k.bound; fel.k += 1) { + fel.print_row = None; + if (fel.report.row[fel.tests.key[fel.k][3]] <= fel.p_value) { + fel.report.test = fel.tests.key[fel.k][0]; + fel.report.rate1_index = fel.tests.key[fel.k][1]; + fel.report.pvalue_index = fel.tests.key[fel.k][3]; + if (fel.report.rate1_index >= 0) { + fel.report.rate2_index = fel.tests.key[fel.k][2]; + fel.print_row = fel.report.pairwise; + } else { + fel.print_row = fel.report.overall; + fel.report.rates = Transpose (fel.report.row[{{0,1}}][{{0,fel.report.rate_count-1}}]) % 0; + } + fel.report.counts [fel.k] += 1; } - } - if (None != fel.print_row) { + if (None != fel.print_row) { if (!fel.report.header_done) { io.ReportProgressMessageMD("FEL", "" + fel.report.partition, "For partition " + (fel.report.partition+1) + " these sites are significant at p <=" + fel.pvalue + "\n"); fprintf (stdout, @@ -381,55 +441,81 @@ function fel.report.echo (fel.report.site, fel.report.partition, fel.report.row) fel.report.header_done = TRUE; fel.table_output_options[terms.table_options.header] = FALSE; } - fprintf (stdout, io.FormatTableRow (fel.print_row,fel.table_output_options)); - } + } + + + } lfunction fel.store_results (node, result, arguments) { + //console.log (^"fel.table_headers"); partition_index = arguments [2]; pattern_info = arguments [3]; - result_row = { { 0, // alpha - 0, // beta_r - 0, // beta_t - 0, // beta_r==beta_t - 0, // LRT - 1, // p-value, - 0 // total branch length of tested branches - } }; + array_size = utility.getGlobalValue ("fel.report.rate_count") + utility.getGlobalValue ("fel.report.test_count") + 1; + + result_row = { 1, array_size } ["_MATRIX_ELEMENT_COLUMN_>=^'fel.report.rate_count'&&_MATRIX_ELEMENT_COLUMN_ Date: Tue, 7 Aug 2018 15:04:33 -0400 Subject: [PATCH 51/53] removing remnants of debugging --- CMakeLists.txt | 8 ++++---- .../SelectionAnalyses/FEL-contrast.bf | 5 ----- .../SelectionAnalyses/modules/grid_compute.ibf | 10 ---------- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dab1a456..3f230d477 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,9 +106,9 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) endif() if(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g -std=gnu++14") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -std=gnu++14") else(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3") endif(${GCC_VERSION} VERSION_LESS 6.0) if(NOAVX) @@ -141,7 +141,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -g") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3") PCL_CHECK_FOR_AVX() if(${HAVE_AVX_EXTENSIONS}) set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -march=corei7-avx -mtune=corei7-avx ") @@ -150,7 +150,7 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") PCL_CHECK_FOR_SSE3() if(${HAVE_SSE3_EXTENSIONS}) add_definitions (-D_SLKP_USE_SSE_INTRINSICS) - set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -msse3 -g") + set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -msse3") endif(${HAVE_SSE3_EXTENSIONS}) endif (${HAVE_AVX_EXTENSIONS}) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf index 11a73d52b..78741a167 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL-contrast.bf @@ -459,11 +459,6 @@ fel.site_patterns = alignments.Extract_site_patterns ((fel.filter_specification[ utility.ForEach (fel.case_respecting_node_names, "_node_", '_node_class_ = (fel.selected_branches[fel.partition_index])[_node_]; - /*if (_node_class_ == terms.tree_attributes.test) { - _beta_scaler = fel.scalers[1]; - } else { - _beta_scaler = fel.scalers[2]; - }*/ _beta_scaler = fel.scaler_parameter_names[_node_class_]; fel.apply_proportional_site_constraint ("fel.site_tree", _node_, fel.alpha, fel.beta, fel.alpha.scaler, _beta_scaler, (( fel.final_partitioned_mg_results[terms.branch_length])[fel.partition_index])[_node_]); '); diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/grid_compute.ibf b/res/TemplateBatchFiles/SelectionAnalyses/modules/grid_compute.ibf index 2e9f81398..bb42b6240 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/grid_compute.ibf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/grid_compute.ibf @@ -40,20 +40,10 @@ lfunction pass1.evaluator (lf_id, tasks, scores) { task_count = Abs (tasks); for (i = 0; i < task_count; i+=1) { - //console.log (tasks[task_ids[i]]); - parameters.SetValues (tasks[task_ids[i]]); LFCompute (^lf_id, ll); results [task_ids[i]] = ll; - /*if (ll < -1e10) { - LFCompute (^lf_id, LF_DONE_COMPUTE); - io.SpoolLF (lf_id, "/tmp/RELAX", "alt.lf"); - assert (0); - } - - console.log (ll + "\n\n");*/ - } LFCompute (^lf_id, LF_DONE_COMPUTE); From 2f0c5013a092f0df5c834c3df9e24df777204f8f Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Tue, 7 Aug 2018 15:19:09 -0400 Subject: [PATCH 52/53] adding clang and BGM to integration script. --- .gitignore | 1 + .travis.yml | 12 ++++++++---- run_tests.sh | 0 3 files changed, 9 insertions(+), 4 deletions(-) mode change 100644 => 100755 run_tests.sh diff --git a/.gitignore b/.gitignore index 45092eb12..9bdd2419a 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ gmon.out messages.log errors.log tests/hbltests/libv3/data/CD2.nex.*.json +tests/hbltests/libv3/data/CD2.prot.BGM.json diff --git a/.travis.yml b/.travis.yml index 0d139781a..4c74259cf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,10 +27,13 @@ env: - METHOD_TEST=tests/hbltests/libv3/aBSREL.wbf - METHOD_TEST=tests/hbltests/libv3/BUSTED.wbf - METHOD_TEST=tests/hbltests/libv3/LEISR.wbf + - METHOD_TEST=tests/hbltests/libv3/BGM.wbf language: c++ + compiler: - gcc + - clang addons: apt: @@ -39,17 +42,18 @@ addons: - ubuntu-toolchain-r-test - george-edison55-precise-backports packages: - - g++-6 - - gcc-6 + - g++-7 + - gcc-7 install: - - if [ "$CXX" = "g++" ]; then export CXX="g++-6" CC="gcc-6"; fi + - if [ "$CXX" = "g++" ]; then export CXX="g++-7" CC="gcc-7"; fi - cmake . - make HYPHYMP - make HYPHYGTEST script: - - export OMP_NUM_THREADS=4 + - export OMP_NUM_THREADS=8 + - export LD_LIBRARY_PATH=/usr/local/clang/lib:$LD_LIBRARY_PATH - ./HYPHYGTEST - ./HYPHYMP LIBPATH=`pwd`/res/ tests/hbltests/libv3/math.bf - ./HYPHYMP LIBPATH=`pwd`/res/ tests/hbltests/libv3/iofunctions.bf diff --git a/run_tests.sh b/run_tests.sh old mode 100644 new mode 100755 From 2ab3f38077bc1890bb0db014489103c6afced999 Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Tue, 7 Aug 2018 21:12:45 -0400 Subject: [PATCH 53/53] bumping version --- src/core/strings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/strings.cpp b/src/core/strings.cpp index b6a152b06..a786a0388 100644 --- a/src/core/strings.cpp +++ b/src/core/strings.cpp @@ -69,7 +69,7 @@ #define MOD_ADLER 65521 _String compileDate = __DATE__, - __HYPHY__VERSION__ = _String ("2.3.12.") & compileDate.Cut (7,10) & compileDate.Cut (0,2).Replace("Jan", "01", true). + __HYPHY__VERSION__ = _String ("2.3.14.") & compileDate.Cut (7,10) & compileDate.Cut (0,2).Replace("Jan", "01", true). Replace("Feb", "02", true). Replace("Mar", "03", true). Replace("Apr", "04", true).