diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a6cfd0ed..ecff8c241 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -685,7 +685,7 @@ makeLink(HYPHYMP hyphy hyphy) add_test (NAME UNIT-TESTS COMMAND bash run_unit_tests.sh) add_test (CODON HYPHYMP tests/hbltests/SimpleOptimizations/SmallCodon.bf) -add_test (PROTEIN HYPHYMP CPU=1 tests/hbltests/SimpleOptimizations/IntermediateProtein.bf) +add_test (PROTEIN HYPHYMP tests/hbltests/SimpleOptimizations/IntermediateProtein.bf) add_test (MTCODON HYPHYMP tests/hbltests/libv3/mtDNA-code.wbf) add_test (ALGAE HYPHYMP tests/hbltests/libv3/algae-mtDNA.wbf) add_test (CILIATES HYPHYMP tests/hbltests/libv3/ciliate-code.wbf) @@ -701,7 +701,7 @@ add_test (RELAX HYPHYMP tests/hbltests/libv3/RELAX.wbf) add_test (FUBAR HYPHYMP tests/hbltests/libv3/FUBAR.wbf) add_test (BGM HYPHYMP tests/hbltests/libv3/BGM.wbf) add_test (CONTRAST-FEL HYPHYMP tests/hbltests/libv3/CFEL.wbf) -add_test (GARD HYPHYMP tests/hbltests/libv3/GARD.wbf) +add_test (GARD HYPHYMP tests/hbltests/libv3/GARD.wbf ENV="TOLERATE_NUMERICAL_ERRORS=1;") add_test (FADE HYPHYMP tests/hbltests/libv3/FADE.wbf) add_test (ABSREL HYPHYMP tests/hbltests/libv3/ABSREL.wbf) #add_test (FMM HYPHYMP tests/hbltests/libv3/FMM.wbf) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf index 166eee840..20db267fb 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf @@ -276,7 +276,7 @@ relax.do_srv = io.SelectAnOption ( if (relax.do_srv == "Branch-site") { relax.do_bs_srv = TRUE; relax.do_srv = TRUE; - (relax.json[terms.json.analysis])[terms.settings] = "Branch-site synonymous rate variation"; + selection.io.json_store_setting (relax.json, "SRV type", "Branch-site synonymous rate variation"); } else { if (relax.do_srv == "Yes") { relax.do_bs_srv = FALSE; diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index 10dd461f0..29fca5e93 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -453,7 +453,8 @@ function doGTR (prefix) { terms.nucleotideRate ("A","C") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , terms.nucleotideRate ("A","T") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , terms.nucleotideRate ("C","G") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , - terms.nucleotideRate ("G","T") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} + terms.nucleotideRate ("G","T") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25} , + terms.nucleotideRate ("C","T") : { utility.getGlobalValue ("terms.fit.MLE") : 1.00} } }); diff --git a/res/TemplateBatchFiles/libv3/models/codon/MSS.bf b/res/TemplateBatchFiles/libv3/models/codon/MSS.bf index f4100afe8..0037157e3 100644 --- a/res/TemplateBatchFiles/libv3/models/codon/MSS.bf +++ b/res/TemplateBatchFiles/libv3/models/codon/MSS.bf @@ -14,7 +14,9 @@ terms.model.MSS.syn_rate_within = " within codon class "; terms.model.MSS.syn_rate_between = " between codon classes "; terms.model.MSS.between = "synonymous rate between codon classes"; terms.model.MSS.neutral = "neutral reference"; +terms.model.MSS.empirical = "empirical"; terms.model.MSS.codon_classes = "codon classes"; +terms.model.MSS.codon_pairs = "codon pairs"; terms.model.MSS.normalize = "normalize rates"; //---------------------------------------------------------------------------------------------------------------- @@ -32,7 +34,9 @@ lfunction model.codon.MSS.prompt_and_define_freq (type, code, freq) { {"SynREV2g", "Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (Valine == neutral). All between-class synonymous substitutions share a rate."} {"SynREVCodon", "Each codon pair that is exchangeable gets its own substitution rate (fully estimated, mean = 1)"} {"Random", "Random partition (specify how many classes; largest class = neutral)"} + {"Empirical", "Load a TSV file with an empirical rate estimate for each codon pair"} {"File", "Load a TSV partition from file (prompted for neutral class)"} + {"Codon-file", "Load a TSV partition for pairs of codons from a file (prompted for neutral class)"} }, "Synonymous Codon Class Definitions"); @@ -182,6 +186,18 @@ lfunction model.codon.MSS.prompt_and_define_freq (type, code, freq) { return models.codon.MSS.ModelDescription(type, code, models.codon.MSS.LoadClasses (null)); } + if (partitioning_option == "Codon-file") { + KeywordArgument ("mss-file", "File defining the model partition"); + KeywordArgument ("mss-neutral", "Designation for the neutral substitution rate"); + return models.codon.MSS.ModelDescription(type, code, models.codon.MSS.LoadClassesCodon (null)); + } + + if (partitioning_option == "Empirical") { + KeywordArgument ("mss-file", "File defining empirical rates for each pair of codons"); + return models.codon.MSS.ModelDescription(type, code, models.codon.MSS.LoadEmpiricalRates (null)); + } + + return {}; } @@ -209,6 +225,10 @@ lfunction models.codon.MSS.ModelDescription(type, code, codon_classes) { m[utility.getGlobalValue("terms.model.q_ij")] = "models.codon.MSS._GenerateRate"; m[utility.getGlobalValue("terms.model.MSS.codon_classes")] = codon_classes [^"terms.model.MSS.codon_classes"]; m[utility.getGlobalValue("terms.model.MSS.neutral")] = codon_classes [^"terms.model.MSS.neutral"]; + m[utility.getGlobalValue("terms.model.MSS.empirical")] = codon_classes [^"terms.model.MSS.empirical"]; + m[utility.getGlobalValue("terms.model.MSS.codon_pairs")] = codon_classes [^"terms.model.MSS.codon_pairs"]; + + if (codon_classes/utility.getGlobalValue("terms.model.MSS.between")) { m[^"terms.model.MSS.between"] = codon_classes [^"terms.model.MSS.between"]; } @@ -257,6 +277,8 @@ lfunction models.codon.MSS._GenerateRate (fromChar, toChar, namespace, model_typ alpha_term = utility.getGlobalValue ("terms.parameters.synonymous_rate"); beta_term = utility.getGlobalValue ("terms.parameters.nonsynonymous_rate"); nr = model[utility.getGlobalValue("terms.model.MSS.neutral")]; + empirical = model[utility.getGlobalValue("terms.model.MSS.empirical")]; + codon_pairs = model[utility.getGlobalValue("terms.model.MSS.codon_pairs")]; omega = "omega"; alpha = "alpha"; beta = "beta"; @@ -298,63 +320,86 @@ lfunction models.codon.MSS._GenerateRate (fromChar, toChar, namespace, model_typ rate_entry += "*" + aa_rate; } else { - class_from = (model[^"terms.model.MSS.codon_classes"])[fromChar]; - class_to = (model[^"terms.model.MSS.codon_classes"])[toChar]; - - - if ((Abs (class_from) && Abs (class_to)) == FALSE) { - class_from = (model[^"terms.model.MSS.codon_classes"])[fromChar+toChar]; - class_to = class_from; - } - - assert (Abs (class_from) && Abs (class_to), "The neutral class for `fromChar` to `toChar` is not specified in the model definition"); - - if (class_from == class_to) { - if (class_from == nr) { - if (model_type == utility.getGlobalValue("terms.local")) { - codon_rate = alpha + "_" + class_from; - (_GenerateRate.p[model_type])[alpha_term + ^"terms.model.MSS.syn_rate_within" + class_from] = codon_rate; - rate_entry += "*" + codon_rate; - } else { - rate_entry = nuc_rate; - } + if (empirical) { + if (fromChar < toChar) { + key = fromChar + "|" + toChar; } else { - if (model_type == utility.getGlobalValue("terms.local")) { - codon_rate = alpha + "_" + class_from; + key = toChar + "|" + fromChar; + } + + assert (model[^"terms.model.MSS.codon_classes"] / key, "Rate for codon pair `key` was missing from the empirical file definition"); + rate_entry += "*" + (model[^"terms.model.MSS.codon_classes"])[key]; + + } else { + if (codon_pairs) { + if (fromChar < toChar) { + key = fromChar + "|" + toChar; } else { - codon_rate = parameters.ApplyNameSpace(alpha + "_" + class_from, namespace); + key = toChar + "|" + fromChar; } - (_GenerateRate.p[model_type])[alpha_term + ^"terms.model.MSS.syn_rate_within" + class_from] = codon_rate; - rate_entry += "*" + codon_rate; + assert (model[^"terms.model.MSS.codon_classes"] / key, "Rate for codon pair `key` was missing from the empirical file definition"); + class_from = (model[^"terms.model.MSS.codon_classes"])[key]; + class_to = class_from; + } else { + class_from = (model[^"terms.model.MSS.codon_classes"])[fromChar]; + class_to = (model[^"terms.model.MSS.codon_classes"])[toChar]; } - } else { - if (class_from > class_to) { - codon_rate = class_to; + + + if ((Abs (class_from) && Abs (class_to)) == FALSE) { + class_from = (model[^"terms.model.MSS.codon_classes"])[fromChar+toChar]; class_to = class_from; - class_from = codon_rate; } - if (Abs (between_rate)) { - if (model_type == utility.getGlobalValue("terms.local")) { - codon_rate = between_rate; + + assert (Abs (class_from) && Abs (class_to), "The neutral class for `fromChar` to `toChar` is not specified in the model definition"); + + if (class_from == class_to) { + if (class_from == nr) { + if (model_type == utility.getGlobalValue("terms.local")) { + codon_rate = alpha + "_" + class_from; + (_GenerateRate.p[model_type])[alpha_term + ^"terms.model.MSS.syn_rate_within" + class_from] = codon_rate; + rate_entry += "*" + codon_rate; + } else { + rate_entry = nuc_rate; + } } else { - codon_rate = parameters.ApplyNameSpace(between_rate, namespace); + if (model_type == utility.getGlobalValue("terms.local")) { + codon_rate = alpha + "_" + class_from; + } else { + codon_rate = parameters.ApplyNameSpace(alpha + "_" + class_from, namespace); + } + (_GenerateRate.p[model_type])[alpha_term + ^"terms.model.MSS.syn_rate_within" + class_from] = codon_rate; + rate_entry += "*" + codon_rate; } - (_GenerateRate.p[model_type])[^"terms.model.MSS.between"] = codon_rate; - } else { - if (class_from + class_to == nr) { - //console.log ("NEUTRAL"); - codon_rate = 1; - } else { + if (class_from > class_to) { + codon_rate = class_to; + class_to = class_from; + class_from = codon_rate; + } + if (Abs (between_rate)) { if (model_type == utility.getGlobalValue("terms.local")) { - codon_rate = alpha + "_" + class_from + "_" + class_to; + codon_rate = between_rate; + } else { + codon_rate = parameters.ApplyNameSpace(between_rate, namespace); + } + (_GenerateRate.p[model_type])[^"terms.model.MSS.between"] = codon_rate; + + } else { + if (class_from + class_to == nr) { + //console.log ("NEUTRAL"); + codon_rate = 1; } else { - codon_rate = parameters.ApplyNameSpace(alpha + "_" + class_from + "_" + class_to, namespace); + if (model_type == utility.getGlobalValue("terms.local")) { + codon_rate = alpha + "_" + class_from + "_" + class_to; + } else { + codon_rate = parameters.ApplyNameSpace(alpha + "_" + class_from + "_" + class_to, namespace); + } + (_GenerateRate.p[model_type])[alpha_term + ^"terms.model.MSS.syn_rate_between" + class_from + " and " + class_to] = codon_rate; } - (_GenerateRate.p[model_type])[alpha_term + ^"terms.model.MSS.syn_rate_between" + class_from + " and " + class_to] = codon_rate; } + rate_entry += "*" + codon_rate; } - rate_entry += "*" + codon_rate; } } @@ -363,6 +408,27 @@ lfunction models.codon.MSS._GenerateRate (fromChar, toChar, namespace, model_typ return _GenerateRate.p; } + +//---------------------------------------------------------------------------------------------------------------- + +lfunction models.codon.MSS.LoadEmpiricalRates (file) { + + SetDialogPrompt ("A TSV file with three columns (Codon1, Codon2, Empirical Rate) which is used to define relative synonymous substitution rates"); + classes = io.ReadDelimitedFile (file, "\t", TRUE); + headers = utility.Array1D(classes[^'terms.io.header']); + io.CheckAssertion("`&headers`>=3", "Expected a TSV file with at least 3 columns; 2nd column is the codon, 3rd is the class for this codon"); + codon_pairs = {}; + for (_record_; in; classes [^"terms.io.rows"]) { + if (_record_[0] < _record_[1]) { + key = _record_[0] + "|" + _record_[1]; + } else { + key = _record_[1] + "|" + _record_[0]; + } + codon_pairs [key] = Eval (_record_[2]); + } + + return {^"terms.model.MSS.codon_classes" : codon_pairs, ^"terms.model.MSS.empirical" : TRUE}; +} //---------------------------------------------------------------------------------------------------------------- @@ -391,3 +457,37 @@ lfunction models.codon.MSS.LoadClasses (file) { return {^"terms.model.MSS.codon_classes" : codons_by_class, ^"terms.model.MSS.neutral" : nr}; } + +//---------------------------------------------------------------------------------------------------------------- + +lfunction models.codon.MSS.LoadClassesCodon (file) { + + SetDialogPrompt ("A TSV file with three columns (Codon1, Codon2, Class) which is used to partition synonymous substitutions into groups"); + classes = io.ReadDelimitedFile (file, "\t", TRUE); + headers = utility.Array1D(classes[^'terms.io.header']); + io.CheckAssertion("`&headers`>=3", "Expected a TSV file with at least 3 columns; 2nd column is the codon, 3rd is the class for this codon"); + codon_pairs = {}; + for (_record_; in; classes [^"terms.io.rows"]) { + if (_record_[0] < _record_[1]) { + key = _record_[0] + "|" + _record_[1]; + } else { + key = _record_[1] + "|" + _record_[0]; + } + codon_pairs [key] = _record_[2]; + } + + classes = utility.UniqueValues(codon_pairs); + class_count = utility.Array1D(classes); + io.CheckAssertion("`&class_count`>=2", "Expected at least 2 codon classes"); + + choices = {class_count,2}; + for (i = 0; i < class_count; i += 1) { + choices[i][0] = classes[i]; + choices[i][1] = "Codon class " + classes[i]; + } + + nr= io.SelectAnOption (choices, "Select the codon class which will serve as the neutral rate reference (relative rate = 1)"); + + + return {^"terms.model.MSS.codon_classes" : codon_pairs, ^"terms.model.MSS.neutral" : nr, ^"terms.model.MSS.codon_pairs" : TRUE}; +} diff --git a/res/TemplateBatchFiles/libv3/models/parameters.bf b/res/TemplateBatchFiles/libv3/models/parameters.bf index 3556f448d..fd96212ee 100644 --- a/res/TemplateBatchFiles/libv3/models/parameters.bf +++ b/res/TemplateBatchFiles/libv3/models/parameters.bf @@ -853,18 +853,23 @@ lfunction parameters.helper.tree_lengths_to_initial_values(dict, type) { for (i = 0; i < components; i += 1) { this_component = {}; + for (_branch_name_ , _branch_length_; in; (dict[i])[ utility.getGlobalValue("terms.branch_length")]) { + this_component [_branch_name_] = { + utility.getGlobalValue('terms.fit.MLE') : + factor*_branch_length_ + }; + } - - utility.ForEachPair((dict[i])[ utility.getGlobalValue("terms.branch_length")], "_branch_name_", "_branch_length_", - " - `&this_component`[_branch_name_] = {utility.getGlobalValue('terms.fit.MLE') : `&factor`*_branch_length_} - "); - result[i] = this_component; + if (Abs (this_component)) { + result[i] = this_component; + } } - return { utility.getGlobalValue("terms.branch_length"): result + return { + utility.getGlobalValue("terms.branch_length"): result }; + } /** diff --git a/src/core/global_things.cpp b/src/core/global_things.cpp index 0ded18de0..714daeccf 100644 --- a/src/core/global_things.cpp +++ b/src/core/global_things.cpp @@ -122,7 +122,7 @@ namespace hy_global { kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"), kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"), kErrorNumerical ("To treat numerical errors as warnings, please specify \"ENV=TOLERATE_NUMERICAL_ERRORS=1;\" as the command line argument. This often resolves the issue, which is indicative of numerical instability."), - kHyPhyVersion = _String ("2.5.64"), + kHyPhyVersion = _String ("2.5.65"), kNoneToken = "None", kNullToken = "null", diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index da672e958..f9684ac9e 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -3105,7 +3105,7 @@ inline hyFloat sqr (hyFloat x) _Matrix * eq_freqs = GetIthFrequencies(partition_index); unsigned long tip_count = filter->NumberSpecies(); - if (filter->GetData()->GetTT()->IsStandardNucleotide() && filter->IsNormalFilter() && tip_count<150 && eq_freqs->IsIndependent()) { + if (filter->GetData()->GetTT()->IsStandardNucleotide() && filter->IsNormalFilter() && tip_count < 300 && eq_freqs->IsIndependent()) { // if not - use distance estimates if (tree->IsDegenerate()) { @@ -4341,15 +4341,6 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) _variables_changed_during_last_compute = new _SimpleList (); variables_changed_during_last_compute = new _AVLList (_variables_changed_during_last_compute); -#ifdef __HYPHYMPI__ - if (hy_mpi_node_rank == 0) { -#endif - BenchmarkThreads (this); -#ifdef __HYPHYMPI__ - } - - -#endif //ObjectToConsole(variables_changed_during_last_compute); //NLToConsole(); @@ -4489,7 +4480,14 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) } - +#ifdef __HYPHYMPI__ + if (hy_mpi_node_rank == 0) { +#endif + BenchmarkThreads (this); +#ifdef __HYPHYMPI__ + } +#endif + _OptimiztionProgress progress_tracker; //checkParameter (optimizationMethod,optMethodP,4.0); @@ -4604,11 +4602,12 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) hyFloat grad_precision; if (maxSoFar > -INFINITY) { - grad_precision = MIN (1000., -maxSoFar / 500.); + grad_precision = MIN (1000., -maxSoFar * 0.002); } else { - grad_precision = -0.005; + grad_precision = -0.0025; } + if (gradientBlocks.nonempty()) { for (long b = 0; b < gradientBlocks.lLength; b++) { _SimpleList * this_block = (_SimpleList*)gradientBlocks(b); @@ -4930,7 +4929,7 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) _Matrix bestMSoFar; GetAllIndependent (bestMSoFar); hyFloat prec = Minimum (diffs[0], diffs[1]), - grad_precision = Maximum(diffs[0],diffs[1]); + grad_precision = Maximum (precision, Maximum(diffs[0],diffs[1])); prec = Minimum (Maximum (prec, precision), 1.); @@ -5119,9 +5118,9 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) //verbosity_level = 101; if (use_adaptive_step) { if (convergenceMode < 2) { - LocateTheBump (current_index,precisionStep, maxSoFar, bestVal, go2Bound, precisionStep); + LocateTheBump (current_index,precisionStep, maxSoFar, bestVal, go2Bound, precisionStep * 0.5); } else { - LocateTheBump (current_index,precisionStep, maxSoFar, bestVal, go2Bound, precisionStep);//convergenceMode == 2? precisionStep*0.25: precisionStep*0.0625); + LocateTheBump (current_index,precisionStep, maxSoFar, bestVal, go2Bound, convergenceMode == 2? precisionStep*0.25: precisionStep*0.0625); } } else { LocateTheBump (current_index,brackStep, maxSoFar, bestVal, go2Bound); @@ -6516,7 +6515,7 @@ void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, hyFloat& gradi hyFloat currentValue = GetIthIndependent(index), ub = GetIthIndependentBound(index,false)-currentValue, lb = currentValue-GetIthIndependentBound(index,true), - testStep = MAX(currentValue * gradientStep,gradientStep); + testStep = MAX(fabs(currentValue) * gradientStep,gradientStep); //printf ("%ld %s %20.18g\n", index, GetIthIndependentName (index)->get_str(), currentValue); @@ -6691,6 +6690,10 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision //printf ("\n\n_LikelihoodFunction::ConjugateGradientDescent ==> %d (%lg)\n", usedCachedResults, maxSoFar); + if (maxSoFar == -INFINITY) { + return maxSoFar; + } + if (min_improvement_to_contuinue < 0.) { min_improvement_to_contuinue = initial_value * min_improvement_to_contuinue; } diff --git a/src/core/likefunc2.cpp b/src/core/likefunc2.cpp index d5aa49978..4710838e4 100644 --- a/src/core/likefunc2.cpp +++ b/src/core/likefunc2.cpp @@ -1254,8 +1254,11 @@ hyFloat mapParameterToInverval (hyFloat in, char type, bool inverse) { break; case _hyphyIntervalMapSqueeze: if (inverse) { - return in/(1.-in); + in = in/(1.-in); + //return in; + return in * in; } else { + in = sqrt (in); return in/(1.+in); } break; diff --git a/tests/hbltests/SimpleOptimizations/IntermediateProtein.bf b/tests/hbltests/SimpleOptimizations/IntermediateProtein.bf index 7375d46e9..3391257ba 100644 --- a/tests/hbltests/SimpleOptimizations/IntermediateProtein.bf +++ b/tests/hbltests/SimpleOptimizations/IntermediateProtein.bf @@ -1498,7 +1498,7 @@ return 0; */ OPTIMIZATION_PRECISION = 0.001; -VERBOSITY_LEVEL = 1; +VERBOSITY_LEVEL = 10; USE_ADAPTIVE_VARIABLE_STEP = 1; OPTIMIZATION_METHOD = 4;