From d1b4eeb7e716389871fd57409c44a5faa589ce6f Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 14 Dec 2017 16:00:30 -0500 Subject: [PATCH 1/8] update LEISR version to reflect JSON change --- res/TemplateBatchFiles/LEISR.bf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/LEISR.bf b/res/TemplateBatchFiles/LEISR.bf index faf7b15fd..b3786fa6d 100644 --- a/res/TemplateBatchFiles/LEISR.bf +++ b/res/TemplateBatchFiles/LEISR.bf @@ -31,7 +31,7 @@ utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); leisr.analysis_description = { terms.io.info: "LEISR (Likelihood Estimation of Individual Site Rates) infer relative amino-acid or nucleotide rates from a fixed nucleotide or amino-acid alignment and tree, with possibility for partitions. Relative site-specific substitution rates are inferred by first optimizing alignment-wide branch lengths, and then inferring a site-specific uniform tree scaler.", - terms.io.version: "0.3", + terms.io.version: "0.4", terms.io.reference: "Spielman, S.J. and Kosakovsky Pond, S.L. Relative evolutionary rate inference in HyPhy with LEISR. bioRxiv. https://doi.org/10.1101/206011. (2017); Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. Rate4Site: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).", terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman", terms.io.contact: "{spond,stephanie.spielman}@temple.edu" From cf90d052482b4cb0d628607bbea770b43d687e7a Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Sat, 16 Dec 2017 14:25:30 -0500 Subject: [PATCH 2/8] Minor bug in frequency specification addressed --- res/TemplateBatchFiles/LEISR.bf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/LEISR.bf b/res/TemplateBatchFiles/LEISR.bf index b3786fa6d..cd0062307 100644 --- a/res/TemplateBatchFiles/LEISR.bf +++ b/res/TemplateBatchFiles/LEISR.bf @@ -77,7 +77,7 @@ leisr.analysis_type = io.SelectAnOption ({{leisr.protein_type , "Infer relative if (leisr.analysis_type == leisr.protein_type) { leisr.baseline_model = io.SelectAnOption (models.protein.empirical_models, "Select a protein model:"); - leisr.generators = models.protein.empirical.default_generators; + leisr.generators = models.protein.empirical.plusF_generators; } else { From 621da1d525f91791e262082cd2dd1762eb8f4523 Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Sat, 30 Dec 2017 15:16:54 -0500 Subject: [PATCH 3/8] Closes issue 709 --- .../SelectionAnalyses/MEME.bf | 15 ++++++--- res/TemplateBatchFiles/libv3/IOFunctions.bf | 2 ++ .../libv3/tasks/alignments.bf | 32 +++++++++++++++++++ src/core/category.cpp | 1 - src/core/include/likefunc.h | 1 - src/core/include/parser.h | 5 ++- src/core/likefunc.cpp | 11 +++---- src/mains/unix.cpp | 2 +- 8 files changed, 55 insertions(+), 14 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index 417e8f0b8..cb27ad0fe 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -149,6 +149,7 @@ meme.final_partitioned_mg_results = estimators.FitMGREV (meme.filter_names, meme }, meme.partitioned_mg_results); +//meme.final_partitioned_mg_results = meme.partitioned_mg_results; io.ReportProgressMessageMD("MEME", "codon-refit", "* Log(L) = " + Format(meme.final_partitioned_mg_results[terms.fit.log_likelihood],8,2)); @@ -298,21 +299,20 @@ for (meme.partition_index = 0; meme.partition_index < meme.partition_count; meme )); __make_filter ("meme.site_filter"); - LikelihoodFunction meme.site_likelihood = (meme.site_filter, meme.site_tree_fel); - __make_filter ("meme.site_filter_bsrel"); estimators.ApplyExistingEstimates ("meme.site_likelihood", meme.site_model_mapping, meme.final_partitioned_mg_results, terms.globals_only); + - + __make_filter ("meme.site_filter_bsrel"); LikelihoodFunction meme.site_likelihood_bsrel = (meme.site_filter_bsrel, meme.site_tree_bsrel); estimators.ApplyExistingEstimates ("meme.site_likelihood_bsrel", meme.site_model_mapping, meme.final_partitioned_mg_results, - "globals only"); + terms.globals_only); meme.queue = mpi.CreateQueue ({terms.mpi.LikelihoodFunctions: {{"meme.site_likelihood","meme.site_likelihood_bsrel"}}, terms.mpi.Models : {{"meme.site.background_fel","meme.site.bsrel"}}, @@ -458,11 +458,15 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa bsrel_tree_id = (lfInfo["Trees"])[0]; utility.SetEnvVariable ("USE_LAST_RESULTS", TRUE); + utility.SetEnvVariable ("ASSUME_REVERSIBLE_MODELS", TRUE); + utility.SetEnvVariable ("VERBOSITY_LEVEL", 500); ^"meme.site_alpha" = 1; ^"meme.site_beta_plus" = 1; ^"meme.site_beta_nuisance" = 1; + console.log ("Optimizing FEL for pattern " + pattern_info); + io.SpoolLF (lf_fel, "/tmp/meme.debug", "FEL"); Optimize (results, ^lf_fel); fel = estimators.ExtractMLEs (lf_fel, model_mapping); @@ -477,6 +481,8 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa /* avoid 0/0 by making the denominator non-zero*/ } + console.log ("Optimizing MEME for pattern " + pattern_info); + io.SpoolLF (lf_bsrel, "/tmp/meme.debug", "MEME"); Optimize (results, ^lf_bsrel); alternative = estimators.ExtractMLEs (lf_bsrel, model_mapping); @@ -554,6 +560,7 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa //---------------------------------------------------------------------------------------- function meme.report.echo (meme.report.site, meme.report.partition, meme.report.row) { + meme.print_row = None; if (meme.report.row [6] <= meme.pvalue) { meme.print_row = meme.report.positive_site; diff --git a/res/TemplateBatchFiles/libv3/IOFunctions.bf b/res/TemplateBatchFiles/libv3/IOFunctions.bf index 4f3c65f18..36f347c8e 100644 --- a/res/TemplateBatchFiles/libv3/IOFunctions.bf +++ b/res/TemplateBatchFiles/libv3/IOFunctions.bf @@ -663,6 +663,7 @@ lfunction io.ReadDelimitedFile (path, separator, has_header) { fscanf (path, REWIND, "Lines", data); } else { fscanf (PROMPT_FOR_FILE, REWIND, "Lines", data); + path = utility.getGlobalValue("LAST_FILE_PATH"); } result = {utility.getGlobalValue("terms.io.rows") : {}}; index = 0; @@ -674,6 +675,7 @@ lfunction io.ReadDelimitedFile (path, separator, has_header) { for (k = index; k < row_count; k+=1) { result [utility.getGlobalValue("terms.io.rows")] + regexp.Split (data[k], separator); } + result[utility.getGlobalValue("terms.json.file")] = path; return result; } diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 5d56c09a2..631e66a60 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -248,6 +248,38 @@ function alignments.ReadNucleotideAlignment(file_name, dataset_name, datafilter_ return data_info; } +/** + * Take an input filter, replace all identical sequences with a single copy + * Optionally, rename the sequences to indicate copy # by adding ':copies' + * @name alignments.CompressDuplicateSequences + * @param {String} filter_in - The name of an existing filter + * @param {String} filter_out - the name (to be created) of the filter where the compressed sequences will go) + * @param {Bool} rename - if true, rename the sequences + * @returns the number of unique sequences + */ +lfunction alignments.CompressDuplicateSequences (filter_in, filter_out, rename) { + GetDataInfo (duplicate_info, ^filter_in, -2); + + DataSetFilter ^filter_out = CreateFilter (^filter_in, 1, "", Join (",", duplicate_info["UNIQUE_INDICES"])); + + if (rename) { + utility.ForEachPair (duplicate_info["UNIQUE_INDICES"], + "_idx_", + "_seq_idx_", + ' + GetString (_seq_name_, ^`&filter_in`, _seq_idx_); + _seq_name_ += ":" + ((`&duplicate_info`)["UNIQUE_COUNTS"])[_idx_[1]]; + SetParameter (^`&filter_in`,_seq_idx_,_seq_name_); + '); + + } + + + + return duplicate_info["UNIQUE_SEQUENCES"]; + //DataSetFilter ^filter_out = CreateFilter (filter_in); +} + /** * Defines filters for multiple partitions * @name alignments.DefineFiltersForPartitions diff --git a/src/core/category.cpp b/src/core/category.cpp index ffe483193..bd047235e 100644 --- a/src/core/category.cpp +++ b/src/core/category.cpp @@ -68,7 +68,6 @@ extern _List modelNames; extern _SimpleList modelMatrixIndices, modelFrequenciesIndices; -bool CheckEqual (_Parameter, _Parameter); //___________________________________________________________________________________________ diff --git a/src/core/include/likefunc.h b/src/core/include/likefunc.h index 0a02eae6c..34b86d115 100644 --- a/src/core/include/likefunc.h +++ b/src/core/include/likefunc.h @@ -777,7 +777,6 @@ reduceLFSmoothing ; -bool CheckEqual (_Parameter, _Parameter); void StateCounterResultHandler (_Formula&, _SimpleList*,long&,long&,long,_Matrix&,_Matrix&); _LikelihoodFunction* diff --git a/src/core/include/parser.h b/src/core/include/parser.h index 475bd1fe8..0dea13446 100644 --- a/src/core/include/parser.h +++ b/src/core/include/parser.h @@ -157,7 +157,10 @@ void ExportCatVariables void SplitVariablesIntoClasses (_SimpleList&, _SimpleList&, _SimpleList&, _SimpleList&); -bool CheckEqual (_Parameter,_Parameter); + + +extern _Parameter machineEps; +bool CheckEqual (_Parameter,_Parameter, _Parameter = machineEps); extern _AVLListX _hyApplicationGlobals; diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 7dc3921d8..aeff9f76b 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -289,7 +289,6 @@ dataSetList, likeFuncList; bool CheckOneDStep (_Parameter&,_Parameter,_Parameter); -bool CheckEqual (_Parameter, _Parameter); _Variable* siteWiseVar = nil, * blockWiseVar = nil; @@ -4728,12 +4727,12 @@ inline bool CheckOneDStep (_Parameter& val, _Parameter lB, _Parameter uB) } //_______________________________________________________________________________________ -bool CheckEqual (_Parameter a, _Parameter b) { +bool CheckEqual (_Parameter a, _Parameter b, _Parameter tolerance) { if (a!=0.0) { a = (a>b)?(a-b)/a:(b-a)/a; - return a>0.0 ? a<=machineEps : a>=-machineEps; + return a>0.0 ? a<=tolerance : a>=-tolerance; } - return (b<=machineEps)&&(b>=-machineEps); + return (b<=tolerance)&&(b>=-tolerance); } //_______________________________________________________________________________________ @@ -6225,8 +6224,8 @@ void _LikelihoodFunction::GradientDescent (_Parameter& gPrecision, _Matrix& b brentHistory.Store (-FX-initialValue);*/ } - if (maxSoFar < initialValue && !CheckEqual (maxSoFar, initialValue)) { - WarnError (_String ("Internal error in _LikelihoodFunction::GradientLocateTheBump: in the Brent loop iteration ") & long(outcome) & ". " & maxSoFar & " / " & initialValue & ".\n");// & _String ((_String*)brentHistory.toStr())); + if (maxSoFar < initialValue && !CheckEqual (maxSoFar, initialValue, 10. * machineEps)) { + WarnError (_String ("Internal error in _LikelihoodFunction::GradientLocateTheBump: in the Brent loop iteration ") & long(outcome) & ". " & _String (maxSoFar, "%15.12g") & " / " & _String (initialValue,"%15.12g") & ".\n");// & _String ((_String*)brentHistory.toStr())); return; } diff --git a/src/mains/unix.cpp b/src/mains/unix.cpp index 88132bf0d..1863aca36 100644 --- a/src/mains/unix.cpp +++ b/src/mains/unix.cpp @@ -243,7 +243,7 @@ _String getLibraryPath() { _String tryLocal = baseDir & "res" & dirSlash; struct stat sb; - + if (stat((const char*)tryLocal, &sb) == 0 && S_ISDIR(sb.st_mode)) { libDir = tryLocal; } From 14e84c4e6c353f15681b99e9cb89dfddd203ef16 Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Wed, 3 Jan 2018 13:53:50 -0500 Subject: [PATCH 4/8] Adding the ability to fix branch lengths in libv3; random number generation module; some convenicene functions --- .../libv3/convenience/random.bf | 369 ++++++++++++++++++ .../libv3/models/DNA/GTR.bf | 2 +- .../libv3/models/DNA/HKY85.bf | 2 +- .../libv3/models/DNA/JC69.bf | 2 +- .../libv3/models/codon/MG_REV.bf | 2 +- .../libv3/models/model_functions.bf | 33 ++ .../libv3/tasks/alignments.bf | 19 + .../libv3/tasks/estimators.bf | 16 + 8 files changed, 441 insertions(+), 4 deletions(-) create mode 100644 res/TemplateBatchFiles/libv3/convenience/random.bf diff --git a/res/TemplateBatchFiles/libv3/convenience/random.bf b/res/TemplateBatchFiles/libv3/convenience/random.bf new file mode 100644 index 000000000..5abd882c1 --- /dev/null +++ b/res/TemplateBatchFiles/libv3/convenience/random.bf @@ -0,0 +1,369 @@ +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); + +/** @module random */ + +random.PI = 4*Arctan (1); + +random.LogFactorial.precomp = {{ 0.000000000000000, + 0.000000000000000, + 0.693147180559945, + 1.791759469228055, + 3.178053830347946, + 4.787491742782046, + 6.579251212010101, + 8.525161361065415, + 10.604602902745251, + 12.801827480081469, + 15.104412573075516, + 17.502307845873887, + 19.987214495661885, + 22.552163853123421, + 25.191221182738683, + 27.899271383840894, + 30.671860106080675, + 33.505073450136891, + 36.395445208033053, + 39.339884187199495, + 42.335616460753485, + 45.380138898476908, + 48.471181351835227, + 51.606675567764377, + 54.784729398112319, + 58.003605222980518, + 61.261701761002001, + 64.557538627006323, + 67.889743137181526, + 71.257038967168000, + 74.658236348830158, + 78.092223553315307, + 81.557959456115029, + 85.054467017581516, + 88.580827542197682, + 92.136175603687079, + 95.719694542143202, + 99.330612454787428, + 102.968198614513810, + 106.631760260643450, + 110.320639714757390, + 114.034211781461690, + 117.771881399745060, + 121.533081515438640, + 125.317271149356880, + 129.123933639127240, + 132.952575035616290, + 136.802722637326350, + 140.673923648234250, + 144.565743946344900, + 148.477766951773020, + 152.409592584497350, + 156.360836303078800, + 160.331128216630930, + 164.320112263195170, + 168.327445448427650, + 172.352797139162820, + 176.395848406997370, + 180.456291417543780, + 184.533828861449510, + 188.628173423671600, + 192.739047287844900, + 196.866181672889980, + 201.009316399281570, + 205.168199482641200, + 209.342586752536820, + 213.532241494563270, + 217.736934113954250, + 221.956441819130360, + 226.190548323727570, + 230.439043565776930, + 234.701723442818260, + 238.978389561834350, + 243.268849002982730, + 247.572914096186910, + 251.890402209723190, + 256.221135550009480, + 260.564940971863220, + 264.921649798552780, + 269.291097651019810, + 273.673124285693690, + 278.067573440366120, + 282.474292687630400, + 286.893133295426990, + 291.323950094270290, + 295.766601350760600, + 300.220948647014100, + 304.686856765668720, + 309.164193580146900, + 313.652829949878990, + 318.152639620209300, + 322.663499126726210, + 327.185287703775200, + 331.717887196928470, + 336.261181979198450, + 340.815058870798960, + 345.379407062266860, + 349.954118040770250, + 354.539085519440790, + 359.134205369575340, + 363.739375555563470, + 368.354496072404690, + 372.979468885689020, + 377.614197873918670, + 382.258588773060010, + 386.912549123217560, + 391.575988217329610, + 396.248817051791490, + 400.930948278915760, + 405.622296161144900, + 410.322776526937280, + 415.032306728249580, + 419.750805599544780, + 424.478193418257090, + 429.214391866651570, + 433.959323995014870, + 438.712914186121170, + 443.475088120918940, + 448.245772745384610, + 453.024896238496130, + 457.812387981278110, + 462.608178526874890, + 467.412199571608080, + 472.224383926980520, + 477.044665492585580, + 481.872979229887900, + 486.709261136839360, + 491.553448223298010, + 496.405478487217580, + 501.265290891579240, + 506.132825342034830, + 511.008022665236070, + 515.890824587822520, + 520.781173716044240, + 525.679013515995050, + 530.584288294433580, + 535.496943180169520, + 540.416924105997740, + 545.344177791154950, + 550.278651724285620, + 555.220294146894960, + 560.169054037273100, + 565.124881094874350, + 570.087725725134190, + 575.057539024710200, + 580.034272767130800, + 585.017879388839220, + 590.008311975617860, + 595.005524249382010, + 600.009470555327430, + 605.020105849423770, + 610.037385686238740, + 615.061266207084940, + 620.091704128477430, + 625.128656730891070, + 630.172081847810200, + 635.221937855059760, + 640.278183660408100, + 645.340778693435030, + 650.409682895655240, + 655.484856710889060, + 660.566261075873510, + 665.653857411105950, + 670.747607611912710, + 675.847474039736880, + 680.953419513637530, + 686.065407301994010, + 691.183401114410800, + 696.307365093814040, + 701.437263808737160, + 706.573062245787470, + 711.714725802289990, + 716.862220279103440, + 722.015511873601330, + 727.174567172815840, + 732.339353146739310, + 737.509837141777440, + 742.685986874351220, + 747.867770424643370, + 753.055156230484160, + 758.248113081374300, + 763.446610112640200, + 768.650616799717000, + 773.860102952558460, + 779.075038710167410, + 784.295394535245690, + 789.521141208958970, + 794.752249825813460, + 799.988691788643450, + 805.230438803703120, + 810.477462875863580, + 815.729736303910160, + 820.987231675937890, + 826.249921864842800, + 831.517780023906310, + 836.790779582469900, + 842.068894241700490, + 847.352097970438420, + 852.640365001133090, + 857.933669825857460, + 863.231987192405430, + 868.535292100464630, + 873.843559797865740, + 879.156765776907600, + 884.474885770751830, + 889.797895749890240, + 895.125771918679900, + 900.458490711945270, + 905.796028791646340, + 911.138363043611210, + 916.485470574328820, + 921.837328707804890, + 927.193914982476710, + 932.555207148186240, + 937.921183163208070, + 943.291821191335660, + 948.667099599019820, + 954.046996952560450, + 959.431492015349480, + 964.820563745165940, + 970.214191291518320, + 975.612353993036210, + 981.015031374908400, + 986.422203146368590, + 991.833849198223450, + 997.249949600427840, + 1002.670484599700300, + 1008.095434617181700, + 1013.524780246136200, + 1018.958502249690200, + 1024.396581558613400, + 1029.838999269135500, + 1035.285736640801600, + 1040.736775094367400, + 1046.192096209724900, + 1051.651681723869200, + 1057.115513528895000, + 1062.583573670030100, + 1068.055844343701400, + 1073.532307895632800, + 1079.012946818975000, + 1084.497743752465600, + 1089.986681478622400, + 1095.479742921962700, + 1100.976911147256000, + 1106.478169357800900, + 1111.983500893733000, + 1117.492889230361000, + 1123.006317976526100, + 1128.523770872990800, + 1134.045231790853000, + 1139.570684729984800, + 1145.100113817496100, + 1150.633503306223700, + 1156.170837573242400}}; + +lfunction random.LogFactorial(n) { + if (n > 254) { + x = n + 1; + return (x - 0.5) * Log (x) - x + 0.5 * Log (2 * ^"random.PI") + 1.0 / (12.0 * x); + } else { + return (^"random.LogFactorial.precomp")[n]; + } + } + +lfunction random.number (from, to) { + return Random (from,to); +} + +lfunction random.poisson (lambda) { + if (lambda < 30) { + L = Exp(-lambda); + k = 0; + p = 1.; + + do { + k += 1; + p = p * Random(0,1); + + } while (p > L); + + return k - 1; + } else { + c = 0.767 - 3.36 / lambda; + beta = ^"random.PI" / Sqrt (3.0 * lambda); + alpha = beta * lambda; + k = Log(c) - lambda - Log(beta); + + while (1) { + + var u = Random (0.1); + if (u > 0.0) { + var x = (alpha - Log ((1.0 - u) / u)) / beta; + var n = (x + 0.5)$1; + if (n < 0) { + continue; + } + var v = Random (0.1); + if (v > 0.0) { + var y = alpha - beta * x; + var t = (1.0 + Exp(y)); + var lhs = y + Log (v / (t*t)); + var rhs = k + n * Log (lambda) - LogFactorial (n); + if (lhs <= rhs) + return n; + } + } + } + } +} + +/*-------------------------------------------------*/ + +function random.gamma (alpha,beta) { + _sampledValue = Random (0,1); + FindRoot (_k,CGammaDist(_x_,alpha,beta)-_sampledValue,_x_,0,2500); + return _k; +} + +/*-------------------------------------------------*/ + +function random.beta (p,q) { + _sampledValue = Random (0,1); + FindRoot (_k,IBeta(_x_,p,q)-_sampledValue,_x_,0,1); + return _k; +} + +/*-------------------------------------------------*/ + +lfunction random.normal.standard () { + /* Box-Muller */ + return Sqrt (-2*Log (Random (1e-26,1)))*Cos (2 * ^"random.PI" * Random (1e-26,1)); +} + +/*-------------------------------------------------*/ + +lfunction random.binomial (p, N) { + + if (N == 0) { + return 0; + } + + if (p == 1) { + return N; + } + q = -Log (1-p); + x = 0; + sum = 0; + + do { + U = Random (0,1); + if (U > 0) { + if (N == x) { + x += 1; + break; + } + sum += - Log (U) / (N-x); + x += 1; + } + } while (sum < q); + + return x - 1; + } \ No newline at end of file diff --git a/res/TemplateBatchFiles/libv3/models/DNA/GTR.bf b/res/TemplateBatchFiles/libv3/models/DNA/GTR.bf index f0cc62a41..fb65bee95 100644 --- a/res/TemplateBatchFiles/libv3/models/DNA/GTR.bf +++ b/res/TemplateBatchFiles/libv3/models/DNA/GTR.bf @@ -26,7 +26,7 @@ lfunction models.DNA.GTR.ModelDescription(type) { utility.getGlobalValue("terms.model.type"): type, utility.getGlobalValue("terms.model.get_branch_length"): "", utility.getGlobalValue("terms.model.set_branch_length"): "models.generic.SetBranchLength", - utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.constrain_branch_length", + utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.ConstrainBranchLength", utility.getGlobalValue("terms.model.frequency_estimator"): "frequencies.empirical.nucleotide", utility.getGlobalValue("terms.model.q_ij"): "models.DNA.GTR._generateRate", utility.getGlobalValue("terms.model.time"): "models.DNA.generic.Time", diff --git a/res/TemplateBatchFiles/libv3/models/DNA/HKY85.bf b/res/TemplateBatchFiles/libv3/models/DNA/HKY85.bf index e4e22a31e..91b6d28f2 100644 --- a/res/TemplateBatchFiles/libv3/models/DNA/HKY85.bf +++ b/res/TemplateBatchFiles/libv3/models/DNA/HKY85.bf @@ -25,7 +25,7 @@ lfunction models.DNA.HKY85.ModelDescription(type) { utility.getGlobalValue("terms.model.type"): type, utility.getGlobalValue("terms.model.get_branch_length"): "", utility.getGlobalValue("terms.model.set_branch_length"): "models.generic.SetBranchLength", - utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.constrain_branch_length", + utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.ConstrainBranchLength", utility.getGlobalValue("terms.model.frequency_estimator"): "frequencies.empirical.nucleotide", utility.getGlobalValue("terms.model.q_ij"): "models.DNA.HKY85._GenerateRate", utility.getGlobalValue("terms.model.time"): "models.DNA.generic.Time", diff --git a/res/TemplateBatchFiles/libv3/models/DNA/JC69.bf b/res/TemplateBatchFiles/libv3/models/DNA/JC69.bf index dadb8fb01..f08d2abb4 100644 --- a/res/TemplateBatchFiles/libv3/models/DNA/JC69.bf +++ b/res/TemplateBatchFiles/libv3/models/DNA/JC69.bf @@ -26,7 +26,7 @@ lfunction models.DNA.JC69.ModelDescription(type) { utility.getGlobalValue("terms.model.type"): type, utility.getGlobalValue("terms.model.get_branch_length"): "", utility.getGlobalValue("terms.model.set_branch_length"): "models.generic.SetBranchLength", - utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.constrain_branch_length", + utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.ConstrainBranchLength", utility.getGlobalValue("terms.model.frequency_estimator"): "frequencies.equal", utility.getGlobalValue("terms.model.q_ij"): "models.DNA.JC69._generateRate", utility.getGlobalValue("terms.model.time"): "models.DNA.generic.Time", diff --git a/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf b/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf index 2e57df7e3..73bc46a69 100644 --- a/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf +++ b/res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf @@ -30,7 +30,7 @@ lfunction models.codon.MG_REV.ModelDescription(type, code) { }, utility.getGlobalValue("terms.model.get_branch_length"): "", utility.getGlobalValue("terms.model.set_branch_length"): "models.codon.MG_REV.set_branch_length", - utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.constrain_branch_length", + utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.ConstrainBranchLength", utility.getGlobalValue("terms.model.frequency_estimator"): "frequencies.empirical.corrected.CF3x4", utility.getGlobalValue("terms.model.q_ij"): "models.codon.MG_REV._GenerateRate", utility.getGlobalValue("terms.model.time"): "models.DNA.generic.Time", diff --git a/res/TemplateBatchFiles/libv3/models/model_functions.bf b/res/TemplateBatchFiles/libv3/models/model_functions.bf index 7a3e2a872..e7370145d 100644 --- a/res/TemplateBatchFiles/libv3/models/model_functions.bf +++ b/res/TemplateBatchFiles/libv3/models/model_functions.bf @@ -328,6 +328,39 @@ function models.generic.post.definition (model) { return model; } +/** + * @name models.generic.ConstrainBranchLength + * @param {Model} model + * @param {AssociativeList} or {Number} value + * @param {String} parameter + * @returns the number of constraints generated (0 or 1) + */ +function models.generic.ConstrainBranchLength (model, value, parameter) { + + if (Type (value) == "Number") { + if (Abs((model[terms.parameters])[terms.local]) == 1) { + if (Type (model [terms.model.branch_length_string]) == "String") { + models.generic.ConstrainBranchLength.expression = model [terms.model.branch_length_string]; + models.generic.ConstrainBranchLength.bl = (Columns ((model[terms.parameters])[terms.local]))[0]; + models.generic.ConstrainBranchLength.bl.p = parameter + "." + models.generic.SetBranchLength.bl; + models.generic.ConstrainBranchLength.substitution = {models.generic.ConstrainBranchLength.bl : 1}; + models.generic.ConstrainBranchLength.expression = "(" + Simplify (models.generic.ConstrainBranchLength.expression, models.generic.ConstrainBranchLength.substitution) + ")"; + Eval (models.generic.ConstrainBranchLength.bl.p + ":=" + value + "/" + models.generic.ConstrainBranchLength.expression); + messages.log ("models.generic.ConstrainBranchLength: " + models.generic.ConstrainBranchLength.bl.p + ":=" + value + "/" + models.generic.ConstrainBranchLength.expression); + } else { + messages.log ("models.generic.ConstrainBranchLength: model branch length expression not available"); + } + + } + else { + messages.log ("models.generic.ConstrainBranchLength: not exactly one local model parameter"); + } + } else { + messages.log ("models.generic.ConstrainBranchLength: unsupported value type " + Type (value) + "\n" + value); + } + return 0; +} + /** * @name models.generic.SetBranchLength * @param {Model} model diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 631e66a60..6e925be0a 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -178,6 +178,25 @@ lfunction alignments.ReadNucleotideDataSet(dataset_name, file_name) { return result; } +/** + * Ensure that name mapping is not None by creating a f(x)=x map if needed + * @name alignments.EnsureMapping + * @param dataset_name - the name of the dataset you wish to use + * @param {Dictionary} r - metadata pertaining to the dataset + * @param file_name - path to file + * @returns {Dictionary} r - metadata pertaining to the dataset + */ + +lfunction alignments.EnsureMapping(dataset_name, data) { + name_mapping = data[utility.getGlobalValue("terms.data.name_mapping")]; + if (None == name_mapping) { /** create a 1-1 mapping if nothing was done */ + name_mapping = {}; + utility.ForEach (alignments.GetSequenceNames (dataset_name), "_value_", "`&name_mapping`[_value_] = _value_"); + data[utility.getGlobalValue("terms.data.name_mapping")] = name_mapping; + } + return data; +} + /** * Read dataset from data * @name alignments.ReadNucleotideDataSetString diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 05842380b..b8b1728e5 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -238,6 +238,18 @@ function estimators.applyBranchLength(tree, node, model, length) { return Call(model[terms.model.set_branch_length], model, length, tree + "." + node); } +/** + * @name estimators.constrainBranchLength + * @private + * @param {String} tree + * @param {String} node + * @param {String} model + * @param {String} length + */ +function estimators.constrainBranchLength(tree, node, model, length) { + return Call(model[terms.model.constrain_branch_length], model, length, tree + "." + node); +} + /** * @name estimators.fixSubsetOfEstimates.helper * @private @@ -432,6 +444,10 @@ function estimators.ApplyExistingEstimates(likelihood_function_id, model_descrip _application_type = branch_length_conditions[estimators.ApplyExistingEstimates.i]; if (Type(_application_type) == "String") { + if (_application_type == terms.model.branch_length_constrain ) { + estimators.ApplyExistingEstimates.df_correction += estimators.constrainBranchLength(_tree_name, _branch_name, model_descriptions[estimators.ApplyExistingEstimates.map[_branch_name]], _set_branch_length_to); + continue; + } _set_branch_length_to = {}; _set_branch_length_to[terms.branch_length] = _existing_estimate[terms.fit.MLE]; _set_branch_length_to[terms.model.branch_length_scaler] = _application_type; From 540739e316c35757da7910e2914fca5a370e5786 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 4 Jan 2018 12:44:16 -0500 Subject: [PATCH 5/8] Updated protein model description functions with correct syntax for branch length constraint --- res/TemplateBatchFiles/libv3/models/protein/REV.bf | 2 +- res/TemplateBatchFiles/libv3/models/protein/empirical.bf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/res/TemplateBatchFiles/libv3/models/protein/REV.bf b/res/TemplateBatchFiles/libv3/models/protein/REV.bf index 3c668da84..22e8dc660 100644 --- a/res/TemplateBatchFiles/libv3/models/protein/REV.bf +++ b/res/TemplateBatchFiles/libv3/models/protein/REV.bf @@ -25,7 +25,7 @@ lfunction models.protein.REV.ModelDescription(type) { utility.getGlobalValue("terms.model.type"): type, utility.getGlobalValue("terms.model.get_branch_length"): "", utility.getGlobalValue("terms.model.set_branch_length"): "models.generic.SetBranchLength", - utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.constrain_branch_length", + utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.ConstrainBranchLength", utility.getGlobalValue("terms.model.frequency_estimator"): "frequencies.empirical.protein", utility.getGlobalValue("terms.model.q_ij"): "models.protein.REV._GenerateRate", utility.getGlobalValue("terms.model.time"): "models.protein.generic.Time", diff --git a/res/TemplateBatchFiles/libv3/models/protein/empirical.bf b/res/TemplateBatchFiles/libv3/models/protein/empirical.bf index 65c3968a6..a9a42ba68 100644 --- a/res/TemplateBatchFiles/libv3/models/protein/empirical.bf +++ b/res/TemplateBatchFiles/libv3/models/protein/empirical.bf @@ -28,7 +28,7 @@ lfunction models.protein.empirical.ModelDescription(type) { utility.getGlobalValue("terms.model.type"): type, utility.getGlobalValue("terms.model.get_branch_length"): "", utility.getGlobalValue("terms.model.set_branch_length"): "models.generic.SetBranchLength", - utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.constrain_branch_length", + utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.ConstrainBranchLength", utility.getGlobalValue("terms.model.frequency_estimator"): "frequencies.empirical.protein", utility.getGlobalValue("terms.model.q_ij"): "models.protein.empirical._GenerateRate", utility.getGlobalValue("terms.model.time"): "models.protein.generic.Time", From f8bf21318159a1444fd6473db1a253556f15df0c Mon Sep 17 00:00:00 2001 From: Sergei Kosakovsky Pond Date: Fri, 5 Jan 2018 10:44:30 -0500 Subject: [PATCH 6/8] Fixing typo in models.generic.ConstrainBranchLength --- res/TemplateBatchFiles/libv3/models/model_functions.bf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/res/TemplateBatchFiles/libv3/models/model_functions.bf b/res/TemplateBatchFiles/libv3/models/model_functions.bf index e7370145d..6a337b680 100644 --- a/res/TemplateBatchFiles/libv3/models/model_functions.bf +++ b/res/TemplateBatchFiles/libv3/models/model_functions.bf @@ -336,13 +336,12 @@ function models.generic.post.definition (model) { * @returns the number of constraints generated (0 or 1) */ function models.generic.ConstrainBranchLength (model, value, parameter) { - if (Type (value) == "Number") { if (Abs((model[terms.parameters])[terms.local]) == 1) { if (Type (model [terms.model.branch_length_string]) == "String") { models.generic.ConstrainBranchLength.expression = model [terms.model.branch_length_string]; models.generic.ConstrainBranchLength.bl = (Columns ((model[terms.parameters])[terms.local]))[0]; - models.generic.ConstrainBranchLength.bl.p = parameter + "." + models.generic.SetBranchLength.bl; + models.generic.ConstrainBranchLength.bl.p = parameter + "." + models.generic.ConstrainBranchLength.bl; models.generic.ConstrainBranchLength.substitution = {models.generic.ConstrainBranchLength.bl : 1}; models.generic.ConstrainBranchLength.expression = "(" + Simplify (models.generic.ConstrainBranchLength.expression, models.generic.ConstrainBranchLength.substitution) + ")"; Eval (models.generic.ConstrainBranchLength.bl.p + ":=" + value + "/" + models.generic.ConstrainBranchLength.expression); From 7810cc165e09ac94d625cdc569dd5891fb7f997a Mon Sep 17 00:00:00 2001 From: Stephen Shank Date: Wed, 10 Jan 2018 15:07:28 -0500 Subject: [PATCH 7/8] Sorts branch annotations in RELAX for use with Datamonkey. --- .../SelectionAnalyses/RELAX.bf | 3 +-- .../libv3/UtilityFunctions.bf | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf index 3a4be057a..0ba54d947 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf @@ -641,8 +641,7 @@ lfunction relax.select_branches(partition_info) { tree_for_analysis = (partition_info[0])[utility.getGlobalValue("terms.data.tree")]; utility.ForEach (tree_for_analysis[utility.getGlobalValue("terms.trees.model_map")], "_value_", "`&available_models`[_value_] += 1"); - list_models = utility.Keys (available_models); // get keys - branch_counts = utility.Values (available_models); + list_models = utility.sortStrings(utility.Keys(available_models)); // get keys option_count = Abs (available_models); io.CheckAssertion("`&option_count` >= 2", "RELAX requires at least one designated set of branches in the tree."); diff --git a/res/TemplateBatchFiles/libv3/UtilityFunctions.bf b/res/TemplateBatchFiles/libv3/UtilityFunctions.bf index 0d14f3795..79398a8c3 100644 --- a/res/TemplateBatchFiles/libv3/UtilityFunctions.bf +++ b/res/TemplateBatchFiles/libv3/UtilityFunctions.bf @@ -935,4 +935,28 @@ lfunction utility.CatersianProduct (arguments) { return product; } +function _sortStringsAux (theKey, theValue) +{ + for (_gb_idx2 = 0; _gb_idx2 < theValue; _gb_idx2=_gb_idx2+1) + { + _gb_sortedStrings [_gb_idx] = theKey; + _gb_idx = _gb_idx + 1; + } + return 0; +} + +function utility.sortStrings (_theList) +{ + _gb_dim = Rows (_theList)*Columns (_theList); + _toSort = {}; + for (_gb_idx = 0; _gb_idx < _gb_dim; _gb_idx = _gb_idx + 1) + { + _toSort[_theList[_gb_idx]] = _toSort[_theList[_gb_idx]]+1; + } + _gb_sortedStrings = {_gb_dim,1}; + _gb_idx = 0; + _toSort["_sortStringsAux"][""]; + return _gb_sortedStrings; +} + From ec4877d49842714bc6347d0f63b42d784a4535f7 Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Wed, 10 Jan 2018 17:02:41 -0800 Subject: [PATCH 8/8] Update strings.cpp --- src/core/strings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/strings.cpp b/src/core/strings.cpp index 2c44143d0..f83b34288 100644 --- a/src/core/strings.cpp +++ b/src/core/strings.cpp @@ -69,7 +69,7 @@ #define MOD_ADLER 65521 _String compileDate = __DATE__, - __HYPHY__VERSION__ = _String ("2.3.7") & compileDate.Cut (7,10) & compileDate.Cut (0,2).Replace("Jan", "01", true). + __HYPHY__VERSION__ = _String ("2.3.9") & compileDate.Cut (7,10) & compileDate.Cut (0,2).Replace("Jan", "01", true). Replace("Feb", "02", true). Replace("Mar", "03", true). Replace("Apr", "04", true).