Skip to content

Commit

Permalink
Merge pull request #1197 from veg/develop
Browse files Browse the repository at this point in the history
2.5.16 RC
  • Loading branch information
spond authored Aug 13, 2020
2 parents 8bb66c8 + aed0826 commit 50926ae
Show file tree
Hide file tree
Showing 45 changed files with 1,363 additions and 1,033 deletions.
24 changes: 21 additions & 3 deletions res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf
Original file line number Diff line number Diff line change
Expand Up @@ -283,21 +283,35 @@ if (relax.model_set == "All") { // run all the models
relax.filter_names,
None);

relax.distribution = models.codon.BS_REL.ExtractMixtureDistribution(relax.ge.bsrel_model);
relax.weight_multipliers = parameters.helper.stick_breaking (utility.SwapKeysAndValues(utility.MatrixToDict(relax.distribution["weights"])),None);
relax.constrain_parameters = parameters.ConstrainMeanOfSet(relax.distribution["rates"],relax.weight_multipliers,1,"relax");

for (key, value; in; relax.constrain_parameters[terms.global]){
model.generic.AddGlobal (relax.ge.bsrel_model, value, key);
parameters.SetRange (value, terms.range_almost_01);
}

relax.distribution["rates"] = Transpose (utility.Values (relax.constrain_parameters[terms.global]));

for (relax.i = 1; relax.i < relax.rate_classes; relax.i += 1) {
parameters.SetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i)), terms.range_almost_01);
}
parameters.SetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.rate_classes)), terms.range_gte1);

// constrain the mean of this distribution to 1


relax.model_object_map = { "relax.ge" : relax.ge.bsrel_model };

io.ReportProgressMessageMD ("RELAX", "gd", "Fitting the general descriptive (separate k per branch) model");
selection.io.startTimer (relax.json [terms.json.timers], "General descriptive model fitting", 2);

relax.distribution = models.codon.BS_REL.ExtractMixtureDistribution(relax.ge.bsrel_model);
PARAMETER_GROUPING = {};
PARAMETER_GROUPING + relax.distribution["rates"];
PARAMETER_GROUPING + relax.distribution["weights"];



if (Type (relax.ge_guess) != "Matrix") {
// first time in
Expand All @@ -315,8 +329,6 @@ if (relax.model_set == "All") { // run all the models


parameters.DeclareGlobalWithRanges ("relax.bl.scaler", 1, 0, 1000);

//VERBOSITY_LEVEL = 10;

relax.grid_search.results = estimators.FitLF (relax.filter_names, relax.trees,{ "0" : {"DEFAULT" : "relax.ge"}},
relax.final_partitioned_mg_results,
Expand Down Expand Up @@ -430,6 +442,12 @@ if (relax.model_set == "All") { // run all the models
0,
relax.k_estimates);


for (relax.i = 1; relax.i <= relax.rate_classes; relax.i += 1) {
//console.log (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i)));
parameters.RemoveConstraint (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i)));
}

break;
}

Expand Down
9 changes: 5 additions & 4 deletions res/TemplateBatchFiles/SelectionAnalyses/SLAC.bf
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ lfunction slac.compute_the_counts (matrix, tree, lookup, selected_branches, coun


selected_branch_total_length = +selected_branches_lengths;

io.CheckAssertion ("`&selected_branch_total_length`>0", "SLAC cannot be applied to a branch selection with total zero branch length (i.e. no variation)");

/* columns
Expand Down Expand Up @@ -593,7 +594,7 @@ lfunction slac.compute_the_counts (matrix, tree, lookup, selected_branches, coun
relative_branch_length = 1;
fr = Eval (fully_resolved);
relative_branch_length = selected_branches_lengths[i] / selected_branch_total_length;

for (k = 0; k < 4; k += 1) {
report_resolved_by_branch[i*column_count + k] += fr[k];
report_averaged_by_branch[i*column_count + k] += fr[k];
Expand All @@ -605,12 +606,12 @@ lfunction slac.compute_the_counts (matrix, tree, lookup, selected_branches, coun
by_site_scaler [s] += (-selected_branches_lengths[i]);
psi = parent_state*state_count+parent_state;


fr = Eval (fully_resolved);
/*fr = Eval (fully_resolved);

for (k = 0; k < 2; k += 1) {
report_averaged[s*column_count + k] += fr[k];
report_resolved[s*column_count + k] += fr[k];
}
}*/

relative_branch_length = 1;
fr = Eval (fully_resolved);
Expand Down
3 changes: 2 additions & 1 deletion res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ selection.io.startTimer (fel.json [terms.json.timers], "Total time", 0);

namespace fel {
LoadFunctionLibrary ("modules/shared-load-file.bf");
load_file ({utility.getGlobalValue("terms.prefix"): "fel", utility.getGlobalValue("terms.settings") : {utility.getGlobalValue("terms.settings.branch_selector") : "fel.select_branches"}});
load_file ({utility.getGlobalValue("terms.prefix"): "fel",
utility.getGlobalValue("terms.settings") : {utility.getGlobalValue("terms.settings.branch_selector") : "fel.select_branches"}});
}

KeywordArgument ("srv", "Include synonymous rate variation in the model", "Yes");
Expand Down
33 changes: 21 additions & 12 deletions res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ lfunction selection.io.defineBranchSets(partition_info) {
tree_for_analysis = (partition_info[k])[utility.getGlobalValue("terms.data.tree")];
available_models = {};
utility.ForEach (tree_for_analysis[utility.getGlobalValue("terms.trees.model_map")], "_value_", "`&available_models`[_value_] += 1");

for (k2; in; utility.Keys (available_models)) {
key_counts[k2] = key_counts[k2] + 1;
}
}



list_models = {};
for (k,v; in; key_counts) {
if (v == Abs (partition_info)) {
Expand All @@ -39,11 +39,11 @@ lfunction selection.io.defineBranchSets(partition_info) {
list_models = utility.sortStrings(utility.Keys (list_models));
}


selectTheseForTesting = {
option_count + 3, 2
};


selectTheseForTesting[0][0] = "All";
selectTheseForTesting[0][1] = "Include all branches in the analysis";
Expand All @@ -61,7 +61,7 @@ lfunction selection.io.defineBranchSets(partition_info) {
selectTheseForTesting[3 + k][1] = "Set of " + available_models[list_models[k]] + " unlabeled branches";
}
}

ChoiceList(testSet, "Choose the set of branches to test for selection", 1, NO_SKIP, selectTheseForTesting);

io.CheckAssertion ("`&testSet` >= 0", "User cancelled branch selection; analysis terminating");
Expand Down Expand Up @@ -195,17 +195,17 @@ lfunction selection.io.report_viterbi_path (path) {
rates = Rows (distribution);
settings ["header"] = FALSE;
last_state = path[0];


row_matrix = {{index, last_state, rate}};

for (index, rate; in; path) {
if (rate != last_state) {
fprintf (stdout, io.FormatTableRow (row_matrix, settings));
last_state = rate;
}
}

fprintf (stdout, "\n");
return row_matrix;
}
Expand Down Expand Up @@ -414,6 +414,15 @@ function selection.io.json_store_branch_attribute(json, attribute_name, attribut
((json[terms.json.branch_attributes])[terms.json.attribute])[attribute_name] = {terms.json.attribute_type : attribute_type,
terms.json.display_order: display_order};



for (selection.io.json_store_branch_attribute.branch_name,selection.io.json_store_branch_attribute.branch_tag; in; values) {
utility.EnsureKey ((json[terms.json.branch_attributes])[partition], selection.io.json_store_branch_attribute.branch_name);
(((json[terms.json.branch_attributes])[partition])[selection.io.json_store_branch_attribute.branch_name])[attribute_name] = selection.io.json_store_branch_attribute.branch_tag;
}


/*
utility.ForEach (utility.Keys (values), "selection.io.json_store_branch_attribute.branch_name",
"utility.EnsureKey ((json[terms.json.branch_attributes])[partition], selection.io.json_store_branch_attribute.branch_name)");

Expand All @@ -422,6 +431,7 @@ function selection.io.json_store_branch_attribute(json, attribute_name, attribut
(((json[terms.json.branch_attributes])[partition])[selection.io.json_store_branch_attribute.branch_name])[attribute_name] = values[selection.io.json_store_branch_attribute.branch_name];
}
");
*/
}


Expand All @@ -436,4 +446,3 @@ function selection.io.extract_branch_info(branch_spec, callback) {
branch_spec["selection.io._aux.extract_branch_info.callback"][""];
return selection.io.extract_branch_info_result;
}

Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ function load_file (prefix) {

sample_size=codon_data_info[utility.getGlobalValue("terms.data.sites")]*codon_data_info[utility.getGlobalValue("terms.data.sequences")];


codon_data_info[utility.getGlobalValue("terms.data.sample_size")] = sample_size;
upper_prefix = prefix && 1; //uppercase the prefix for json name
codon_data_info[utility.getGlobalValue("terms.json.json")] = codon_data_info[utility.getGlobalValue("terms.data.file")] + "."+upper_prefix+".json";
Expand All @@ -86,8 +87,19 @@ function load_file (prefix) {
name_mapping = {};
utility.ForEach (alignments.GetSequenceNames (prefix+".codon_data"), "_value_", "`&name_mapping`[_value_] = _value_");
}



// check for duplicates
duplicate_sequences = codon_data_info[^"terms.data.sequences"] - alignments.HasDuplicateSequences (codon_data_info[^"terms.data.datafilter"],-1);
if (duplicate_sequences > 0) {
fprintf(stdout, "\n-------\n", io.FormatLongStringToWidth(
">[WARNING] This dataset contains " + duplicate_sequences + " duplicate " + io.SingularOrPlural (duplicate_sequences, 'sequence', 'sequences') + ".
Identical sequences do not contribute any information to the analysis and only slow down computation.
Please consider removing duplicate or 'nearly' duplicate sequences,
e.g. using https://github.com/veg/hyphy-analyses/tree/master/remove-duplicates
prior to running selection analyses", 72),
"\n-------\n");
}

utility.SetEnvVariable(utility.getGlobalValue ("terms.trees.data_for_neighbor_joining"),
codon_data_info[utility.getGlobalValue("terms.data.datafilter")]);

Expand Down Expand Up @@ -122,7 +134,7 @@ function load_file (prefix) {
},
...
*/


partition_count = Abs (partitions_and_trees);

Expand Down Expand Up @@ -203,7 +215,7 @@ function load_file (prefix) {

function store_tree_information () {
// Place in own attribute called `tested`

selection.io.json_store_key_value_pair (json, None, utility.getGlobalValue("terms.json.tested"), selected_branches);

/** this will return a dictionary of selected branches; one set per partition, like in
Expand Down Expand Up @@ -277,30 +289,30 @@ function doGTR (prefix) {
terms.nucleotideRate ("G","T") : { utility.getGlobalValue ("terms.fit.MLE") : 0.25}
}
});


//utility.ToggleEnvVariable("VERBOSITY_LEVEL", 10);

gtr_results = estimators.FitGTR(filter_names,
trees,
gtr_results);


KeywordArgument ("kill-zero-lengths", "Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise)", "Yes");

kill0 = io.SelectAnOption (
{
"Yes":"Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise)",
"No":"Keep all branches"
},
},
"The set of properties to use in the model") == "Yes";
if (kill0) {


if (kill0) {
for (index, tree; in; trees) {
deleted = {};
if (^(prefix + ".selected_branches") / index) {
trees[index] = trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], (^(prefix + ".selected_branches"))[index], deleted);
trees[index] = trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], (^(prefix + ".selected_branches"))[index], deleted);
} else {
trees[index] = trees.KillZeroBranches (tree, (gtr_results[^"terms.branch_length"])[index], null, deleted);
}
Expand All @@ -314,11 +326,11 @@ function doGTR (prefix) {
}
for (i = 0; i < partition_count; i+=1) {
(partitions_and_trees[i])[^"terms.data.tree"] = trees[i];
}
}
store_tree_information ();
}


io.ReportProgressMessageMD (prefix, "nuc-fit", "* " +
selection.io.report_fit (gtr_results, 0, 3*(^"`prefix`.sample_size")));

Expand Down Expand Up @@ -393,7 +405,7 @@ function doPartitionedMG (prefix, keep_lf) {

if (partition_count > 1) {
//partition_scalers = selection.io.extract_global_MLE_re (partitioned_mg_results, "^" + utility.getGlobalValue("terms.parameters.omega_ratio"));

}

/** extract and report dN/dS estimates */
Expand Down
11 changes: 2 additions & 9 deletions res/TemplateBatchFiles/TemplateModels/chooseGeneticCode.def
Original file line number Diff line number Diff line change
Expand Up @@ -540,21 +540,14 @@ lfunction CompareCodonProperties(codon1, codon2, code)
/*----------------------------------------------------------------------------------------------------------*/

function defineCodonToAA() {
codonToAAMap = {};
nucChars = "ACGT";

for (p1 = 0; p1 < 64; p1 = p1 + 1) {
codonToAAMap[nucChars[p1$16] + nucChars[p1 % 16 $4] + nucChars[p1 % 4]] = _hyphyAAOrdering[_Genetic_Code[p1]];
}

return codonToAAMap;
return defineCodonToAAGivenCode(_Genetic_Code);
}

/*----------------------------------------------------------------------------------------------------------*/

function defineCodonToAAGivenCode(code) {
codonToAAMap = {};
nucChars = "ACGT";
nucChars = "ACGT";

for (p1 = 0; p1 < 64; p1 += 1) {
codonToAAMap[nucChars[p1$16] + nucChars[p1 % 16 $4] + nucChars[p1 % 4]] = _hyphyAAOrdering[code[p1]];
Expand Down
1 change: 0 additions & 1 deletion res/TemplateBatchFiles/files.lst
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@

"","Test for recombination.","!Recombination";
"GARD","[GARD] Screen an alignment using GARD (requires an MPI environment).","GARD.bf";
"GRDR","Process GARD results.","GARDProcessor.bf";
"LHT","A Likelihood Ratio Test to detect conflicting phylogenetic signal Huelsenbeck and Bull, 1996. [Contributed by Olivier Fedrigo].","LHT.bf";
"SBL","Search an alignment for a single breakpoint.","SingleBreakpointRecomb.bf";
"SPL","Plot genetic distances (similarity) of one sequence against all others in an alignment, using a sliding window. Optionally, determine NJ-based clustering and bootstrap support in every window. This is a HyPhy adaptation of the excellent (but Windows only tool) SimPlot (and/or VarPlot) written by Stuart Ray (http://sray.med.som.jhmi.edu/SCRoftware/simplot/)","SimilarityPlot.bf";
Expand Down
2 changes: 1 addition & 1 deletion res/TemplateBatchFiles/libv3/UtilityFunctions.bf
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ function utility.MapWithKey (object, key_name, lambda_name, transform) {
utility.MapWithKey.return_object = {utility.MapWithKey.rows, utility.MapWithKey.columns};

^(lambda_name) := object [utility.MapWithKey.r][utility.MapWithKey.c];
^(key_name) := {{utility.MapWithKey.r,utility.MapWithKey.c}}
^(key_name) := {{utility.MapWithKey.r,utility.MapWithKey.c}};
for (utility.MapWithKey.r = 0; utility.MapWithKey.r < utility.MapWithKey.rows; utility.MapWithKey.r += 1) {
for (utility.MapWithKey.c = 0; utility.MapWithKey.c < utility.MapWithKey.columns; utility.MapWithKey.c += 1) {
utility.MapWithKey.temp = Eval (transform);
Expand Down
Loading

1 comment on commit 50926ae

@kjlevitz
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'Benchmark.js Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 2.

Benchmark suite Current: 50926ae Previous: aed0826 Ratio
BGM.wbf Infinity secs/op (±0.000000%) null secs/op (±0.000000%) Infinity

This comment was automatically generated by workflow using github-action-benchmark.

CC: @klevitz

Please sign in to comment.