Skip to content

Commit

Permalink
Merge pull request #1133 from veg/develop
Browse files Browse the repository at this point in the history
2.5.9
  • Loading branch information
spond authored Apr 21, 2020
2 parents c7cc769 + 2ef403f commit 466f29f
Show file tree
Hide file tree
Showing 20 changed files with 1,007 additions and 156 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -617,4 +617,6 @@ add_test (BGM HYPHYMP tests/hbltests/libv3/BGM.wbf)
add_test (CONTRAST-FEL HYPHYMP tests/hbltests/libv3/CFEL.wbf)
add_test (GARD HYPHYMP tests/hbltests/libv3/GARD.wbf)
add_test (FADE HYPHYMP tests/hbltests/libv3/FADE.wbf)
add_test (ABSREL HYPHYMP tests/hbltests/libv3/aBSREL.wbf)
add_test (ABSREL-MH HYPHYMP tests/hbltests/libv3/aBSREL-MH.wbf)

48 changes: 27 additions & 21 deletions res/TemplateBatchFiles/GARD.bf
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ gard.analysisDescription = {terms.io.info : "GARD : Genetic Algorithms for Recom
approach to screening alignments of sequences for recombination, by using the CHC genetic algorithm to search for phylogenetic
incongruence among different partitions of the data. The number of partitions is determined using a step-up procedure, while the
placement of breakpoints is searched for with the GA. The best fitting model (based on c-AIC) is returned; and additional post-hoc
tests run to distinguish topological incongruence from rate-variation.",
terms.io.version : "0.1",
tests run to distinguish topological incongruence from rate-variation. v0.2 adds and spooling results to JSON after each breakpoint search conclusion",
terms.io.version : "0.2",
terms.io.reference : "**Automated Phylogenetic Detection of Recombination Using a Genetic Algorithm**, _Mol Biol Evol 23(10), 1891–1901",
terms.io.authors : "Sergei L Kosakovsky Pond",
terms.io.contact : "[email protected]",
Expand Down Expand Up @@ -301,7 +301,7 @@ namespace gard {
if (singleBreakPointBest_cAIC < baseline_cAIC) {
io.ReportProgressBar ("GARD", "Breakpoint " + Format (1+breakPointIndex, 10, 0) + " of " + (variableSites-1) + ". Best cAIC = " + Format (singleBreakPointBest_cAIC, 12, 4) + " [delta = " + Format (baseline_cAIC - singleBreakPointBest_cAIC, 12, 4) + "] with breakpoint at site " + Format (singleBreakPointBestLocation, 10, 0));
} else {
io.ReportProgressBar ("GARD", "Breakpoint " + Format (1+breakPointIndex, 10, 0) + " of " + (variableSites-1) + ". Best cAIC = " + Format (baseline_cAIC, 12, 4) + " with no breakpoints");
io.ReportProgressBar ("GARD", "Breakpoint " + Format (1+breakPointIndex, 10, 0) + " of " + (variableSites-1) + ". Best cAIC = " + Format (baseline_cAIC, 12, 4) + " with no breakpoints." );
}


Expand Down Expand Up @@ -338,6 +338,8 @@ if (gard.singleBreakPointBest_cAIC < gard.bestOverall_cAIC_soFar) {
gard.bestOverallModelSoFar = null;
}

gard.concludeAnalysis(gard.bestOverallModelSoFar);


/* 2b. Evaluation of multiple break points with genetic algorithm
------------------------------------------------------------------------------*/
Expand All @@ -349,12 +351,12 @@ namespace gard {
if(populationSize < mpi.NodeCount() -1 ) {
populationSize = mpi.NodeCount() + 1;
}
mutationRate = 0.8; // the GARD paper said "15% of randomly selected bits were toggled"...
rateOfMutationsTharAreSmallShifts = 0.5; // some mutations are a new random break point; some are small shifts of the break point to an adjacent location.
mutationRate = 0.2; // the GARD paper said "15% of randomly selected bits were toggled"...
rateOfMutationsTharAreSmallShifts = 0.8; // some mutations are a new random break point; some are small shifts of the break point to an adjacent location.
maxFailedAttemptsToMakeNewModel = 7;
cAIC_diversityThreshold = 0.001;
cAIC_diversityThreshold = 0.01;
cAIC_improvementThreshold = 0.01; // I think this was basically 0 in the gard paper
maxGenerationsAllowedWithNoNewModelsAdded = 5; // TODO: Not in the GARD paper. use 10?
maxGenerationsAllowedWithNoNewModelsAdded = 2; // TODO: Not in the GARD paper. use 10?
maxGenerationsAllowedAtStagnant_cAIC = 100; // TODO: this is set to 100 in the GARD paper

// GA.2: Loop over increasing number of break points
Expand Down Expand Up @@ -401,7 +403,6 @@ namespace gard {
currentBest_model = Eval(currentBest_individual["key"]);

if ( (math.minNormalizedRange(selectedModels) < cAIC_diversityThreshold) || (generationsNoNewModelsAdded > maxGenerationsAllowedWithNoNewModelsAdded) ) {
//console.log ("UPDATING PARENT MODELS");

parentModels = gard.GA.generateNewGenerationOfModelsByMutatingModelSet(selectedModels, numberOfPotentialBreakPoints, mutationRate, rateOfMutationsTharAreSmallShifts);
if (Abs(parentModels) == 1) {
Expand All @@ -423,11 +424,10 @@ namespace gard {
terminationCondition = TRUE;
}
} else {
//console.log ("RESETTING generationsAtCurrentBest_cAIC" + previousBest_cAIC + ", " + currentBest_cAIC + ", " + cAIC_improvementThreshold + "\n" + currentBest_model + "\n");
generationsAtCurrentBest_cAIC = 0;
}

//console.log ("END OF LOOP");
//console.log (parentModels);

if (previousBest_cAIC > 0 && previousBest_cAIC < currentBest_cAIC) {
io.CheckAssertion("gard.previousBest_cAIC >= gard.currentBest_cAIC", "Internal error in GARD -- c-AIC INCREASED between two consecutive generations");
Expand All @@ -438,13 +438,16 @@ namespace gard {
generation += 1;

if (bestOverall_cAIC_soFar-currentBest_cAIC > 0) {
io.ReportProgressBar ("GARD", Format (numberOfBreakPointsBeingEvaluated,3,0) + ' breakpoints [ generation ' + Format (generation, 6, 0) + ", total models " + Format (Abs (masterList), 8, 0) + ", " + Format (generationsAtCurrentBest_cAIC/ maxGenerationsAllowedAtStagnant_cAIC*100, 4, 0) +"% converged]"
io.ReportProgressBar ("GARD", Format (numberOfBreakPointsBeingEvaluated,3,0) + ' breakpoints [ generation ' + Format (generation, 6, 0) + ", total models " + Format (Abs (masterList), 8, 0) + ", "
+ Format (generationsAtCurrentBest_cAIC/ maxGenerationsAllowedAtStagnant_cAIC*100, 4, 0) +"% converged, " + Format (Abs (masterList)/(Time(1)-startTime),5,2) + "/sec]"
+ ". Min (c-AIC) = " + Format (currentBest_cAIC, 12,4) + " [ delta = " + Format (bestOverall_cAIC_soFar-currentBest_cAIC, 8, 2) + "], breakpoints at " + Join (", ", currentBest_model));

} else {
io.ReportProgressBar ("GARD", Format (numberOfBreakPointsBeingEvaluated,3,0) + ' breakpoints [ generation ' + Format (generation, 6, 0) + ", total models " + Format (Abs (masterList), 8, 0) + ", " + Format (generationsAtCurrentBest_cAIC/ maxGenerationsAllowedAtStagnant_cAIC*100, 4, 0) +"% converged]"
+ ". Min (c-AIC) = " + Format (currentBest_cAIC, 12,4) + " [no improvement], breakpoints at " + Join (", ", bestOverallModelSoFar));
io.ReportProgressBar ("GARD", Format (numberOfBreakPointsBeingEvaluated,3,0) + ' breakpoints [ generation ' + Format (generation, 6, 0) + ", total models " + Format (Abs (masterList), 8, 0) + ", "
+ Format (generationsAtCurrentBest_cAIC/ maxGenerationsAllowedAtStagnant_cAIC*100, 4, 0) +"% converged, " + Format (Abs (masterList)/(Time(1)-startTime),5,2) + "/sec]"
+ ". Min (c-AIC) = " + Format (currentBest_cAIC, 12,4) + " [no improvement], breakpoints at " + Join (", ", currentBest_model));
}

}

io.ClearProgressBar();
Expand All @@ -464,8 +467,8 @@ namespace gard {
bestOverallModelSoFar = bestModel;
} else {
addingBreakPointsImproves_cAIC = FALSE;
gard.concludeAnalysis(bestOverallModelSoFar);
}
gard.concludeAnalysis(bestOverallModelSoFar);
}

}
Expand Down Expand Up @@ -497,6 +500,7 @@ namespace gard {

lfunction gard.fitPartitionedModel (breakPoints, model, initialValues, saveToFile, constrainToOneTopology) {


currentIndex = 0;
currentStart = 0;
breakPointsCount = utility.Array1D (breakPoints);
Expand Down Expand Up @@ -915,30 +919,32 @@ lfunction gard.GA.generateNewGenerationOfModelsByMutatingModelSet(parentModels,
numberOfBreakPoints = utility.Array1D(firstModel);

// Generate a new set of models
local_mutation_rate = 1 - (1-mutationRate) ^ (1/numberOfBreakPoints); // keep bp the same
nextGenOfModels = {};
for(i=0; i<populationSize-1; i += 1) {

modelIsValid = FALSE;
failedAttempts = 0;
modelIsValid = FALSE;
failedAttempts = 0;
while(modelIsValid == FALSE && failedAttempts < ^"gard.maxFailedAttemptsToMakeNewModel") {
parentModel = gard.Helper.convertMatrixStringToMatrix(modelIds[i]);
breakPoints = {numberOfBreakPoints, 1};


for(breakPointIndex=0; breakPointIndex<numberOfBreakPoints; breakPointIndex += 1) {

if(Random(0,1) < mutationRate) { // keep the break point the same
if(Random(0,1) < local_mutation_rate) { // keep the break point the same
breakPoints[breakPointIndex] = parentModel[breakPointIndex];
} else {
if(Random(0,1) < rateOfMutationsThatAreSmallShifts) { // move the break point by a random small amount
notValid = TRUE;
while (notValid) {
while (1) {
distanceOfStep = Max (1,random.poisson(1));
if (Random (0,1) <= 0.5) { // randomly decide if the break point moves right or left
distanceOfStep = -distanceOfStep;
}
//console.log (distanceOfStep);
variableSiteMapIndexOfParentBreakPoint = (^"gard.inverseVariableSiteMap")[parentModel[breakPointIndex]] + distanceOfStep;
if (variableSiteMapIndexOfParentBreakPoint >= 0 && variableSiteMapIndexOfParentBreakPoint < (^"gard.variableSites")) {
newBreakPoint = (^"gard.variableSiteMap")[variableSiteMapIndexOfParentBreakPoint];
notValid = FALSE;
break;
}
}
breakPoints[breakPointIndex] = newBreakPoint;
Expand Down
10 changes: 7 additions & 3 deletions res/TemplateBatchFiles/SelectionAnalyses/FADE.bf
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ KeywordArgument ("tree", "A rooted phylogenetic tree", null, "Please select
fade.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (fade.alignment_info[utility.getGlobalValue("terms.data.partitions")],
fade.alignment_info[utility.getGlobalValue("terms.data.name_mapping")]
);


fade.partition_count = Abs (fade.partitions_and_trees);

selection.io.json_store_key_value_pair (fade.json, terms.json.input, terms.json.partition_count,fade.partition_count);
Expand All @@ -192,6 +194,7 @@ assert (((fade.partitions_and_trees[index])[terms.data.tree])[terms.trees.rooted
fade.name_mapping = fade.alignment_info[utility.getGlobalValue("terms.data.name_mapping")];



if (utility.Has (fade.cache, terms.fade.cache.settings, "AssociativeList")) {
fade.run_settings = fade.cache [terms.fade.cache.settings];
fade.prompts["grid"] = FALSE;
Expand Down Expand Up @@ -247,6 +250,7 @@ if (utility.Has (fade.cache, terms.fade.cache.baseline, "AssociativeList")) {
model [utility.getGlobalValue("terms.model.frequency_estimator")] = "frequencies.mle";
return model;
}


fade.baseline_fit = estimators.FitSingleModel_Ext (
fade.filter_names,
Expand All @@ -272,8 +276,6 @@ if (utility.Has (fade.run_settings, "method", "String")) {
// =========== FIT BASELINE MODEL




io.ReportProgressMessageMD ("FADE", "baseline", ">Fitted an alignment-wide model. " + selection.io.report_fit (fade.baseline_fit, 0, fade.alignment_sample_size ) + "\n\nTotal tree lengths by partition\n");
utility.ForEachPair (fade.baseline_fit[terms.branch_length], "_part_", "_value_",
'
Expand Down Expand Up @@ -326,7 +328,7 @@ if (utility.Has (fade.cache, terms.fade.cache.substitutions, "AssociativeList")
fade.baseline_fit,
{terms.run_options.retain_lf_object: TRUE},
None
)
);

fade.baseline_fit[terms.likelihood_function] = "fade.ancestral.rebuild.likelihoodFunction";
}
Expand Down Expand Up @@ -522,6 +524,8 @@ for (fade.residue = 0; fade.residue < 20; fade.residue += 1) {

fade.trees.names = utility.Map (utility.Range (fade.partition_count, 1, 1), "_index_", "'fade.grid_tree_' + _index_");
fade.lf.components = {fade.partition_count * 2, 1};


utility.ForEachPair (fade.filter_names, "_index_", "_filter_",
'
fade.model_assignment = {
Expand Down
Loading

0 comments on commit 466f29f

Please sign in to comment.