Skip to content

Commit

Permalink
Merge pull request #1072 from veg/develop
Browse files Browse the repository at this point in the history
2.5.2 release
  • Loading branch information
spond authored Feb 6, 2020
2 parents 047266e + 0d598ad commit fac1356
Show file tree
Hide file tree
Showing 92 changed files with 2,218 additions and 4,974 deletions.
59 changes: 35 additions & 24 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,23 +188,34 @@ endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)

if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3")
PCL_CHECK_FOR_AVX()

if(${HAVE_AVX_EXTENSIONS} AND NOT ${NOAVX})
set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -march=native -mtune=native -mavx")
add_definitions (-D_SLKP_USE_AVX_INTRINSICS)
PCL_CHECK_FOR_FMA3()
if (${HAVE_FMA3})
set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -mfma")
add_definitions (-D_SLKP_USE_FMA3_INTRINSICS)
endif (${HAVE_FMA3})
else(${HAVE_AVX_EXTENSIONS} AND NOT ${NOAVX})
PCL_CHECK_FOR_SSE3()
if(${HAVE_SSE3_EXTENSIONS})
add_definitions (-D_SLKP_USE_SSE_INTRINSICS)
set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -msse3 ")
endif(${HAVE_SSE3_EXTENSIONS})
endif (${HAVE_AVX_EXTENSIONS})
if(NOAVX)
if(NOSSE3)
set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -mno-sse3")
else(NOSSE)
PCL_CHECK_FOR_SSE3()
if(${HAVE_SSE3_EXTENSIONS})
add_definitions (-D_SLKP_USE_SSE_INTRINSICS)
set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -msse3 ")
endif(${HAVE_SSE3_EXTENSIONS})
endif(NOSSE3)
else(NOAVX)
PCL_CHECK_FOR_AVX()
if(${HAVE_AVX_EXTENSIONS})
set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -march=native -mtune=native -mavx")
add_definitions (-D_SLKP_USE_AVX_INTRINSICS)
PCL_CHECK_FOR_FMA3()
if (${HAVE_FMA3})
set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -mfma")
add_definitions (-D_SLKP_USE_FMA3_INTRINSICS)
endif (${HAVE_FMA3})
else(${HAVE_AVX_EXTENSIONS})
PCL_CHECK_FOR_SSE3()
if(${HAVE_SSE3_EXTENSIONS})
add_definitions (-D_SLKP_USE_SSE_INTRINSICS)
set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -msse3 ")
endif(${HAVE_SSE3_EXTENSIONS})
endif (${HAVE_AVX_EXTENSIONS})
endif(NOAVX)

set_property(
SOURCE ${SRC_CORE} ${SRC_NEW} ${SRC_UTILS} ${SRC_UNIXMAIN}
Expand Down Expand Up @@ -406,10 +417,10 @@ if(${MPI_FOUND})
HYPHYMPI
PROPERTIES
if(${OPENMP_FOUND})
COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_WARNING_FLAGS} ${MPI_COMPILE_FLAGS} ${OpenMP_CXX_FLAGS}"
LINK_FLAGS "${DEFAULT_LINK_FLAGS} ${MPI_LINK_FLAGS} ${OpenMP_CXX_FLAGS}"
COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_WARNING_FLAGS} ${MPI_COMPILE_FLAGS} ${OpenMP_CXX_FLAGS} "
LINK_FLAGS "${DEFAULT_LINK_FLAGS} ${MPI_LINK_FLAGS} ${OpenMP_CXX_FLAGS} -g "
else(${OPENMP_FOUND})
COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_WARNING_FLAGS} ${MPI_COMPILE_FLAGS}"
COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_WARNING_FLAGS} ${MPI_COMPILE_FLAGS} "
LINK_FLAGS "${DEFAULT_LINK_FLAGS} ${MPI_LINK_FLAGS}"
endif(${OPENMP_FOUND})
)
Expand Down Expand Up @@ -437,11 +448,11 @@ set_target_properties(
HYPHY-PROFILE
PROPERTIES
if(${OPENMP_FOUND})
COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${OpenMP_CXX_FLAGS} -g -fprofile-instr-generate -fcoverage-mapping "
LINK_FLAGS "${DEFAULT_COMPILE_FLAGS} ${OpenMP_CXX_FLAGS} -g -fprofile-instr-generate "
COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${OpenMP_CXX_FLAGS} -g -fprofile-generate "
LINK_FLAGS "${DEFAULT_COMPILE_FLAGS} ${OpenMP_CXX_FLAGS} -g -fprofile-generate "
else(${OPENMP_FOUND})
COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS}-g -fprofile-instr-generate -fcoverage-mapping "
LINK_FLAGS "${DEFAULT_COMPILE_FLAGS} -g -fprofile-instr-generate "
COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS}-g -fprofile-generate "
LINK_FLAGS "${DEFAULT_COMPILE_FLAGS} -g -fprofile-generate "
endif(${OPENMP_FOUND})
)

Expand Down
33 changes: 19 additions & 14 deletions res/TemplateBatchFiles/BGM.bf
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ selection.io.startTimer (bgm.json [terms.json.timers], "Overall", 0);
bgm.data_types = {terms.nucleotide : "Nucleotide multiple sequence alignment",
terms.amino_acid : "Protein multiple sequence alignment",
terms.codon : "Codon multiple sequence alignment"};
KeywordArgument ("run_type", "nucleotide, amino-acid or codon", "codon");
bgm.run_type = io.SelectAnOption (bgm.data_types, "Data type");
KeywordArgument ("type", "nucleotide, amino-acid or codon", "codon");
bgm.type = io.SelectAnOption (bgm.data_types, "Data type");

SetDialogPrompt ("Specify a `bgm.run_type` multiple sequence alignment file");
SetDialogPrompt ("Specify a `bgm.type` multiple sequence alignment file");

bgm.fit_options = {terms.run_options.retain_lf_object : TRUE};
bgm.reporting_thershold = 0.5;
Expand All @@ -74,28 +74,33 @@ bgm.run_settings = {
"samples" : 100,
"max-parents" : 1,
"min-subs" : 1,
"data-type" : bgm.run_type,
"data-type" : bgm.type,
"threshold" : bgm.reporting_thershold
};

KeywordArgument ("code", "Which genetic code should be used", "Universal");
KeywordArgument ("alignment", "An in-frame codon alignment in one of the formats supported by HyPhy");
KeywordArgument ("tree", "A phylogenetic tree (optionally annotated with {})", null, "Please select a tree file for the data:");
KeywordArgument ("branches", "Branches to test", "All");



if (bgm.run_type == "nucleotide") {
KeywordArgument ("branches", "Branches to test", "All");
bgm.alignment_info = alignments.ReadNucleotideDataSet ("bgm.dataset", None);
bgm.baseline_model = "models.DNA.GTR.ModelDescription";
} else {
if (bgm.run_type == "amino-acid") {
if (bgm.type == "amino-acid") {
bgm.alignment_info = alignments.ReadProteinDataSet ("bgm.dataset", None);
LoadFunctionLibrary ("libv3/models/protein.bf");
LoadFunctionLibrary ("libv3/models/protein/empirical.bf");
LoadFunctionLibrary ("libv3/models/protein/REV.bf");
utility.Extend (models.protein.empirical_models, {"GTR" : "General time reversible model (189 estimated parameters)."});
bgm.run_settings ["model"] = io.SelectAnOption (models.protein.empirical_models, "Baseline substitution model");
bgm.baseline_model = (utility.Extend (models.protein.empirical.plusF_generators , {"GTR" : "models.protein.REV.ModelDescription"}))[bgm.run_settings ["model"]];
KeywordArgument ("baseline_model", "Which amino acid substitution model should be used", "LG");
bgm.run_settings ["model"] = io.SelectAnOption (models.protein.empirical_models, "Baseline substitution model");
bgm.baseline_model = (utility.Extend (models.protein.empirical.plusF_generators , {"GTR" : "models.protein.REV.ModelDescription"}))[bgm.run_settings ["model"]];
KeywordArgument ("branches", "Branches to test", "All");
} else { // codon
KeywordArgument ("branches", "Branches to test", "All");
bgm.alignment_info = alignments.PromptForGeneticCodeAndAlignment("bgm.dataset","bgm.codon.filter");
LoadFunctionLibrary("libv3/models/codon/MG_REV.bf");
bgm.baseline_model = "models.codon.MG_REV.ModelDescription";
Expand All @@ -112,7 +117,7 @@ bgm.name_mapping = bgm.alignment_info[utility.getGlobalValue("terms.data.name_ma
selection.io.json_store_key_value_pair (bgm.json, terms.json.input, terms.json.file, bgm.alignment_info [terms.data.file]);
selection.io.json_store_key_value_pair (bgm.json, terms.json.input, terms.json.sequences, bgm.alignment_info [terms.data.sequences]);
selection.io.json_store_key_value_pair (bgm.json, terms.json.input, terms.json.sites, bgm.alignment_info [terms.data.sites]);
selection.io.json_store_key_value_pair (bgm.json, terms.json.input, terms.data_type, bgm.run_type);
selection.io.json_store_key_value_pair (bgm.json, terms.json.input, terms.data_type, bgm.type);

bgm.alignment_info[terms.json.json] = bgm.alignment_info[terms.data.file] + ".BGM.json";

Expand All @@ -133,7 +138,7 @@ bgm.filter_specification = alignments.DefineFiltersForPartitions (bgm.partitions
bgm.store_tree_information();

io.ReportProgressMessageMD ("BGM", "Data", "Loaded **" +
bgm.alignment_info [terms.data.sequences] + "** `bgm.run_type` sequences, **" +
bgm.alignment_info [terms.data.sequences] + "** `bgm.type` sequences, **" +
bgm.alignment_info [terms.data.sites] + "** sites, from \`" + bgm.alignment_info [terms.data.file] + "\`");

bgm.initial_values = parameters.helper.tree_lengths_to_initial_values (bgm.trees, None);
Expand Down Expand Up @@ -165,7 +170,7 @@ selection.io.startTimer (bgm.json [terms.json.timers], "Baseline fit", 1);

io.ReportProgressMessageMD("bgm", "phylo", "Performing initial model fit to obtain branch lengths and rate parameters");

if (bgm.run_type == "nucleotide") {
if (bgm.type == "nucleotide") {
bgm.initial_values = utility.Extend (bgm.initial_values,
{
utility.getGlobalValue ("terms.global") : {
Expand All @@ -177,7 +182,7 @@ if (bgm.run_type == "nucleotide") {
});

} else {
if (bgm.run_type == "codon") {
if (bgm.type == "codon") {
bgm.initial_values = utility.Extend (bgm.initial_values,
{
utility.getGlobalValue ("terms.global") : {
Expand Down Expand Up @@ -206,7 +211,7 @@ if (bgm.run_type == "nucleotide") {
}
}

if (bgm.run_type == "codon") {
if (bgm.type == "codon") {
//codon_data, tree, generator, genetic_code, option, initial_values
bgm.baseline_fit = estimators.FitCodonModel(
bgm.filter_names,
Expand Down Expand Up @@ -259,7 +264,7 @@ bgm.ancestral_cache = ancestral.build (bgm.baseline_fit[terms.likelihood_functio
bgm.branch_filter = utility.Filter (bgm.selected_branches[0], "_class_", "_class_ == terms.tree_attributes.test");
DeleteObject (^bgm.baseline_fit[terms.likelihood_function]);

if (bgm.run_type != "codon") {
if (bgm.type != "codon") {
bgm.counts = ancestral.ComputeSubstitutionCounts(
bgm.ancestral_cache,
bgm.branch_filter, // selected branches
Expand Down
45 changes: 26 additions & 19 deletions res/TemplateBatchFiles/GARD.bf
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ gard.analysisDescription = {terms.io.info : "GARD : Genetic Algorithms for Recom
};

namespace terms.gard {
nucleotide = "Nucleotide";
protein = "Protein";
codon = "Codon";
nucleotide = "nucleotide";
protein = "amino-acid";
codon = "codon";
};

gard.json = { terms.json.analysis: gard.analysisDescription,
Expand All @@ -86,7 +86,7 @@ gard.json = { terms.json.analysis: gard.analysisDescription,
------------------------------------------------------------------------------*/
io.DisplayAnalysisBanner (gard.analysisDescription);

KeywordArgument ("type", "The type of data to perform screening on", "Nucleotide");
KeywordArgument ("type", "The type of data to perform screening on", "nucleotide");
KeywordArgument ("code", "Genetic code to use (for codon alignments)", "Universal", "Choose Genetic Code");
KeywordArgument ("alignment", "Sequence alignment to screen for recombination");

Expand All @@ -104,6 +104,7 @@ if (gard.dataType == terms.gard.nucleotide) {
gard.model.generator = "models.DNA.GTR.ModelDescription";
gard.alignment = alignments.ReadNucleotideDataSet ("gard.sequences", null);
DataSetFilter gard.filter = CreateFilter (gard.sequences, 1);

} else {
// TODO: implement these branches
if (gard.dataType == terms.gard.protein) {
Expand Down Expand Up @@ -341,17 +342,17 @@ io.ReportProgressMessageMD('GARD', 'multi-breakpoint', 'Performing multi breakpo

namespace gard {
// GA.1: Setup global parameters
populationSize = 30; // the GARD paper used: (numberOfMpiNodes*2 - 2) with 17 mpi nodes
if(populationSize < mpi.NodeCount()) {
populationSize = mpi.NodeCount();
populationSize = 32; // the GARD paper used: (numberOfMpiNodes*2 - 2) with 17 mpi nodes
if(populationSize < mpi.NodeCount() -1 ) {
populationSize = mpi.NodeCount() + 1;
}
mutationRate = 0.8; // the GARD paper said "15% of randomly selected bits were toggled"...
rateOfMutationsTharAreSmallShifts = 0.5; // some mutations are a new random break point; some are small shifts of the break point to an adjacent location.
maxFailedAttemptsToMakeNewModel = 7;
cAIC_diversityThreshold = 0.001;
cAIC_improvementThreshold = 0.01; // I think this was basically 0 in the gard paper
maxGenerationsAllowedWithNoNewModelsAdded = 15; // TODO: Not in the GARD paper. use 10?
maxGenerationsAllowedAtStagnent_cAIC = 100; // TODO: this is set to 100 in the GARD paper
maxGenerationsAllowedWithNoNewModelsAdded = 10; // TODO: Not in the GARD paper. use 10?
maxGenerationsAllowedAtStagnant_cAIC = 100; // TODO: this is set to 100 in the GARD paper

// GA.2: Loop over increasing number of break points
addingBreakPointsImproves_cAIC = TRUE;
Expand Down Expand Up @@ -401,7 +402,7 @@ namespace gard {

if (previousBest_cAIC - currentBest_cAIC < cAIC_improvementThreshold) {
generationsAtCurrentBest_cAIC += 1;
if (generationsAtCurrentBest_cAIC >= maxGenerationsAllowedAtStagnent_cAIC) {
if (generationsAtCurrentBest_cAIC >= maxGenerationsAllowedAtStagnant_cAIC) {
terminationCondition = TRUE;
}
} else {
Expand Down Expand Up @@ -531,7 +532,8 @@ lfunction gard.fitPartitionedModel (breakPoints, model, initialValues, saveToFil
res = estimators.FitExistingLF (&likelihoodFunction, modelObjects);

if (Type (saveToFile) == "String") {
io.SpoolLF (&likelihoodFunction, saveToFile, "");
alignment.ExportPartitionedNEXUS ("gard.filter",breakPoints,utility.Map (trees,"_t_","_t_[^'terms.trees.newick_with_lengths']"),saveToFile,^"gard.dataType" == ^"terms.gard.codon");
io.SpoolLF (&likelihoodFunction, saveToFile, "fit");
}

DeleteObject (likelihoodFunction, :shallow);
Expand Down Expand Up @@ -822,9 +824,8 @@ function gard.GA.evaluateModels (models) {
"2" : gard.baseLikelihoodInfo},
"gard.GA.storeMultiBreakPointModelResults");
}
mpi.QueueComplete (gard.queue);

}
mpi.QueueComplete (gard.queue);

}

Expand Down Expand Up @@ -904,14 +905,20 @@ lfunction gard.GA.generateNewGenerationOfModelsByMutatingModelSet(parentModels,
if(Random(0,1) < mutationRate) { // keep the break point the same
breakPoints[breakPointIndex] = parentModel[breakPointIndex];
} else {

if(Random(0,1) < rateOfMutationsThatAreSmallShifts) { // move the break point by a random small amount
distanceOfStep = random.poisson(2);
if (random.TRUE_or_FALSE()) { // randomly decide if the break point moves right or left
distanceOfStep = - distanceOfStep;
notValid = TRUE;
while (notValid) {
distanceOfStep = Min (1,random.poisson(2));
if (random.TRUE_or_FALSE()) { // randomly decide if the break point moves right or left
distanceOfStep = - distanceOfStep;
}
variableSiteMapIndexOfParentBreakPoint = utility.Find(^"gard.variableSiteMap", parentModel[breakPointIndex]);
variableSiteMapIndexOfParentBreakPoint += distanceOfStep;
if (variableSiteMapIndexOfParentBreakPoint >= 0 && variableSiteMapIndexOfParentBreakPoint < (^"gard.variableSites")) {
newBreakPoint = (^"gard.variableSiteMap")[variableSiteMapIndexOfParentBreakPoint];
notValid = FALSE;
}
}
variableSiteMapIndexOfParentBreakPoint = utility.Find(^"gard.variableSiteMap", parentModel[breakPointIndex]);
newBreakPoint = (^"gard.variableSiteMap")[variableSiteMapIndexOfParentBreakPoint + distanceOfStep];
breakPoints[breakPointIndex] = newBreakPoint;
} else { // select a completely new random break point
breakPoints[breakPointIndex] = (^"gard.variableSiteMap")[Random(0,numberOfPotentialBreakPoints)$1];
Expand Down
Loading

0 comments on commit fac1356

Please sign in to comment.