Skip to content

Commit

Permalink
Merge pull request #1704 from veg/develop
Browse files Browse the repository at this point in the history
2.5.61
  • Loading branch information
spond authored May 1, 2024
2 parents 6282dbb + b4f0263 commit c668448
Show file tree
Hide file tree
Showing 34 changed files with 730 additions and 253 deletions.
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ option(NOAVX OFF)
option(NOSSE4 OFF)
option(NONEON OFF)
option(NOZLIB OFF)
option(NOBLAS OFF)

if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(NOAVX ON)
Expand Down Expand Up @@ -427,7 +428,7 @@ include_directories(
contrib/SQLite-3.8.2 # SQLite
)

if (${BLAS_FOUND})
if (${BLAS_FOUND} AND NOT NOBLAS)
add_definitions (-D_SLKP_USE_APPLE_BLAS)
set(DEFAULT_LIBRARIES "${DEFAULT_LIBRARIES}")
endif()
Expand All @@ -451,7 +452,7 @@ else(${OPENMP_FOUND})
target_link_libraries(hyphy PRIVATE ${DEFAULT_LIBRARIES})
endif(${OPENMP_FOUND})

if(${BLAS_FOUND})
if(${BLAS_FOUND} AND NOT NOBLAS)
target_link_libraries(hyphy PRIVATE ${BLAS_LIBRARIES})
endif ()

Expand Down Expand Up @@ -490,7 +491,7 @@ if(${MPI_FOUND})
target_link_libraries(HYPHYMPI ${DEFAULT_LIBRARIES} ${MPI_LIBRARIES})
endif(${OPENMP_FOUND})

if(${BLAS_FOUND})
if(${BLAS_FOUND} AND NOT NOBLAS)
target_link_libraries(HYPHYMPI ${BLAS_LIBRARIES})
endif ()

Expand Down Expand Up @@ -598,6 +599,9 @@ target_link_libraries(HYPHYDEBUG ${DEFAULT_LIBRARIES})

add_custom_target(DEBUG DEPENDS HYPHYDEBUG)

if(${BLAS_FOUND})
target_link_libraries(HYPHYDEBUG ${BLAS_LIBRARIES})
endif ()

set_target_properties(
HYPHYDEBUG
Expand Down
52 changes: 44 additions & 8 deletions res/TemplateBatchFiles/MSS-joint-fitter.bf
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ io.ReportProgressMessageMD("mss", "data" , "* Loaded a list with **" + mss_selec
KeywordArgument ("code", "Which genetic code should be used", "Universal");
mss.genetic_code = alignments.LoadGeneticCode (None);

KeywordArgument ("omega", "How should alignment-level omega be treated?", "Fix");

mss.omega_option = io.SelectAnOption ({
{"Fix", "[Default] Fix omega estimates at those obtained with the standard MG94xREV model"}
{"Fit", "Fit omega (per alignment) together with the MSS model"}
}, "How should alignment-level omega be treated?");


mss.file_records = {};
mss.file_info = {};
Expand Down Expand Up @@ -160,10 +167,18 @@ mss_selector.header = {
{"TreeLength"},
{"Log(L)"}
};

KeywordArgument ("model", "Substitution model to use","SynREV");

mss.model_option = io.SelectAnOption ({
{"SynREV", "[Default] SynREV model (one rate per aa)"}
{"SynREVCodon", "SynREVCodon (one rate per codon pair)"}
}, "Which model?");



ExecuteCommands ( "mss.codon_classes = model.codon.MSS.prompt_and_define (terms.global, mss.genetic_code[terms.code])",
{"--mss-type" : "SynREV"}
{"--mss-type" : mss.model_option}
);

io.ReportProgressMessageMD("mss", "fit0" , "Individual file statistics and simple model fits\n");
Expand Down Expand Up @@ -219,6 +234,7 @@ io.ReportProgressMessageMD("mss", "fit0done", "**LogL = " + mss.baselineLL + "**
mss.json ["baseline"] = {terms.json.log_likelihood : mss.baselineLL, terms.json.AICc : mss.baseline_AIC};

function mss.MSS_generator (type) {
mss.codon_classes [utility.getGlobalValue("terms.model.MSS.normalize")] = TRUE;
model = Call ("models.codon.MSS.ModelDescription",type, mss.genetic_code[terms.code], mss.codon_classes);
return model;
}
Expand All @@ -234,10 +250,13 @@ mss.lf_id = "MSS.COMPOSITE.LF";
mss.model_map_overall = {};


//#profile START;
mss.constraints = {};

for (mss.counter, mss_selector.path; in; mss_selector.file_list) {
mss.prefix = mss.file_prefix [mss_selector.path];

console.log (">" + mss.counter + " / " + mss_selector.path);

mss.model_name = "`mss.prefix`.model";
mss.tree_name = "`mss.prefix`.tree";
Expand Down Expand Up @@ -271,14 +290,17 @@ for (mss.counter, mss_selector.path; in; mss_selector.file_list) {
(initial_values [terms.branch_length])[0],
terms.model.branch_length_constrain,
TRUE);

models.FixParameterSetRegExp (terms.parameters.omega_ratio, ^(mss.model_name));

if (mss.omega_option == "Fix") {
models.FixParameterSetRegExp (terms.parameters.omega_ratio, ^(mss.model_name));
mss.constraint_count += 1;
}
models.FixParameterSetRegExp (terms.nucleotideRatePrefix, ^(mss.model_name));


if (mss.counter == 0) {
mss.reference_set = ^(mss.model_name);
mss.mss_rate_list = model.GetParameters_RegExp( ^(mss.model_name),"^" + terms.parameters.synonymous_rate + "");
mss.mss_rate_list = model.GetParameters_RegExp( ^(mss.model_name),"^" + terms.MeanScaler(""));
mss.model_dimension = utility.Array1D (mss.mss_rate_list);
mss.scaler_prefix = 'mss.scaler_parameter_';
mss.scaler_mapping = {};
Expand All @@ -290,15 +312,18 @@ for (mss.counter, mss_selector.path; in; mss_selector.file_list) {
for (mss.i2 = 0; mss.i2 < mss.model_dimension; mss.i2 += 1) {
parameters.DeclareGlobal (mss.scaler_prefix + mss.i2, None);
}

mss.json ["mapping"] = mss.scaler_index;
} else {
//utility.getGlobalValue ("terms.parameters.synonymous_rate");
models.BindGlobalParameters ({"0" : mss.reference_set , "1" : ^(mss.model_name)}, terms.parameters.synonymous_rate + terms.model.MSS.syn_rate_between);
models.BindGlobalParameters ({"0" : mss.reference_set , "1" : ^(mss.model_name)}, terms.parameters.synonymous_rate + terms.model.MSS.syn_rate_within);
}
mss.constraints * models.BindGlobalParametersDeferred ({"0" : mss.reference_set , "1" : ^(mss.model_name)}, "^" + terms.MeanScaler(""));
}
}

parameters.BatchApplyConstraints (mss.constraints);

//#profile _hyphy_profile_dump;
//utility.FinishAndPrintProfile (_hyphy_profile_dump);
utility.SetEnvVariable ("AUTO_PARALLELIZE_OPTIMIZE", 1);

utility.ExecuteInGlobalNamespace ("LikelihoodFunction `mss.lf_id` = (`&mss.lf_components`)");

Expand All @@ -317,6 +342,17 @@ io.ReportProgressMessageMD("mss", "fitAlldone", "**LogL = " + res[1][0] + "**"
mss.json["joint-model"] = estimators.ExtractMLEsOptions (mss.lf_id, mss.model_map_overall, {terms.globals_only : TRUE});
//estimators.RemoveBranchLengthConstraints (mss.json["joint-model"]);

function pfilter (key, value) {
if (key[0] != "[" || (key $ "ratio")[0] >= 0) {
mss.filtered [key] = value;
}
}

mss.filtered = {};
((mss.json["joint-model"])[terms.global])["pfilter"][""];
(mss.json ["joint-model"])[terms.global] = mss.filtered;


io.SpoolJSON (mss.json, mss.json[terms.data.file]);

KeywordArgument ("save-fit", "Write the resulting model fit file to this (large!) file", "/dev/null");
Expand Down
85 changes: 51 additions & 34 deletions res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ Version 4.5 adds an 'error absorption' component [experimental]
};


console.log (busted.analysis_description);
io.DisplayAnalysisBanner (busted.analysis_description);

busted.FG = "Test";
Expand Down Expand Up @@ -536,8 +535,7 @@ if (busted.do_srv) {
//PARAMETER_GROUPING + busted.srv_distribution["weights"];
PARAMETER_GROUPING + utility.Concat (busted.srv_distribution["rates"],busted.srv_distribution["weights"]);

busted.init_grid_setup (busted.srv_distribution, FALSE);

busted.init_grid_setup (busted.srv_distribution, FALSE);
}

if (busted.do_srv_hmm) {
Expand All @@ -553,33 +551,29 @@ busted.global_scaler_list = {};

for (busted.partition_index = 0; busted.partition_index < busted.partition_count; busted.partition_index += 1) {
busted.global_scaler_list [busted.partition_index] = "busted.bl.scaler_" + busted.partition_index;
parameters.DeclareGlobalWithRanges (busted.global_scaler_list [busted.partition_index], 3, 0, 1000);
busted.initial_ranges [busted.global_scaler_list [busted.partition_index]] = {
terms.lower_bound : 2.0,
terms.upper_bound : 4.0
};
parameters.DeclareGlobalWithRanges (busted.global_scaler_list [busted.partition_index], 1, 0, 1000);
// busted.initial_ranges [busted.global_scaler_list [busted.partition_index]] = {
// terms.lower_bound : 1.0,
// terms.upper_bound : 3.0
// };
}




busted.initial.test_mean = ((selection.io.extract_global_MLE_re (busted.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+test.+"))["0"])[terms.fit.MLE];
busted.initial_grid = estimators.LHC (busted.initial_ranges,busted.initial_grid.N);

//estimators.CreateInitialGrid (busted.initial_grid, busted.initial_grid.N, busted.initial_grid_presets);

busted.initial_grid = utility.Map (busted.initial_grid, "_v_",
'busted._renormalize_with_weights (_v_, "busted.distribution", busted.initial.test_mean, busted.error_sink)'
);

if (busted.has_background) { //GDD rate category
if (busted.has_background) { //has background
busted.initial.background_mean = ((selection.io.extract_global_MLE_re (busted.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+background.+"))["0"])[terms.fit.MLE];

busted.initial_grid = utility.Map (busted.initial_grid, "_v_",
'busted._renormalize (_v_, "busted.background_distribution", busted.initial.background_mean, busted.error_sink)'
'busted._renormalize_with_weights (_v_, "busted.background_distribution", busted.initial.background_mean, busted.error_sink)'
);
}


busted.model_map = {};

for (busted.partition_index = 0; busted.partition_index < busted.partition_count; busted.partition_index += 1) {
Expand All @@ -604,7 +598,7 @@ io.ReportProgressMessageMD ("BUSTED", "main", "Performing the full (dN/dS > 1 al
*/


busted.nm.precision = -0.00025*busted.final_partitioned_mg_results[terms.fit.log_likelihood];
busted.nm.precision = Max (-0.00001*busted.final_partitioned_mg_results[terms.fit.log_likelihood],0.5);

debug.checkpoint = utility.GetEnvVariable ("DEBUG_CHECKPOINT");

Expand All @@ -615,7 +609,8 @@ if (Type (debug.checkpoint) != "String") {


busted.tmp_fixed = models.FixParameterSetRegExpFromReference (terms.nucleotideRatePrefix,busted.test.bsrel_model, busted.final_partitioned_mg_results[terms.global]);



busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.final_partitioned_mg_results, busted.model_object_map,
{
terms.run_options.retain_lf_object: TRUE,
Expand All @@ -634,16 +629,20 @@ if (Type (debug.checkpoint) != "String") {
}
);




parameters.RemoveConstraint (busted.tmp_fixed );
//console.log (busted.tmp_fixed);
PARAMETER_GROUPING + Columns (busted.tmp_fixed);

//PRODUCE_OPTIMIZATION_LOG = 1;

busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.grid_search.results, busted.model_object_map, {
"retain-lf-object": TRUE,
terms.run_options.optimization_settings :
{
"OPTIMIZATION_METHOD" : "coordinate-wise",
"OPTIMIZATION_METHOD" : "hybrid",
//"OPTIMIZATION_PRECISION" : 1.
}

Expand Down Expand Up @@ -1260,57 +1259,75 @@ lfunction busted._renormalize (v, distro, mean, skip_first) {
//------------------------------------------------------------------------------

lfunction busted._renormalize_with_weights (v, distro, mean, skip_first) {

parameters.SetValues (v);
m = parameters.GetStickBreakingDistribution (^distro);
d = Rows (m);


mean = Max (mean, 1e-3);

if (skip_first) {
m0 = m[0][0]*m[0][1];
} else {
m0 = 0;
}


over_one = m[d-1][0] * m[d-1][1];

if (over_one >= mean*0.95) {
//console.log ("OVERAGE");
new_weight = mean * Random (0.9, 0.95) / m[d-1][0];
diff = (m[d-1][1] - new_weight)/(d-1);
diff = (m[d-1][1] - new_weight)/(d-1-(skip_first != 0));
for (k = (skip_first != 0); k < d-1; k += 1) {
m[k][1] += diff;
}
m[d-1][1] = new_weight;
}


over_one = m[d-1][0] * m[d-1][1];
under_one = (+(m[-1][0] $ m[-1][1])) / (1-m[d-1][1]); // current mean

under_one = (+(m[-1][0] $ m[-1][1]) - m0) / (1-m[d-1][1]); // current mean

for (i = (skip_first != 0); i < d-1; i+=1) {
m[i][0] = m[i][0] * mean / under_one;
}

under_one = +(m[-1][0] $ m[-1][1]);

if (skip_first) {
m_rest = m [{{1,0}}][{{d-1,1}}];
under_one = +(m_rest[-1][0] $ m_rest[-1][1]);
} else {
under_one = +(m[-1][0] $ m[-1][1]);
}


for (i = (skip_first != 0); i < d; i+=1) {
m[i][0] = m[i][0] * mean / under_one;
}

m = m%0;
wts = parameters.SetStickBreakingDistributionWeigths (m[-1][1]);


if (skip_first) {
m_rest = m [{{1,0}}][{{d-1,1}}];
m_rest = m_rest % 0;
for (i = 1; i < d; i+=1) {
m[i][0] = m_rest[i-1][0];
m[i][1] = m_rest[i-1][1];
}
} else {
m = m%0;
}

//console.log (v);

wts = parameters.SetStickBreakingDistributionWeigths (m[-1][1]);


for (i = (skip_first != 0); i < d; i+=1) {
(v[((^distro)["rates"])[i]])[^"terms.fit.MLE"] = m[i][0];
}
for (i = (skip_first != 0); i < d-1; i+=1) {
(v[((^distro)["weights"])[i]])[^"terms.fit.MLE"] = wts[i];
}

//console.log (v);

//assert (0);
return v;

}
Expand Down Expand Up @@ -1383,7 +1400,7 @@ function busted.init_grid_setup (omega_distro, error_sink) {
busted.initial_grid [_name_] = {{100,500,1000,5000}};
busted.initial_ranges [_name_] = {
terms.lower_bound : 100,
terms.upper_bound : 10000
terms.upper_bound : 1000
};
} else {
busted.initial_ranges [_name_] = {
Expand Down Expand Up @@ -1416,12 +1433,12 @@ function busted.init_grid_setup (omega_distro, error_sink) {
if (error_sink && _index_ == 0) {
busted.initial_grid [_name_] = {
{
0, 0.001, 0.005, 0.1
0, 0.001, 0.0025, 0.025
}
};
busted.initial_ranges [_name_] = {
terms.lower_bound : 0,
terms.upper_bound : 0.01,
terms.upper_bound : 0.005,
};
} else {
busted.initial_ranges [_name_] = {
Expand Down
Loading

0 comments on commit c668448

Please sign in to comment.