diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ecbba0a1..3dc813751 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,7 +236,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) PCL_CHECK_FOR_NEON() if(${HAVE_NEON_EXTENSIONS}) add_definitions (-D_SLKP_USE_ARM_NEON) - + set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -mcpu=native -mtune=native ") endif(${HAVE_NEON_EXTENSIONS}) endif(NOT NONEON) @@ -284,6 +284,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") PCL_CHECK_FOR_NEON() if(${HAVE_NEON_EXTENSIONS}) add_definitions (-D_SLKP_USE_ARM_NEON) + set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -mcpu=native -mtune=native ") + endif(${HAVE_NEON_EXTENSIONS}) endif(NOT NONEON) diff --git a/res/TemplateBatchFiles/CleanStopCodons.bf b/res/TemplateBatchFiles/CleanStopCodons.bf index 092a3364b..e4a7ad95d 100644 --- a/res/TemplateBatchFiles/CleanStopCodons.bf +++ b/res/TemplateBatchFiles/CleanStopCodons.bf @@ -94,12 +94,13 @@ for (k=0; k0 && siteInfo2[0] == 0) { - sitesWithDeletions[siteIndex] = 1; - stopCodonCount += 1; - } - if (filteringOption % 2) { - if (haveInfoAtSites[siteIndex] == 0) { - if (siteInfo1[0]+siteInfo2[0] < +stopCodonTemplate) { - haveInfoAtSites[siteIndex] = 1; + stopCodonCount = 0; + sitesWithDeletions = {1,all64.unique_sites}; + + COUNT_GAPS_IN_FREQUENCIES = 0; + + for (siteIndex = 0; siteIndex < all64.unique_sites; siteIndex += 1) { + + GetDataInfo (siteInfo, all64, sequenceIndex, siteIndex); + + + siteInfo1 = stopCodonTemplate*siteInfo; + siteInfo2 = nonStopCodonTemplate*siteInfo; + + + if (siteInfo1[0]>0 && siteInfo2[0] == 0) { + sitesWithDeletions[siteIndex] = 1; + stopCodonCount += 1; + } + + if (filteringOption % 2) { + if (haveInfoAtSites[siteIndex] == 0) { + if (siteInfo1[0]+siteInfo2[0] > 0) { + haveInfoAtSites[siteIndex] = 1; + } } } } -} - -if (stopCodonCount > 0) { - if (filterinOption == 4) { - continue; + + if (stopCodonCount > 0) { + if (filterinOption == 4) { + continue; + } + fprintf (stdout, "\nSequence ", sequenceNames[sequenceIndex], ":"); + fprintf (stdout, "\n\t", Format(stopCodonCount,8,0), " stop codons found."); + + doSomething = 1; + cleanedString = ""; + seqString = sequenceData[sequenceIndex]; + cleanedString * (Abs(seqString)+1); + + for (siteIndex = 0; siteIndex < all64.sites; siteIndex += 1) { + stopCodonCount = duplicateMapper[siteIndex]; + if (sitesWithDeletions[stopCodonCount]) { + cleanedString * replacementString; + } else { + cleanedString * seqString[3*siteIndex][3*siteIndex+2]; + } + } + cleanedString * 0; + sequenceData[sequenceIndex] = cleanedString; } - fprintf (stdout, "\nSequence ", sequenceNames[sequenceIndex], ":"); - fprintf (stdout, "\n\t", Format(stopCodonCount,8,0), " stop codons found."); - - doSomething = 1; - cleanedString = ""; - seqString = sequenceData[sequenceIndex]; - cleanedString * (Abs(seqString)+1); - - for (siteIndex = 0; siteIndex < all64.sites; siteIndex += 1) { - stopCodonCount = duplicateMapper[siteIndex]; - if (sitesWithDeletions[stopCodonCount]) { - cleanedString * replacementString; + + + + if (filteringOption >= 2) { + if (duplicateChecker[sequenceData[sequenceIndex]] == 0) { + duplicateChecker[sequenceData[sequenceIndex]] = 1; + notDuplicate[sequenceIndex] = 1; } else { - cleanedString * seqString[3*siteIndex][3*siteIndex+2]; + doSomething = 1; } - } - cleanedString * 0; - sequenceData[sequenceIndex] = cleanedString; -} - -if (filteringOption >= 2) { - if (duplicateChecker[sequenceData[sequenceIndex]] == 0) { - duplicateChecker[sequenceData[sequenceIndex]] = 1; - notDuplicate[sequenceIndex] = 1; } else { - doSomething = 1; + notDuplicate[sequenceIndex] = 1; } -} else { - notDuplicate[sequenceIndex] = 1; } } +filterSites = 0; + if (filteringOption%2) { - doSomething = doSomething || (Abs(haveInfoAtSites)", sequenceNames[sequenceIndex], "\n", sequenceData[sequenceIndex], "\n\n"); + if (filterSites) { + fprintf (LAST_FILE_PATH, ">", sequenceNames[sequenceIndex], "\n"); + for (s = 0; s < seqLen; s+=3) { + if (haveInfoAtSites[duplicateMapper[s$3]]) { + fprintf (LAST_FILE_PATH, (sequenceData[sequenceIndex])[s][s+2]); + } + } + fprintf (LAST_FILE_PATH, "\n\n"); + } else { + fprintf (LAST_FILE_PATH, ">", sequenceNames[sequenceIndex], "\n", sequenceData[sequenceIndex], "\n\n"); + } } } fprintf (LAST_FILE_PATH, CLOSE_FILE); diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index c73a0680a..606d62972 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -752,7 +752,6 @@ for (meme.partition_index = 0; meme.partition_index < meme.partition_count; meme }, "meme.store_results"); } - pattern_count_all ' ); @@ -1724,7 +1723,7 @@ lfunction meme.store_results (node, result, arguments) { if (has_psi) { result_row[has_psi] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")],^"terms.meme.fg_param_prefix"+ ^"terms.parameters.triple_hit_rate"); if (None == result_row[has_psi]) { - result_row[has_delta] = ((scalers['BG'])[^"terms.parameters.triple_hit_rate"])["scaler"]; + result_row[has_psi] = ((scalers['BG'])[^"terms.parameters.triple_hit_rate"])["scaler"]; } ^"meme.site_multihit_string" += "/" + Format (result_row[has_psi],0,2); } diff --git a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf index 5731d27bc..d9ea199da 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf @@ -545,7 +545,10 @@ for (prime.partition_index = 0; prime.partition_index < prime.partition_count; p /* run the main loop over all unique site pattern combinations */ prime.pattern_count = 1; + + for (_pattern_, _pattern_info_; in; prime.site_patterns) { + io.ReportProgressBar("", "Working on site pattern " + (prime.pattern_count) + "/" + Abs (prime.site_patterns)); if (_pattern_info_[utility.getGlobalValue("terms.data.is_constant")]) { prime.store_results (-1,None,{"0" : "prime.site_likelihood", @@ -556,6 +559,7 @@ for (prime.partition_index = 0; prime.partition_index < prime.partition_count; p "5" : prime.site_model_mapping }); } else { + mpi.QueueJob (prime.queue, "prime.handle_a_site", {"0" : "prime.site_likelihood", "1" : "prime.site_likelihood_property", "2" : alignments.serialize_site_filter @@ -703,6 +707,8 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa //Export (lfe, ^lf_prop); // fprintf ("/tmp/PRIME-site." + (pattern_info["sites"])[0] + ".bf",CLEAR_FILE,lfe); + character_map = None; + if (^"prime.site_beta" > 0) { // fit the universal alternative @@ -781,8 +787,8 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa "OPTIMIZATION_PRECISION": 1e-3 }); - // Export (lfe, ^lf_prop); - // fprintf ("/tmp/PRIME-site." + (pattern_info["sites"])[0] + ".bf",CLEAR_FILE,lfe); + //Export (lfe, ^lf_prop); + //fprintf ("/tmp/PRIME-site." + (pattern_info["sites"])[0] + ".bf",CLEAR_FILE,lfe); //console.log ("\n" + ^"LF_INITIAL_GRID_MAXIMUM_VALUE" + "\nGrid best"+ ^"LF_INITIAL_GRID_MAXIMUM" + " / optimized " + results[1][0] + "\n"); Optimize (results, ^lf_prop); @@ -878,25 +884,41 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa //console.log ("\nPRIME = " + altL); //console.log ("alpha = " + ^"prime.site_alpha"); //console.log ("beta = " + ^"prime.site_beta"); - - character_map = None; + + if (^"prime.impute_states") { DataSet anc = ReconstructAncestors ( ^lf_prop, {{0}}, MARGINAL, DOLEAVES); GetString (names, anc, -1); GetDataInfo (codon_chars, ^((lfInfo["Datafilters"])[0]) , "CHARACTERS"); + GetString (names_obs, ^((lfInfo["Datafilters"])[0]), -1); + names_obs = utility.MatrixToDict (names_obs); + character_map = {}; for (seq_id, seq_name; in; names) { - character_map [seq_name] = {}; + GetDataInfo (site_map, ^((lfInfo["Datafilters"])[0]), names_obs[seq_name], 0); + character_map [seq_name] = {'observed' :{} , 'imputed' : {}, 'support' : 0}; + + + for (char, char_support; in; site_map) { + if (char_support > 1e-6) { + (((character_map [seq_name]))['observed'])[codon_chars[char]] = char_support; + } + } + for (char, char_support; in; (anc.marginal_support_matrix)[seq_id][-1]) { if (char_support > 1e-6) { - (character_map [seq_name])[codon_chars[char]] = char_support; + (((character_map [seq_name]))['imputed'])[codon_chars[char]] = char_support; + if ( (((character_map [seq_name]))['observed'])[codon_chars[char]]) { + (character_map [seq_name])['support'] += char_support; + } } } + } - } + utility.ToggleEnvVariable ("TOLERATE_CONSTRAINT_VIOLATION", None); ancestral_info = ancestral.build (lf_prop,0,FALSE); @@ -1138,7 +1160,9 @@ lfunction prime.store_results (node, result, arguments) { //console.log (result_row); utility.EnsureKey (^"prime.site_results", partition_index); - utility.EnsureKey (^"prime.imputed_leaf_states", partition_index); + if (^"prime.impute_states") { + utility.EnsureKey (^"prime.imputed_leaf_states", partition_index); + } utility.EnsureKey (^"prime.sub_mapping", partition_index); sites_mapping_to_pattern = pattern_info[utility.getGlobalValue("terms.data.sites")]; @@ -1148,7 +1172,9 @@ lfunction prime.store_results (node, result, arguments) { site_index = sites_mapping_to_pattern[i]; ((^"prime.site_results")[partition_index])[site_index] = result_row; ((^"prime.sub_mapping")[partition_index])[site_index] = sub_map; - ((^"prime.imputed_leaf_states")[partition_index])[site_index] = result[^"terms.prime_imputed_states"]; + if (^"prime.impute_states") { + ((^"prime.imputed_leaf_states")[partition_index])[site_index] = result[^"terms.prime_imputed_states"]; + } prime.report.echo (site_index, partition_index, result_row); } } diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index 035f40348..d816e2e6e 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -507,11 +507,18 @@ hyBLFunctionType GetBFFunctionType (long idx) { } //____________________________________________________________________________________ -_String const ExportBFFunction (long idx, bool recursive) { +_String const ExportBFFunction (long idx, bool recursive, _AVLList* tracker) { _StringBuffer bf (8192UL); if (IsBFFunctionIndexValid(idx)) { + + if (tracker) { + if (tracker->FindLong (idx) >= 0) { + return bf; + } + tracker->InsertNumber(idx); + } _String hbf_name = GetBFFunctionNameByIndex (idx); _ExecutionList * body = &GetBFFunctionBody(idx); @@ -567,7 +574,7 @@ _String const ExportBFFunction (long idx, bool recursive) { bf << "\n/*----- Called function '" << *a_name << "' ------*/\n" - << ExportBFFunction (FindBFFunctionName(*a_name), false) + << ExportBFFunction (FindBFFunctionName(*a_name), tracker ? recursive : false, tracker) << "\n\n"; } } diff --git a/src/core/batchlanruntime.cpp b/src/core/batchlanruntime.cpp index c14acbb68..10c537b2e 100644 --- a/src/core/batchlanruntime.cpp +++ b/src/core/batchlanruntime.cpp @@ -887,7 +887,8 @@ bool _ElementaryCommand::HandleConstructCategoryMatrix (_ExecutionList& cur _String ("WEIGHTS"),_hyphyLFConstructCategoryMatrixWeights, _String ("SITE_LOG_LIKELIHOODS"), _hyphyLFConstructCategoryMatrixSiteProbabilities, _String ("CLASSES"), _hyphyLFConstructCategoryMatrixClasses, - _String ("SHORT"), _hyphyLFConstructCategoryMatrixClasses + _String ("SHORT"), _hyphyLFConstructCategoryMatrixClasses, + _String ("PARTITIONS"), _hyphyLFConstructCategoryMatrixPartitions ); diff --git a/src/core/formula.cpp b/src/core/formula.cpp index 7be2617e6..895c91708 100644 --- a/src/core/formula.cpp +++ b/src/core/formula.cpp @@ -289,7 +289,7 @@ _StringBuffer const _Formula::toRPN (_hyFormulaStringConversionMode mode, _List* return r; } //__________________________________________________________________________________ -BaseRef _Formula::toStr (_hyFormulaStringConversionMode mode, _List* matched_names, bool drop_tree) { +BaseRef _Formula::toStr (_hyFormulaStringConversionMode mode, _List* matched_names, bool drop_tree, _StringBuffer * hbl_dependencies) { ConvertToTree(false); _StringBuffer * result = new _StringBuffer (64UL); @@ -297,8 +297,17 @@ BaseRef _Formula::toStr (_hyFormulaStringConversionMode mode, _List* matched_nam long savepd = print_digit_specification; print_digit_specification = 0L; + + if (theTree) { // there is something to do - SubtreeToString (*result, theTree, -1, matched_names, nil, mode); + if (hbl_dependencies) { + _SimpleList _tracker; + _AVLList tracker (&_tracker); + SubtreeToString (*result, theTree, -1, matched_names, nil, mode, hbl_dependencies,&tracker); + + } else { + SubtreeToString (*result, theTree, -1, matched_names, nil, mode); + } } else { if (theFormula.countitems()) { (*result) << "RPN:" << toRPN (mode, matched_names); @@ -990,7 +999,7 @@ bool _Formula::InternalSimplify (node* top_node) { //__________________________________________________________________________________ -void _Formula::SubtreeToString (_StringBuffer & result, node* top_node, int op_level, _List* match_names, _Operation* this_node_op, _hyFormulaStringConversionMode mode) { +void _Formula::SubtreeToString (_StringBuffer & result, node* top_node, int op_level, _List* match_names, _Operation* this_node_op, _hyFormulaStringConversionMode mode, _StringBuffer * hbl_dependencies, _AVLList* hbl_tracker) { if (!this_node_op) { if (!top_node) { @@ -1142,6 +1151,9 @@ void _Formula::SubtreeToString (_StringBuffer & result, node* top_node, in if (node_op_count < 0L) { // a user-defined function long func_id = this_node_op->UserFunctionID(); + if (hbl_dependencies && hbl_tracker) { + (*hbl_dependencies) << ExportBFFunction(func_id, true, hbl_tracker); + } result<< & GetBFFunctionNameByIndex(func_id); if (top_node) { result<<'('; diff --git a/src/core/global_things.cpp b/src/core/global_things.cpp index eb0dda628..8cc9a0716 100644 --- a/src/core/global_things.cpp +++ b/src/core/global_things.cpp @@ -122,7 +122,7 @@ namespace hy_global { kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"), kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"), kErrorNumerical ("To treat numerical errors as warnings, please specify \"ENV=TOLERATE_NUMERICAL_ERRORS=1;\" as the command line argument. This often resolves the issue, which is indicative of numerical instability."), - kHyPhyVersion = _String ("2.5.62"), + kHyPhyVersion = _String ("2.5.63"), kNoneToken = "None", kNullToken = "null", diff --git a/src/core/include/batchlan.h b/src/core/include/batchlan.h index 2e4761201..c71b8098d 100644 --- a/src/core/include/batchlan.h +++ b/src/core/include/batchlan.h @@ -644,7 +644,7 @@ _ExecutionList& GetBFFunctionBody (long); _String const - ExportBFFunction (long, bool = true); + ExportBFFunction (long, bool = true, _AVLList * = nil); void ClearBFFunctionLists (long = -1L); diff --git a/src/core/include/formula.h b/src/core/include/formula.h index fd3c51177..333523351 100644 --- a/src/core/include/formula.h +++ b/src/core/include/formula.h @@ -149,7 +149,7 @@ class _Formula { void Duplicate (_Formula const *, bool deep_copy = false); void DuplicateReference (const _Formula*); BaseRef makeDynamic (void) const; - BaseRef toStr (_hyFormulaStringConversionMode mode, _List* matchNames = nil, bool = false); + BaseRef toStr (_hyFormulaStringConversionMode mode, _List* matchNames = nil, bool = false, _StringBuffer* hbl_dependencies = nil); _StringBuffer const toRPN (_hyFormulaStringConversionMode mode, _List* matchNames = nil); long ObjectClass (void); @@ -288,7 +288,7 @@ class _Formula { protected: - void SubtreeToString (_StringBuffer & result, node* top_node, int op_level, _List* match_names, _Operation* this_node_op, _hyFormulaStringConversionMode mode = kFormulaStringConversionNormal); + void SubtreeToString (_StringBuffer & result, node* top_node, int op_level, _List* match_names, _Operation* this_node_op, _hyFormulaStringConversionMode mode = kFormulaStringConversionNormal, _StringBuffer * hbl_dependencies = nil, _AVLList* hbl_tracker = nil); void ConvertToTree (bool err_msg = true); void ConvertFromTree (void); bool CheckSimpleTerm (HBLObjectRef); diff --git a/src/core/include/likefunc.h b/src/core/include/likefunc.h index f8cda16b8..d8ba97c6f 100644 --- a/src/core/include/likefunc.h +++ b/src/core/include/likefunc.h @@ -74,6 +74,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define _hyphyLFConstructCategoryMatrixWeights 2 #define _hyphyLFConstructCategoryMatrixPosteriors 3 #define _hyphyLFConstructCategoryMatrixSiteProbabilities 4 +#define _hyphyLFConstructCategoryMatrixPartitions 5 + /* likelihood seialization model */ diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index c7166d778..da672e958 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -1563,6 +1563,15 @@ _Matrix* _LikelihoodFunction::ConstructCategoryMatrix (const _SimpleList& whi PrepareToCompute(); + if (runMode == _hyphyLFConstructCategoryMatrixPartitions) { + _Matrix * block_likelihoods = new _Matrix (1, whichParts.lLength, false, true); + whichParts.Each([this, block_likelihoods] (long item, unsigned long i) -> void { + block_likelihoods->Store (0, i, ComputeBlock(item)); + }); + DoneComputing(); + return block_likelihoods; + } + if (runMode == _hyphyLFConstructCategoryMatrixConditionals || runMode == _hyphyLFConstructCategoryMatrixWeights) // just return the matrix with class weights { @@ -1608,9 +1617,8 @@ _Matrix* _LikelihoodFunction::ConstructCategoryMatrix (const _SimpleList& whi - if (runMode == _hyphyLFConstructCategoryMatrixClasses || runMode == _hyphyLFConstructCategoryMatrixSiteProbabilities) + if (runMode == _hyphyLFConstructCategoryMatrixClasses || runMode == _hyphyLFConstructCategoryMatrixSiteProbabilities) { // just return the maximum conditional likelihood category - { _Matrix *result = new _Matrix (hDim,vDim,false,true), *cache = nil; _SimpleList *scalerCache = nil; @@ -10110,6 +10118,22 @@ void _LikelihoodFunction::SerializeLF(_StringBuffer & rec, char opt, rec.AppendAnAssignmentToBuffer(&hy_env::assume_reversible, new _String(hy_env::EnvVariableGetNumber(hy_env::assume_reversible, 0.))); rec.AppendAnAssignmentToBuffer(&kUseLastResults, new _String(hy_env::EnvVariableGetNumber(kUseLastResults, 0.))); + /** 20241022 SLKP + if there's a computing template defined, we need to export HBL functions that in may be referencing + */ + + _StringBuffer exported_compute_template, template_code; + + + if (computingTemplate && templateKind != _hyphyLFComputationalTemplateNone) { + + template_code << ",\""; + template_code << (_String *)computingTemplate->toStr(kFormulaStringConversionNormal, nil, false, &exported_compute_template); + template_code << '"'; + } + + rec << exported_compute_template; + if (frequencyVectors.empty()) { rec << "LikelihoodFunction " << *lfName << " = ("; @@ -10124,6 +10148,7 @@ void _LikelihoodFunction::SerializeLF(_StringBuffer & rec, char opt, rec << *GetFilterName(redirector->get(idx)) << ',' << *LocateVar(redirectorT->list_data[idx])->GetName(); } } else { + rec << "LikelihoodFunction3 " << *lfName << " = ("; long dsID = 0; @@ -10137,11 +10162,7 @@ void _LikelihoodFunction::SerializeLF(_StringBuffer & rec, char opt, } } - if (computingTemplate && templateKind == 1) { - rec << ",\""; - rec << (_String *)computingTemplate->toStr(kFormulaStringConversionNormal); - rec << '"'; - } + rec << template_code; if (opt == _hyphyLFSerializeModeOptimize) { rec << ");\n"; @@ -10201,11 +10222,12 @@ void _LikelihoodFunction::SerializeLF(_StringBuffer & rec, char opt, //_______________________________________________________________________________________ BaseRef _LikelihoodFunction::toStr (unsigned long) { - hyFloat longOrShort, + + hyFloat longOrShort = hy_env::EnvVariableGetNumber (likefuncOutput, 2.0), value = 0.0; - checkParameter(likefuncOutput,longOrShort,2.0); - + + if (longOrShort < 4.0) { PrepareToCompute(); value = Compute(); @@ -10403,7 +10425,7 @@ BaseRef _LikelihoodFunction::toStr (unsigned long) { _Variable* v = GetIthDependentVar(i); _String value ((_String*)v->GetFormulaString(kFormulaStringConversionNormal), kAppendAnAssignmentToBufferPlain); value = value & " = " & _String ((_String*)v->toStr()); - res->AppendAnAssignmentToBuffer(GetIthDependentName(i), &value); + res->AppendAnAssignmentToBuffer(GetIthDependentName(i), &value, kAppendAnAssignmentToBufferPlain); } } else { * res << _String (value, "%15.15g");