diff --git a/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf index e71e0a2f6..3b67bbced 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf @@ -10,6 +10,7 @@ LoadFunctionLibrary("libv3/tasks/estimators.bf"); LoadFunctionLibrary("libv3/tasks/alignments.bf"); LoadFunctionLibrary("libv3/tasks/mpi.bf"); LoadFunctionLibrary("libv3/tasks/trees.bf"); +LoadFunctionLibrary("libv3/models/rate_variation.bf"); LoadFunctionLibrary("modules/io_functions.ibf"); LoadFunctionLibrary("modules/selection_lib.ibf"); @@ -38,6 +39,10 @@ absrel.rate_classes = "Rate classes"; absrel.per_branch_omega = "Per-branch omega"; absrel.per_branch_delta = "Per-branch delta"; absrel.per_branch_psi = "Per-branch psi"; +absrel.synonymous_rate_classes = 3; +absrel.SRV = "Synonymous site-to-site rates"; +absrel.json.srv_posteriors = "Synonymous site-posteriors"; + absrel.display_orders = {terms.original_name: -1, terms.json.nucleotide_gtr: 0, @@ -64,8 +69,8 @@ absrel.analysis_description = {terms.io.info : "aBSREL (Adaptive branch-site ran uses an adaptive random effects branch-site model framework to test whether each branch has evolved under positive selection, using a procedure which infers an optimal number of rate categories per branch.", - terms.io.version : "2.2", - terms.io.reference : "Less Is More: An Adaptive Branch-Site Random Effects Model for Efficient Detection of Episodic Diversifying Selection (2015). Mol Biol Evol 32 (5): 1342-1353. v2.2 adds support for multiple-hit models", + terms.io.version : "2.3", + terms.io.reference : "Less Is More: An Adaptive Branch-Site Random Effects Model for Efficient Detection of Episodic Diversifying Selection (2015). Mol Biol Evol 32 (5): 1342-1353. v2.2 adds support for multiple-hit models. v2.3 adds support for SRV", terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group", terms.io.contact : "spond@temple.edu", terms.io.requirements : "in-frame codon alignment and a phylogenetic tree" @@ -114,6 +119,29 @@ absrel.multi_hit = io.SelectAnOption ({ }, "Include support for multiple nucleotide substitutions"); +KeywordArgument ("srv", "Include synonymous rate variation", "No"); + +absrel.srv = io.SelectAnOption ( + { + "Yes" : "Allow synonymous substitution rates to vary from site to site (but not from branch to branch)", + "No" : "Synonymous substitution rates are constant across sites. This is the 'classic' behavior, i.e. the original published test" + }, + "Synonymous rate variation" +); + +absrel.do_srv = FALSE; + +if (absrel.srv =="Yes") { + absrel.do_srv = TRUE; +} + +if (absrel.do_srv) { + KeywordArgument ("syn-rates", "The number alpha rate classes to include in the model [1-10, default 3]", absrel.synonymous_rate_classes); + absrel.synonymous_rate_classes = io.PromptUser ("The number omega rate classes to include in the model", absrel.synonymous_rate_classes, 1, 10, TRUE); +} + + + KeywordArgument ("output", "Write the resulting JSON to this file (default is to save to the same path as the alignment file + 'ABSREL.json')", absrel.codon_data_info [terms.json.json]); absrel.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to"); @@ -170,6 +198,23 @@ if (absrel.multi_hit == "Double") { } } +if (absrel.do_srv) { + absrel.model_generator_base = absrel.model_generator ; + + lfunction absrel.model.with.GDD (type,code) { + def = Call (^"absrel.model_generator_base",type,code); + options = {utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("absrel.synonymous_rate_classes"), + utility.getGlobalValue("terms._namespace") : "absrel._shared_srv"}; + + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory (options); + return def; + } + + absrel.model_generator = "absrel.model.with.GDD"; +} + +absrel.distribution_for_json = {}; + absrel.base.results = estimators.FitCodonModel (absrel.filter_names, absrel.trees, absrel.model_generator, absrel.codon_data_info [terms.code], { terms.run_options.model_type: terms.local, terms.run_options.retain_lf_object: TRUE, @@ -177,8 +222,16 @@ absrel.base.results = estimators.FitCodonModel (absrel.filter_names, absrel.tree }, absrel.gtr_results); -io.ReportProgressMessageMD("absrel", "base", "* " + selection.io.report_fit (absrel.base.results, 0, absrel.codon_data_info[terms.data.sample_size])); +absrel.MG94.model = (absrel.base.results[terms.model])["VALUEINDEXORDER"][0]; +absrel.MG94.id = absrel.MG94.model [terms.id]; + +absrel.model_object_map = { + absrel.MG94.id : absrel.MG94.model +}; + +io.ReportProgressMessageMD("absrel", "base", "* " + selection.io.report_fit (absrel.base.results, 0, absrel.codon_data_info[terms.data.sample_size])); +absrel._report_srv (absrel.base.results, "base", None); absrel.baseline.branch_lengths = selection.io.extract_branch_info((absrel.base.results[terms.branch_length])[0], "selection.io.branch.length"); absrel.baseline.omegas = selection.io.extract_branch_info((absrel.base.results[terms.branch_length])[0], "absrel.local.omega"); @@ -208,7 +261,6 @@ if (absrel.multi_hit == "Double+Triple") { } - selection.io.stopTimer (absrel.json [terms.json.timers], "Baseline model fitting"); // TODO -- there's gotta be a better way to do this @@ -227,12 +279,11 @@ for (absrel.i = absrel.branch_count - 1; absrel.i >= 0; absrel.i = absrel.i - 1 absrel.names_sorted_by_length [absrel.branch_count - 1 - absrel.i] = absrel.bnames [absrel.sorted_branch_lengths[absrel.i][1]]; } -absrel.distribution_for_json = {absrel.per_branch_omega : +absrel.distribution_for_json[absrel.per_branch_omega] = {terms.math.mean : absrel.omega_stats[terms.math.mean], terms.math.median : absrel.omega_stats[terms.math.median], terms.math._2.5 : absrel.omega_stats[terms.math._2.5], - terms.math._97.5 : absrel.omega_stats[terms.math._97.5]} - }; + terms.math._97.5 : absrel.omega_stats[terms.math._97.5]}; if (absrel.multi_hit == "Double" || absrel.multi_hit == "Double+Triple") { @@ -282,7 +333,9 @@ selection.io.json_store_branch_attribute(absrel.json, absrel.baseline_omega_rati absrel.model_defintions = {}; absrel.likelihood_function_id = absrel.base.results [terms.likelihood_function]; -absrel.constrain_everything (absrel.likelihood_function_id); + +absrel.stash = estimators.TakeLFStateSnapshot (absrel.likelihood_function_id); + absrel.tree_id = absrel.get_tree_name (absrel.likelihood_function_id); absrel.model_id = absrel.get_model_id (absrel.likelihood_function_id); absrel.MG94.model = (absrel.base.results[terms.model])[(utility.Keys (absrel.base.results[terms.model]))[0]]; @@ -292,6 +345,15 @@ absrel.temp - terms.nucleotideRate("A","G"); absrel.full_model_parameters = {}; utility.AddToSet (absrel.full_model_parameters, absrel.temp); +if (absrel.do_srv) { + absrel.temp = model.GetParameters_RegExp (absrel.MG94.model, "GDD rate category "); + utility.AddToSet (absrel.full_model_parameters, absrel.temp); + absrel.temp = model.GetParameters_RegExp (absrel.MG94.model, terms.mixture.mixture_aux_weight + " for GDD category "); + utility.AddToSet (absrel.full_model_parameters, absrel.temp); + //console.log (absrel.full_model_parameters); +} + + selection.io.startTimer (absrel.json [terms.json.timers], "Complexity analysis", 3); absrel.model_object_map = { @@ -313,6 +375,9 @@ for (absrel.i = 2; absrel.i <= absrel.max_rate_classes; absrel.i += 1) { models.BindGlobalParameters ({"1" : absrel.model_defintions [absrel.i], "0" : absrel.MG94.model}, terms.nucleotideRate("[ACGT]","[ACGT]")); } +estimators.RestoreLFStateFromSnapshot (absrel.likelihood_function_id, absrel.stash); +absrel.constrain_everything (absrel.likelihood_function_id); + io.ReportProgressMessageMD ("absrel", "complexity", "Determining the optimal number of rate classes per branch using a step up procedure"); @@ -393,6 +458,7 @@ for (absrel.branch_id = 0; absrel.branch_id < absrel.branch_count; absrel.branch //absrel.initial_guess = absrel.SetBranchConstraints (absrel.model_defintions [absrel.current_rate_count], absrel.tree_id, absrel.current_branch); + Optimize (absrel.stepup.mles, ^absrel.likelihood_function_id , { "OPTIMIZATION_METHOD" : "nedler-mead", @@ -488,6 +554,9 @@ io.ReportProgressMessageMD("absrel", "Full adaptive model", "* " + selection.io. selection.io.stopTimer (absrel.json [terms.json.timers], "Full adaptive model fitting"); +absrel._report_srv (absrel.full_model.fit, "Full adaptive model", absrel.likelihood_function_id); + + selection.io.json_store_branch_attribute(absrel.json, absrel.full_adaptive_model, terms.branch_length, absrel.display_orders[absrel.full_adaptive_model], 0, selection.io.extract_branch_info((absrel.full_model.fit[terms.branch_length])[0], "selection.io.branch.length")); @@ -894,13 +963,16 @@ lfunction absrel.get_model_id (lf_id) { return (info["Models"])[0]; } -function absrel.constrain_everything (lf_id) { +lfunction absrel.constrain_everything (lf_id) { GetString (absrel.constrain_everything.info, ^lf_id, -1); - utility.ForEach (absrel.constrain_everything.info [utility.getGlobalValue("terms.parameters.global_independent")], "_value_", - "parameters.SetConstraint (_value_, Eval (_value_), terms.global)"); - utility.ForEach (absrel.constrain_everything.info [utility.getGlobalValue("terms.parameters.local_independent")], "_value_", - "parameters.SetConstraint (_value_, Eval (_value_), '')"); + for (k; in; absrel.constrain_everything.info [utility.getGlobalValue("terms.parameters.global_independent")]) { + //console.log (^k); + parameters.SetConstraint (k, ^k, ^"terms.global"); + } + for (k; in; absrel.constrain_everything.info [utility.getGlobalValue("terms.parameters.local_independent")]) { + parameters.SetConstraint (k, ^k, ""); + } } lfunction absrel.local.omega(branch_info) { @@ -928,9 +1000,16 @@ lfunction absrel.BS_REL.ModelDescription (type, code, components) { model [utility.getGlobalValue("terms.model.defineQ")] = "absrel.codon.BS_REL._DefineQ.TH"; } else { model [utility.getGlobalValue("terms.model.defineQ")] = "absrel.BS_REL._DefineQ"; - } } + + if (^"absrel.do_srv") { + options = {utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("absrel.synonymous_rate_classes"), + utility.getGlobalValue("terms._namespace") : "absrel._shared_srv"}; + + model [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory (options); + } + return model; } @@ -1106,3 +1185,31 @@ lfunction absrel.BS_REL._DefineQ(bs_rel, namespace) { parameters.SetConstraint(((bs_rel[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.global")])[terms.nucleotideRate("A", "G")], "1", ""); return bs_rel; } + +//------------------------------------ + +function absrel._report_srv (absrel_model_fit, tag, lf_id) { + + + if (absrel.do_srv) { + if (None == lf_id) { + lf_id = absrel_model_fit[terms.likelihood_function]; + } + absrel.srv_info = Transpose((rate_variation.extract_category_information((absrel.model_object_map[ absrel.MG94.model [terms.id]])))["VALUEINDEXORDER"][0])%0; + + io.ReportProgressMessageMD("absrel", tag, "* The following rate distribution for site-to-site **synonymous** rate variation was inferred"); + selection.io.report_distribution (absrel.srv_info); + + absrel.distribution_for_json [absrel.SRV] = (utility.Map (utility.Range (absrel.synonymous_rate_classes, 0, 1), + "_index_", + "{terms.json.rate :absrel.srv_info [_index_][0], + terms.json.proportion : absrel.srv_info [_index_][1]}")); + + ConstructCategoryMatrix (absrel.cmx, ^lf_id); + ConstructCategoryMatrix (absrel.cmx_weights, ^lf_id, WEIGHTS); + absrel.cmx_weighted = (absrel.cmx_weights[-1][0]) $ absrel.cmx; // taking the 1st column fixes a bug with multiple partitions + absrel.column_weights = {1, Rows (absrel.cmx_weights)}["1"] * absrel.cmx_weighted; + absrel.column_weights = absrel.column_weights["1/_MATRIX_ELEMENT_VALUE_"]; + (absrel.json [absrel.json.srv_posteriors]) = absrel.cmx_weighted $ absrel.column_weights; + } +} diff --git a/res/TemplateBatchFiles/libv3/models/rate_variation.bf b/res/TemplateBatchFiles/libv3/models/rate_variation.bf index acc6dcee4..aa7ca96a7 100644 --- a/res/TemplateBatchFiles/libv3/models/rate_variation.bf +++ b/res/TemplateBatchFiles/libv3/models/rate_variation.bf @@ -249,12 +249,11 @@ lfunction rate_variation_define_gamma_inv (options, namespace) { lfunction rate_variation.extract_category_information (model) { if (utility.Has (model[^"terms.parameters"], ^"terms.category", "AssociativeList")) { info = {}; - utility.ForEachPair ((model[^"terms.parameters"])[^"terms.category"], "_k_", "_v_", - " - GetInformation (`&v_info`, ^_k_); - (`&info`)[_k_] = `&v_info`; + for (_k_, _v_; in; (model[^"terms.parameters"])[^"terms.category"]) { + GetInformation (v_info, ^_k_); + info[_k_] = v_info; - "); + } return info; } return None; diff --git a/src/core/include/matrix.h b/src/core/include/matrix.h index b5b380018..2c6891852 100644 --- a/src/core/include/matrix.h +++ b/src/core/include/matrix.h @@ -738,4 +738,10 @@ HBLObjectRef _returnMatrixOrUseCache (long nrow, long ncol, long type, bool is_s } #endif +#ifdef _SLKP_USE_ARM_NEON + inline double _neon_sum_2 (float64x2_t const & x) { + return vgetq_lane_f64 (x,0) + vgetq_lane_f64 (x,1); + } +#endif + #endif diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 68bdf2a21..8ff0869bd 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -8797,7 +8797,7 @@ hyFloat _LikelihoodFunction::ComputeBlock (long index, hyFloat* siteRes, long c inc, conditionalTerminalNodeStateFlag[index], ssf, - (_GrowingVector*)conditionalTerminalNodeLikelihoodCaches(index), + (_Vector*)conditionalTerminalNodeLikelihoodCaches(index), overallScalingFactors.list_data[index], 0, df->GetPatternCount(), diff --git a/src/core/tree.cpp b/src/core/tree.cpp index a409e9842..8afd29ecc 100644 --- a/src/core/tree.cpp +++ b/src/core/tree.cpp @@ -3059,11 +3059,15 @@ hyFloat _TheTree::ComputeLLWithBranchCache ( storageVec [direct_index] = accumulator; } else { if (accumulator <= 0.0) { - //fprintf (stderr, "ZERO TERM AT SITE %ld (direct %ld) EVAL %ld\n",siteID,direct_index, likeFuncEvalCallCount); - /*for (long s = 0; s < theFilter->NumberSpecies(); s++) { + + + /*fprintf (stderr, "ZERO TERM AT SITE %ld (direct %ld) EVAL %ld\n",siteID,direct_index, likeFuncEvalCallCount); + for (long s = 0; s < theFilter->NumberSpecies(); s++) { fprintf (stderr, "%s", theFilter->RetrieveState(direct_index, s).get_str()); } fprintf (stderr, "\n");*/ + + throw (1L+direct_index); } @@ -3075,8 +3079,8 @@ hyFloat _TheTree::ComputeLLWithBranchCache ( term = log(accumulator) - correction; } - /*if (likeFuncEvalCallCount == 15098) { - fprintf (stderr, "CACHE, %ld, %ld, %20.15lg, %20.15lg, %20.15lg\n", likeFuncEvalCallCount, siteID, accumulator, correction, term); + /*if (likeFuncEvalCallCount == 643) { + fprintf (stderr, "CACHE, %ld, %ld, %20.15lg, %20.15lg, %20.15lg, %20.15lg\n", likeFuncEvalCallCount, siteID, accumulator, correction, term, result); }*/ hyFloat temp_sum = result + term; @@ -3131,6 +3135,15 @@ hyFloat _TheTree::ComputeLLWithBranchCache ( }; #endif +#ifdef _SLKP_USE_ARM_NEON + float64x2x2_t tmatrix_transpose [4] = { + (float64x2x2_t) {transitionMatrix[0],transitionMatrix[4],transitionMatrix[8],transitionMatrix[12]}, + (float64x2x2_t) {transitionMatrix[1],transitionMatrix[5],transitionMatrix[9],transitionMatrix[13]}, + (float64x2x2_t) {transitionMatrix[2],transitionMatrix[6],transitionMatrix[10],transitionMatrix[14]}, + (float64x2x2_t) {transitionMatrix[3],transitionMatrix[7],transitionMatrix[11],transitionMatrix[15]} + }; +#endif + for (unsigned long siteID = siteFrom; siteID < siteTo; siteID++) { hyFloat accumulator = 0.; #ifdef _SLKP_USE_AVX_INTRINSICS @@ -3144,6 +3157,45 @@ hyFloat _TheTree::ComputeLLWithBranchCache ( s23 = _mm256_add_pd ( _mm256_mul_pd (b_cond2, tmatrix_transpose[2]), _mm256_mul_pd (b_cond3, tmatrix_transpose[3])); accumulator = _avx_sum_4(_mm256_mul_pd (_mm256_mul_pd (root_c, probs), _mm256_add_pd (s01,s23))); +#elif defined _SLKP_USE_ARM_NEON + //float64x2x2_t root_c = vld1q_f64_x2 (rootConditionals), + // probs = vld1q_f64_x2 (theProbs); + + //printf ("NEON\n"); + + float64x2_t rp0 = vmulq_f64 (vld1q_f64 (rootConditionals), vld1q_f64 (theProbs)), + rp1 = vmulq_f64 (vld1q_f64 (rootConditionals+2), vld1q_f64 (theProbs+2)); + + float64x2_t b_cond0 = vdupq_n_f64 (branchConditionals[0]), + b_cond1 = vdupq_n_f64 (branchConditionals[1]), + b_cond2 = vdupq_n_f64 (branchConditionals[2]), + b_cond3 = vdupq_n_f64 (branchConditionals[3]); + + + float64x2_t t00 = vmulq_f64 (b_cond0, tmatrix_transpose[0].val[0]), + // branchConditionals[0] * transitionMatrix[0] , branchConditionals[0] * transitionMatrix[4] + t01 = vmulq_f64 (b_cond0, tmatrix_transpose[0].val[1]), + // branchConditionals[0] * transitionMatrix[8] , branchConditionals[0] * transitionMatrix[12] + t10 = vmulq_f64 (b_cond1, tmatrix_transpose[1].val[0]), + // branchConditionals[1] * transitionMatrix[1] , branchConditionals[1] * transitionMatrix[5] + t11 = vmulq_f64 (b_cond1, tmatrix_transpose[1].val[1]), + t20 = vmulq_f64 (b_cond2, tmatrix_transpose[2].val[0]), + t21 = vmulq_f64 (b_cond2, tmatrix_transpose[2].val[1]), + t30 = vmulq_f64 (b_cond3, tmatrix_transpose[3].val[0]), + t31 = vmulq_f64 (b_cond3, tmatrix_transpose[3].val[1]); + + + + t00 = vfmaq_f64 (vmulq_f64 (t00, rp0), t01, rp1); + t10 = vfmaq_f64 (vmulq_f64 (t10, rp0), t11, rp1); + t20 = vfmaq_f64 (vmulq_f64 (t20, rp0), t21, rp1); + t30 = vfmaq_f64 (vmulq_f64 (t30, rp0), t31, rp1); + + t00 = vaddq_f64 (t00,t10); + t20 = vaddq_f64 (t20,t30); + accumulator = _neon_sum_2 (vaddq_f64 (t00,t20)); + + #else accumulator = rootConditionals[0] * theProbs[0] * (branchConditionals[0] * transitionMatrix[0] + branchConditionals[1] * transitionMatrix[1] + branchConditionals[2] * transitionMatrix[2] + branchConditionals[3] * transitionMatrix[3]) + @@ -3154,10 +3206,10 @@ hyFloat _TheTree::ComputeLLWithBranchCache ( rootConditionals[3] * theProbs[3] * (branchConditionals[0] * transitionMatrix[12] + branchConditionals[1] * transitionMatrix[13] + branchConditionals[2] * transitionMatrix[14] + branchConditionals[3] * transitionMatrix[15]); #endif + bookkeeping (siteID, accumulator, correction, result); rootConditionals += 4UL; branchConditionals += 4UL; - bookkeeping (siteID, accumulator, correction, result); - } // siteID + } // siteID } break; /**** @@ -3201,6 +3253,40 @@ hyFloat _TheTree::ComputeLLWithBranchCache ( accumulator += *rootConditionals * theProbs[p] * _avx_sum_4(_mm256_add_pd (t_matrix[3],t_matrix[4])); } + +#elif defined _SLKP_USE_ARM_NEON + + float64x2_t bc_vector[10] = {vld1q_f64(branchConditionals), + vld1q_f64(branchConditionals+2UL), + vld1q_f64(branchConditionals+4UL), + vld1q_f64(branchConditionals+6UL), + vld1q_f64(branchConditionals+8UL), + vld1q_f64(branchConditionals+10UL), + vld1q_f64(branchConditionals+12UL), + vld1q_f64(branchConditionals+14UL), + vld1q_f64(branchConditionals+16UL), + vld1q_f64(branchConditionals+18UL) + }; + + hyFloat const * tm = transitionMatrix; + + for (unsigned long p = 0UL; p < 20UL; p++, rootConditionals++) { + + float64x2_t S1 = vmulq_f64 ( bc_vector[0], vld1q_f64(tm)), + S2 = vmulq_f64 ( bc_vector[1], vld1q_f64(tm+2UL)); + + S1 = vfmaq_f64 (S1, bc_vector[2], vld1q_f64(tm+4UL)); + S2 = vfmaq_f64 (S2, bc_vector[3], vld1q_f64(tm+6UL)); + S1 = vfmaq_f64 (S1, bc_vector[4], vld1q_f64(tm+8UL)); + S2 = vfmaq_f64 (S2, bc_vector[5], vld1q_f64(tm+10UL)); + S1 = vfmaq_f64 (S1, bc_vector[6], vld1q_f64(tm+12UL)); + S2 = vfmaq_f64 (S2, bc_vector[7], vld1q_f64(tm+14UL)); + S1 = vfmaq_f64 (S1, bc_vector[8], vld1q_f64(tm+16UL)); + S2 = vfmaq_f64 (S2, bc_vector[9], vld1q_f64(tm+18UL)); + + accumulator += *rootConditionals * theProbs[p] * _neon_sum_2(vaddq_f64 (S1,S2)); + tm += 20UL; + } #else // _SLKP_USE_AVX_INTRINSICS unsigned long rmx = 0UL; for (unsigned long p = 0UL; p < 20UL; p++,rootConditionals++) { @@ -3262,6 +3348,23 @@ hyFloat _TheTree::ComputeLLWithBranchCache ( } r2 = _avx_sum_4(sum256); +#elif defined _SLKP_USE_ARM_NEON + + float64x2_t sum128 = vdupq_n_f64 (0.); + for (; c < 60UL; c+=6UL, rmx +=6UL) { + + float64x2_t branches0 = vld1q_f64 (branchConditionals+c), + branches1 = vld1q_f64 (branchConditionals+c+2), + branches2 = vld1q_f64 (branchConditionals+c+4), + matrix0 = vld1q_f64 (transitionMatrix+rmx), + matrix1 = vld1q_f64 (transitionMatrix+rmx+2), + matrix2 = vld1q_f64 (transitionMatrix+rmx+4); + + branches0 = vfmaq_f64 (vmulq_f64 (branches0, matrix0), branches1, matrix1); + branches1 = vfmaq_f64 (sum128, branches2, matrix2); + sum128 = vaddq_f64 (branches1, branches0); + } + r2 = _neon_sum_2(sum128); #else // _SLKP_USE_AVX_INTRINSICS for (; c < 60UL; c+=4UL, rmx +=4UL) { diff --git a/src/core/tree_evaluator.cpp b/src/core/tree_evaluator.cpp index 5831a6cfc..ecb298163 100644 --- a/src/core/tree_evaluator.cpp +++ b/src/core/tree_evaluator.cpp @@ -122,11 +122,7 @@ inline double _sse_sum_2 (__m128d const & x) { } #endif -#ifdef _SLKP_USE_ARM_NEON -inline double _neon_sum_2 (float64x2_t const & x) { - return vgetq_lane_f64 (x,0) + vgetq_lane_f64 (x,1); -} -#endif + template inline void __ll_handle_matrix_transpose (hyFloat const * __restrict transitionMatrix, hyFloat * __restrict tMatrixT) { long i = 0L; @@ -1774,13 +1770,15 @@ void _TheTree::ComputeBranchCache ( siteRes[siteOrdering.list_data[siteID]] *= _lfScalingFactorThreshold; } else { if (didScale > 0) { - for (long k = 0; k < didScale; k++) { + siteRes[siteOrdering.list_data[siteID]] *= ComputePower (_lfScalerUpwards, didScale); + /*for (long k = 0; k < didScale; k++) { siteRes[siteOrdering.list_data[siteID]] *= _lfScalerUpwards; - } + }*/ } else{ - for (long k = 0; k < -didScale; k++) { + siteRes[siteOrdering.list_data[siteID]] *= ComputePower (_lfScalingFactorThreshold, -didScale); + /*for (long k = 0; k < -didScale; k++) { siteRes[siteOrdering.list_data[siteID]] *= _lfScalingFactorThreshold; - } + }*/ } } } @@ -2036,8 +2034,8 @@ void _TheTree::ComputeBranchCache ( __ll_loop_handle_scaling<4L, false> (sum, parentConditionals, scalingAdjustments, didScale, nodeCode, siteCount, siteID, localScalerChange, theFilter->theFrequencies.get (siteOrdering.list_data[siteID])); } - /*if (likeFuncEvalCallCount == 15098 && siteID == 91) { - fprintf (stderr, "NODE = %ld, PARENT = %ld (%ld), P(G) = %lg, P(T) = %lg, scale = %ld\n", nodeCode, parentCode, canScale, parentConditionals[2], parentConditionals[3], didScale); + /*if (likeFuncEvalCallCount == 647 && siteID == 158) { + fprintf (stderr, "NODE = %ld, PARENT = %ld (%ld), P(A) = %lg, P(C) = %lg, P(G) = %lg, P(T) = %lg, scale = %ld\n", nodeCode, parentCode, canScale, parentConditionals[0], parentConditionals[1], parentConditionals[2], parentConditionals[3], didScale); }*/ childVector += 4L; __handle_site_corrections(didScale, siteID); @@ -2498,7 +2496,7 @@ void _TheTree::ComputeBranchCache ( const unsigned long site_bound = alphabetDimension*siteTo; for (unsigned long ii = siteFrom * alphabetDimension; ii < site_bound; ii++) { state[ii] = rootConditionals[ii]; - /*if (likeFuncEvalCallCount == 15098 && ii / alphabetDimension == 91) { + /*if (likeFuncEvalCallCount == 647 && ii / alphabetDimension == 158) { printf ("Site %ld, Root conditional [%ld] = %g, node state [%ld] = %g\n", ii/alphabetDimension, ii, state[ii], ii, cache[ii]); }*/ }