diff --git a/configure.ac b/configure.ac index 7638c89..80bcd1d 100644 --- a/configure.ac +++ b/configure.ac @@ -42,8 +42,6 @@ AC_FUNC_REALLOC AC_CHECK_FUNCS([memmove memcpy gettimeofday memchr memset pow regcomp strcasecmp strchr strcspn sysinfo]) AC_CHECK_LIB([m],[cos]) -AC_CHECK_LIB([gslcblas], [cblas_dgemm]) -AC_CHECK_LIB([gsl], [gsl_cdf_chisq_P]) # Bash completions AC_ARG_WITH([bash-completions], diff --git a/src/auto.c b/src/auto.c index 891bc7c..3f0dc14 100644 --- a/src/auto.c +++ b/src/auto.c @@ -317,9 +317,15 @@ void detect_min_bl(rtree_t * rtree) } if (minfound && n != 1) + { printf("Minimum branch length (--minbr) should be set to %.10f\n", branch_lengths[n-1]); + output_minbr(branch_lengths[n-1]); + } else + { printf("Minimum branch length (--minbr) should be set to 0\n"); + output_minbr(0); + } free(branch_lengths); diff --git a/src/dp.c b/src/dp.c index b59c95a..7cccb93 100644 --- a/src/dp.c +++ b/src/dp.c @@ -22,6 +22,7 @@ #include "mptp.h" static unsigned int species_iter = 0; +static unsigned int coal_param_count = 0; static void dp_recurse(rtree_t * node, long method) { @@ -166,6 +167,47 @@ static void backtrack(rtree_t * node, } } +long multi_coalpopedgecount(rtree_t * node) +{ + long edges = 0; + + if (node->left) + { + if (node->left->length > opt_minbr) + ++edges; + edges += multi_coalpopedgecount(node->left); + } + if (node->right) + { + if (node->right->length > opt_minbr) + ++edges; + edges += multi_coalpopedgecount(node->right); + } + + return edges; + +} +void multi_getcoalparamscount(rtree_t * node, int index) +{ + dp_vector_t * vec = node->vector; + + if ((vec[index].vec_left != -1) && (vec[index].vec_right != -1)) + { + node->event = EVENT_SPECIATION; + + multi_getcoalparamscount(node->left, vec[index].vec_left); + multi_getcoalparamscount(node->right,vec[index].vec_right); + } + else + { + node->event = EVENT_COALESCENT; + + long edges = multi_coalpopedgecount(node); + if (edges) + coal_param_count++; + } +} + void dp_ptp(rtree_t * tree, long method) { int i; @@ -234,19 +276,16 @@ void dp_ptp(rtree_t * tree, long method) /* do a Likelihood Ratio Test (lrt) and return the computed p-value */ species_count = vec[best_index].species_count; - // only do LRT for PTP, not for mPTP - lrt_pass = (method == PTP_METHOD_MULTI) ? 1 : lrt(tree->coal_logl, - vec[best_index].score_single, 1, &pvalue); + /* fills the coal_param_count variable with # coalescent pop parameters */ + coal_param_count = 0; + multi_getcoalparamscount(tree,best_index); -#ifndef HAVE_LIBGSL - fprintf(stderr, "WARNING: delimit was not compiled with libgsl. " - "Likelihood ratio test disabled.\n"); -#endif + /* likelihood ratio test */ + unsigned int df = (method == PTP_METHOD_SINGLE) ? 1 : coal_param_count; + lrt_pass = lrt(tree->coal_logl,max,df,&pvalue); -#ifdef HAVE_LIBGSL - if (!opt_quiet && method == PTP_METHOD_SINGLE) + if (!opt_quiet) fprintf(stdout,"LRT computed p-value: %.6f\n", pvalue); -#endif /* initialize file name */ FILE * out = open_file_ext("txt", opt_seed); diff --git a/src/likelihood.c b/src/likelihood.c index eb9a07b..daeb0ee 100644 --- a/src/likelihood.c +++ b/src/likelihood.c @@ -21,6 +21,70 @@ #include "mptp.h" +double IncompleteGamma(double x, double alpha, double ln_gamma_alpha) +{ + /* returns the incomplete gamma ratio I(x,alpha) where x is the upper + limit of the integration and alpha is the shape parameter. + returns (-1) if in error + ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant. + (1) series expansion, if (alpha>x || x<=1) + (2) continued fraction, otherwise + RATNEST FORTRAN by + Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics, + 19: 285-287 (AS32) + */ + int i; + double p = alpha, g = ln_gamma_alpha; + double accurate = 1e-10, overflow = 1e60; + double factor, gin = 0, rn = 0, a = 0, b = 0, an = 0, dif = 0, term = 0, pn[6]; + + if (x == 0) return (0); + if (x < 0 || p <= 0) return (-1); + + factor = exp(p*log(x) - x - g); + if (x > 1 && x >= p) goto l30; + /* (1) series expansion */ + gin = 1; term = 1; rn = p; +l20: + rn++; + term *= x / rn; gin += term; + if (term > accurate) goto l20; + gin *= factor / p; + goto l50; +l30: + /* (2) continued fraction */ + a = 1 - p; b = a + x + 1; term = 0; + pn[0] = 1; pn[1] = x; pn[2] = x + 1; pn[3] = x*b; + gin = pn[2] / pn[3]; +l32: + a++; + b += 2; + term++; + an = a*term; + for (i = 0; i < 2; i++) + pn[i + 4] = b*pn[i + 2] - an*pn[i]; + if (pn[5] == 0) goto l35; + rn = pn[4] / pn[5]; + dif = fabs(gin - rn); + if (dif > accurate) goto l34; + if (dif <= accurate*rn) goto l42; +l34: + gin = rn; +l35: + for (i = 0; i < 4; i++) pn[i] = pn[i + 2]; + if (fabs(pn[4]) < overflow) goto l32; + for (i = 0; i < 4; i++) pn[i] /= overflow; + goto l32; +l42: + gin = 1 - factor*gin; + +l50: + return (gin); +} + +#define CDFGamma(x,alpha,beta) IncompleteGamma((beta)*(x),alpha,lgamma(alpha)) +#define CDFChi2(x,v) CDFGamma(x,(v)/2.0,0.5) + double loglikelihood(long edge_count, double edgelen_sum) @@ -34,15 +98,11 @@ double loglikelihood(long edge_count, double edgelen_sum) int lrt(double nullmodel_logl, double ptp_logl, unsigned int df, double * pvalue) { -#ifdef HAVE_LIBGSL double diff = 2*(ptp_logl - nullmodel_logl); - - /* http://docs.scipy.org/doc/scipy/reference/generated/scipy.special.chdtr.html */ - *pvalue = 1 - gsl_cdf_chisq_P(diff,df); + *pvalue = 1 - CDFChi2(diff,df); if ((*pvalue) > opt_pvalue) return 0; -#endif return 1; } diff --git a/src/mptp.h b/src/mptp.h index 6db42fc..77629d9 100644 --- a/src/mptp.h +++ b/src/mptp.h @@ -44,10 +44,6 @@ #include "config.h" #endif -#if (defined(HAVE_CONFIG_H) && defined(HAVE_LIBGSL)) -#include -#endif - /* constants */ #define PROG_NAME PACKAGE @@ -426,6 +422,7 @@ void output_info(FILE * out, int lrt_result, rtree_t * root, unsigned int species_count); +void output_minbr(double minbr); FILE * open_file_ext(const char * extension, long seed); diff --git a/src/output.c b/src/output.c index e7bc9da..04b1734 100644 --- a/src/output.c +++ b/src/output.c @@ -62,12 +62,25 @@ void output_info(FILE * out, (method == PTP_METHOD_SINGLE) ? "single" : "multi", logl); -#ifdef HAVE_LIBGSL - if (method == PTP_METHOD_SINGLE) - { - fprintf(out, "LRT computed p-value: %.6f\n", pvalue); - fprintf(out, "LRT: %s\n", lrt_result ? "passed" : "failed"); - } -#endif + fprintf(out, "LRT computed p-value: %.6f\n", pvalue); + fprintf(out, "LRT: %s\n", lrt_result ? "passed" : "failed"); fprintf(out, "Number of delimited species: %d\n", species_count); } + +void output_minbr(double minbr) +{ + FILE * fp_out; + char * filename = NULL; + + if (asprintf(&filename, "%s.%s", opt_outfile, "txt") == -1) + fatal("Unable to allocate enough memory."); + + fp_out = xopen(filename,"w"); + free(filename); + + fprintf(fp_out, "Command: %s\n", cmdline); + if (minbr == 0) + fprintf(fp_out, "Minimum branch length (--minbr) should be set to 0\n"); + else + fprintf(fp_out, "Minimum branch length (--minbr) should be set to %f\n",minbr); +}