Skip to content

Commit

Permalink
removed gsl dependency, added lrt for multi
Browse files Browse the repository at this point in the history
  • Loading branch information
xflouris committed Sep 11, 2023
1 parent 2a953cc commit 04a4160
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 28 deletions.
2 changes: 0 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ AC_FUNC_REALLOC
AC_CHECK_FUNCS([memmove memcpy gettimeofday memchr memset pow regcomp strcasecmp strchr strcspn sysinfo])

AC_CHECK_LIB([m],[cos])
AC_CHECK_LIB([gslcblas], [cblas_dgemm])
AC_CHECK_LIB([gsl], [gsl_cdf_chisq_P])

# Bash completions
AC_ARG_WITH([bash-completions],
Expand Down
6 changes: 6 additions & 0 deletions src/auto.c
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,15 @@ void detect_min_bl(rtree_t * rtree)
}

if (minfound && n != 1)
{
printf("Minimum branch length (--minbr) should be set to %.10f\n", branch_lengths[n-1]);
output_minbr(branch_lengths[n-1]);
}
else
{
printf("Minimum branch length (--minbr) should be set to 0\n");
output_minbr(0);
}


free(branch_lengths);
Expand Down
59 changes: 49 additions & 10 deletions src/dp.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "mptp.h"

static unsigned int species_iter = 0;
static unsigned int coal_param_count = 0;

static void dp_recurse(rtree_t * node, long method)
{
Expand Down Expand Up @@ -166,6 +167,47 @@ static void backtrack(rtree_t * node,
}
}

long multi_coalpopedgecount(rtree_t * node)
{
long edges = 0;

if (node->left)
{
if (node->left->length > opt_minbr)
++edges;
edges += multi_coalpopedgecount(node->left);
}
if (node->right)
{
if (node->right->length > opt_minbr)
++edges;
edges += multi_coalpopedgecount(node->right);
}

return edges;

}
void multi_getcoalparamscount(rtree_t * node, int index)
{
dp_vector_t * vec = node->vector;

if ((vec[index].vec_left != -1) && (vec[index].vec_right != -1))
{
node->event = EVENT_SPECIATION;

multi_getcoalparamscount(node->left, vec[index].vec_left);
multi_getcoalparamscount(node->right,vec[index].vec_right);
}
else
{
node->event = EVENT_COALESCENT;

long edges = multi_coalpopedgecount(node);
if (edges)
coal_param_count++;
}
}

void dp_ptp(rtree_t * tree, long method)
{
int i;
Expand Down Expand Up @@ -234,19 +276,16 @@ void dp_ptp(rtree_t * tree, long method)
/* do a Likelihood Ratio Test (lrt) and return the computed p-value */
species_count = vec[best_index].species_count;

// only do LRT for PTP, not for mPTP
lrt_pass = (method == PTP_METHOD_MULTI) ? 1 : lrt(tree->coal_logl,
vec[best_index].score_single, 1, &pvalue);
/* fills the coal_param_count variable with # coalescent pop parameters */
coal_param_count = 0;
multi_getcoalparamscount(tree,best_index);

#ifndef HAVE_LIBGSL
fprintf(stderr, "WARNING: delimit was not compiled with libgsl. "
"Likelihood ratio test disabled.\n");
#endif
/* likelihood ratio test */
unsigned int df = (method == PTP_METHOD_SINGLE) ? 1 : coal_param_count;
lrt_pass = lrt(tree->coal_logl,max,df,&pvalue);

#ifdef HAVE_LIBGSL
if (!opt_quiet && method == PTP_METHOD_SINGLE)
if (!opt_quiet)
fprintf(stdout,"LRT computed p-value: %.6f\n", pvalue);
#endif

/* initialize file name */
FILE * out = open_file_ext("txt", opt_seed);
Expand Down
70 changes: 65 additions & 5 deletions src/likelihood.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,70 @@

#include "mptp.h"

double IncompleteGamma(double x, double alpha, double ln_gamma_alpha)
{
/* returns the incomplete gamma ratio I(x,alpha) where x is the upper
limit of the integration and alpha is the shape parameter.
returns (-1) if in error
ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.
(1) series expansion, if (alpha>x || x<=1)
(2) continued fraction, otherwise
RATNEST FORTRAN by
Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics,
19: 285-287 (AS32)
*/
int i;
double p = alpha, g = ln_gamma_alpha;
double accurate = 1e-10, overflow = 1e60;
double factor, gin = 0, rn = 0, a = 0, b = 0, an = 0, dif = 0, term = 0, pn[6];

if (x == 0) return (0);
if (x < 0 || p <= 0) return (-1);

factor = exp(p*log(x) - x - g);
if (x > 1 && x >= p) goto l30;
/* (1) series expansion */
gin = 1; term = 1; rn = p;
l20:
rn++;
term *= x / rn; gin += term;
if (term > accurate) goto l20;
gin *= factor / p;
goto l50;
l30:
/* (2) continued fraction */
a = 1 - p; b = a + x + 1; term = 0;
pn[0] = 1; pn[1] = x; pn[2] = x + 1; pn[3] = x*b;
gin = pn[2] / pn[3];
l32:
a++;
b += 2;
term++;
an = a*term;
for (i = 0; i < 2; i++)
pn[i + 4] = b*pn[i + 2] - an*pn[i];
if (pn[5] == 0) goto l35;
rn = pn[4] / pn[5];
dif = fabs(gin - rn);
if (dif > accurate) goto l34;
if (dif <= accurate*rn) goto l42;
l34:
gin = rn;
l35:
for (i = 0; i < 4; i++) pn[i] = pn[i + 2];
if (fabs(pn[4]) < overflow) goto l32;
for (i = 0; i < 4; i++) pn[i] /= overflow;
goto l32;
l42:
gin = 1 - factor*gin;

l50:
return (gin);
}

#define CDFGamma(x,alpha,beta) IncompleteGamma((beta)*(x),alpha,lgamma(alpha))
#define CDFChi2(x,v) CDFGamma(x,(v)/2.0,0.5)



double loglikelihood(long edge_count, double edgelen_sum)
Expand All @@ -34,15 +98,11 @@ double loglikelihood(long edge_count, double edgelen_sum)

int lrt(double nullmodel_logl, double ptp_logl, unsigned int df, double * pvalue)
{
#ifdef HAVE_LIBGSL
double diff = 2*(ptp_logl - nullmodel_logl);

/* http://docs.scipy.org/doc/scipy/reference/generated/scipy.special.chdtr.html */
*pvalue = 1 - gsl_cdf_chisq_P(diff,df);
*pvalue = 1 - CDFChi2(diff,df);

if ((*pvalue) > opt_pvalue)
return 0;
#endif

return 1;
}
Expand Down
5 changes: 1 addition & 4 deletions src/mptp.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@
#include "config.h"
#endif

#if (defined(HAVE_CONFIG_H) && defined(HAVE_LIBGSL))
#include <gsl/gsl_cdf.h>
#endif

/* constants */

#define PROG_NAME PACKAGE
Expand Down Expand Up @@ -426,6 +422,7 @@ void output_info(FILE * out,
int lrt_result,
rtree_t * root,
unsigned int species_count);
void output_minbr(double minbr);

FILE * open_file_ext(const char * extension, long seed);

Expand Down
27 changes: 20 additions & 7 deletions src/output.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,25 @@ void output_info(FILE * out,
(method == PTP_METHOD_SINGLE) ?
"single" : "multi",
logl);
#ifdef HAVE_LIBGSL
if (method == PTP_METHOD_SINGLE)
{
fprintf(out, "LRT computed p-value: %.6f\n", pvalue);
fprintf(out, "LRT: %s\n", lrt_result ? "passed" : "failed");
}
#endif
fprintf(out, "LRT computed p-value: %.6f\n", pvalue);
fprintf(out, "LRT: %s\n", lrt_result ? "passed" : "failed");
fprintf(out, "Number of delimited species: %d\n", species_count);
}

void output_minbr(double minbr)
{
FILE * fp_out;
char * filename = NULL;

if (asprintf(&filename, "%s.%s", opt_outfile, "txt") == -1)
fatal("Unable to allocate enough memory.");

fp_out = xopen(filename,"w");
free(filename);

fprintf(fp_out, "Command: %s\n", cmdline);
if (minbr == 0)
fprintf(fp_out, "Minimum branch length (--minbr) should be set to 0\n");
else
fprintf(fp_out, "Minimum branch length (--minbr) should be set to %f\n",minbr);
}

0 comments on commit 04a4160

Please sign in to comment.