Skip to content

Commit

Permalink
Merge pull request #2310 from jeromekelleher/local-multiple-merger
Browse files Browse the repository at this point in the history
Multiple merger polyploid scaling
  • Loading branch information
jeromekelleher authored Aug 7, 2024
2 parents 7492287 + 309ce94 commit f6627a9
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 114 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## [1.3.3] - 2024-XX-XX

**Bug fixes**:

- Correct the Dirac coalescent time scaling with polyploidy and population growth.

## [1.3.2] - 2024-07-08

- Add `record_provenance` argument to `sim_mutations` ({issue}`2272`, {pr}`2273`, {user}`peterlharp`)
Expand Down
168 changes: 70 additions & 98 deletions lib/msprime.c
Original file line number Diff line number Diff line change
Expand Up @@ -7949,33 +7949,18 @@ msp_dirac_get_common_ancestor_waiting_time_from_rate(
msp_t *self, population_t *pop, double lambda)
{
double ret = DBL_MAX;
double alpha = pop->growth_rate;
double alpha = 2.0 * pop->growth_rate;
double t = self->time;
double u, dt, z;
double pop_size = pop->initial_size;

if (lambda > 0.0) {
u = gsl_ran_exponential(self->rng, 1.0 / lambda);
if (alpha == 0.0) {
if (self->ploidy == 1) {
ret = pop->initial_size * pop->initial_size * u;
} else {
/* For ploidy > 1 we assume N/2 two-parent families, so that the rate
* with which 2 lineages belong to a common family is (2/N)^2 */
ret = pop->initial_size * pop->initial_size * u / 4.0;
}
ret = pop_size * pop_size * u;
} else {
dt = t - pop->start_time;
if (self->ploidy == 1) {
z = 1
+ alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt)
* u;
} else {
/* For ploidy > 1 we assume N/2 two-parent families, so that the rate
* with which 2 lineages belong to a common family is (2/N)^2 */
z = 1
+ alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt)
* u / 4.0;
}
z = 1 + alpha * pop_size * pop_size * exp(-alpha * dt) * u;
/* if z is <= 0 no coancestry can occur */
if (z > 0) {
ret = log(z) / alpha;
Expand All @@ -7994,13 +7979,7 @@ msp_dirac_get_common_ancestor_waiting_time(
{
population_t *pop = &self->populations[pop_id];
unsigned int n = (unsigned int) avl_count(&pop->ancestors[label]);
double c = self->model.params.dirac_coalescent.c;
double lambda = n * (n - 1.0) / 2.0;
if (self->ploidy == 1) {
lambda += c;
} else {
lambda += c / (2.0 * self->ploidy);
}
double lambda = gsl_sf_choose(n, 2) + self->model.params.dirac_coalescent.c;

return msp_dirac_get_common_ancestor_waiting_time_from_rate(self, pop, lambda);
}
Expand All @@ -8018,71 +7997,64 @@ msp_dirac_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t
double nC2, p;
double psi = self->model.params.dirac_coalescent.psi;

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}

ancestors = &self->populations[pop_id].ancestors[label];
n = avl_count(ancestors);
nC2 = gsl_sf_choose(n, 2);
if (self->ploidy == 1) {
p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c));
} else {
p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c / (2.0 * self->ploidy)));
}
p = nC2 / (nC2 + self->model.params.dirac_coalescent.c);

if (gsl_rng_uniform(self->rng) < p) {
/* When 2 * ploidy parental chromosomes are available, Mendelian segregation
* results in a merger only 1 / (2 * ploidy) of the time. */
if (self->ploidy == 1
|| gsl_rng_uniform(self->rng) < 1.0 / (2.0 * self->ploidy)) {
/* Choose x and y */
n = avl_count(ancestors);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n);
x_node = avl_at(ancestors, j);
tsk_bug_assert(x_node != NULL);
x_lin = (lineage_t *) x_node->item;
x = x_lin->head;
avl_unlink_node(ancestors, x_node);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1);
y_node = avl_at(ancestors, j);
tsk_bug_assert(y_node != NULL);
y_lin = (lineage_t *) y_node->item;
y = y_lin->head;
avl_unlink_node(ancestors, y_node);
self->num_ca_events++;
msp_free_avl_node(self, x_node);
msp_free_lineage(self, x_lin);
msp_free_avl_node(self, y_node);
msp_free_lineage(self, y_lin);
ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL);
}
/* Choose x and y */
n = avl_count(ancestors);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n);
x_node = avl_at(ancestors, j);
tsk_bug_assert(x_node != NULL);
x_lin = (lineage_t *) x_node->item;
x = x_lin->head;
avl_unlink_node(ancestors, x_node);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1);
y_node = avl_at(ancestors, j);
tsk_bug_assert(y_node != NULL);
y_lin = (lineage_t *) y_node->item;
y = y_lin->head;
avl_unlink_node(ancestors, y_node);
self->num_ca_events++;
msp_free_avl_node(self, x_node);
msp_free_lineage(self, x_lin);
msp_free_avl_node(self, y_node);
msp_free_lineage(self, y_lin);
ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL);
} else {
for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
num_participants = gsl_ran_binomial(self->rng, psi, n);
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
goto out;
}
/* All the lineages that have been assigned to the particular pots can now be
* merged.
*/
for (j = 0; j < num_parental_copies; j++) {
ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL);
if (num_participants > 1) {
/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}
for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
goto out;
}
/* All the lineages that have been assigned to the particular pots can now be
* merged.
*/
for (j = 0; j < num_parental_copies; j++) {
ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL);
if (ret < 0) {
goto out;
}
}
}
}
out:
Expand Down Expand Up @@ -8242,22 +8214,6 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
double truncation_point = beta_compute_truncation(self);
double beta_x, u, increment;

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}

for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ancestors = &self->populations[pop_id].ancestors[label];
n = avl_count(ancestors);
beta_x = ran_inc_beta(self->rng, 2.0 - alpha, alpha, truncation_point);
Expand Down Expand Up @@ -8295,6 +8251,22 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
num_participants = 2 + gsl_ran_binomial(self->rng, beta_x, n - 2);
} while (gsl_rng_uniform(self->rng) > 1 / gsl_sf_choose(num_participants, 2));

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}

for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
Expand Down
4 changes: 2 additions & 2 deletions msprime/_msprimemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1199,8 +1199,8 @@ Simulator_parse_simulation_model(Simulator *self, PyObject *py_model)
goto out;
}
c = PyFloat_AsDouble(value);
if (psi <= 0 || psi >= 1.0) {
PyErr_SetString(PyExc_ValueError, "Must have 0 < psi < 1");
if (psi <= 0 || psi > 1.0) {
PyErr_SetString(PyExc_ValueError, "Must have 0 < psi <= 1");
goto out;
}
if (c < 0) {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ancestry.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def test_multimerger_dirac(self):
recombination_rate=0.1,
record_full_arg=True,
sequence_length=10,
model=msprime.DiracCoalescent(psi=0.5, c=5),
model=msprime.DiracCoalescent(psi=0.5, c=100),
)
self.verify(sim, multiple_mergers=True)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1353,7 +1353,7 @@ def test_dirac_simulation_model(self):
make_sim(model=model)
with pytest.raises(ValueError):
make_sim(model=get_simulation_model("dirac"))
for bad_psi in [-1, 0, -1e-6, 1, 1e6]:
for bad_psi in [-1, 0, -1e-6, 1 + 1e-6, 1e6]:
with pytest.raises(ValueError):
make_sim(
model=get_simulation_model("dirac", c=1, psi=bad_psi),
Expand All @@ -1363,7 +1363,7 @@ def test_dirac_simulation_model(self):
make_sim(
model=get_simulation_model("dirac", psi=0.5, c=bad_c),
)
for psi in [0.99, 0.2, 1e-4]:
for psi in [1, 0.2, 1e-4]:
for c in [5.0, 1e2, 1e-4]:
model = get_simulation_model("dirac", psi=psi, c=c)
sim = make_sim(model=model)
Expand Down
26 changes: 15 additions & 11 deletions verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3431,12 +3431,14 @@ def _run(self, pop_size, alpha, growth_rate, num_replicates=10000):
logging.debug(f"running Beta growth for {pop_size} {alpha} {growth_rate}")
b = growth_rate * (alpha - 1)
model = (msprime.BetaCoalescent(alpha=alpha),)
ploidy = 2
a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy))
name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, ploidy, name
)
for ploidy in range(2, 7):
a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy))
name = (
f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
)
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, ploidy, name
)
ploidy = 1
a = 1 / self.compute_beta_timescale(pop_size, alpha, ploidy)
name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
Expand Down Expand Up @@ -3474,12 +3476,14 @@ def test_100000_11_001(self):
class DiracGrowth(XiGrowth):
def _run(self, pop_size, c, psi, growth_rate, num_replicates=10000):
logging.debug(f"running Dirac growth for {pop_size} {c} {psi} {growth_rate}")
b = growth_rate
b = 2 * growth_rate
model = (msprime.DiracCoalescent(psi=psi, c=c),)
p = 2
a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
self.compare_tmrca(pop_size, growth_rate, model, num_replicates, a, b, p, name)
for p in range(2, 7):
a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, p, name
)
p = 1
a = (1 + c * psi * psi) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
Expand Down

0 comments on commit f6627a9

Please sign in to comment.