Skip to content

Commit

Permalink
Internal samples in C (with minimal unit tests)
Browse files Browse the repository at this point in the history
Minimal testing in Python

C code cleanup
  • Loading branch information
jeromekelleher committed Sep 6, 2024
1 parent 3866f05 commit 4c6f29a
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 103 deletions.
105 changes: 52 additions & 53 deletions lib/msprime.c
Original file line number Diff line number Diff line change
Expand Up @@ -1859,11 +1859,13 @@ msp_verify_overlaps(msp_t *self)
int ok = overlap_counter_alloc(&counter, self->sequence_length, 0);
tsk_bug_assert(ok == 0);

/* add in the overlaps for ancient samples */
for (j = self->next_sampling_event; j < self->num_sampling_events; j++) {
se = self->sampling_events[j];
for (u = self->root_segments[se.sample]; u != NULL; u = u->next) {
overlap_counter_increment_interval(&counter, u->left, u->right);
if (self->model.type != MSP_MODEL_WF_PED) {
/* add in the overlaps for ancient samples */
for (j = self->next_sampling_event; j < self->num_sampling_events; j++) {
se = self->sampling_events[j];
for (u = self->root_segments[se.sample]; u != NULL; u = u->next) {
overlap_counter_increment_interval(&counter, u->left, u->right);
}
}
}

Expand Down Expand Up @@ -2495,12 +2497,10 @@ msp_store_coalescence_edge(
int ret = 0;

if (self->model.type == MSP_MODEL_DTWF || self->model.type == MSP_MODEL_WF_PED) {
if (self->additional_nodes & MSP_NODE_IS_RE_EVENT) {
// parent and child can be equal
// don't store edges
if (parent == child) {
return ret;
}
// parent and child can be equal if we're storing RE events, or if we've
// got internal samples. Don't store edges
if (parent == child) {
return ret;
}
}

Expand Down Expand Up @@ -2914,13 +2914,30 @@ msp_pedigree_add_individual_common_ancestor(
}

static int MSP_WARN_UNUSED
msp_pedigree_add_sample_ancestry(msp_t *self, segment_t *segment)
msp_pedigree_add_sample_ancestry(msp_t *self, tsk_id_t node_id)
{
int ret = 0;
tsk_size_t ploid;
tsk_id_t node_id = segment->value;
tsk_id_t individual_id;
individual_t *ind;
avl_node_t *a;
node_mapping_t *counter;
segment_t *seg
= msp_alloc_segment(self, 0, self->sequence_length, node_id, 0, 0, NULL, NULL);
lineage_t *lin = msp_alloc_lineage(self, seg, seg, 0, 0);

if (seg == NULL || lin == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}
ret = msp_insert_individual(self, lin);
if (ret != 0) {
goto out;
}
for (a = self->overlap_counts.head; a != self->overlap_counts.tail; a = a->next) {
counter = (node_mapping_t *) a->item;
counter->value += 1;
}

tsk_bug_assert(node_id < (tsk_id_t) self->tables->nodes.num_rows);
individual_id = self->tables->nodes.individual[node_id];
Expand All @@ -2934,14 +2951,7 @@ msp_pedigree_add_sample_ancestry(msp_t *self, segment_t *segment)
}
}
tsk_bug_assert(ploid < ind->ploidy);
if (avl_count(&ind->common_ancestors[ploid]) > 0) {
/* This is where we'd deal with the internal samples. What we'll probably
* need to do is to go through the ancestry after we've processed the
* individual and then pad out any gaps in the ancestry segments. */
ret = MSP_ERR_PEDIGREE_INTERNAL_SAMPLE;
goto out;
}
ret = msp_pedigree_add_individual_common_ancestor(self, ind->id, segment, ploid);
ret = msp_pedigree_add_individual_common_ancestor(self, ind->id, seg, ploid);
if (ret != 0) {
goto out;
}
Expand All @@ -2955,9 +2965,11 @@ msp_pedigree_initialise(msp_t *self)
int ret = 0;
population_t *pop;
lineage_t *lin;
segment_t *seg;
avl_node_t *a;
label_id_t label = 0;
tsk_size_t j;
node_mapping_t *counter;

if (self->next_demographic_event != NULL) {
ret = MSP_ERR_UNSUPPORTED_OPERATION;
Expand All @@ -2981,14 +2993,21 @@ msp_pedigree_initialise(msp_t *self)

for (j = 0; j < self->num_populations; j++) {
pop = &self->populations[j];
/* Rather than messing about with how we initialise from trees, it's
* easier to just remove the lineages here, before we add them
* back later when dealing with samples in the pedigree. */
for (a = pop->ancestors[label].head; a != NULL; a = a->next) {
lin = (lineage_t *) a->item;
ret = msp_pedigree_add_sample_ancestry(self, lin->head);
if (ret != 0) {
goto out;
for (seg = lin->head; seg != NULL; seg = seg->next) {
msp_free_segment(self, seg);
}
msp_remove_individual(self, lin);
}
}
tsk_bug_assert(avl_count(&self->overlap_counts) == 2);
counter = (node_mapping_t *) self->overlap_counts.head->item;
counter->value = 0;

self->pedigree.next_individual = 0;
out:
return ret;
Expand Down Expand Up @@ -4589,31 +4608,6 @@ msp_apply_sampling_events(msp_t *self, double time)
return ret;
}

static int MSP_WARN_UNUSED
msp_pedigree_insert_ancient_samples(msp_t *self)
{
int ret = 0;
sampling_event_t *se;
segment_t *root_seg, *new_head;

while (self->next_sampling_event < self->num_sampling_events
&& self->sampling_events[self->next_sampling_event].time <= self->time) {
se = self->sampling_events + self->next_sampling_event;
root_seg = self->root_segments[se->sample];
ret = msp_insert_root_segments(self, root_seg, &new_head);
if (ret != 0) {
goto out;
}
ret = msp_pedigree_add_sample_ancestry(self, new_head);
if (ret != 0) {
goto out;
}
self->next_sampling_event++;
}
out:
return ret;
}

static double
msp_get_next_fixed_event_time(msp_t *self)
{
Expand Down Expand Up @@ -5483,13 +5477,22 @@ msp_pedigree_process_common_ancestors(msp_t *self, individual_t *ind, tsk_size_t
{
int ret = 0;
tsk_id_t node = ind->nodes[ploid];
bool is_sample = (self->tables->nodes.flags[node] & TSK_NODE_IS_SAMPLE) != 0;
tsk_id_t parent = ind->parents[ploid];
avl_tree_t *common_ancestors = &ind->common_ancestors[ploid];
segment_t *genome, *parent_ancestry[MSP_MAX_PED_PLOIDY], *seg;
const tsk_size_t ploidy = self->ploidy;
tsk_size_t j;
tsk_id_t *parent_nodes;

/* printf("Process node=%d sample=%d\n", node, is_sample); */
if (is_sample) {
ret = msp_pedigree_add_sample_ancestry(self, node);
if (ret != 0) {
goto out;
}
}

/* FIXME - assuming 1 label here */
ret = msp_merge_n_ancestors(
self, common_ancestors, ind->population, 0, node, &genome);
Expand Down Expand Up @@ -5607,10 +5610,6 @@ msp_run_pedigree(msp_t *self, double max_time, unsigned long max_events)
}
tsk_bug_assert(ind->time >= self->time);
self->time = ind->time;
ret = msp_pedigree_insert_ancient_samples(self);
if (ret != 0) {
goto out;
}
for (ploid = 0; ploid < ind->ploidy; ploid++) {
ret = msp_pedigree_process_common_ancestors(self, ind, ploid);
if (ret != 0) {
Expand Down
32 changes: 18 additions & 14 deletions lib/tests/test_pedigrees.c
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,8 @@ test_replicates_ancient_samples(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);

for (j = 0; j < 20; j++) {
msp_verify(&msp, 0);
ret = msp_run(&msp, DBL_MAX, UINT32_MAX);

CU_ASSERT_EQUAL_FATAL(ret, MSP_EXIT_MODEL_COMPLETE);
msp_verify(&msp, 0);
ret = msp_finalise_tables(&msp);
Expand Down Expand Up @@ -844,24 +844,28 @@ test_errors(void)
static void
test_internal_samples(void)
{
int ret;
tsk_id_t parents[] = { -1, -1, -1, -1, 0, 1 };
double time[] = { 1.0, 1.0, 0.0 };
tsk_flags_t is_sample[] = { true, true, true };
msp_t msp;
tsk_table_collection_t tables;
gsl_rng *rng = safe_rng_alloc();

ret = build_pedigree_sim(
&msp, &tables, rng, 100, 2, 3, parents, time, is_sample, NULL);
ret = msp_initialise(&msp);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = msp_run(&msp, DBL_MAX, UINT32_MAX);
CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_PEDIGREE_INTERNAL_SAMPLE);
tsk_table_collection_free(&tables);
msp_free(&msp);
verify_pedigree(0, 1, 3, parents, time, is_sample, NULL, 0);

gsl_rng_free(rng);
/* verify_pedigree */
/* msp_t msp; */
/* tsk_table_collection_t tables; */
/* gsl_rng *rng = safe_rng_alloc(); */

/* ret = build_pedigree_sim( */
/* &msp, &tables, rng, 100, 2, 3, parents, time, is_sample, NULL); */
/* ret = msp_initialise(&msp); */
/* msp_print_state(&msp, stdout); */
/* CU_ASSERT_EQUAL_FATAL(ret, 0); */
/* ret = msp_run(&msp, DBL_MAX, UINT32_MAX); */
/* CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_PEDIGREE_INTERNAL_SAMPLE); */
/* tsk_table_collection_free(&tables); */
/* msp_free(&msp); */

/* gsl_rng_free(rng); */
}

static void
Expand Down
6 changes: 0 additions & 6 deletions lib/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,6 @@ msp_strerror_internal(int err)
ret = "All individuals in the input pedigree must be associated with "
"exactly two parents (can be TSK_NULL, if not known)";
break;
case MSP_ERR_PEDIGREE_INTERNAL_SAMPLE:
ret = "Samples that are internal nodes in the pedigree are not "
"currently supported. Please comment on this GitHub issue if you "
"would like to see this feature implemented: "
"https://github.com/tskit-dev/msprime/issues/1855 ";
break;

case MSP_ERR_BAD_PROPORTION:
ret = "Proportion values must have 0 <= x <= 1";
Expand Down
1 change: 0 additions & 1 deletion lib/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@
#define MSP_ERR_PEDIGREE_TIME_TRAVEL -88
#define MSP_ERR_PEDIGREE_IND_NOT_DIPLOID -89
#define MSP_ERR_PEDIGREE_IND_NOT_TWO_PARENTS -90
#define MSP_ERR_PEDIGREE_INTERNAL_SAMPLE -91

/* clang-format on */
/* This bit is 0 for any errors originating from tskit */
Expand Down
47 changes: 18 additions & 29 deletions tests/test_pedigree.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,19 @@ def test_just_samples(self):
parent_nodes = {0, 1, 2, 3}
assert set(ts.first().roots) == parent_nodes

@pytest.mark.parametrize("num_founders", [2, 3, 10, 20])
def test_all_samples(self, num_founders):
tables = simulate_pedigree(
num_founders=num_founders,
num_children_prob=[0, 0, 1],
num_generations=6,
sequence_length=100,
)
flags = tables.nodes.flags
flags[:] = tskit.NODE_IS_SAMPLE
tables.nodes.flags = flags
self.verify(tables)


class TestSimulateThroughPedigreeEventByEvent(TestSimulateThroughPedigree):
def verify(self, input_tables, recombination_rate=0):
Expand Down Expand Up @@ -792,11 +805,14 @@ def verify(self, input_tables, recombination_rate=0):
for node_id in tree.nodes():
if tree.num_children(node_id) == 1:
# Any unary nodes should be associated with a pedigree
# founder.
# founder or a sapmle
node = ts2.node(node_id)
assert node.individual != tskit.NULL
individual = ts2.individual(node.individual)
assert list(individual.parents) == [tskit.NULL, tskit.NULL]
assert node.is_sample() or list(individual.parents) == [
tskit.NULL,
tskit.NULL,
]
tables1 = ts1.tables
tables2 = ts2.tables
tables1.individuals.assert_equals(
Expand Down Expand Up @@ -912,19 +928,6 @@ def test_10_percent_parents_removed(self):


class TestSimulateThroughPedigreeErrors:
def test_parents_are_samples(self):
tables = get_base_tables(100)
parents = [
add_pedigree_individual(tables, time=1, is_sample=True) for _ in range(2)
]
add_pedigree_individual(tables, parents=parents, time=0)

with pytest.raises(_msprime.LibraryError, match="1855"):
msprime.sim_ancestry(
initial_state=tables,
model="fixed_pedigree",
)

@pytest.mark.parametrize("num_parents", [0, 1, 3])
def test_not_two_parents(self, num_parents):
tables = get_base_tables(100)
Expand Down Expand Up @@ -984,20 +987,6 @@ def test_pedigree_time_travel(self):
with pytest.raises(_msprime.InputError, match="time for a parent must be"):
msprime.sim_ancestry(initial_state=tables, model="fixed_pedigree")

@pytest.mark.parametrize("num_founders", [2, 3, 10, 20])
def test_all_samples(self, num_founders):
tables = simulate_pedigree(
num_founders=num_founders,
num_children_prob=[0, 0, 1],
num_generations=6,
sequence_length=100,
)
flags = tables.nodes.flags
flags[:] = tskit.NODE_IS_SAMPLE
tables.nodes.flags = flags
with pytest.raises(_msprime.LibraryError, match="1855"):
msprime.sim_ancestry(initial_state=tables, model="fixed_pedigree")


class TestSimulateThroughPedigreeMultiplePops:
def test_demography_raises_error(self):
Expand Down

0 comments on commit 4c6f29a

Please sign in to comment.