Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial steps for internal pedigree samples #2321

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,12 @@ def __init__(
# implemented using `ParametricAncestryModel()`
self.hull_offset = hull_offset

self.initialise(tables.tree_sequence())
if model == "fixed_pedigree":
self.t = 0
self.S[0] = 0
self.S[self.L] = -1
else:
self.initialise(tables.tree_sequence())

self.num_ca_events = 0
self.num_re_events = 0
Expand Down Expand Up @@ -981,6 +986,7 @@ def initialise(self, ts):
root_segments_tail = [None for _ in range(ts.num_nodes)]
root_lineages = [None for _ in range(ts.num_nodes)]
last_S = -1
start_time = np.inf
for tree in ts.trees():
left, right = tree.interval
S = 0 if tree.num_roots == 1 else tree.num_roots
Expand All @@ -991,6 +997,7 @@ def initialise(self, ts):
# any ancestral segments to the state.
if tree.num_roots > 1:
for root in tree.roots:
start_time = min(start_time, tree.time(root))
population = ts.node(root).population
if root_segments_head[root] is None:
seg = self.alloc_segment(left, right, root)
Expand All @@ -1014,7 +1021,7 @@ def initialise(self, ts):
# Insert the segment chains into the algorithm state.
for node in range(ts.num_nodes):
lineage = root_lineages[node]
if lineage is not None:
if lineage is not None and ts.nodes_time[node] == start_time:
seg = lineage.head
left_end = seg.left
while seg is not None:
Expand Down Expand Up @@ -1616,6 +1623,16 @@ def process_pedigree_common_ancestors(self, ind, ploid):
individual, then recombine and distribute the remaining ancestral
material among its parent ploids.
"""
node = ind.nodes[ploid]
is_sample = (self.tables.nodes.flags[node] & tskit.NODE_IS_SAMPLE) > 0
if is_sample:
segment = self.alloc_segment(0, self.L, node)
lineage = self.alloc_lineage(segment, population=0)
self.add_lineage(lineage)
ind.add_common_ancestor(lineage.head, ploid=ploid)
for k in list(self.S.keys())[:-1]:
self.S[k] += 1

common_ancestors = ind.common_ancestors[ploid]
if len(common_ancestors) == 0:
# No ancestral material inherited on this ploid of this individual
Expand All @@ -1632,7 +1649,6 @@ def process_pedigree_common_ancestors(self, ind, ploid):
# monoploid genome for this ploid of this individual.
# If any coalescences occur, they use the corresponding node ID.
# FIXME update the population/label here
node = ind.nodes[ploid]
genome = self.merge_ancestors(common_ancestors, 0, 0, node)
if ind.parents[ploid] == tskit.NULL:
# If this individual is a founder we need to make sure that all
Expand Down Expand Up @@ -1676,18 +1692,6 @@ def dtwf_climb_pedigree(self):
Simulates transmission of ancestral material through a pre-specified
pedigree
"""
assert self.num_populations == 1 # Single pop/pedigree for now
pop = self.P[0]

# Go through the extant lineages and gather the ancestral material
# into the corresponding pedigree individuals.
for lineage in pop.iter_ancestors():
u = lineage.head.node
node = self.tables.nodes[u]
assert node.individual != tskit.NULL
ind = self.pedigree.individuals[node.individual]
ind.add_common_ancestor(lineage.head, ploid=ind.nodes.index(u))

# Visit pedigree individuals in time order.
visit_order = sorted(self.pedigree.individuals, key=lambda x: (x.time, x.id))
for ind in visit_order:
Expand Down
104 changes: 51 additions & 53 deletions lib/msprime.c
Original file line number Diff line number Diff line change
Expand Up @@ -1859,11 +1859,13 @@ msp_verify_overlaps(msp_t *self)
int ok = overlap_counter_alloc(&counter, self->sequence_length, 0);
tsk_bug_assert(ok == 0);

/* add in the overlaps for ancient samples */
for (j = self->next_sampling_event; j < self->num_sampling_events; j++) {
se = self->sampling_events[j];
for (u = self->root_segments[se.sample]; u != NULL; u = u->next) {
overlap_counter_increment_interval(&counter, u->left, u->right);
if (self->model.type != MSP_MODEL_WF_PED) {
/* add in the overlaps for ancient samples */
for (j = self->next_sampling_event; j < self->num_sampling_events; j++) {
se = self->sampling_events[j];
for (u = self->root_segments[se.sample]; u != NULL; u = u->next) {
overlap_counter_increment_interval(&counter, u->left, u->right);
}
}
}

Expand Down Expand Up @@ -2495,12 +2497,10 @@ msp_store_coalescence_edge(
int ret = 0;

if (self->model.type == MSP_MODEL_DTWF || self->model.type == MSP_MODEL_WF_PED) {
if (self->additional_nodes & MSP_NODE_IS_RE_EVENT) {
// parent and child can be equal
// don't store edges
if (parent == child) {
return ret;
}
// parent and child can be equal if we're storing RE events, or if we've
// got internal samples. Don't store edges
if (parent == child) {
return ret;
}
}

Expand Down Expand Up @@ -2914,13 +2914,30 @@ msp_pedigree_add_individual_common_ancestor(
}

static int MSP_WARN_UNUSED
msp_pedigree_add_sample_ancestry(msp_t *self, segment_t *segment)
msp_pedigree_add_sample_ancestry(msp_t *self, tsk_id_t node_id)
{
int ret = 0;
tsk_size_t ploid;
tsk_id_t node_id = segment->value;
tsk_id_t individual_id;
individual_t *ind;
avl_node_t *a;
node_mapping_t *counter;
segment_t *seg
= msp_alloc_segment(self, 0, self->sequence_length, node_id, 0, 0, NULL, NULL);
lineage_t *lin = msp_alloc_lineage(self, seg, seg, 0, 0);

if (seg == NULL || lin == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}
ret = msp_insert_individual(self, lin);
if (ret != 0) {
goto out;
}
for (a = self->overlap_counts.head; a != self->overlap_counts.tail; a = a->next) {
counter = (node_mapping_t *) a->item;
counter->value += 1;
}

tsk_bug_assert(node_id < (tsk_id_t) self->tables->nodes.num_rows);
individual_id = self->tables->nodes.individual[node_id];
Expand All @@ -2934,14 +2951,7 @@ msp_pedigree_add_sample_ancestry(msp_t *self, segment_t *segment)
}
}
tsk_bug_assert(ploid < ind->ploidy);
if (avl_count(&ind->common_ancestors[ploid]) > 0) {
/* This is where we'd deal with the internal samples. What we'll probably
* need to do is to go through the ancestry after we've processed the
* individual and then pad out any gaps in the ancestry segments. */
ret = MSP_ERR_PEDIGREE_INTERNAL_SAMPLE;
goto out;
}
ret = msp_pedigree_add_individual_common_ancestor(self, ind->id, segment, ploid);
ret = msp_pedigree_add_individual_common_ancestor(self, ind->id, seg, ploid);
if (ret != 0) {
goto out;
}
Expand All @@ -2955,9 +2965,11 @@ msp_pedigree_initialise(msp_t *self)
int ret = 0;
population_t *pop;
lineage_t *lin;
segment_t *seg;
avl_node_t *a;
label_id_t label = 0;
tsk_size_t j;
node_mapping_t *counter;

if (self->next_demographic_event != NULL) {
ret = MSP_ERR_UNSUPPORTED_OPERATION;
Expand All @@ -2981,14 +2993,21 @@ msp_pedigree_initialise(msp_t *self)

for (j = 0; j < self->num_populations; j++) {
pop = &self->populations[j];
/* Rather than messing about with how we initialise from trees, it's
* easier to just remove the lineages here, before we add them
* back later when dealing with samples in the pedigree. */
for (a = pop->ancestors[label].head; a != NULL; a = a->next) {
lin = (lineage_t *) a->item;
ret = msp_pedigree_add_sample_ancestry(self, lin->head);
if (ret != 0) {
goto out;
for (seg = lin->head; seg != NULL; seg = seg->next) {
msp_free_segment(self, seg);
}
msp_remove_individual(self, lin);
}
}
tsk_bug_assert(avl_count(&self->overlap_counts) == 2);
counter = (node_mapping_t *) self->overlap_counts.head->item;
counter->value = 0;

self->pedigree.next_individual = 0;
out:
return ret;
Expand Down Expand Up @@ -4589,31 +4608,6 @@ msp_apply_sampling_events(msp_t *self, double time)
return ret;
}

static int MSP_WARN_UNUSED
msp_pedigree_insert_ancient_samples(msp_t *self)
{
int ret = 0;
sampling_event_t *se;
segment_t *root_seg, *new_head;

while (self->next_sampling_event < self->num_sampling_events
&& self->sampling_events[self->next_sampling_event].time <= self->time) {
se = self->sampling_events + self->next_sampling_event;
root_seg = self->root_segments[se->sample];
ret = msp_insert_root_segments(self, root_seg, &new_head);
if (ret != 0) {
goto out;
}
ret = msp_pedigree_add_sample_ancestry(self, new_head);
if (ret != 0) {
goto out;
}
self->next_sampling_event++;
}
out:
return ret;
}

static double
msp_get_next_fixed_event_time(msp_t *self)
{
Expand Down Expand Up @@ -5483,13 +5477,21 @@ msp_pedigree_process_common_ancestors(msp_t *self, individual_t *ind, tsk_size_t
{
int ret = 0;
tsk_id_t node = ind->nodes[ploid];
bool is_sample = (self->tables->nodes.flags[node] & TSK_NODE_IS_SAMPLE) != 0;
tsk_id_t parent = ind->parents[ploid];
avl_tree_t *common_ancestors = &ind->common_ancestors[ploid];
segment_t *genome, *parent_ancestry[MSP_MAX_PED_PLOIDY], *seg;
const tsk_size_t ploidy = self->ploidy;
tsk_size_t j;
tsk_id_t *parent_nodes;

if (is_sample) {
ret = msp_pedigree_add_sample_ancestry(self, node);
if (ret != 0) {
goto out;
}
}

/* FIXME - assuming 1 label here */
ret = msp_merge_n_ancestors(
self, common_ancestors, ind->population, 0, node, &genome);
Expand Down Expand Up @@ -5607,10 +5609,6 @@ msp_run_pedigree(msp_t *self, double max_time, unsigned long max_events)
}
tsk_bug_assert(ind->time >= self->time);
self->time = ind->time;
ret = msp_pedigree_insert_ancient_samples(self);
if (ret != 0) {
goto out;
}
for (ploid = 0; ploid < ind->ploidy; ploid++) {
ret = msp_pedigree_process_common_ancestors(self, ind, ploid);
if (ret != 0) {
Expand Down
18 changes: 3 additions & 15 deletions lib/tests/test_pedigrees.c
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,8 @@ test_replicates_ancient_samples(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);

for (j = 0; j < 20; j++) {
msp_verify(&msp, 0);
ret = msp_run(&msp, DBL_MAX, UINT32_MAX);

CU_ASSERT_EQUAL_FATAL(ret, MSP_EXIT_MODEL_COMPLETE);
msp_verify(&msp, 0);
ret = msp_finalise_tables(&msp);
Expand Down Expand Up @@ -844,24 +844,12 @@ test_errors(void)
static void
test_internal_samples(void)
{
int ret;
tsk_id_t parents[] = { -1, -1, -1, -1, 0, 1 };
double time[] = { 1.0, 1.0, 0.0 };
tsk_flags_t is_sample[] = { true, true, true };
msp_t msp;
tsk_table_collection_t tables;
gsl_rng *rng = safe_rng_alloc();

ret = build_pedigree_sim(
&msp, &tables, rng, 100, 2, 3, parents, time, is_sample, NULL);
ret = msp_initialise(&msp);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = msp_run(&msp, DBL_MAX, UINT32_MAX);
CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_PEDIGREE_INTERNAL_SAMPLE);
tsk_table_collection_free(&tables);
msp_free(&msp);

gsl_rng_free(rng);
verify_pedigree(0, 1, 3, parents, time, is_sample, NULL, 0);
verify_pedigree(0.1, 1, 3, parents, time, is_sample, NULL, 0);
}

static void
Expand Down
6 changes: 0 additions & 6 deletions lib/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,6 @@ msp_strerror_internal(int err)
ret = "All individuals in the input pedigree must be associated with "
"exactly two parents (can be TSK_NULL, if not known)";
break;
case MSP_ERR_PEDIGREE_INTERNAL_SAMPLE:
ret = "Samples that are internal nodes in the pedigree are not "
"currently supported. Please comment on this GitHub issue if you "
"would like to see this feature implemented: "
"https://github.com/tskit-dev/msprime/issues/1855 ";
break;

case MSP_ERR_BAD_PROPORTION:
ret = "Proportion values must have 0 <= x <= 1";
Expand Down
1 change: 0 additions & 1 deletion lib/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@
#define MSP_ERR_PEDIGREE_TIME_TRAVEL -88
#define MSP_ERR_PEDIGREE_IND_NOT_DIPLOID -89
#define MSP_ERR_PEDIGREE_IND_NOT_TWO_PARENTS -90
#define MSP_ERR_PEDIGREE_INTERNAL_SAMPLE -91

/* clang-format on */
/* This bit is 0 for any errors originating from tskit */
Expand Down
15 changes: 15 additions & 0 deletions tests/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,21 @@ def test_pedigree_trio(self, r):
input_tables.nodes.assert_equals(output_tables.nodes[: len(input_tables.nodes)])
assert len(output_tables.edges) >= 2

@pytest.mark.parametrize("r", [0, 0.1, 1])
def test_pedigree_internal_sample(self, r):
input_tables = simulate_pedigree(num_founders=4, num_generations=4)
flags = input_tables.nodes.flags
flags[8:12] = tskit.NODE_IS_SAMPLE # Make inds 4 and 5 samples
input_tables.nodes.flags = flags
with tempfile.TemporaryDirectory() as tmpdir:
ts_path = pathlib.Path(tmpdir) / "pedigree.trees"
input_tables.dump(ts_path)
ts = self.run_script(f"0 --from-ts {ts_path} -r {r} --model=fixed_pedigree")
output_tables = ts.dump_tables()
input_tables.individuals.assert_equals(output_tables.individuals)
input_tables.nodes.assert_equals(output_tables.nodes[: len(input_tables.nodes)])
assert len(output_tables.edges) >= 2

@pytest.mark.parametrize("num_founders", [1, 2, 20])
def test_one_gen_pedigree(self, num_founders):
tables = simulate_pedigree(num_founders=num_founders, num_generations=1)
Expand Down
Loading
Loading