Skip to content

Commit

Permalink
Merge pull request #237 from jeromekelleher/direct-attach-zero-mutati…
Browse files Browse the repository at this point in the history
…on-samples

Add special case for inserting exact matches
  • Loading branch information
jeromekelleher authored Aug 23, 2024
2 parents 23ef308 + 2344b99 commit 3debb78
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 3 deletions.
1 change: 1 addition & 0 deletions sc2ts/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
NODE_IS_MUTATION_OVERLAP = 1 << 21
NODE_IS_REVERSION_PUSH = 1 << 22
NODE_IS_RECOMBINANT = 1 << 23
NODE_IS_EXACT_MATCH = 1 << 24


__version__ = "undefined"
Expand Down
29 changes: 28 additions & 1 deletion sc2ts/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,11 @@ def extend(
match_db.create_mask_table(base_ts)
ts = increment_time(date, base_ts)

ts = add_exact_matches(ts=ts, match_db=match_db, date=date)

logger.info(f"Update ARG with low-cost samples for {date}")
ts = add_matching_results(
f"match_date=='{date}' and hmm_cost<={max_hmm_cost}",
f"match_date=='{date}' and hmm_cost>0 and hmm_cost<={max_hmm_cost}",
ts=ts,
match_db=match_db,
date=date,
Expand Down Expand Up @@ -674,6 +676,31 @@ def match_path_ts(samples, ts, path, reversions):
# print(tables)


def add_exact_matches(match_db, ts, date):
where_clause = f"match_date=='{date}' AND hmm_cost==0"
logger.info(f"Querying match DB WHERE: {where_clause}")
samples = list(match_db.get(where_clause))
if len(samples) == 0:
logger.info(f"No exact matches on {date}")
return ts
logger.info(f"Update ARG with {len(samples)} exact matches for {date}")
tables = ts.dump_tables()
for sample in samples:
assert len(sample.path) == 1
assert len(sample.mutations) == 0
node_id = tables.nodes.add_row(
flags=tskit.NODE_IS_SAMPLE | core.NODE_IS_EXACT_MATCH,
time=0,
metadata=sample.metadata,
)
parent = sample.path[0].parent
logger.debug(f"ARG add exact match {sample.strain}:{node_id}->{parent}")
tables.edges.add_row(0, ts.sequence_length, parent=parent, child=node_id)
tables.sort()
tables.build_index()
return tables.tree_sequence()


def add_matching_results(
where_clause,
match_db,
Expand Down
6 changes: 4 additions & 2 deletions sc2ts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,13 +494,14 @@ def summary(self):
mc_nodes = np.sum(self.ts.nodes_flags == sc2ts.NODE_IS_MUTATION_OVERLAP)
pr_nodes = np.sum(self.ts.nodes_flags == sc2ts.NODE_IS_REVERSION_PUSH)
re_nodes = np.sum(self.ts.nodes_flags == sc2ts.NODE_IS_RECOMBINANT)
exact_matches = np.sum((self.ts.nodes_flags & sc2ts.NODE_IS_EXACT_MATCH) > 0)

samples = self.ts.samples()
nodes_with_zero_muts = np.sum(self.nodes_num_mutations == 0)
sites_with_zero_muts = np.sum(self.sites_num_mutations == 0)
latest_sample = self.nodes_date[samples[-1]]
masked_sites_per_sample = self.nodes_num_masked_sites[samples]
non_samples = self.ts.nodes_flags != tskit.NODE_IS_SAMPLE
non_samples = (self.ts.nodes_flags & tskit.NODE_IS_SAMPLE) == 0
max_non_sample_mutations = np.max(self.nodes_num_mutations[non_samples])
insertions = np.sum(self.mutations_inherited_state == "-")
deletions = np.sum(self.mutations_derived_state == "-")
Expand All @@ -509,6 +510,7 @@ def summary(self):
("latest_sample", latest_sample),
("samples", self.ts.num_samples),
("nodes", self.ts.num_nodes),
("exact_matches", exact_matches),
("mc_nodes", mc_nodes),
("pr_nodes", pr_nodes),
("re_nodes", re_nodes),
Expand Down Expand Up @@ -584,7 +586,7 @@ def _node_summary(self, u, child_mutations=True):
qc += status
flags = self.ts.nodes_flags[u]
strain = ""
if flags == tskit.NODE_IS_SAMPLE:
if (flags & tskit.NODE_IS_SAMPLE) != 0:
strain = md["strain"]
elif flags == 1 << 21:
if "overlap" in md:
Expand Down

0 comments on commit 3debb78

Please sign in to comment.