From ef03b17d2c4b5dc905b20e4f0166b0ded11b9f22 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Tue, 24 Sep 2024 00:17:31 +0100 Subject: [PATCH] Initial debug data on groups --- sc2ts/info.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sc2ts/info.py b/sc2ts/info.py index 1b05431..f68ab24 100644 --- a/sc2ts/info.py +++ b/sc2ts/info.py @@ -1,6 +1,7 @@ import collections import warnings import dataclasses +from typing import List import numba import tskit @@ -1258,3 +1259,30 @@ def plot_recombinants_per_day(self): ax1.set_ylabel("Number of recombinant samples") ax2.set_ylabel("Fraction of samples recombinant") ax2.set_ylim(0, 0.01) + + def get_sample_group_info(self, group_id): + samples = [] + strains = [] + lineage_counts = collections.Counter() + for u in self.nodes_sample_group[group_id]: + if self.ts.nodes_flags[u] & tskit.NODE_IS_SAMPLE > 0: + samples.append(u) + lineage_counts[self.nodes_metadata[u]["Viridian_pangolin"]] += 1 + strains.append(self.nodes_metadata[u]["strain"]) + + tree = self.ts.first() + while self.nodes_metadata[u]["sc2ts"].get("group_id", None) == group_id: + u = tree.parent(u) + ts = self.ts.simplify(samples + [u]) + return SampleGroupInfo( + group_id, lineage_counts, self.nodes_sample_group[group_id], strains, ts + ) + + +@dataclasses.dataclass +class SampleGroupInfo: + group_id: str + lineage_counts: collections.Counter + nodes: List[int] + strains: List[str] + ts: tskit.TreeSequence