From 9a0e133f4e86eb9d335050227d0e9f6d6fa6f718 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Fri, 20 Sep 2024 17:26:57 +0100 Subject: [PATCH] Move recombinant processing offline Because it slows down inference a lot. We have only a few recombinants, so we don't effectively parallelise, and we do it three times. Also, we're doing some expensive matching on things which will mostly not get into the final ARG. --- sc2ts/inference.py | 13 +------- tests/test_inference.py | 69 ++--------------------------------------- 2 files changed, 3 insertions(+), 79 deletions(-) diff --git a/sc2ts/inference.py b/sc2ts/inference.py index f6b7689..73fc74b 100644 --- a/sc2ts/inference.py +++ b/sc2ts/inference.py @@ -461,21 +461,10 @@ def match_samples( cost = hmm_match.get_hmm_cost(num_mismatches) # print(f"Final HMM pass:{sample.strain} hmm_cost={cost} {sample.summary()}") logger.debug(f"Final HMM pass hmm_cost={cost} {sample.summary()}") - if sample.is_recombinant: - recombinants.append(sample) - - if len(recombinants) > 0: - match_recombinants( - recombinants, - base_ts, - num_mismatches=num_mismatches, - show_progress=show_progress, - num_threads=num_threads, - ) - return samples + def check_base_ts(ts): md = ts.metadata assert "sc2ts" in md diff --git a/tests/test_inference.py b/tests/test_inference.py index 12af229..6559356 100644 --- a/tests/test_inference.py +++ b/tests/test_inference.py @@ -630,37 +630,7 @@ def test_recombinant_example_1(self, tmp_path, fx_ts_map, fx_alignment_store): {"left": 15324, "parent": 52, "right": 29904}, ], } - assert smd["hmm_reruns"] == { - "forward": { - "mutations": [], - "path": [ - {"left": 0, "parent": 36, "right": 15324}, - {"left": 15324, "parent": 52, "right": 29904}, - ], - }, - "no_recombination": { - "mutations": [ - { - "derived_state": "T", - "inherited_state": "C", - "site_position": 15324, - }, - { - "derived_state": "T", - "inherited_state": "C", - "site_position": 29303, - }, - ], - "path": [{"left": 0, "parent": 36, "right": 29904}], - }, - "reverse": { - "mutations": [], - "path": [ - {"left": 0, "parent": 36, "right": 3788}, - {"left": 3788, "parent": 52, "right": 29904}, - ], - }, - } + assert smd["hmm_reruns"] == {} recomb_node = ts.node(ts.num_nodes - 1) assert recomb_node.flags == sc2ts.NODE_IS_RECOMBINANT @@ -755,42 +725,7 @@ def test_recombinant_example_2(self, tmp_path, fx_ts_map, fx_alignment_store): ], } - assert smd["hmm_reruns"] == { - "forward": { - "mutations": [], - "path": [ - {"left": 0, "parent": 62, "right": 29800}, - {"left": 29800, "parent": 63, "right": 29904}, - ], - }, - "no_recombination": { - "mutations": [ - { - "derived_state": "A", - "inherited_state": "G", - "site_position": 29800, - }, - { - "derived_state": "A", - "inherited_state": "C", - "site_position": 29801, - }, - { - "derived_state": "A", - "inherited_state": "C", - "site_position": 29802, - }, - ], - "path": [{"left": 0, "parent": 62, "right": 29904}], - }, - "reverse": { - "mutations": [], - "path": [ - {"left": 0, "parent": 62, "right": 113}, - {"left": 113, "parent": 63, "right": 29904}, - ], - }, - } + assert smd["hmm_reruns"] == { } def test_all_As(self, tmp_path, fx_ts_map, fx_alignment_store): # Same as the recombinant_example_1() function above