From 4f8799c9a8b3f7fc3a0875c911d9a9f87a4b6213 Mon Sep 17 00:00:00 2001
From: Benjamin Gallusser <bgallusser@googlemail.com>
Date: Tue, 31 Oct 2023 12:15:19 +0100
Subject: [PATCH 01/26] Check in 3d benchmark

---
 tests/bench.py | 180 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 143 insertions(+), 37 deletions(-)

diff --git a/tests/bench.py b/tests/bench.py
index 41451bad..71b21574 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -9,11 +9,12 @@
 from traccuracy.metrics import CTCMetrics, DivisionMetrics
 
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+TIMEOUT_2D = 20
+TIMEOUT_3D = 30
 
 
-def download_gt_data():
+def download_gt_data(url):
     # Download GT data -- look into cacheing this in github actions
-    url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip"
     data_dir = os.path.join(ROOT_DIR, "downloads")
 
     if not os.path.exists(data_dir):
@@ -30,71 +31,128 @@ def download_gt_data():
             zip_ref.extractall(data_dir)
 
 
+# TODO look into fixture with params for 2d/3d
 @pytest.fixture(scope="module")
-def gt_data():
-    download_gt_data()
+def gt_data_2d():
+    url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip"
+    path = "downloads/Fluo-N2DL-HeLa/01_GT/TRA"
+    download_gt_data(url)
     return load_ctc_data(
-        os.path.join(ROOT_DIR, "downloads/Fluo-N2DL-HeLa/01_GT/TRA"),
-        os.path.join(ROOT_DIR, "downloads/Fluo-N2DL-HeLa/01_GT/TRA/man_track.txt"),
+        os.path.join(ROOT_DIR, path),
+        os.path.join(ROOT_DIR, path, "man_track.txt"),
     )
 
 
 @pytest.fixture(scope="module")
-def pred_data():
+def gt_data_3d():
+    url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N3DH-CE.zip"
+    path = "downloads/Fluo-N3DH-CE/01_GT/TRA"
+    download_gt_data(url)
     return load_ctc_data(
-        os.path.join(ROOT_DIR, "examples/sample-data/Fluo-N2DL-HeLa/01_RES"),
-        os.path.join(
-            ROOT_DIR, "examples/sample-data/Fluo-N2DL-HeLa/01_RES/res_track.txt"
-        ),
+        os.path.join(ROOT_DIR, path),
+        os.path.join(ROOT_DIR, path, "man_track.txt"),
     )
 
 
 @pytest.fixture(scope="module")
-def ctc_matched(gt_data, pred_data):
-    return CTCMatched(gt_data, pred_data)
+def pred_data_2d():
+    path = "examples/sample-data/Fluo-N2DL-HeLa/01_RES"
+    return load_ctc_data(
+        os.path.join(ROOT_DIR, path),
+        os.path.join(ROOT_DIR, path, "res_track.txt"),
+    )
 
 
 @pytest.fixture(scope="module")
-def iou_matched(gt_data, pred_data):
-    return IOUMatched(gt_data, pred_data, iou_threshold=0.1)
+def pred_data_3d(gt_data_3d):
+    # For the time being, this is also GT data.
+    return gt_data_3d
+
 
+@pytest.fixture(scope="module")
+def ctc_matched_2d(gt_data_2d, pred_data_2d):
+    return CTCMatched(gt_data_2d, pred_data_2d)
+
+
+@pytest.fixture(scope="module")
+def ctc_matched_3d(gt_data_3d, pred_data_3d):
+    return CTCMatched(gt_data_3d, pred_data_3d)
+
+
+@pytest.fixture(scope="module")
+def iou_matched_2d(gt_data_2d, pred_data_2d):
+    return IOUMatched(gt_data_2d, pred_data_2d, iou_threshold=0.1)
 
-def test_load_gt_data(benchmark):
-    download_gt_data()
+
+@pytest.fixture(scope="module")
+def iou_matched_3d(gt_data_3d, pred_data_3d):
+    return IOUMatched(gt_data_3d, pred_data_3d, iou_threshold=0.1)
+
+
+@pytest.mark.parametrize("dataset", ["Fluo-N2DL-HeLa", "Fluo-N3DH-CE"])
+def test_load_gt_ctc_data(
+    benchmark,
+    dataset,
+):
+    url = f"http://data.celltrackingchallenge.net/training-datasets/{dataset}.zip"
+    path = f"downloads/{dataset}/01_GT/TRA"
+    download_gt_data(url)
 
     benchmark.pedantic(
         load_ctc_data,
         args=(
-            "downloads/Fluo-N2DL-HeLa/01_GT/TRA",
-            "downloads/Fluo-N2DL-HeLa/01_GT/TRA/man_track.txt",
+            os.path.join(ROOT_DIR, path),
+            os.path.join(ROOT_DIR, path, "man_track.txt"),
         ),
         rounds=1,
         iterations=1,
     )
 
 
-def test_load_pred_data(benchmark):
+# TODO Add 3d results
+@pytest.mark.parametrize(
+    "path",
+    [
+        "examples/sample-data/Fluo-N2DL-HeLa/01_RES",
+    ],
+)
+def test_load_pred_ctc_data(benchmark, path):
     benchmark.pedantic(
         load_ctc_data,
         args=(
-            os.path.join(ROOT_DIR, "examples/sample-data/Fluo-N2DL-HeLa/01_RES"),
-            os.path.join(
-                ROOT_DIR, "examples/sample-data/Fluo-N2DL-HeLa/01_RES/res_track.txt"
-            ),
+            os.path.join(ROOT_DIR, path),
+            os.path.join(ROOT_DIR, path, "res_track.txt"),
         ),
         rounds=1,
         iterations=1,
     )
 
 
-def test_ctc_matched(benchmark, gt_data, pred_data):
-    benchmark(CTCMatched, gt_data, pred_data)
+@pytest.mark.timeout(TIMEOUT_2D)
+def test_ctc_matched_2d(benchmark, gt_data_2d, pred_data_2d):
+    benchmark.pedantic(
+        CTCMatched,
+        args=(gt_data_2d, pred_data_2d),
+        rounds=1,
+        iterations=1,
+    )
 
 
-@pytest.mark.timeout(300)
-def test_ctc_metrics(benchmark, ctc_matched):
+@pytest.mark.xfail
+@pytest.mark.timeout(TIMEOUT_3D)
+def test_ctc_matched_3d(benchmark, gt_data_3d, pred_data_3d):
+    benchmark.pedantic(
+        CTCMatched,
+        args=(gt_data_3d, pred_data_3d),
+        rounds=1,
+        iterations=1,
+    )
+
+
+@pytest.mark.timeout(TIMEOUT_2D)
+def test_ctc_metrics_2d(benchmark, ctc_matched_2d):
     def run_compute():
-        return CTCMetrics(copy.deepcopy(ctc_matched)).compute()
+        return CTCMetrics(copy.deepcopy(ctc_matched_2d)).compute()
 
     ctc_results = benchmark.pedantic(run_compute, rounds=1, iterations=1)
 
@@ -106,27 +164,75 @@ def run_compute():
     assert ctc_results["ws_edges"] == 47
 
 
-def test_ctc_div_metrics(benchmark, ctc_matched):
+@pytest.mark.xfail
+@pytest.mark.timeout(TIMEOUT_3D)
+def test_ctc_metrics_3d(benchmark, ctc_matched_3d):
+    def run_compute():
+        return CTCMetrics(copy.deepcopy(ctc_matched_3d)).compute()
+
+    benchmark.pedantic(run_compute, rounds=1, iterations=1)
+
+
+@pytest.mark.timeout(TIMEOUT_2D)
+def test_ctc_div_metrics_2d(benchmark, ctc_matched_2d):
     def run_compute():
-        return DivisionMetrics(copy.deepcopy(ctc_matched)).compute()
+        return DivisionMetrics(copy.deepcopy(ctc_matched_2d)).compute()
 
-    div_results = benchmark(run_compute)
+    div_results = benchmark.pedantic(run_compute, rounds=1, iterations=1)
 
     assert div_results["Frame Buffer 0"]["False Negative Divisions"] == 18
     assert div_results["Frame Buffer 0"]["False Positive Divisions"] == 30
     assert div_results["Frame Buffer 0"]["True Positive Divisions"] == 76
 
 
-def test_iou_matched(benchmark, gt_data, pred_data):
-    benchmark(IOUMatched, gt_data, pred_data, iou_threshold=0.5)
+@pytest.mark.xfail
+@pytest.mark.timeout(TIMEOUT_3D)
+def test_ctc_div_metrics_3d(benchmark, ctc_matched_3d):
+    def run_compute():
+        return DivisionMetrics(copy.deepcopy(ctc_matched_3d)).compute()
+
+    benchmark.pedantic(run_compute, rounds=1, iterations=1)
+
+
+@pytest.mark.timeout(TIMEOUT_2D)
+def test_iou_matched_2d(benchmark, gt_data_2d, pred_data_2d):
+    benchmark.pedantic(
+        IOUMatched,
+        args=(gt_data_2d, pred_data_2d),
+        kwargs={"iou_threshold": 0.5},
+        rounds=1,
+        iterations=1,
+    )
+
+
+@pytest.mark.xfail
+@pytest.mark.timeout(TIMEOUT_3D)
+def test_iou_matched_3d(benchmark, gt_data_3d, pred_data_3d):
+    benchmark.pedantic(
+        IOUMatched,
+        args=(gt_data_3d, pred_data_3d),
+        kwargs={"iou_threshold": 0.5},
+        rounds=1,
+        iterations=1,
+    )
 
 
-def test_iou_div_metrics(benchmark, iou_matched):
+@pytest.mark.timeout(TIMEOUT_2D)
+def test_iou_div_metrics_2d(benchmark, iou_matched_2d):
     def run_compute():
-        return DivisionMetrics(copy.deepcopy(iou_matched)).compute()
+        return DivisionMetrics(copy.deepcopy(iou_matched_2d)).compute()
 
-    div_results = benchmark(run_compute)
+    div_results = benchmark.pedantic(run_compute, rounds=1, iterations=1)
 
     assert div_results["Frame Buffer 0"]["False Negative Divisions"] == 25
     assert div_results["Frame Buffer 0"]["False Positive Divisions"] == 31
     assert div_results["Frame Buffer 0"]["True Positive Divisions"] == 69
+
+
+@pytest.mark.xfail
+@pytest.mark.timeout(TIMEOUT_3D)
+def test_iou_div_metrics_3d(benchmark, iou_matched_3d):
+    def run_compute():
+        return DivisionMetrics(copy.deepcopy(iou_matched_3d)).compute()
+
+    benchmark.pedantic(run_compute, rounds=1, iterations=1)

From fddadd0a1b6d58323e75856b24cd666f0d9ae794 Mon Sep 17 00:00:00 2001
From: Benjamin Gallusser <bgallusser@googlemail.com>
Date: Tue, 31 Oct 2023 14:20:44 +0100
Subject: [PATCH 02/26] Reduce some duplicate code

---
 tests/bench.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/bench.py b/tests/bench.py
index 71b21574..7b70a645 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -31,11 +31,7 @@ def download_gt_data(url):
             zip_ref.extractall(data_dir)
 
 
-# TODO look into fixture with params for 2d/3d
-@pytest.fixture(scope="module")
-def gt_data_2d():
-    url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip"
-    path = "downloads/Fluo-N2DL-HeLa/01_GT/TRA"
+def gt_data(url, path):
     download_gt_data(url)
     return load_ctc_data(
         os.path.join(ROOT_DIR, path),
@@ -43,15 +39,18 @@ def gt_data_2d():
     )
 
 
+@pytest.fixture(scope="module")
+def gt_data_2d():
+    url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip"
+    path = "downloads/Fluo-N2DL-HeLa/01_GT/TRA"
+    return gt_data(url, path)
+
+
 @pytest.fixture(scope="module")
 def gt_data_3d():
     url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N3DH-CE.zip"
     path = "downloads/Fluo-N3DH-CE/01_GT/TRA"
-    download_gt_data(url)
-    return load_ctc_data(
-        os.path.join(ROOT_DIR, path),
-        os.path.join(ROOT_DIR, path, "man_track.txt"),
-    )
+    return gt_data(url, path)
 
 
 @pytest.fixture(scope="module")

From 8c83c2716f145470e126ccd6990f474e2303ccda Mon Sep 17 00:00:00 2001
From: Benjamin Gallusser <bgallusser@googlemail.com>
Date: Fri, 12 Apr 2024 14:59:52 +0200
Subject: [PATCH 03/26] Clean up benchmarks

---
 tests/bench.py                    | 167 +++++++++---------------------
 tests/metrics/test_ctc_metrics.py |  41 +++++++-
 tests/metrics/test_divisions.py   |  45 +++++++-
 tests/test_utils.py               |  31 ++++++
 4 files changed, 161 insertions(+), 123 deletions(-)

diff --git a/tests/bench.py b/tests/bench.py
index 2ee55475..2f45cc41 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -1,7 +1,6 @@
 import copy
 import os
-import urllib.request
-import zipfile
+from pathlib import Path
 
 import pandas as pd
 import pytest
@@ -14,65 +13,35 @@
 from traccuracy.matchers import CTCMatcher, IOUMatcher
 from traccuracy.metrics import CTCMetrics, DivisionMetrics
 
-ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-TIMEOUT_2D = 20
-TIMEOUT_3D = 30
+from tests.test_utils import download_gt_data, gt_data
 
-
-def download_gt_data(url):
-    # Download GT data -- look into caching this in github actions
-    data_dir = os.path.join(ROOT_DIR, "downloads")
-
-    if not os.path.exists(data_dir):
-        os.mkdir(data_dir)
-
-    filename = url.split("/")[-1]
-    file_path = os.path.join(data_dir, filename)
-
-    if not os.path.exists(file_path):
-        urllib.request.urlretrieve(url, file_path)
-
-        # Unzip the data
-        with zipfile.ZipFile(file_path, "r") as zip_ref:
-            zip_ref.extractall(data_dir)
-
-
-def gt_data(url, path):
-    download_gt_data(url)
-    return load_ctc_data(
-        os.path.join(ROOT_DIR, path),
-        os.path.join(ROOT_DIR, path, "man_track.txt"),
-    )
+ROOT_DIR = Path(__file__).resolve().parents[1]
+TIMEOUT = 20
 
 
 @pytest.fixture(scope="module")
 def gt_data_2d():
-    # url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip"
     url = "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DL-PSC.zip"
     path = "downloads/Fluo-N2DL-HeLa/01_GT/TRA"
-    return gt_data(url, path)
+    return gt_data(url, ROOT_DIR, path)
 
 
 @pytest.fixture(scope="module")
 def gt_data_3d():
     url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N3DH-CE.zip"
     path = "downloads/Fluo-N3DH-CE/01_GT/TRA"
-    return gt_data(url, path)
+    return gt_data(url, ROOT_DIR, path)
 
 
 @pytest.fixture(scope="module")
 def pred_data_2d(gt_data_2d):
-    # path = "examples/sample-data/Fluo-N2DL-HeLa/01_RES"
-    # return load_ctc_data(
-    #     os.path.join(ROOT_DIR, path),
-    #     os.path.join(ROOT_DIR, path, "res_track.txt"),
-    # )
+    # For now this is also GT data.
     return copy.deepcopy(gt_data_2d)
 
 
 @pytest.fixture(scope="module")
 def pred_data_3d(gt_data_3d):
-    # For the time being, this is also GT data.
+    # For now this is also GT data.
     return copy.deepcopy(gt_data_3d)
 
 
@@ -96,14 +65,18 @@ def iou_matched_3d(gt_data_3d, pred_data_3d):
     return IOUMatcher(iou_threshold=0.1).compute_mapping(gt_data_3d, pred_data_3d)
 
 
-@pytest.mark.parametrize("dataset", ["Fluo-N2DL-HeLa", "Fluo-N3DH-CE"])
+@pytest.mark.parametrize(
+    "dataset",
+    ["Fluo-N2DL-HeLa", "Fluo-N3DH-CE"],
+    ids=["2d", "3d"],
+)
 def test_load_gt_ctc_data(
     benchmark,
     dataset,
 ):
     url = f"http://data.celltrackingchallenge.net/training-datasets/{dataset}.zip"
     path = f"downloads/{dataset}/01_GT/TRA"
-    download_gt_data(url)
+    download_gt_data(url, ROOT_DIR)
 
     benchmark.pedantic(
         load_ctc_data,
@@ -118,12 +91,12 @@ def test_load_gt_ctc_data(
 
 
 # TODO Add 3d results
-@pytest.mark.skip
 @pytest.mark.parametrize(
     "path",
     [
         "examples/sample-data/Fluo-N2DL-HeLa/01_RES",
     ],
+    ids=["2d"],
 )
 def test_load_pred_ctc_data(benchmark, path):
     benchmark.pedantic(
@@ -138,24 +111,6 @@ def test_load_pred_ctc_data(benchmark, path):
     )
 
 
-@pytest.mark.parametrize(
-    "gt_data,pred_data",
-    [
-        ("gt_data_2d", "pred_data_2d"),
-        ("gt_data_3d", "pred_data_3d"),
-    ],
-)
-def test_ctc_matcher(benchmark, gt_data, pred_data, request):
-    gt_data = request.getfixturevalue(gt_data)
-    pred_data = request.getfixturevalue(pred_data)
-    benchmark.pedantic(
-        CTCMatcher().compute_mapping,
-        args=(gt_data, pred_data),
-        rounds=1,
-        iterations=1,
-    )
-
-
 def test_ctc_checks(benchmark):
     names = ["Cell_ID", "Start", "End", "Parent_ID"]
 
@@ -175,58 +130,41 @@ def test_ctc_checks(benchmark):
     benchmark(_check_ctc, tracks, detections, masks)
 
 
-@pytest.mark.timeout(TIMEOUT_2D)
-def test_ctc_metrics_2d(benchmark, ctc_matched_2d):
-    def run_compute():
-        return CTCMetrics().compute(copy.deepcopy(ctc_matched_2d))
-
-    ctc_results = benchmark.pedantic(run_compute, rounds=1, iterations=1)
+@pytest.mark.parametrize(
+    "gt_data,pred_data",
+    [
+        ("gt_data_2d", "pred_data_2d"),
+        ("gt_data_3d", "pred_data_3d"),
+    ],
+    ids=["2d", "3d"],
+)
+def test_ctc_matcher(benchmark, gt_data, pred_data, request):
+    gt_data = request.getfixturevalue(gt_data)
+    pred_data = request.getfixturevalue(pred_data)
+    benchmark.pedantic(
+        CTCMatcher().compute_mapping,
+        args=(gt_data, pred_data),
+        rounds=1,
+        iterations=1,
+    )
 
-    assert ctc_results.results["fn_edges"] == 87
-    assert ctc_results.results["fn_nodes"] == 39
-    assert ctc_results.results["fp_edges"] == 60
-    assert ctc_results.results["fp_nodes"] == 0
-    assert ctc_results.results["ns_nodes"] == 0
-    assert ctc_results.results["ws_edges"] == 47
 
+@pytest.mark.parametrize(
+    "ctc_matched",
+    ["ctc_matched_2d", "ctc_matched_3d"],
+    ids=["2d", "3d"],
+)
+def test_ctc_metrics(benchmark, ctc_matched, request):
+    ctc_matched = request.getfixturevalue(ctc_matched)
 
-@pytest.mark.xfail
-@pytest.mark.timeout(TIMEOUT_3D)
-def test_ctc_metrics_3d(benchmark, ctc_matched_3d):
     def run_compute():
-        return CTCMetrics().compute(copy.deepcopy(ctc_matched_3d))
+        return CTCMetrics().compute(copy.deepcopy(ctc_matched))
 
     benchmark.pedantic(run_compute, rounds=1, iterations=1)
 
 
-@pytest.mark.timeout(TIMEOUT_2D)
-def test_ctc_div_metrics_2d(benchmark, ctc_matched_2d):
-    def run_compute():
-        return DivisionMetrics().compute(copy.deepcopy(ctc_matched_2d))
-
-    div_results = benchmark.pedantic(run_compute, rounds=1, iterations=1)
-
-    assert div_results.results["Frame Buffer 0"]["False Negative Divisions"] == 18
-    assert div_results.results["Frame Buffer 0"]["False Positive Divisions"] == 30
-    assert div_results.results["Frame Buffer 0"]["True Positive Divisions"] == 76
-
-
 @pytest.mark.xfail
-@pytest.mark.timeout(TIMEOUT_3D)
-def test_ctc_div_metrics_3d(benchmark, ctc_matched_3d):
-    def run_compute():
-        return DivisionMetrics().compute(copy.deepcopy(ctc_matched_3d))
-
-    benchmark.pedantic(run_compute, rounds=1, iterations=1)
-
-
-def test_iou_div_metrics(benchmark, iou_matched):
-    def run_compute():
-        return DivisionMetrics().compute(copy.deepcopy(iou_matched))
-
-    benchmark.pedantic(run_compute, rounds=1, iterations=1)
-
-
+@pytest.mark.timeout(TIMEOUT)
 @pytest.mark.parametrize(
     "gt_data,pred_data",
     [
@@ -244,22 +182,15 @@ def test_iou_matcher(benchmark, gt_data, pred_data):
     )
 
 
-@pytest.mark.timeout(TIMEOUT_2D)
-def test_iou_div_metrics_2d(benchmark, iou_matched_2d):
-    def run_compute():
-        return DivisionMetrics().compute(copy.deepcopy(iou_matched_2d))
-
-    div_results = benchmark.pedantic(run_compute, rounds=1, iterations=1)
-
-    assert div_results["Frame Buffer 0"]["False Negative Divisions"] == 25
-    assert div_results["Frame Buffer 0"]["False Positive Divisions"] == 31
-    assert div_results["Frame Buffer 0"]["True Positive Divisions"] == 69
-
-
 @pytest.mark.xfail
-@pytest.mark.timeout(TIMEOUT_3D)
-def test_iou_div_metrics_3d(benchmark, iou_matched_3d):
+@pytest.mark.timeout(TIMEOUT)
+@pytest.mark.parametrize(
+    "iou_matched",
+    ["iou_matched_2d", "iou_matched_3d"],
+    ids=["2d", "3d"],
+)
+def test_iou_div_metrics(benchmark, iou_matched):
     def run_compute():
-        return DivisionMetrics().compute(copy.deepcopy(iou_matched_3d))
+        return DivisionMetrics().compute(copy.deepcopy(iou_matched))
 
     benchmark.pedantic(run_compute, rounds=1, iterations=1)
diff --git a/tests/metrics/test_ctc_metrics.py b/tests/metrics/test_ctc_metrics.py
index 2bf18a5e..adc805d3 100644
--- a/tests/metrics/test_ctc_metrics.py
+++ b/tests/metrics/test_ctc_metrics.py
@@ -1,7 +1,42 @@
-from traccuracy.matchers._ctc import CTCMatcher
-from traccuracy.metrics._ctc import CTCMetrics
+import os
+from pathlib import Path
 
-from tests.test_utils import get_movie_with_graph
+import pytest
+from traccuracy.loaders import load_ctc_data
+from traccuracy.matchers import CTCMatcher
+from traccuracy.metrics import CTCMetrics
+
+from tests.test_utils import get_movie_with_graph, gt_data
+
+ROOT_DIR = Path(__file__).resolve().parents[2]
+
+
+@pytest.fixture(scope="module")
+def gt_hela():
+    url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip"
+    path = "downloads/Fluo-N2DL-HeLa/01_GT/TRA"
+    return gt_data(url, ROOT_DIR, path)
+
+
+@pytest.fixture(scope="module")
+def pred_hela():
+    path = "examples/sample-data/Fluo-N2DL-HeLa/01_RES"
+    return load_ctc_data(
+        os.path.join(ROOT_DIR, path),
+        os.path.join(ROOT_DIR, path, "res_track.txt"),
+    )
+
+
+def test_ctc_metrics(gt_hela, pred_hela):
+    ctc_matched = CTCMatcher().compute_mapping(gt_hela, pred_hela)
+    ctc_results = CTCMetrics().compute(ctc_matched)
+
+    assert ctc_results.results["fn_edges"] == 87
+    assert ctc_results.results["fn_nodes"] == 39
+    assert ctc_results.results["fp_edges"] == 60
+    assert ctc_results.results["fp_nodes"] == 0
+    assert ctc_results.results["ns_nodes"] == 0
+    assert ctc_results.results["ws_edges"] == 47
 
 
 def test_compute_mapping():
diff --git a/tests/metrics/test_divisions.py b/tests/metrics/test_divisions.py
index ce2d978c..dbf95391 100644
--- a/tests/metrics/test_divisions.py
+++ b/tests/metrics/test_divisions.py
@@ -1,8 +1,49 @@
+import os
+from pathlib import Path
+
+import pytest
 from traccuracy import TrackingGraph
-from traccuracy.matchers import Matched
+from traccuracy.loaders import load_ctc_data
+from traccuracy.matchers import CTCMatcher, IOUMatcher, Matched
 from traccuracy.metrics._divisions import DivisionMetrics
 
-from tests.test_utils import get_division_graphs
+from tests.test_utils import get_division_graphs, gt_data
+
+ROOT_DIR = Path(__file__).resolve().parents[2]
+
+
+@pytest.fixture(scope="module")
+def gt_hela():
+    url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip"
+    path = "downloads/Fluo-N2DL-HeLa/01_GT/TRA"
+    return gt_data(url, ROOT_DIR, path)
+
+
+@pytest.fixture(scope="module")
+def pred_hela():
+    path = "examples/sample-data/Fluo-N2DL-HeLa/01_RES"
+    return load_ctc_data(
+        os.path.join(ROOT_DIR, path),
+        os.path.join(ROOT_DIR, path, "res_track.txt"),
+    )
+
+
+def test_ctc_div_metrics(gt_hela, pred_hela):
+    ctc_matched = CTCMatcher().compute_mapping(gt_hela, pred_hela)
+    div_results = DivisionMetrics().compute(ctc_matched)
+
+    assert div_results.results["Frame Buffer 0"]["False Negative Divisions"] == 18
+    assert div_results.results["Frame Buffer 0"]["False Positive Divisions"] == 30
+    assert div_results.results["Frame Buffer 0"]["True Positive Divisions"] == 76
+
+
+def test_iou_div_metrics(gt_hela, pred_hela):
+    iou_matched = IOUMatcher(iou_threshold=0.1).compute_mapping(gt_hela, pred_hela)
+    div_results = DivisionMetrics().compute(iou_matched)
+
+    assert div_results.results["Frame Buffer 0"]["False Negative Divisions"] == 25
+    assert div_results.results["Frame Buffer 0"]["False Positive Divisions"] == 31
+    assert div_results.results["Frame Buffer 0"]["True Positive Divisions"] == 69
 
 
 def test_DivisionMetrics():
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 6f854ad1..2cf98f28 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,7 +1,38 @@
+import os
+import urllib.request
+import zipfile
+
 import networkx as nx
 import numpy as np
 import skimage as sk
 from traccuracy._tracking_graph import TrackingGraph
+from traccuracy.loaders import load_ctc_data
+
+
+def download_gt_data(url, root_dir):
+    # Download GT data -- look into caching this in github actions
+    data_dir = os.path.join(root_dir, "downloads")
+
+    if not os.path.exists(data_dir):
+        os.mkdir(data_dir)
+
+    filename = url.split("/")[-1]
+    file_path = os.path.join(data_dir, filename)
+
+    if not os.path.exists(file_path):
+        urllib.request.urlretrieve(url, file_path)
+
+        # Unzip the data
+        with zipfile.ZipFile(file_path, "r") as zip_ref:
+            zip_ref.extractall(data_dir)
+
+
+def gt_data(url, root_dir, path):
+    download_gt_data(url, root_dir)
+    return load_ctc_data(
+        os.path.join(root_dir, path),
+        os.path.join(root_dir, path, "man_track.txt"),
+    )
 
 
 def get_annotated_image(img_size=256, num_labels=3, sequential=True, seed=1):

From ed0eedf1ccad0dda6835a08b28314361c8cb12ed Mon Sep 17 00:00:00 2001
From: Benjamin Gallusser <bgallusser@googlemail.com>
Date: Fri, 12 Apr 2024 15:36:43 +0200
Subject: [PATCH 04/26] Fix typo

---
 tests/bench.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/bench.py b/tests/bench.py
index 2f45cc41..3a23bad7 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -67,7 +67,7 @@ def iou_matched_3d(gt_data_3d, pred_data_3d):
 
 @pytest.mark.parametrize(
     "dataset",
-    ["Fluo-N2DL-HeLa", "Fluo-N3DH-CE"],
+    ["PhC-C2DL-PSC", "Fluo-N3DH-CE"],
     ids=["2d", "3d"],
 )
 def test_load_gt_ctc_data(

From f837bb218dbbe54a4c5b1ef65fa789ec67e82f13 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 10:36:13 -0700
Subject: [PATCH 05/26] Remove expected fail from benchmarking tests

---
 tests/bench.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/bench.py b/tests/bench.py
index 3a23bad7..d3d1cd3f 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -163,7 +163,6 @@ def run_compute():
     benchmark.pedantic(run_compute, rounds=1, iterations=1)
 
 
-@pytest.mark.xfail
 @pytest.mark.timeout(TIMEOUT)
 @pytest.mark.parametrize(
     "gt_data,pred_data",
@@ -173,7 +172,9 @@ def run_compute():
     ],
     ids=["2d", "3d"],
 )
-def test_iou_matcher(benchmark, gt_data, pred_data):
+def test_iou_matcher(benchmark, gt_data, pred_data, request):
+    gt_data = request.getfixturevalue(gt_data)
+    pred_data = request.getfixturevalue(pred_data)
     benchmark.pedantic(
         IOUMatcher(iou_threshold=0.1).compute_mapping,
         args=(gt_data, pred_data),
@@ -182,14 +183,15 @@ def test_iou_matcher(benchmark, gt_data, pred_data):
     )
 
 
-@pytest.mark.xfail
 @pytest.mark.timeout(TIMEOUT)
 @pytest.mark.parametrize(
     "iou_matched",
     ["iou_matched_2d", "iou_matched_3d"],
     ids=["2d", "3d"],
 )
-def test_iou_div_metrics(benchmark, iou_matched):
+def test_iou_div_metrics(benchmark, iou_matched, request):
+    iou_matched = request.getfixturevalue(iou_matched)
+
     def run_compute():
         return DivisionMetrics().compute(copy.deepcopy(iou_matched))
 

From a4bdaa7243318a37dea6724b33d8d2b373b53842 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 10:58:04 -0700
Subject: [PATCH 06/26] Setup script and cacheing for downloading test data for
 benchmarking

---
 .github/workflows/ci.yml      | 12 ++++++++++++
 scripts/download_test_data.py | 37 +++++++++++++++++++++++++++++++++++
 tests/bench.py                | 16 +++++++--------
 3 files changed, 57 insertions(+), 8 deletions(-)
 create mode 100644 scripts/download_test_data.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 509e122f..5c08b61f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -60,6 +60,18 @@ jobs:
         with:
           fetch-depth: 50 # this is to make sure we obtain the target base commit
 
+      - uses: actions/cache@v3
+        id: cache
+        with:
+          path: downloads
+          key: ${{ hashFiles('scripts/download_samples.py') }}
+
+      - name: Download Samples
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: |
+          pip install requests
+          python scripts/download_test_data.py
+
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
diff --git a/scripts/download_test_data.py b/scripts/download_test_data.py
new file mode 100644
index 00000000..3aba4274
--- /dev/null
+++ b/scripts/download_test_data.py
@@ -0,0 +1,37 @@
+import os
+import urllib.request
+import zipfile
+from pathlib import Path
+
+ROOT_DIR = Path(__file__).resolve().parents[1]
+DATASETS = [
+    "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip",
+    "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DL-PSC.zip"
+    "http://data.celltrackingchallenge.net/training-datasets/Fluo-N3DH-CE.zip",
+]
+
+
+def download_gt_data(url, root_dir):
+    data_dir = os.path.join(root_dir, "downloads")
+
+    if not os.path.exists(data_dir):
+        os.mkdir(data_dir)
+
+    filename = url.split("/")[-1]
+    file_path = os.path.join(data_dir, filename)
+
+    if not os.path.exists(file_path):
+        urllib.request.urlretrieve(url, file_path)
+
+        # Unzip the data
+        with zipfile.ZipFile(file_path, "r") as zip_ref:
+            zip_ref.extractall(data_dir)
+
+
+def main():
+    for url in DATASETS:
+        download_gt_data(url, ROOT_DIR)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/bench.py b/tests/bench.py
index d3d1cd3f..d266b3f2 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -13,24 +13,26 @@
 from traccuracy.matchers import CTCMatcher, IOUMatcher
 from traccuracy.metrics import CTCMetrics, DivisionMetrics
 
-from tests.test_utils import download_gt_data, gt_data
-
 ROOT_DIR = Path(__file__).resolve().parents[1]
 TIMEOUT = 20
 
 
 @pytest.fixture(scope="module")
 def gt_data_2d():
-    url = "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DL-PSC.zip"
     path = "downloads/Fluo-N2DL-HeLa/01_GT/TRA"
-    return gt_data(url, ROOT_DIR, path)
+    return load_ctc_data(
+        os.path.join(ROOT_DIR, path),
+        os.path.join(ROOT_DIR, path, "man_track.txt"),
+    )
 
 
 @pytest.fixture(scope="module")
 def gt_data_3d():
-    url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N3DH-CE.zip"
     path = "downloads/Fluo-N3DH-CE/01_GT/TRA"
-    return gt_data(url, ROOT_DIR, path)
+    return load_ctc_data(
+        os.path.join(ROOT_DIR, path),
+        os.path.join(ROOT_DIR, path, "man_track.txt"),
+    )
 
 
 @pytest.fixture(scope="module")
@@ -74,9 +76,7 @@ def test_load_gt_ctc_data(
     benchmark,
     dataset,
 ):
-    url = f"http://data.celltrackingchallenge.net/training-datasets/{dataset}.zip"
     path = f"downloads/{dataset}/01_GT/TRA"
-    download_gt_data(url, ROOT_DIR)
 
     benchmark.pedantic(
         load_ctc_data,

From 29a5b55d7c0f0726b8956fa71bc6a721d46fa12b Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 11:00:13 -0700
Subject: [PATCH 07/26] Use unique ids for cache steps in workflow

---
 .github/workflows/ci.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5c08b61f..1abe14ae 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -61,13 +61,13 @@ jobs:
           fetch-depth: 50 # this is to make sure we obtain the target base commit
 
       - uses: actions/cache@v3
-        id: cache
+        id: cache_data
         with:
           path: downloads
           key: ${{ hashFiles('scripts/download_samples.py') }}
 
       - name: Download Samples
-        if: steps.cache.outputs.cache-hit != 'true'
+        if: steps.cache_data.outputs.cache-hit != 'true'
         run: |
           pip install requests
           python scripts/download_test_data.py
@@ -87,20 +87,20 @@ jobs:
 
       - name: Retrieve cached baseline if available
         uses: actions/cache/restore@v4
-        id: cache
+        id: cache_baseline
         with:
           path: baseline.json
           key: ${{ github.event.pull_request.base.sha }}
 
       - name: Run baseline benchmark if not in cache
-        if: steps.cache.outputs.cache-hit != 'true'
+        if: steps.cache_baseline.outputs.cache-hit != 'true'
         run: |
           git checkout ${{ github.event.pull_request.base.sha }}
           pytest tests/bench.py --benchmark-json baseline.json
 
       - name: Cache baseline results
         uses: actions/cache/save@v4
-        if: steps.cache.outputs.cache-hit != 'true'
+        if: steps.cache_baseline.outputs.cache-hit != 'true'
         with:
           path: baseline.json
           key: ${{ github.event.pull_request.base.sha }}

From d381e65239a1492f5e83ab54f28e7e6bd90ce2b1 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 11:12:09 -0700
Subject: [PATCH 08/26] Add missing comma to list of urls

---
 scripts/download_test_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/download_test_data.py b/scripts/download_test_data.py
index 3aba4274..e7597a85 100644
--- a/scripts/download_test_data.py
+++ b/scripts/download_test_data.py
@@ -6,7 +6,7 @@
 ROOT_DIR = Path(__file__).resolve().parents[1]
 DATASETS = [
     "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip",
-    "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DL-PSC.zip"
+    "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DL-PSC.zip",
     "http://data.celltrackingchallenge.net/training-datasets/Fluo-N3DH-CE.zip",
 ]
 

From 1b15ec2c48c3572ef6e4bc86c9812fc08b91dc8d Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 11:16:50 -0700
Subject: [PATCH 09/26] Correct data cacheing steps in workflow

---
 .github/workflows/ci.yml | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1abe14ae..69b713c7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -60,7 +60,8 @@ jobs:
         with:
           fetch-depth: 50 # this is to make sure we obtain the target base commit
 
-      - uses: actions/cache@v3
+      - name: Retrieve cached data
+        uses: actions/cache/restore@v4
         id: cache_data
         with:
           path: downloads
@@ -72,6 +73,13 @@ jobs:
           pip install requests
           python scripts/download_test_data.py
 
+      - name: Cache sample data
+        uses: actions/cache/save@v4
+        if: steps.cache_data.outputs.cache-hit != 'true'
+        with:
+          path: downloads
+          key: ${{ hashFiles('scripts/download_samples.py') }}
+
       - name: Set up Python
         uses: actions/setup-python@v5
         with:

From 85738d110fd7727314e65b57aa0fe64746b447a1 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 13:24:13 -0700
Subject: [PATCH 10/26] Correct path to script used for data cache key

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 69b713c7..82fd95cb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -65,7 +65,7 @@ jobs:
         id: cache_data
         with:
           path: downloads
-          key: ${{ hashFiles('scripts/download_samples.py') }}
+          key: ${{ hashFiles('scripts/download_test_data.py') }}
 
       - name: Download Samples
         if: steps.cache_data.outputs.cache-hit != 'true'
@@ -78,7 +78,7 @@ jobs:
         if: steps.cache_data.outputs.cache-hit != 'true'
         with:
           path: downloads
-          key: ${{ hashFiles('scripts/download_samples.py') }}
+          key: ${{ hashFiles('scripts/download_test_data.py') }}
 
       - name: Set up Python
         uses: actions/setup-python@v5

From 0e6c8baaf22b6a486d7a17005b14d4cb3ef59dad Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 13:48:34 -0700
Subject: [PATCH 11/26] add verbose flag for benchmarking

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 82fd95cb..530d63ce 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -104,7 +104,7 @@ jobs:
         if: steps.cache_baseline.outputs.cache-hit != 'true'
         run: |
           git checkout ${{ github.event.pull_request.base.sha }}
-          pytest tests/bench.py --benchmark-json baseline.json
+          pytest tests/bench.py -v --benchmark-json baseline.json
 
       - name: Cache baseline results
         uses: actions/cache/save@v4
@@ -116,7 +116,7 @@ jobs:
       - name: Run benchmark on PR head commit
         run: |
           git checkout ${{ github.event.pull_request.head.sha }}
-          pytest tests/bench.py --benchmark-json pr.json
+          pytest tests/bench.py -v --benchmark-json pr.json
 
       - name: Generate report
         run: python .github/workflows/benchmark-pr.py baseline.json pr.json report.md

From 7e3113cec3f39d431268ff1af3b4a77f6221da8d Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 14:20:35 -0700
Subject: [PATCH 12/26] Limit 3d data to subset of frames

---
 tests/bench.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/bench.py b/tests/bench.py
index d266b3f2..19d75723 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -29,10 +29,17 @@ def gt_data_2d():
 @pytest.fixture(scope="module")
 def gt_data_3d():
     path = "downloads/Fluo-N3DH-CE/01_GT/TRA"
-    return load_ctc_data(
+    trackgraph = load_ctc_data(
         os.path.join(ROOT_DIR, path),
         os.path.join(ROOT_DIR, path, "man_track.txt"),
     )
+    nodes = set()
+
+    # Limit 3d dataset to a subset of frames to manage memory/cpu footprint
+    for t in range(10):
+        nodes = nodes.union(trackgraph.nodes_by_frame[t])
+
+    return trackgraph.get_subgraph(nodes)
 
 
 @pytest.fixture(scope="module")

From 78c984208e015521854a773a1d0e92141eaaeb34 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 14:26:17 -0700
Subject: [PATCH 13/26] Add timeout for ctc matcher

---
 tests/bench.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/bench.py b/tests/bench.py
index 19d75723..4f1a961d 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -137,6 +137,7 @@ def test_ctc_checks(benchmark):
     benchmark(_check_ctc, tracks, detections, masks)
 
 
+@pytest.mark.timeout(TIMEOUT)
 @pytest.mark.parametrize(
     "gt_data,pred_data",
     [

From a4f35d1ab66f855235d875fb8a38c6027e30c503 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Wed, 18 Sep 2024 14:38:48 -0700
Subject: [PATCH 14/26] limit 3d data to 3 frames

---
 tests/bench.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/bench.py b/tests/bench.py
index 4f1a961d..d7fe734f 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -36,7 +36,7 @@ def gt_data_3d():
     nodes = set()
 
     # Limit 3d dataset to a subset of frames to manage memory/cpu footprint
-    for t in range(10):
+    for t in range(3):
         nodes = nodes.union(trackgraph.nodes_by_frame[t])
 
     return trackgraph.get_subgraph(nodes)
@@ -137,7 +137,7 @@ def test_ctc_checks(benchmark):
     benchmark(_check_ctc, tracks, detections, masks)
 
 
-@pytest.mark.timeout(TIMEOUT)
+@pytest.mark.timeout(TIMEOUT * 2)
 @pytest.mark.parametrize(
     "gt_data,pred_data",
     [
@@ -171,7 +171,7 @@ def run_compute():
     benchmark.pedantic(run_compute, rounds=1, iterations=1)
 
 
-@pytest.mark.timeout(TIMEOUT)
+@pytest.mark.timeout(TIMEOUT * 2)
 @pytest.mark.parametrize(
     "gt_data,pred_data",
     [
@@ -191,7 +191,7 @@ def test_iou_matcher(benchmark, gt_data, pred_data, request):
     )
 
 
-@pytest.mark.timeout(TIMEOUT)
+@pytest.mark.timeout(TIMEOUT * 2)
 @pytest.mark.parametrize(
     "iou_matched",
     ["iou_matched_2d", "iou_matched_3d"],

From e98c47a727efc5eeb8039790392f2ceb2fadd6a9 Mon Sep 17 00:00:00 2001
From: Benjamin Gallusser <bgallusser@googlemail.com>
Date: Thu, 19 Sep 2024 12:17:47 +0200
Subject: [PATCH 15/26] Revert benchmark to full 3d video

---
 tests/bench.py | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/tests/bench.py b/tests/bench.py
index d7fe734f..b8d169d2 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -14,7 +14,7 @@
 from traccuracy.metrics import CTCMetrics, DivisionMetrics
 
 ROOT_DIR = Path(__file__).resolve().parents[1]
-TIMEOUT = 20
+TIMEOUT = 30
 
 
 @pytest.fixture(scope="module")
@@ -23,23 +23,18 @@ def gt_data_2d():
     return load_ctc_data(
         os.path.join(ROOT_DIR, path),
         os.path.join(ROOT_DIR, path, "man_track.txt"),
+        run_checks=False,
     )
 
 
 @pytest.fixture(scope="module")
 def gt_data_3d():
     path = "downloads/Fluo-N3DH-CE/01_GT/TRA"
-    trackgraph = load_ctc_data(
+    return load_ctc_data(
         os.path.join(ROOT_DIR, path),
         os.path.join(ROOT_DIR, path, "man_track.txt"),
+        run_checks=False,
     )
-    nodes = set()
-
-    # Limit 3d dataset to a subset of frames to manage memory/cpu footprint
-    for t in range(3):
-        nodes = nodes.union(trackgraph.nodes_by_frame[t])
-
-    return trackgraph.get_subgraph(nodes)
 
 
 @pytest.fixture(scope="module")
@@ -118,26 +113,26 @@ def test_load_pred_ctc_data(benchmark, path):
     )
 
 
-def test_ctc_checks(benchmark):
+@pytest.mark.parametrize(
+    "dataset",
+    ["PhC-C2DL-PSC", "Fluo-N3DH-CE"],
+    ids=["2d", "3d"],
+)
+def test_ctc_checks(benchmark, dataset):
+    path = f"downloads/{dataset}/01_GT/TRA"
     names = ["Cell_ID", "Start", "End", "Parent_ID"]
-
     tracks = pd.read_csv(
-        os.path.join(
-            ROOT_DIR, "examples/sample-data/Fluo-N2DL-HeLa/01_RES/res_track.txt"
-        ),
+        os.path.join(ROOT_DIR, path, "man_track.txt"),
         header=None,
         sep=" ",
         names=names,
     )
-
-    masks = _load_tiffs(
-        os.path.join(ROOT_DIR, "examples/sample-data/Fluo-N2DL-HeLa/01_RES")
-    )
+    masks = _load_tiffs(os.path.join(ROOT_DIR, path))
     detections = _get_node_attributes(masks)
     benchmark(_check_ctc, tracks, detections, masks)
 
 
-@pytest.mark.timeout(TIMEOUT * 2)
+@pytest.mark.timeout(TIMEOUT)
 @pytest.mark.parametrize(
     "gt_data,pred_data",
     [
@@ -171,7 +166,7 @@ def run_compute():
     benchmark.pedantic(run_compute, rounds=1, iterations=1)
 
 
-@pytest.mark.timeout(TIMEOUT * 2)
+@pytest.mark.timeout(TIMEOUT)
 @pytest.mark.parametrize(
     "gt_data,pred_data",
     [
@@ -191,7 +186,7 @@ def test_iou_matcher(benchmark, gt_data, pred_data, request):
     )
 
 
-@pytest.mark.timeout(TIMEOUT * 2)
+@pytest.mark.timeout(TIMEOUT)
 @pytest.mark.parametrize(
     "iou_matched",
     ["iou_matched_2d", "iou_matched_3d"],

From 35519aeecddc56c22355399133bf99ceb1ff39d7 Mon Sep 17 00:00:00 2001
From: Benjamin Gallusser <bgallusser@googlemail.com>
Date: Thu, 19 Sep 2024 12:21:49 +0200
Subject: [PATCH 16/26] Add progress bar to ctc loader feature extraction

---
 src/traccuracy/loaders/_ctc.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/traccuracy/loaders/_ctc.py b/src/traccuracy/loaders/_ctc.py
index e710ba10..401dc473 100644
--- a/src/traccuracy/loaders/_ctc.py
+++ b/src/traccuracy/loaders/_ctc.py
@@ -66,7 +66,10 @@ def _get_node_attributes(masks):
             segmentation_id, x, y, z, t
     """
     data_df = pd.concat(
-        [_detections_from_image(masks, idx) for idx in range(masks.shape[0])]
+        [
+            _detections_from_image(masks, idx)
+            for idx in tqdm(range(masks.shape[0]), desc="Computing node attributes")
+        ],
     ).reset_index(drop=True)
     data_df = data_df.rename(
         columns={
@@ -193,9 +196,9 @@ def _check_ctc(tracks: pd.DataFrame, detections: pd.DataFrame, masks: np.ndarray
             parent_end = tracks[tracks["Cell_ID"] == row["Parent_ID"]]["End"].iloc[0]
             if parent_end >= row["Start"]:
                 raise ValueError(
-                    f"Invalid tracklet connection: Daughter tracklet with ID {row['Cell_ID']} "
-                    f"starts at t={row['Start']}, "
-                    f"but parent tracklet with ID {row['Parent_ID']} only ends at t={parent_end}."
+                    "Invalid tracklet connection: Daughter tracklet with ID"
+                    f" {row['Cell_ID']} starts at t={row['Start']}, but parent tracklet"
+                    f" with ID {row['Parent_ID']} only ends at t={parent_end}."
                 )
 
     for t in range(tracks["Start"].min(), tracks["End"].max()):
@@ -241,12 +244,13 @@ def load_ctc_data(data_dir, track_path=None, name=None, run_checks=True):
         track_paths = list(glob.glob(os.path.join(data_dir, "*_track.txt")))
         if not track_paths:
             raise ValueError(
-                f"No track_path passed and a *_track.txt file could not be found in {data_dir}"
+                "No track_path passed and a *_track.txt file could not be found in"
+                f" {data_dir}"
             )
         if len(track_paths) > 1:
             raise ValueError(
-                f"No track_path passed and multiple *_track.txt files found: {track_paths}."
-                + " Please pick one and pass it explicitly."
+                "No track_path passed and multiple *_track.txt files found:"
+                f" {track_paths}." + " Please pick one and pass it explicitly."
             )
         track_path = track_paths[0]
 

From 9bab2a7a6c735c1dfbad469ab697361ea1c8176c Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Thu, 19 Sep 2024 14:30:36 -0700
Subject: [PATCH 17/26] Try setting ubuntu runner to 22.04 instead of latest

---
 .github/workflows/ci.yml          |  2 +-
 src/traccuracy/_tracking_graph.py | 39 +++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 530d63ce..c8b17584 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -27,7 +27,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.9", "3.10", "3.11"]
-        platform: [ubuntu-latest, macos-latest, windows-latest]
+        platform: [ubuntu-22.04, macos-latest, windows-latest]
 
     steps:
       - uses: actions/checkout@v4
diff --git a/src/traccuracy/_tracking_graph.py b/src/traccuracy/_tracking_graph.py
index 93c8198d..6fb6b24a 100644
--- a/src/traccuracy/_tracking_graph.py
+++ b/src/traccuracy/_tracking_graph.py
@@ -483,3 +483,42 @@ def get_tracklets(
                         tracklet.add(parent)
 
         return [self.get_subgraph(g) for g in tracklets]
+
+    def get_nodes_by_roi(self, **kwargs):
+        """Gets the nodes in a given region of interest (ROI). The ROI is
+        defined by keyword arguments that correspond to the frame key and
+        location keys, where each argument should be a (start, end) tuple
+        (the end is exclusive). Dimensions that are not passed as arguments
+        are unbounded. None can be passed as an element of the tuple to
+        signify an unbounded ROI on that side.
+
+        For example, if frame_key='t' and location_keys=('x', 'y'):
+            `graph.get_nodes_by_roi(t=(10, None), x=(0, 100))`
+        would return all nodes with time >= 10, and 0 <= x < 100, with no limit
+        on the y values.
+
+        Returns:
+            list of hashable: A list of node_ids for all nodes in the ROI.
+        """
+        dimensions = []
+        for dim, limit in kwargs.items():
+            if not (dim == self.frame_key or dim in self.location_keys):
+                raise ValueError(
+                    f"Provided argument {dim} is neither the frame key"
+                    f" {self.frame_key} or one of the location keys"
+                    f" {self.location_keys}."
+                )
+            dimensions.append((dim, limit[0], limit[1]))
+        nodes = []
+        for node, attrs in self.graph.nodes().items():
+            inside = True
+            for dim, start, end in dimensions:
+                if start is not None and attrs[dim] < start:
+                    inside = False
+                    break
+                if end is not None and attrs[dim] >= end:
+                    inside = False
+                    break
+            if inside:
+                nodes.append(node)
+        return nodes

From 25760d3a7c1d7539ee23d3b4ea078de25098cde6 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Thu, 19 Sep 2024 14:55:39 -0700
Subject: [PATCH 18/26] Revert "Try setting ubuntu runner to 22.04 instead of
 latest"

This reverts commit 9bab2a7a6c735c1dfbad469ab697361ea1c8176c.
---
 .github/workflows/ci.yml          |  2 +-
 src/traccuracy/_tracking_graph.py | 39 -------------------------------
 2 files changed, 1 insertion(+), 40 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c8b17584..530d63ce 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -27,7 +27,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.9", "3.10", "3.11"]
-        platform: [ubuntu-22.04, macos-latest, windows-latest]
+        platform: [ubuntu-latest, macos-latest, windows-latest]
 
     steps:
       - uses: actions/checkout@v4
diff --git a/src/traccuracy/_tracking_graph.py b/src/traccuracy/_tracking_graph.py
index 6fb6b24a..93c8198d 100644
--- a/src/traccuracy/_tracking_graph.py
+++ b/src/traccuracy/_tracking_graph.py
@@ -483,42 +483,3 @@ def get_tracklets(
                         tracklet.add(parent)
 
         return [self.get_subgraph(g) for g in tracklets]
-
-    def get_nodes_by_roi(self, **kwargs):
-        """Gets the nodes in a given region of interest (ROI). The ROI is
-        defined by keyword arguments that correspond to the frame key and
-        location keys, where each argument should be a (start, end) tuple
-        (the end is exclusive). Dimensions that are not passed as arguments
-        are unbounded. None can be passed as an element of the tuple to
-        signify an unbounded ROI on that side.
-
-        For example, if frame_key='t' and location_keys=('x', 'y'):
-            `graph.get_nodes_by_roi(t=(10, None), x=(0, 100))`
-        would return all nodes with time >= 10, and 0 <= x < 100, with no limit
-        on the y values.
-
-        Returns:
-            list of hashable: A list of node_ids for all nodes in the ROI.
-        """
-        dimensions = []
-        for dim, limit in kwargs.items():
-            if not (dim == self.frame_key or dim in self.location_keys):
-                raise ValueError(
-                    f"Provided argument {dim} is neither the frame key"
-                    f" {self.frame_key} or one of the location keys"
-                    f" {self.location_keys}."
-                )
-            dimensions.append((dim, limit[0], limit[1]))
-        nodes = []
-        for node, attrs in self.graph.nodes().items():
-            inside = True
-            for dim, start, end in dimensions:
-                if start is not None and attrs[dim] < start:
-                    inside = False
-                    break
-                if end is not None and attrs[dim] >= end:
-                    inside = False
-                    break
-            if inside:
-                nodes.append(node)
-        return nodes

From dfce2e7d84a3c6155dce4ef093f4242d4c0dc4e2 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Thu, 19 Sep 2024 14:56:17 -0700
Subject: [PATCH 19/26] Show print outputs during benchmarking to try to
 identify failure point

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 530d63ce..23c5b82f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -116,7 +116,7 @@ jobs:
       - name: Run benchmark on PR head commit
         run: |
           git checkout ${{ github.event.pull_request.head.sha }}
-          pytest tests/bench.py -v --benchmark-json pr.json
+          pytest tests/bench.py -v -s --benchmark-json pr.json
 
       - name: Generate report
         run: python .github/workflows/benchmark-pr.py baseline.json pr.json report.md

From d8c595a059d039bf301f87c14f9d18d6748cdd23 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Thu, 19 Sep 2024 15:03:22 -0700
Subject: [PATCH 20/26] Stop copying graphs when initializing matcher

---
 src/traccuracy/matchers/_base.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/traccuracy/matchers/_base.py b/src/traccuracy/matchers/_base.py
index d3056356..3017455f 100644
--- a/src/traccuracy/matchers/_base.py
+++ b/src/traccuracy/matchers/_base.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import copy
 import logging
 from abc import ABC, abstractmethod
 from typing import Any
@@ -41,10 +40,7 @@ def compute_mapping(
                 "Input data must be a TrackingData object with a graph and segmentations"
             )
 
-        # Copy graphs to avoid possible changes to graphs while computing mapping
-        matched = self._compute_mapping(
-            copy.deepcopy(gt_graph), copy.deepcopy(pred_graph)
-        )
+        matched = self._compute_mapping(gt_graph, pred_graph)
 
         # Record matcher info on Matched object
         matched.matcher_info = self.info

From d32818246f7d2918bacc28636c81a129a66856b5 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Thu, 7 Nov 2024 11:29:43 -0500
Subject: [PATCH 21/26] Limit fixture scope to function and remove unnecessary
 copy.deepcopy

---
 tests/bench.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/bench.py b/tests/bench.py
index b8d169d2..848c1197 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -17,7 +17,7 @@
 TIMEOUT = 30
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def gt_data_2d():
     path = "downloads/Fluo-N2DL-HeLa/01_GT/TRA"
     return load_ctc_data(
@@ -27,7 +27,7 @@ def gt_data_2d():
     )
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def gt_data_3d():
     path = "downloads/Fluo-N3DH-CE/01_GT/TRA"
     return load_ctc_data(
@@ -37,34 +37,34 @@ def gt_data_3d():
     )
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def pred_data_2d(gt_data_2d):
     # For now this is also GT data.
     return copy.deepcopy(gt_data_2d)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def pred_data_3d(gt_data_3d):
     # For now this is also GT data.
     return copy.deepcopy(gt_data_3d)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def ctc_matched_2d(gt_data_2d, pred_data_2d):
     return CTCMatcher().compute_mapping(gt_data_2d, pred_data_2d)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def ctc_matched_3d(gt_data_3d, pred_data_3d):
     return CTCMatcher().compute_mapping(gt_data_3d, pred_data_3d)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def iou_matched_2d(gt_data_2d, pred_data_2d):
     return IOUMatcher(iou_threshold=0.1).compute_mapping(gt_data_2d, pred_data_2d)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def iou_matched_3d(gt_data_3d, pred_data_3d):
     return IOUMatcher(iou_threshold=0.1).compute_mapping(gt_data_3d, pred_data_3d)
 
@@ -161,7 +161,7 @@ def test_ctc_metrics(benchmark, ctc_matched, request):
     ctc_matched = request.getfixturevalue(ctc_matched)
 
     def run_compute():
-        return CTCMetrics().compute(copy.deepcopy(ctc_matched))
+        return CTCMetrics().compute(ctc_matched)
 
     benchmark.pedantic(run_compute, rounds=1, iterations=1)
 
@@ -196,6 +196,6 @@ def test_iou_div_metrics(benchmark, iou_matched, request):
     iou_matched = request.getfixturevalue(iou_matched)
 
     def run_compute():
-        return DivisionMetrics().compute(copy.deepcopy(iou_matched))
+        return DivisionMetrics().compute(iou_matched)
 
     benchmark.pedantic(run_compute, rounds=1, iterations=1)

From d330c07d9a2f0eef928b3db13c964b58b286e37f Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Thu, 7 Nov 2024 11:42:20 -0500
Subject: [PATCH 22/26] Bump timeout to 300s to allow for additional time
 loading and prepping data

---
 tests/bench.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/bench.py b/tests/bench.py
index 848c1197..c07c9851 100644
--- a/tests/bench.py
+++ b/tests/bench.py
@@ -14,7 +14,7 @@
 from traccuracy.metrics import CTCMetrics, DivisionMetrics
 
 ROOT_DIR = Path(__file__).resolve().parents[1]
-TIMEOUT = 30
+TIMEOUT = 300
 
 
 @pytest.fixture(scope="function")

From c3e92f6abd0592b0573a2e73699ef0ca51680313 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Thu, 7 Nov 2024 13:29:01 -0500
Subject: [PATCH 23/26] Remove print statements from benchmarking ci output

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 23c5b82f..530d63ce 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -116,7 +116,7 @@ jobs:
       - name: Run benchmark on PR head commit
         run: |
           git checkout ${{ github.event.pull_request.head.sha }}
-          pytest tests/bench.py -v -s --benchmark-json pr.json
+          pytest tests/bench.py -v --benchmark-json pr.json
 
       - name: Generate report
         run: python .github/workflows/benchmark-pr.py baseline.json pr.json report.md

From 1f4a47b3474a49babb080220b52da9fdd1cc9763 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Thu, 7 Nov 2024 13:29:43 -0500
Subject: [PATCH 24/26] Fix bug introduced by eliminating data copying in
 matcher

---
 tests/metrics/test_divisions.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tests/metrics/test_divisions.py b/tests/metrics/test_divisions.py
index f9c529d9..db6edd98 100644
--- a/tests/metrics/test_divisions.py
+++ b/tests/metrics/test_divisions.py
@@ -7,19 +7,27 @@
 from traccuracy.matchers import CTCMatcher, IOUMatcher, Matched
 from traccuracy.metrics._divisions import DivisionMetrics
 
-from tests.test_utils import get_division_graphs, gt_data
+from tests.test_utils import download_gt_data, get_division_graphs
 
 ROOT_DIR = Path(__file__).resolve().parents[2]
 
 
 @pytest.fixture(scope="module")
-def gt_hela():
+def download_gt_hela():
     url = "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip"
+    download_gt_data(url, ROOT_DIR)
+
+
+@pytest.fixture(scope="function")
+def gt_hela():
     path = "downloads/Fluo-N2DL-HeLa/01_GT/TRA"
-    return gt_data(url, ROOT_DIR, path)
+    return load_ctc_data(
+        os.path.join(ROOT_DIR, path),
+        os.path.join(ROOT_DIR, path, "man_track.txt"),
+    )
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def pred_hela():
     path = "examples/sample-data/Fluo-N2DL-HeLa/01_RES"
     return load_ctc_data(

From c1ba1d587565458908f5df56bd8f292cc6e10848 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Thu, 7 Nov 2024 13:32:02 -0500
Subject: [PATCH 25/26] Revert "Stop copying graphs when initializing matcher"

This reverts commit d8c595a059d039bf301f87c14f9d18d6748cdd23.
---
 src/traccuracy/matchers/_base.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/traccuracy/matchers/_base.py b/src/traccuracy/matchers/_base.py
index 3017455f..d3056356 100644
--- a/src/traccuracy/matchers/_base.py
+++ b/src/traccuracy/matchers/_base.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import copy
 import logging
 from abc import ABC, abstractmethod
 from typing import Any
@@ -40,7 +41,10 @@ def compute_mapping(
                 "Input data must be a TrackingData object with a graph and segmentations"
             )
 
-        matched = self._compute_mapping(gt_graph, pred_graph)
+        # Copy graphs to avoid possible changes to graphs while computing mapping
+        matched = self._compute_mapping(
+            copy.deepcopy(gt_graph), copy.deepcopy(pred_graph)
+        )
 
         # Record matcher info on Matched object
         matched.matcher_info = self.info

From 9aa9a7a4bfeceb7465a61c3edf50e9557bb75296 Mon Sep 17 00:00:00 2001
From: msschwartz21 <msschwartz21@gmail.com>
Date: Mon, 11 Nov 2024 11:32:10 -0500
Subject: [PATCH 26/26] Revert "Revert "Stop copying graphs when initializing
 matcher""

Tried to move this change into a different PR, but that caused the action to start failing

This reverts commit c1ba1d587565458908f5df56bd8f292cc6e10848.
---
 src/traccuracy/matchers/_base.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/traccuracy/matchers/_base.py b/src/traccuracy/matchers/_base.py
index d3056356..3017455f 100644
--- a/src/traccuracy/matchers/_base.py
+++ b/src/traccuracy/matchers/_base.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import copy
 import logging
 from abc import ABC, abstractmethod
 from typing import Any
@@ -41,10 +40,7 @@ def compute_mapping(
                 "Input data must be a TrackingData object with a graph and segmentations"
             )
 
-        # Copy graphs to avoid possible changes to graphs while computing mapping
-        matched = self._compute_mapping(
-            copy.deepcopy(gt_graph), copy.deepcopy(pred_graph)
-        )
+        matched = self._compute_mapping(gt_graph, pred_graph)
 
         # Record matcher info on Matched object
         matched.matcher_info = self.info