dev - update timeseries + tests

expand tests for masked_timeseries and refactored masked_timeseries
demidenm · May 29, 2024 · 8a35438 · 8a35438
1 parent b5de518
commit 8a35438
Show file tree

Hide file tree

Showing 2 changed files with 171 additions and 9 deletions.
diff --git a/pyrelimri/masked_timeseries.py b/pyrelimri/masked_timeseries.py
@@ -85,8 +85,7 @@ def extract_time_series_values(behave_df: pd.DataFrame, time_series_array: np.nd
 
 
 def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = None, roi_mask: str = None,
-                        roi_coords: tuple = None, radius_mm: int = None, bold_tr: float = None, detrend=True,
-                        fwhm_smooth: float = None):
+                        roi_coords: tuple = None, radius_mm: int = None, detrend=True, fwhm_smooth: float = None):
     """
     For each BOLD path, extract timeseries for either a specified mask or ROI coordinate. Mask and coordinate should be
     in same space/affine as BOLD data. Function leverages NiftiLabelsMasker (mask path) NiftiSpheresMasker (coordinates)
@@ -99,7 +98,6 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
         roi_mask (str or None): Path to the ROI mask image (required if roi_type is 'mask').
         roi_coords (tuple or None): Coordinates (x,y,z) for the sphere center (required if roi_type is 'coords').
         radius_mm (int or None): Radius of the sphere in mm (required if roi_type is 'coords').
-        bold_tr (float or None): TR value for acquisition of BOLD data.
         detrend: True/False, whether to use Nilearn's detrend function.
         fwhm_smooth (float or None): FWHM for spatial smoothing of data.
 
@@ -111,14 +109,33 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
     if roi_type not in roi_options:
         raise ValueError("Invalid ROI type. Choose 'mask' or 'coords'.")
 
+    bold_id_list = []
+    for bold_path in bold_paths:
+        bold_name = os.path.basename(bold_path)
+        path_parts = bold_name.split('_')
+
+        assert any('sub-' in part for part in path_parts), "The path must contain 'sub-' separated by '-'"
+        assert any('run-' in part for part in path_parts), "The path must contain 'run-' separated by '-'"
+
+        sub_id, run_id = None, None
+        for val in path_parts:
+            if 'sub-' in val:
+                sub_id = val.split('-')[1]
+            elif 'run-' in val:
+                run_id = val.split('-')[1]
+        sub_info = 'sub-' + sub_id + '_' + 'run-' + run_id
+        bold_id_list.append(sub_info)
+
     if roi_type == 'mask':
         roi_series_list = []
 
         # Iterate over each path in bold_paths
         for bold_path in bold_paths:
             img = [load_img(i) for i in [bold_path, roi_mask]]
-            assert img[0].shape[0:3] == img[1].shape, 'images of different shape, BOLD {} and ROI {}'.format(
+            assert img[0].shape[0:3] == img[1].shape, 'Position [0:3] of images differ in shape, BOLD {} and ROI {}'.format(
                 img[0].shape, img[1].shape)
+
+
             # Mask data by ROI and smooth and then clean data
             masked_data = apply_mask(bold_path, roi_mask, smoothing_fwhm=fwhm_smooth)
             clean_timeseries = clean(masked_data, standardize='psc', detrend=detrend,
@@ -128,7 +145,7 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
             time_series_avg = np.mean(clean_timeseries, axis=1)[:, None]
             roi_series_list.append(time_series_avg)
 
-        return roi_series_list
+        return roi_series_list, bold_id_list
 
     elif roi_type == 'coords':
         coord_series_list = []
@@ -144,6 +161,7 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
             img = [load_img(i) for i in [bold_path, coord_mask]]
             assert img[0].shape[0:3] == img[1].shape, 'images of different shape, BOLD {} and ROI {}'.format(
                 img[0].shape[0:3], img[1].shape)
+
             # Mask data by ROI and smooth and then clean data
             masked_data = apply_mask(bold_path, coord_mask, smoothing_fwhm=fwhm_smooth)
             clean_timeseries = clean(masked_data, standardize='psc', detrend=detrend,
@@ -152,15 +170,15 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
             # get avg at volumes across voxels, return a (volumnes,1) array
             time_series_avg = np.mean(clean_timeseries, axis=1)[:, None]
             coord_series_list.append(time_series_avg)
-        return coord_series_list, coord_mask
+        return coord_series_list, coord_mask, bold_id_list
 
     else:
         print(f'roi_type: {roi_type}, is not in [{roi_options}]')
 
 
 def extract_postcue_trs_for_conditions(events_data: list, onset: str, trial_name: str,
                                        bold_tr: float, bold_vols: int, time_series: np.ndarray,
-                                       conditions: list, tr_delay: int):
+                                       conditions: list, tr_delay: int, list_trpaths: list):
     """
     Extract TR coinciding with condition onset, plus TRs for specified delay for each file. Save this to a pandas
     dataframe (long) with associated Mean Signal value, for each subject, trial of condition and cue across the range
@@ -175,13 +193,34 @@ def extract_postcue_trs_for_conditions(events_data: list, onset: str, trial_name
         bold_vols (int): Number of volumes for BOLD.
         time_series (numpy.ndarray): numpy array of time series data.
         conditions (list): List of cue conditions to iterate over, min 1.
-        tr_delay (int): Number of TRs to serve as delay (post onset)
+        list_trpaths (list): List or sub-##_run-## exported with timeseries extraction to match beh order
 
     Returns:
         pd.DataFrame: DataFrame containing mean signal intensity values, subject labels,
             trial labels, TR values, and cue labels for all specified conditions.
     """
     dfs = []
+
+    # check array names first
+    beh_id_list = []
+    for beh_path in events_data:
+        # create sub ID array to text again bold array
+        beh_name = os.path.basename(beh_path)
+        path_parts = beh_name.split('_')
+        sub_id, run_id = None, None
+        for val in path_parts:
+            if 'sub-' in val:
+                sub_id = val.split('-')[1]
+            elif 'run-' in val:
+                run_id = val.split('-')[1]
+        sub_info = 'sub-' + sub_id + '_' + 'run-' + run_id
+        beh_id_list.append(sub_info)
+
+    assert len(beh_id_list) == len(list_trpaths), f"Length of behavioral files {len(beh_id_list)} " \
+                                              f"does not match TR list {len(list_trpaths)}"
+    assert (np.array(beh_id_list) == np.array(list_trpaths)).all(), "Provided list_trpaths does not match" \
+                                                                f"Beh path order {beh_id_list}"
+
     for cue in conditions:
         cue_dfs = [] # creating separate cue dfs to accomodate different number of trials for cue types
         sub_n = 0

diff --git a/tests/test_similarity_ref.py → tests/test_modules.py b/tests/test_similarity_ref.py → tests/test_modules.py
@@ -9,6 +9,8 @@
 from pyrelimri.tetrachoric_correlation import tetrachoric_corr
 from pyrelimri.brain_icc import (voxelwise_icc, setup_atlas, roi_icc)
 from pyrelimri.icc import sumsq_icc
+from pyrelimri.masked_timeseries import (trlocked_events, extract_time_series_values,
+                                         extract_time_series, extract_postcue_trs_for_conditions)
 from collections import namedtuple
 from nilearn.datasets import fetch_neurovault_ids
 from nilearn.masking import compute_multi_brain_mask
@@ -241,4 +243,125 @@ def test_roiicc_msc(tmp_path_factory):
     result = roi_icc(multisession_list=[ses1, ses2], type_atlas='shaefer_2018',
                      atlas_dir = tmpdir, icc_type='icc_3')
 
-    assert np.allclose(result['est'][200], .70, atol=.01)
+    assert np.allclose(result['est'][200], .70, atol=.01)
+
+
+# tests for masked timeseries
+def test_miss_sub_boldpath():
+    bold_paths = ['/tmp/NDA_run-01_bold.nii.gz']
+    roi_type = 'mask'
+    roi_mask = '/tmp/roi_mask.nii.gz'
+
+    with pytest.raises(AssertionError):
+        extract_time_series(bold_paths, roi_type, roi_mask=roi_mask)
+
+def test_miss_sub_boldpath():
+    bold_paths = ['/tmp/sub-NDA_01_bold.nii.gz']
+    roi_type = 'mask'
+    roi_mask = '/tmp/roi_mask.nii.gz'
+
+    with pytest.raises(AssertionError):
+        extract_time_series(bold_paths, roi_type, roi_mask=roi_mask)
+
+
+def test_mismatched_shapes_boldroi(tmp_path):
+    # testing that error is thrown when mask and BOLD images are not similar shape
+    bold_path = tmp_path / "sub-01_run-01_bold.nii.gz"
+    roi_mask_path = tmp_path / "roi_mask.nii.gz"
+
+    # Create dummy BOLD and ROI NIfTI files with mismatched shapes
+    create_dummy_nifti((64, 64, 36, 2), np.eye(4), bold_path)
+    create_dummy_nifti((64, 64, 34), np.eye(4), roi_mask_path)
+
+    bold_paths = [bold_path]
+    roi_type = 'mask'
+
+    # Ensure that AssertionError is raised when shapes are mismatched
+    with pytest.raises(AssertionError):
+        extract_time_series(bold_paths, roi_type, roi_mask=str(roi_mask_path))
+
+def test_wrongorder_behbold_ids():
+    # testing that order of sub & run paths for BOLD paths != Behavioral paths
+    bold_path_list = [f"sub-0{i:02d}_run-01.nii.gz" for i in range(20)] + \
+                     [f"sub-0{i:02d}_run-02.nii.gz" for i in range(20)]
+    beh_path_list = [f"sub-0{i:02d}_run-01.nii.gz" for i in range(15)] + \
+                     [f"sub-0{i:02d}_run-02.nii.gz" for i in range(20)]
+
+    with pytest.raises(AssertionError):
+        extract_postcue_trs_for_conditions(events_data=beh_path_list, onset='Test',
+                                           trial_name='test', bold_tr=.800, bold_vols=200,
+                                           time_series=[0,1,2,3], conditions=['test'], tr_delay=15,
+                                           list_trpaths= bold_path_list)
+
+def test_wrongroi_type():
+    # Define invalid ROI type
+    wrong_roi_lab = 'fookwrng'
+
+    # Define other function arguments
+    bold_paths = ["sub-01_run-01_bold.nii.gz"]
+    high_pass_sec = 100
+    roi_mask = "roi_mask.nii.gz"
+
+    # Test if ValueError is raised for invalid ROI type
+    with pytest.raises(ValueError):
+        extract_time_series(bold_paths, wrong_roi_lab, high_pass_sec=high_pass_sec, roi_mask=roi_mask)
+
+def test_missing_file():
+    # test when events file is not found
+    events_path = "missing_file_name.csv"
+    onsets_column = "onsets"
+    trial_name = "trial"
+    bold_tr = 2.0
+    bold_vols = 10
+
+    # Test if FileNotFoundError is raised when the file does not exist
+    with pytest.raises(FileNotFoundError):
+        trlocked_events(events_path, onsets_column, trial_name, bold_tr, bold_vols)
+
+
+def test_missing_eventscol(tmp_path):
+    # testing missing column "trial" in events file
+    events_path = tmp_path / "test_events.csv"
+    with open(events_path, "w") as f:
+        f.write("onsets\n0.0\n1.0\n2.0\n")
+
+    # Define function arguments
+    onsets_column = "onsets"
+    trial_name = "trial"
+    bold_tr = 2.0
+    bold_vols = 10
+
+    # Test if KeyError is raised when columns are missing
+    with pytest.raises(KeyError):
+        trlocked_events(events_path, onsets_column, trial_name, bold_tr, bold_vols)
+
+
+def test_lenbold_mismatchtrlen(tmp_path):
+    # The length of the resulting TR locked values (length) should be similar N to BOLD.
+    # assume to always be true but confirm
+    events_path = tmp_path / "test_events.csv"
+    onset_name = 'onsets'
+    trial_name = 'trial'
+    bold_tr = 2.0
+    bold_vols = 5  # Mismatched bold_vols compared to the expected length of merged_df
+
+    with open(events_path, "w") as f:
+        f.write(f"{onset_name},{trial_name}\n0.0,Test1\n1.0,Test2\n2.0,Test1\n")
+
+    with pytest.raises(ValueError):
+        trlocked_events(events_path=events_path, onsets_column=onset_name, trial_name=trial_name,
+                        bold_tr=bold_tr, bold_vols=bold_vols, separator=',')
+
+def test_runtrlocked_events(tmp_path):
+    # The length of the resulting TR locked values (length) should be similar N to BOLD.
+    # assume to always be true but confirm
+    events_path = tmp_path / "test_events.csv"
+    onset_name = 'onsets'
+    trial_name = 'trial'
+    bold_tr = 2.0
+    bold_vols = 3
+    with open(events_path, "w") as f:
+        f.write(f"{onset_name},{trial_name}\n0.0,Test1\n2.0,Test2\n4.0,Test1\n")
+
+    trlocked_events(events_path=events_path, onsets_column=onset_name, trial_name=trial_name,
+                    bold_tr=bold_tr, bold_vols=bold_vols, separator=',')