Skip to content

Commit

Permalink
dev - update timeseries + tests
Browse files Browse the repository at this point in the history
expand tests for masked_timeseries and refactored masked_timeseries
  • Loading branch information
demidenm committed May 29, 2024
1 parent b5de518 commit 8a35438
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 9 deletions.
55 changes: 47 additions & 8 deletions pyrelimri/masked_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ def extract_time_series_values(behave_df: pd.DataFrame, time_series_array: np.nd


def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = None, roi_mask: str = None,
roi_coords: tuple = None, radius_mm: int = None, bold_tr: float = None, detrend=True,
fwhm_smooth: float = None):
roi_coords: tuple = None, radius_mm: int = None, detrend=True, fwhm_smooth: float = None):
"""
For each BOLD path, extract timeseries for either a specified mask or ROI coordinate. Mask and coordinate should be
in same space/affine as BOLD data. Function leverages NiftiLabelsMasker (mask path) NiftiSpheresMasker (coordinates)
Expand All @@ -99,7 +98,6 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
roi_mask (str or None): Path to the ROI mask image (required if roi_type is 'mask').
roi_coords (tuple or None): Coordinates (x,y,z) for the sphere center (required if roi_type is 'coords').
radius_mm (int or None): Radius of the sphere in mm (required if roi_type is 'coords').
bold_tr (float or None): TR value for acquisition of BOLD data.
detrend: True/False, whether to use Nilearn's detrend function.
fwhm_smooth (float or None): FWHM for spatial smoothing of data.
Expand All @@ -111,14 +109,33 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
if roi_type not in roi_options:
raise ValueError("Invalid ROI type. Choose 'mask' or 'coords'.")

bold_id_list = []
for bold_path in bold_paths:
bold_name = os.path.basename(bold_path)
path_parts = bold_name.split('_')

assert any('sub-' in part for part in path_parts), "The path must contain 'sub-' separated by '-'"
assert any('run-' in part for part in path_parts), "The path must contain 'run-' separated by '-'"

sub_id, run_id = None, None
for val in path_parts:
if 'sub-' in val:
sub_id = val.split('-')[1]
elif 'run-' in val:
run_id = val.split('-')[1]
sub_info = 'sub-' + sub_id + '_' + 'run-' + run_id
bold_id_list.append(sub_info)

if roi_type == 'mask':
roi_series_list = []

# Iterate over each path in bold_paths
for bold_path in bold_paths:
img = [load_img(i) for i in [bold_path, roi_mask]]
assert img[0].shape[0:3] == img[1].shape, 'images of different shape, BOLD {} and ROI {}'.format(
assert img[0].shape[0:3] == img[1].shape, 'Position [0:3] of images differ in shape, BOLD {} and ROI {}'.format(
img[0].shape, img[1].shape)


# Mask data by ROI and smooth and then clean data
masked_data = apply_mask(bold_path, roi_mask, smoothing_fwhm=fwhm_smooth)
clean_timeseries = clean(masked_data, standardize='psc', detrend=detrend,
Expand All @@ -128,7 +145,7 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
time_series_avg = np.mean(clean_timeseries, axis=1)[:, None]
roi_series_list.append(time_series_avg)

return roi_series_list
return roi_series_list, bold_id_list

elif roi_type == 'coords':
coord_series_list = []
Expand All @@ -144,6 +161,7 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
img = [load_img(i) for i in [bold_path, coord_mask]]
assert img[0].shape[0:3] == img[1].shape, 'images of different shape, BOLD {} and ROI {}'.format(
img[0].shape[0:3], img[1].shape)

# Mask data by ROI and smooth and then clean data
masked_data = apply_mask(bold_path, coord_mask, smoothing_fwhm=fwhm_smooth)
clean_timeseries = clean(masked_data, standardize='psc', detrend=detrend,
Expand All @@ -152,15 +170,15 @@ def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = No
# get avg at volumes across voxels, return a (volumnes,1) array
time_series_avg = np.mean(clean_timeseries, axis=1)[:, None]
coord_series_list.append(time_series_avg)
return coord_series_list, coord_mask
return coord_series_list, coord_mask, bold_id_list

else:
print(f'roi_type: {roi_type}, is not in [{roi_options}]')


def extract_postcue_trs_for_conditions(events_data: list, onset: str, trial_name: str,
bold_tr: float, bold_vols: int, time_series: np.ndarray,
conditions: list, tr_delay: int):
conditions: list, tr_delay: int, list_trpaths: list):
"""
Extract TR coinciding with condition onset, plus TRs for specified delay for each file. Save this to a pandas
dataframe (long) with associated Mean Signal value, for each subject, trial of condition and cue across the range
Expand All @@ -175,13 +193,34 @@ def extract_postcue_trs_for_conditions(events_data: list, onset: str, trial_name
bold_vols (int): Number of volumes for BOLD.
time_series (numpy.ndarray): numpy array of time series data.
conditions (list): List of cue conditions to iterate over, min 1.
tr_delay (int): Number of TRs to serve as delay (post onset)
list_trpaths (list): List or sub-##_run-## exported with timeseries extraction to match beh order
Returns:
pd.DataFrame: DataFrame containing mean signal intensity values, subject labels,
trial labels, TR values, and cue labels for all specified conditions.
"""
dfs = []

# check array names first
beh_id_list = []
for beh_path in events_data:
# create sub ID array to text again bold array
beh_name = os.path.basename(beh_path)
path_parts = beh_name.split('_')
sub_id, run_id = None, None
for val in path_parts:
if 'sub-' in val:
sub_id = val.split('-')[1]
elif 'run-' in val:
run_id = val.split('-')[1]
sub_info = 'sub-' + sub_id + '_' + 'run-' + run_id
beh_id_list.append(sub_info)

assert len(beh_id_list) == len(list_trpaths), f"Length of behavioral files {len(beh_id_list)} " \
f"does not match TR list {len(list_trpaths)}"
assert (np.array(beh_id_list) == np.array(list_trpaths)).all(), "Provided list_trpaths does not match" \
f"Beh path order {beh_id_list}"

for cue in conditions:
cue_dfs = [] # creating separate cue dfs to accomodate different number of trials for cue types
sub_n = 0
Expand Down
125 changes: 124 additions & 1 deletion tests/test_similarity_ref.py → tests/test_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from pyrelimri.tetrachoric_correlation import tetrachoric_corr
from pyrelimri.brain_icc import (voxelwise_icc, setup_atlas, roi_icc)
from pyrelimri.icc import sumsq_icc
from pyrelimri.masked_timeseries import (trlocked_events, extract_time_series_values,
extract_time_series, extract_postcue_trs_for_conditions)
from collections import namedtuple
from nilearn.datasets import fetch_neurovault_ids
from nilearn.masking import compute_multi_brain_mask
Expand Down Expand Up @@ -241,4 +243,125 @@ def test_roiicc_msc(tmp_path_factory):
result = roi_icc(multisession_list=[ses1, ses2], type_atlas='shaefer_2018',
atlas_dir = tmpdir, icc_type='icc_3')

assert np.allclose(result['est'][200], .70, atol=.01)
assert np.allclose(result['est'][200], .70, atol=.01)


# tests for masked timeseries
def test_miss_sub_boldpath():
bold_paths = ['/tmp/NDA_run-01_bold.nii.gz']
roi_type = 'mask'
roi_mask = '/tmp/roi_mask.nii.gz'

with pytest.raises(AssertionError):
extract_time_series(bold_paths, roi_type, roi_mask=roi_mask)

def test_miss_sub_boldpath():
bold_paths = ['/tmp/sub-NDA_01_bold.nii.gz']
roi_type = 'mask'
roi_mask = '/tmp/roi_mask.nii.gz'

with pytest.raises(AssertionError):
extract_time_series(bold_paths, roi_type, roi_mask=roi_mask)


def test_mismatched_shapes_boldroi(tmp_path):
# testing that error is thrown when mask and BOLD images are not similar shape
bold_path = tmp_path / "sub-01_run-01_bold.nii.gz"
roi_mask_path = tmp_path / "roi_mask.nii.gz"

# Create dummy BOLD and ROI NIfTI files with mismatched shapes
create_dummy_nifti((64, 64, 36, 2), np.eye(4), bold_path)
create_dummy_nifti((64, 64, 34), np.eye(4), roi_mask_path)

bold_paths = [bold_path]
roi_type = 'mask'

# Ensure that AssertionError is raised when shapes are mismatched
with pytest.raises(AssertionError):
extract_time_series(bold_paths, roi_type, roi_mask=str(roi_mask_path))

def test_wrongorder_behbold_ids():
# testing that order of sub & run paths for BOLD paths != Behavioral paths
bold_path_list = [f"sub-0{i:02d}_run-01.nii.gz" for i in range(20)] + \
[f"sub-0{i:02d}_run-02.nii.gz" for i in range(20)]
beh_path_list = [f"sub-0{i:02d}_run-01.nii.gz" for i in range(15)] + \
[f"sub-0{i:02d}_run-02.nii.gz" for i in range(20)]

with pytest.raises(AssertionError):
extract_postcue_trs_for_conditions(events_data=beh_path_list, onset='Test',
trial_name='test', bold_tr=.800, bold_vols=200,
time_series=[0,1,2,3], conditions=['test'], tr_delay=15,
list_trpaths= bold_path_list)

def test_wrongroi_type():
# Define invalid ROI type
wrong_roi_lab = 'fookwrng'

# Define other function arguments
bold_paths = ["sub-01_run-01_bold.nii.gz"]
high_pass_sec = 100
roi_mask = "roi_mask.nii.gz"

# Test if ValueError is raised for invalid ROI type
with pytest.raises(ValueError):
extract_time_series(bold_paths, wrong_roi_lab, high_pass_sec=high_pass_sec, roi_mask=roi_mask)

def test_missing_file():
# test when events file is not found
events_path = "missing_file_name.csv"
onsets_column = "onsets"
trial_name = "trial"
bold_tr = 2.0
bold_vols = 10

# Test if FileNotFoundError is raised when the file does not exist
with pytest.raises(FileNotFoundError):
trlocked_events(events_path, onsets_column, trial_name, bold_tr, bold_vols)


def test_missing_eventscol(tmp_path):
# testing missing column "trial" in events file
events_path = tmp_path / "test_events.csv"
with open(events_path, "w") as f:
f.write("onsets\n0.0\n1.0\n2.0\n")

# Define function arguments
onsets_column = "onsets"
trial_name = "trial"
bold_tr = 2.0
bold_vols = 10

# Test if KeyError is raised when columns are missing
with pytest.raises(KeyError):
trlocked_events(events_path, onsets_column, trial_name, bold_tr, bold_vols)


def test_lenbold_mismatchtrlen(tmp_path):
# The length of the resulting TR locked values (length) should be similar N to BOLD.
# assume to always be true but confirm
events_path = tmp_path / "test_events.csv"
onset_name = 'onsets'
trial_name = 'trial'
bold_tr = 2.0
bold_vols = 5 # Mismatched bold_vols compared to the expected length of merged_df

with open(events_path, "w") as f:
f.write(f"{onset_name},{trial_name}\n0.0,Test1\n1.0,Test2\n2.0,Test1\n")

with pytest.raises(ValueError):
trlocked_events(events_path=events_path, onsets_column=onset_name, trial_name=trial_name,
bold_tr=bold_tr, bold_vols=bold_vols, separator=',')

def test_runtrlocked_events(tmp_path):
# The length of the resulting TR locked values (length) should be similar N to BOLD.
# assume to always be true but confirm
events_path = tmp_path / "test_events.csv"
onset_name = 'onsets'
trial_name = 'trial'
bold_tr = 2.0
bold_vols = 3
with open(events_path, "w") as f:
f.write(f"{onset_name},{trial_name}\n0.0,Test1\n2.0,Test2\n4.0,Test1\n")

trlocked_events(events_path=events_path, onsets_column=onset_name, trial_name=trial_name,
bold_tr=bold_tr, bold_vols=bold_vols, separator=',')

0 comments on commit 8a35438

Please sign in to comment.