From 2c5b94cbb90a7bfc923febffe01ecfb26277715c Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Mon, 1 Jul 2024 15:29:01 +1000 Subject: [PATCH 01/20] Rerun pipeline with 1hz high pass filter and add broadband decoding --- bidsify.py | 5 +++-- config.py | 5 +++-- csp.py | 14 ++++++++++---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/bidsify.py b/bidsify.py index b155ff5..957056b 100644 --- a/bidsify.py +++ b/bidsify.py @@ -114,11 +114,12 @@ analysis_root = share_root / "analysis" data_root = share_root / "data" del share_root -bids_root = analysis_root / f"{name}-bids" if platform.system() == 'Windows': analysis_root = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis") data_root = Path("D:/Work/analysis_ME206/data") - bids_root = Path("E:/M3/Natural_Conversations_study/analysis") / f"{name}-bids" +analysis_root = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis") +data_root = Path("/mnt/d/Work/analysis_ME206/data") +bids_root = analysis_root / f"{name}-bids" bids_root.mkdir(exist_ok=True) mne_bids.make_dataset_description( diff --git a/config.py b/config.py index c2a62da..8fb18ac 100644 --- a/config.py +++ b/config.py @@ -12,7 +12,7 @@ study_name = "natural-conversations" if platform.system() == 'Windows': - analysis_path = Path("E:/M3/Natural_Conversations_study/analysis") + analysis_path = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis") else: analysis_path = ( Path(__file__).parent / ".." / "Natural_Conversations_study" / "analysis" @@ -29,7 +29,7 @@ data_type = "meg" eeg_reference = "average" -l_freq = 0.5 +l_freq = 1.0 h_freq = 45.0 h_trans_bandwidth = 5 epochs_decim = 5 @@ -71,6 +71,7 @@ 'alpha': [8, 13], 'beta': [14, 30], 'gamma': [31, 45], + #'broadband': [4, 45], # makes the plot weird in html report due to overlapping freqs } # TFRs diff --git a/csp.py b/csp.py index 5f610bd..b32204e 100644 --- a/csp.py +++ b/csp.py @@ -51,6 +51,7 @@ ) csp_freqs = config.decoding_csp_freqs +csp_freqs['broadband'] = [4, 45] # add broadband decoding n_components = 4 random_state = 42 n_splits = 5 @@ -60,13 +61,13 @@ whiten = True # default is True rerun = False # force re-run / overwrite of existing files src_type = 'surf' # surf or vol -mode = 'glass_brain' # stat_map or glass_brain, vol source space plotting mode +mode = 'stat_map' # stat_map or glass_brain, vol source space plotting mode randomize = False # False or nonzero int, randomize the trial labels -decode = "participant" # "participant" or "interviewer" turns, or "bada" +decode = "interviewer" # "participant" or "interviewer" turns, or "bada" assert src_type in ("vol", "surf"), src_type assert decode in ("participant", "interviewer", "bada"), decode -mode_extra = f"_{mode[0:5]}" if src_type == "vol" else "" +mode_extra = f"_{mode[0:5]}" if src_type == "vol" and mode == "glass_brain" else "" plot_classification = True plot_indiv = False @@ -89,12 +90,13 @@ analysis_path = deriv_path = Path(__file__).parents[1] / "Natural_Conversations_study" / "analysis" if platform.system() == 'Windows': data_path = Path("D:/Work/analysis_ME206/data") - analysis_path = Path("E:/M3/Natural_Conversations_study/analysis") + analysis_path = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis") deriv_path = analysis_path / "natural-conversations-bids" / "derivatives" fig_path = analysis_path / "figures" / "CSP-decoding" if src_type == 'vol': fig_path = analysis_path / "figures" / "CSP-decoding-vol" + results_path = analysis_path / "results" / "CSP-decoding-vol" cop_path = analysis_path / "figures" subjects_dir = deriv_path / "freesurfer" / "subjects" if src_type == 'vol': @@ -390,6 +392,10 @@ def clean_brain(brain_img): this_data.reshape(-1, n_vertices).T, vertices=fs_vertices, tmin=0, tstep=1., subject="fsaverage", ) + # save the GA stc - this can be exported as Nifti, then loaded into other software (e.g. xjview) to extract anatomical labels + if subj_key == "": + results_path.mkdir(exist_ok=True) + stc.save(f'{results_path}/decoding{extra}_csp_GA') src = mne.read_source_spaces(src_fname) # for vol src, the plot function returns a matplotlib Figure - # we can't update the clim & time point for this once plotted, so do the actual plotting later From 4c509ca60ffeef5ee91d04cd9177417ef409e12d Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Mon, 1 Jul 2024 15:29:24 +1000 Subject: [PATCH 02/20] Convert coordinates from MNI305 to MNI152 space --- MNI305to152.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 MNI305to152.py diff --git a/MNI305to152.py b/MNI305to152.py new file mode 100644 index 0000000..77136a6 --- /dev/null +++ b/MNI305to152.py @@ -0,0 +1,21 @@ +# Convert coordinates from MNI305 space to MNI152 space + +import numpy as np + +# Transformation matrix +# Ref: https://surfer.nmr.mgh.harvard.edu/fswiki/CoordinateSystems +M = np.array([ + [0.9975, -0.0073, 0.0176, -0.0429], + [0.0146, 1.0009, -0.0024, 1.5496], + [-0.0130, -0.0093, 0.9971, 1.1840] + ]) + +# Specify your RAS coordinates in MNI305 space +v = np.array([29, -40, -15, 1]) +v = v.transpose() + +# Tranform to MNI152 space +print(np.matmul(M, v)) + +# you can then input these coordinates into image viewing software (e.g. MRICron, xjview) +# to read out the corresponding anatomical label From 3b7638c508ff66f30fe16d806d0387de1c99a02f Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Mon, 1 Jul 2024 15:29:52 +1000 Subject: [PATCH 03/20] Run source analysis and extract ROI time series --- traditional_source_analysis_and_ROIs.py | 400 ++++++++++++++++++++++++ 1 file changed, 400 insertions(+) create mode 100644 traditional_source_analysis_and_ROIs.py diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py new file mode 100644 index 0000000..7ce0e6c --- /dev/null +++ b/traditional_source_analysis_and_ROIs.py @@ -0,0 +1,400 @@ +# Run traditional source analysis (surface or volume-based) and save the STCs +# Compute and plot GAs +# Extract time courses from predefined ROIs + +import os +import os.path as op +import numpy as np +import matplotlib.pyplot as plt +import glob +import platform +from pathlib import Path + +import mne +from mne.minimum_norm import read_inverse_operator, apply_inverse +from mne.beamformer import make_lcmv, apply_lcmv + +import config # our config.py file + + +# specify source method etc +source_method = "dSPM" #"MNE" +src_type = 'vol' # 'vol' or 'surface' +spacing = "oct6" # for 'surface' source space only +if src_type != 'surface': + spacing = '' + +# specify how many SSP projectors to use for speech artifact removal +n_proj = 4 # 1 means: 1 for MEG & 1 for EEG + +this_run = f'{n_proj}-proj' +#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks + +# which conditions to compare in ROI analysis +conds_ROI = ['interviewer_conversation','interviewer_repetition'] #['participant_conversation','participant_repetition'] + +subjects = config.subjects +if subjects == "all": + subjects = [f"{sub:02d}" for sub in range(1, 33)] +subjects = [subject for subject in subjects if subject not in config.exclude_subjects] +del config + +use_subjects = subjects # run all of them (could use e.g. subjects[2:3] just to run 03) +del subjects + +data_path = deriv_path = Path(__file__).parents[1] / "Natural_Conversations_study" / "data" +analysis_path = deriv_path = Path(__file__).parents[1] / "Natural_Conversations_study" / "analysis" +if platform.system() == 'Windows': + data_path = Path("D:/Work/analysis_ME206/data") + analysis_path = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis") +#data_path = Path("/mnt/d/Work/analysis_ME206/data") +#analysis_path = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis") + +deriv_path = analysis_path / "natural-conversations-bids" / "derivatives" +source_results_dir = analysis_path / "results" / f"source-{source_method}-{src_type}" / this_run +figures_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run +# create the folders if needed +source_results_dir.mkdir(parents=True, exist_ok=True) +figures_dir.mkdir(parents=True, exist_ok=True) + +subjects_dir = deriv_path / "freesurfer" / "subjects" +subject = 'fsaverage' +if src_type == 'vol': + src_fname = subjects_dir / subject / "bem" / "fsaverage-vol-5-src.fif" + bem_fname = subjects_dir / subject / "bem" / "fsaverage-5120-5120-5120-bem-sol.fif" +else: + src_fname = subjects_dir / subject / "bem" / "fsaverage-oct6-src.fif" + +# adjust mne options to fix rendering issues (only needed in Linux / WSL) +mne.viz.set_3d_options(antialias = False, depth_peeling = False, + smooth_shading = False, multi_samples = 1) + + +# loop through the subjects we want to analyse +for sub in use_subjects: + path = deriv_path / 'mne-bids-pipeline' / f'sub-{sub}' / 'meg' + epochs_fname = path / f'sub-{sub}_task-conversation_proc-clean_epo.fif' + trans_fname = path / f'sub-{sub}_task-conversation_trans.fif' + fwd_fname = path / f'sub-{sub}_task-conversation_fwd.fif' + cov_fname = path / f'sub-{sub}_task-rest_proc-clean_cov.fif' + inv_fname = path / f'sub-{sub}_task-conversation_inv.fif' + proj_fname = path / f'sub-{sub}_task-conversation_proc-proj_proj.fif' + + stc_filename_prefix = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc') + + + # Read data + epochs = mne.read_epochs(epochs_fname) + + # Apply SSP projectors for speech artifact removal + if n_proj: + all_proj = mne.read_proj(proj_fname) + proj = list() + for ii, kind in enumerate(("MEG", "EEG")): + these_proj = all_proj[10*ii : 10*ii+n_proj] + proj.extend(these_proj) + del all_proj + epochs.add_proj(proj).apply_proj() + + epochs = epochs.pick(["meg", "eeg"], exclude="bads") + + ranks = mne.compute_rank(inst=epochs, tol=1e-3, tol_kind="relative") + rank = sum(ranks.values()) + print(f" Ranks={ranks} (total={rank})") + + if n_proj or src_type == 'vol': + # Recreate inverse taking into account additional projections + cov = mne.read_cov(cov_fname) + if src_type == 'vol': + src = mne.read_source_spaces(src_fname) + fwd = mne.make_forward_solution( + epochs.info, + trans=trans_fname, + src=src, + bem=bem_fname, + meg=True, + eeg=True, + ) + inv = mne.minimum_norm.make_inverse_operator( + epochs.info, fwd, cov, rank=ranks, + ) + else: # surface source space + fwd = mne.read_forward_solution(fwd_fname) + inv = mne.minimum_norm.make_inverse_operator( + epochs.info, fwd, cov, loose=0.2, depth=0.8, rank=ranks, + ) + else: # load the default inv solution (surface source space; no SSP projectors for speech artifact removal) + inv = mne.minimum_norm.read_inverse_operator(inv_fname) + + + # compute evoked (averaged over all conditions) + #evoked_allconds = epochs.average() + #evoked_allconds.plot_joint() # average ERF across all conds + + # compute evoked for each cond (ba, da, conversation, repetition) + evokeds = [] + for cond in epochs.event_id: + evokeds.append(epochs[cond].average()) + + # compute source timecourses + stcs = dict() + + for index, evoked in enumerate(evokeds): + cond = evoked.comment + + snr = 3. + lambda2 = 1. / snr ** 2 + stcs[cond], residual = apply_inverse(evoked, inv, lambda2, + method=source_method, pick_ori=None, + return_residual=True, verbose=True) + + # save the source estimates + stcs[cond].save(stc_filename_prefix + '_' + cond, overwrite=True) + + if this_run == "ba+da__ba-da": + # compute "ba+da" (i.e. combining the two conditions) + evoked_ba = epochs['ba'].average() + evoked_da = epochs['da'].average() + evoked_combined = mne.combine_evoked([evoked_ba, evoked_da], weights='equal') + stc_combined, residual = apply_inverse(evoked_combined, inv, lambda2, + method=source_method, pick_ori=None, + return_residual=True, verbose=True) + stc_combined.save(f'{stc_filename_prefix}_ba+da') + + # compute "ba-da" (i.e. difference between the two conditions) + evoked_subtracted = mne.combine_evoked([evoked_ba, evoked_da], weights=[1, -1]) + stc_subtracted, residual = apply_inverse(evoked_subtracted, inv, lambda2, + method=source_method, pick_ori=None, + return_residual=True, verbose=True) + stc_subtracted.save(f'{stc_filename_prefix}_ba-da') + + + # Plot the source timecourses + ''' + for index, evoked in enumerate(evokeds): + cond = evoked.comment + + hemi='both' + #vertno_max, time_max = stcs[cond].get_peak(hemi=hemi, tmin=0.1, tmax=0.27) + initial_time = 0.09 #time_max + surfer_kwargs = dict( + hemi=hemi, subjects_dir=subjects_dir, + initial_time=initial_time, + time_unit='s', title=subject + ' - ' + cond, + views=['lateral','medial'], #['caudal','ventral','lateral','medial'], + show_traces=False, + smoothing_steps=10) + brain = stcs[cond].plot(**surfer_kwargs) + #brain.add_foci(vertno_max, coords_as_verts=True, hemi=hemi, + # color='blue', scale_factor=0.6, alpha=0.5) + brain.save_image(op.join(figures_dir, "indi_subjects", subject + '-' + cond + '.png')) + #brain.save_movie(op.join(figures_dir, "indi_subjects", subject + '-' + cond + '-both.mov'), + # tmin=0, tmax=0.35, interpolation='linear', + # time_dilation=50, time_viewer=True) + + # close all figures before moving onto the next subject + mne.viz.close_all_3d_figures() + ''' + + +# Compute the grand average + +conds = ['ba','da','interviewer_conversation','interviewer_repetition','participant_conversation','participant_repetition'] +if this_run == "ba+da__ba-da": + conds = ['ba+da', 'ba-da'] + +if src_type == 'vol': + src_suffix = '-vl.stc' + #if this_run == "ba+da__ba-da": + # src_suffix = '-vol.stc' # to read a previous version of saved results +elif src_type == 'surface': + src_suffix = '-lh.stc' + +# overwrite the paths here to use a previous version of results for participant-locked epochs +#source_results_dir = "/mnt/d/Work/analysis_ME206/results/bids/source/MNE_surface_4proj" +#figures_dir = "/mnt/d/Work/analysis_ME206/results/bids/source/MNE_surface_4proj/Figures_ROI" + +GA_stcs = {} +for cond in conds: + # find all the saved stc results + stc_files = glob.glob(op.join(source_results_dir, 'sub*' + cond + src_suffix)) + # Note: for surface-based stcs, only need to supply the filename + # for one hemisphere (e.g. '-lh.stc'), and it will look for both + # https://mne.tools/stable/generated/mne.read_source_estimate.html + + # initialise the sum array to correct size using first subject's stc + stc = mne.read_source_estimate(stc_files[0]) + stcs_sum = stc.data # this contains lh & rh vertices combined together + # there are also separate fields for the 2 hemis (stc.lh_data, stc.rh_data), + # but their content cannot be set directly, so just use the combined one + + # read in the stc for each subsequent subject, add to the sum array + for fname in stc_files[1:]: + stc = mne.read_source_estimate(fname) + stcs_sum = stcs_sum + stc.data + + # divide by number of files + stcs_GA = stcs_sum / len(stc_files) + + # feed into the dummy stc structure + stc.data = stcs_GA + GA_stcs[cond] = stc + + + # Plot the GAs + initial_time = 0.09 + + # Depending on the src type, use diff types of plots + if src_type == 'vol': + src = mne.read_source_spaces(src_fname) + fig = stc.plot(src=src, colormap="viridis", + subject='fsaverage', subjects_dir=subjects_dir, verbose=True, + initial_time=initial_time) + fig.savefig(op.join(figures_dir, 'GA_' + cond + '.png')) + plt.close(fig) + # seems like we can't save movies for these volume-based stcs + + elif src_type == 'surface': + hemi='both' + #vertno_max, time_max = stc.get_peak(hemi=hemi) + #initial_time = time_max + # to get auto clim for a particular time point, crop to a short interval around that time first + # (otherwise clim will be based on the peak activity over the entire epoch) + #stc = stc.crop(tmin=initial_time, tmax=initial_time+0.1) + surfer_kwargs = dict( + hemi=hemi, subject='fsaverage', subjects_dir=subjects_dir, + #clim=dict(kind='value', lims=[8, 12, 15]), + initial_time=initial_time, time_unit='s', + views=['lateral','medial'], show_traces=False, + size=(800, 800), smoothing_steps=10) + brain = stc.plot(**surfer_kwargs) + #brain.add_foci(vertno_max, coords_as_verts=True, hemi=hemi, color='blue', + # scale_factor=0.6, alpha=0.5) + brain.save_image(op.join(figures_dir, 'GA_' + str(initial_time) + 's_' + cond + '.png')) + brain.save_movie(op.join(figures_dir, 'GA-' + cond + '.mov'), + tmin=-1, tmax=1, interpolation='linear', + time_dilation=50, time_viewer=True) + + # Note: if there are any issues with the plots/movies (e.g. showing + # horizontal bands), it's probably a rendering issue in Linux. + # Try running this script in Windows/Mac! + + plt.close('all') + + +# Extract ROI time courses from source estimates + +src = mne.read_source_spaces(src_fname) +Path(op.join(figures_dir, "all_ROIs")).mkdir(parents=True, exist_ok=True) + +if src_type == 'vol': + # choose atlas for parcellation + fname_aseg = op.join(subjects_dir, subject, 'mri', 'aparc.a2009s+aseg.mgz') # aparc = cortical; aseg = subcortical + label_names = mne.get_volume_labels_from_aseg(fname_aseg) + rois = ['ctx_lh_G_cingul-Post-dorsal','ctx_lh_G_cingul-Post-ventral','ctx_rh_G_cingul-Post-dorsal','ctx_rh_G_cingul-Post-ventral', + 'ctx_lh_G_pariet_inf-Supramar','ctx_rh_G_pariet_inf-Supramar','ctx_lh_G_precuneus','ctx_rh_G_precuneus', + 'ctx_lh_G_temp_sup-Lateral','ctx_rh_G_temp_sup-Lateral','ctx_lh_Pole_temporal','ctx_rh_Pole_temporal', + 'ctx_lh_S_temporal_sup','ctx_rh_S_temporal_sup'] # can have multiple labels in this list + #roi_idx = label_names.index(rois[0]) + + for label_name in rois: + Path(op.join(figures_dir, label_name)).mkdir(parents=True, exist_ok=True) + + # Plot GA ROI time series + fig, axes = plt.subplots(1, layout="constrained") + for cond in conds_ROI: + label_ts = mne.extract_label_time_course( + [GA_stcs[cond]], (fname_aseg, label_name), src, mode="auto" + ) + axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond) + + axes.axvline(linestyle='--') # add verticle line at time 0 + axes.set(xlabel="Time (ms)", ylabel="Activation") + axes.legend() + + fig.savefig(op.join(figures_dir, label_name, "GA.png")) + fig.savefig(op.join(figures_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder + plt.close(fig) + + # Plot individual-subjects ROI time series + for sub in use_subjects: + fig, axes = plt.subplots(1, layout="constrained") + for cond in conds_ROI: + stc_file = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc_{cond}{src_suffix}') + stc = mne.read_source_estimate(stc_file) + label_ts = mne.extract_label_time_course( + [stc], (fname_aseg, label_name), src, mode="auto" + ) + axes.plot(1e3 * stc.times, label_ts[0][0, :], label=cond) + + axes.axvline(linestyle='--') # add verticle line at time 0 + axes.set(xlabel="Time (ms)", ylabel="Activation") + axes.legend() + + fig.savefig(op.join(figures_dir, label_name, "sub-" + sub + ".png")) + plt.close(fig) + +elif src_type == 'surface': + # for surface source space, we need to create the Label object first + # by reading from .annot or .label file + # Can't use the mri file like above, as extract_label_time_course() will throw an error + # https://mne.tools/stable/generated/mne.extract_label_time_course.html + + # Get labels for FreeSurfer 'aparc' cortical parcellation (69 labels) + # https://freesurfer.net/fswiki/CorticalParcellation + #labels_parc = mne.read_labels_from_annot(subject, parc='aparc', subjects_dir=subjects_dir) # read all labels from annot file + #labels = [labels_parc[60], labels_parc[61]] # 60: left STG; 61: right STG + + # or use 'aparc.a2009s' parcellation (150 labels) + labels_parc = mne.read_labels_from_annot(subject, parc='aparc.a2009s', subjects_dir=subjects_dir) + labels = [ + labels_parc[18]+labels_parc[20], labels_parc[19]+labels_parc[21], labels_parc[50], labels_parc[51], + labels_parc[58], labels_parc[59], labels_parc[66], labels_parc[67], labels_parc[84], labels_parc[85], labels_parc[144], labels_parc[145] + ] + + # or read a single label (e.g. V1, BA44, etc) + #labels_parc = mne.read_label(op.join(subjects_dir, subject, 'label', 'lh.V1.label')) + + for label in labels: + label_name = label.name + # or set a custom name for combined labels + if label_name == 'G_cingul-Post-dorsal-lh + G_cingul-Post-ventral-lh': + label_name = 'G_cingul-Post-lh' + if label_name == 'G_cingul-Post-dorsal-rh + G_cingul-Post-ventral-rh': + label_name = 'G_cingul-Post-rh' + + Path(op.join(figures_dir, label_name)).mkdir(parents=True, exist_ok=True) + + # Plot GA ROI time series + fig, axes = plt.subplots(1, layout="constrained") + for cond in conds_ROI: + label_ts = mne.extract_label_time_course( + [GA_stcs[cond]], label, src, mode="auto" + ) + axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond) + + axes.axvline(linestyle='--') # add verticle line at time 0 + axes.set(xlabel="Time (ms)", ylabel="Activation") + axes.legend() + + fig.savefig(op.join(figures_dir, label_name, "GA.png")) + fig.savefig(op.join(figures_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder + plt.close(fig) + + # Plot individual-subjects ROI time series + for sub in use_subjects: + fig, axes = plt.subplots(1, layout="constrained") + for cond in conds_ROI: + stc_file = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc_{cond}{src_suffix}') + stc = mne.read_source_estimate(stc_file) + label_ts = mne.extract_label_time_course( + [stc], label, src, mode="auto" + ) + axes.plot(1e3 * stc.times, label_ts[0][0, :], label=cond) + + axes.axvline(linestyle='--') # add verticle line at time 0 + axes.set(xlabel="Time (ms)", ylabel="Activation") + axes.legend() + + fig.savefig(op.join(figures_dir, label_name, "sub-" + sub + ".png")) + plt.close(fig) From 9b1272a4dd35dd23ffc45f10e0f4acceffd14f31 Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Mon, 1 Jul 2024 15:30:47 +1000 Subject: [PATCH 04/20] Update paths --- bidsify.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bidsify.py b/bidsify.py index 957056b..1be7a4d 100644 --- a/bidsify.py +++ b/bidsify.py @@ -117,8 +117,8 @@ if platform.system() == 'Windows': analysis_root = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis") data_root = Path("D:/Work/analysis_ME206/data") -analysis_root = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis") -data_root = Path("/mnt/d/Work/analysis_ME206/data") +#analysis_root = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis") +#data_root = Path("/mnt/d/Work/analysis_ME206/data") bids_root = analysis_root / f"{name}-bids" bids_root.mkdir(exist_ok=True) From ddc7842c4b5c441ff2fe6b555dcdf3a2c1920afa Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Mon, 1 Jul 2024 17:37:22 +1000 Subject: [PATCH 05/20] Separate the interview-locked and participant-locked ROI figures --- traditional_source_analysis_and_ROIs.py | 30 +++++++++++++------------ 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py index 7ce0e6c..3652a23 100644 --- a/traditional_source_analysis_and_ROIs.py +++ b/traditional_source_analysis_and_ROIs.py @@ -27,11 +27,12 @@ # specify how many SSP projectors to use for speech artifact removal n_proj = 4 # 1 means: 1 for MEG & 1 for EEG -this_run = f'{n_proj}-proj' -#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks - # which conditions to compare in ROI analysis -conds_ROI = ['interviewer_conversation','interviewer_repetition'] #['participant_conversation','participant_repetition'] +comparison = "participant" # "interviewer" or "participant" +conds_ROI = [f"{comparison}_conversation", f"{comparison}_repetition"] + +this_run = f"{n_proj}-proj" +#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks subjects = config.subjects if subjects == "all": @@ -53,9 +54,10 @@ deriv_path = analysis_path / "natural-conversations-bids" / "derivatives" source_results_dir = analysis_path / "results" / f"source-{source_method}-{src_type}" / this_run figures_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run +figures_ROI_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run / f"{comparison}_ROI" # create the folders if needed source_results_dir.mkdir(parents=True, exist_ok=True) -figures_dir.mkdir(parents=True, exist_ok=True) +figures_ROI_dir.mkdir(parents=True, exist_ok=True) subjects_dir = deriv_path / "freesurfer" / "subjects" subject = 'fsaverage' @@ -285,7 +287,7 @@ # Extract ROI time courses from source estimates src = mne.read_source_spaces(src_fname) -Path(op.join(figures_dir, "all_ROIs")).mkdir(parents=True, exist_ok=True) +Path(op.join(figures_ROI_dir, "all_ROIs")).mkdir(parents=True, exist_ok=True) if src_type == 'vol': # choose atlas for parcellation @@ -298,7 +300,7 @@ #roi_idx = label_names.index(rois[0]) for label_name in rois: - Path(op.join(figures_dir, label_name)).mkdir(parents=True, exist_ok=True) + Path(op.join(figures_ROI_dir, label_name)).mkdir(parents=True, exist_ok=True) # Plot GA ROI time series fig, axes = plt.subplots(1, layout="constrained") @@ -312,8 +314,8 @@ axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() - fig.savefig(op.join(figures_dir, label_name, "GA.png")) - fig.savefig(op.join(figures_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder + fig.savefig(op.join(figures_ROI_dir, label_name, "GA.png")) + fig.savefig(op.join(figures_ROI_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder plt.close(fig) # Plot individual-subjects ROI time series @@ -331,7 +333,7 @@ axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() - fig.savefig(op.join(figures_dir, label_name, "sub-" + sub + ".png")) + fig.savefig(op.join(figures_ROI_dir, label_name, "sub-" + sub + ".png")) plt.close(fig) elif src_type == 'surface': @@ -363,7 +365,7 @@ if label_name == 'G_cingul-Post-dorsal-rh + G_cingul-Post-ventral-rh': label_name = 'G_cingul-Post-rh' - Path(op.join(figures_dir, label_name)).mkdir(parents=True, exist_ok=True) + Path(op.join(figures_ROI_dir, label_name)).mkdir(parents=True, exist_ok=True) # Plot GA ROI time series fig, axes = plt.subplots(1, layout="constrained") @@ -377,8 +379,8 @@ axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() - fig.savefig(op.join(figures_dir, label_name, "GA.png")) - fig.savefig(op.join(figures_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder + fig.savefig(op.join(figures_ROI_dir, label_name, "GA.png")) + fig.savefig(op.join(figures_ROI_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder plt.close(fig) # Plot individual-subjects ROI time series @@ -396,5 +398,5 @@ axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() - fig.savefig(op.join(figures_dir, label_name, "sub-" + sub + ".png")) + fig.savefig(op.join(figures_ROI_dir, label_name, "sub-" + sub + ".png")) plt.close(fig) From c4895d4e18f1d8f0c341c63b3e120d8e7a14f480 Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:29:14 +1000 Subject: [PATCH 06/20] Tidying up --- traditional_source_analysis_and_ROIs.py | 42 +++++++++++-------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py index 3652a23..d4c8be0 100644 --- a/traditional_source_analysis_and_ROIs.py +++ b/traditional_source_analysis_and_ROIs.py @@ -104,7 +104,8 @@ rank = sum(ranks.values()) print(f" Ranks={ranks} (total={rank})") - if n_proj or src_type == 'vol': + # TEMP - always recreate inv for now, until we change to free orientation in the pipeline + if 1: #n_proj or src_type == 'vol': # Recreate inverse taking into account additional projections cov = mne.read_cov(cov_fname) if src_type == 'vol': @@ -123,7 +124,7 @@ else: # surface source space fwd = mne.read_forward_solution(fwd_fname) inv = mne.minimum_norm.make_inverse_operator( - epochs.info, fwd, cov, loose=0.2, depth=0.8, rank=ranks, + epochs.info, fwd, cov, loose=1., depth=0.8, rank=ranks, # use free orientation, so that the surface- and volume-based results are more comparable ) else: # load the default inv solution (surface source space; no SSP projectors for speech artifact removal) inv = mne.minimum_norm.read_inverse_operator(inv_fname) @@ -226,20 +227,15 @@ # initialise the sum array to correct size using first subject's stc stc = mne.read_source_estimate(stc_files[0]) - stcs_sum = stc.data # this contains lh & rh vertices combined together - # there are also separate fields for the 2 hemis (stc.lh_data, stc.rh_data), - # but their content cannot be set directly, so just use the combined one # read in the stc for each subsequent subject, add to the sum array for fname in stc_files[1:]: - stc = mne.read_source_estimate(fname) - stcs_sum = stcs_sum + stc.data + stc += mne.read_source_estimate(fname) # divide by number of files - stcs_GA = stcs_sum / len(stc_files) + stc /= len(stc_files) - # feed into the dummy stc structure - stc.data = stcs_GA + # store in the GA struct GA_stcs[cond] = stc @@ -252,7 +248,7 @@ fig = stc.plot(src=src, colormap="viridis", subject='fsaverage', subjects_dir=subjects_dir, verbose=True, initial_time=initial_time) - fig.savefig(op.join(figures_dir, 'GA_' + cond + '.png')) + fig.savefig(figures_dir / f'GA_{cond}.png') plt.close(fig) # seems like we can't save movies for these volume-based stcs @@ -272,8 +268,8 @@ brain = stc.plot(**surfer_kwargs) #brain.add_foci(vertno_max, coords_as_verts=True, hemi=hemi, color='blue', # scale_factor=0.6, alpha=0.5) - brain.save_image(op.join(figures_dir, 'GA_' + str(initial_time) + 's_' + cond + '.png')) - brain.save_movie(op.join(figures_dir, 'GA-' + cond + '.mov'), + brain.save_image(figures_dir / f'GA_{str(initial_time)}s_{cond}.png') + brain.save_movie(figures_dir / f'GA-{cond}.mov', tmin=-1, tmax=1, interpolation='linear', time_dilation=50, time_viewer=True) @@ -287,11 +283,11 @@ # Extract ROI time courses from source estimates src = mne.read_source_spaces(src_fname) -Path(op.join(figures_ROI_dir, "all_ROIs")).mkdir(parents=True, exist_ok=True) +(figures_ROI_dir / "all_ROIs").mkdir(parents=True, exist_ok=True) if src_type == 'vol': # choose atlas for parcellation - fname_aseg = op.join(subjects_dir, subject, 'mri', 'aparc.a2009s+aseg.mgz') # aparc = cortical; aseg = subcortical + fname_aseg = subjects_dir / subject / 'mri' / 'aparc.a2009s+aseg.mgz' # aparc = cortical; aseg = subcortical label_names = mne.get_volume_labels_from_aseg(fname_aseg) rois = ['ctx_lh_G_cingul-Post-dorsal','ctx_lh_G_cingul-Post-ventral','ctx_rh_G_cingul-Post-dorsal','ctx_rh_G_cingul-Post-ventral', 'ctx_lh_G_pariet_inf-Supramar','ctx_rh_G_pariet_inf-Supramar','ctx_lh_G_precuneus','ctx_rh_G_precuneus', @@ -300,7 +296,7 @@ #roi_idx = label_names.index(rois[0]) for label_name in rois: - Path(op.join(figures_ROI_dir, label_name)).mkdir(parents=True, exist_ok=True) + (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True) # Plot GA ROI time series fig, axes = plt.subplots(1, layout="constrained") @@ -314,8 +310,8 @@ axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() - fig.savefig(op.join(figures_ROI_dir, label_name, "GA.png")) - fig.savefig(op.join(figures_ROI_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder + fig.savefig(figures_ROI_dir / label_name / "GA.png") + fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder plt.close(fig) # Plot individual-subjects ROI time series @@ -333,7 +329,7 @@ axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() - fig.savefig(op.join(figures_ROI_dir, label_name, "sub-" + sub + ".png")) + fig.savefig(figures_ROI_dir / label_name / f"sub-{sub}.png") plt.close(fig) elif src_type == 'surface': @@ -365,7 +361,7 @@ if label_name == 'G_cingul-Post-dorsal-rh + G_cingul-Post-ventral-rh': label_name = 'G_cingul-Post-rh' - Path(op.join(figures_ROI_dir, label_name)).mkdir(parents=True, exist_ok=True) + (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True) # Plot GA ROI time series fig, axes = plt.subplots(1, layout="constrained") @@ -379,8 +375,8 @@ axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() - fig.savefig(op.join(figures_ROI_dir, label_name, "GA.png")) - fig.savefig(op.join(figures_ROI_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder + fig.savefig(figures_ROI_dir / label_name / "GA.png") + fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder plt.close(fig) # Plot individual-subjects ROI time series @@ -398,5 +394,5 @@ axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() - fig.savefig(op.join(figures_ROI_dir, label_name, "sub-" + sub + ".png")) + fig.savefig(figures_ROI_dir / label_name / f"sub-{sub}.png") plt.close(fig) From f2f36fc45cd12763e9415a51f21f00645eaa5c73 Mon Sep 17 00:00:00 2001 From: paulsowman Date: Wed, 24 Jul 2024 11:39:47 +1200 Subject: [PATCH 07/20] initial commit of Paul's beamformer code --- LCMV_conversation_batch.py | 451 +++++++++++++++++++++++++++++ LCMV_conversation_grand_average.py | 74 +++++ LCMV_conversation_summarise.py | 187 ++++++++++++ plot_stc.py | 20 ++ 4 files changed, 732 insertions(+) create mode 100644 LCMV_conversation_batch.py create mode 100644 LCMV_conversation_grand_average.py create mode 100644 LCMV_conversation_summarise.py create mode 100644 plot_stc.py diff --git a/LCMV_conversation_batch.py b/LCMV_conversation_batch.py new file mode 100644 index 0000000..082a533 --- /dev/null +++ b/LCMV_conversation_batch.py @@ -0,0 +1,451 @@ +import os +import mne +import numpy as np +import h5py +from mne.beamformer import apply_lcmv_epochs, make_lcmv +from mne.cov import compute_covariance +from scipy.signal import hilbert + +# Configuration options - change as needed saving +# cropped data takes up a lot of space +EQUALIZE_EVENT_COUNTS = False +SAVE_CROPPED_DATA_H5 = False + + +def average_stc_in_time(stc, window_size=0.1): + """ + Average the SourceEstimate in the time domain. + + Parameters: + ----------- + stc : mne.SourceEstimate + The source estimate to average. + window_size : float + The size of the averaging window in seconds. + + Returns: + -------- + mne.SourceEstimate + A new SourceEstimate with time-averaged data. + """ + sfreq = 1 / stc.tstep + n_samples = int(window_size * sfreq) + + # Calculate the number of full windows + n_windows = ( + len(stc.times) - 1 + ) // n_samples # -1 to ensure we don't exceed the time range + + # Prepare the data for averaging + data_to_average = stc.data[:, : n_windows * n_samples] + + # Reshape and average + averaged_data = data_to_average.reshape( + data_to_average.shape[0], n_windows, n_samples + ).mean(axis=2) + + # Create new time array + new_times = np.arange(n_windows) * window_size + stc.tmin + (window_size / 2) + + return mne.SourceEstimate( + averaged_data, + vertices=stc.vertices, + tmin=new_times[0], + tstep=window_size, + subject=stc.subject, + ) + + +def is_subject_processed(subject, output_dir): + # Check if all expected output files exist for the subject + frequency_bands = ["broadband", "alpha", "beta", "gamma"] + conditions = [ + "ba", + "da", + "interviewer_conversation", + "interviewer_repetition", + "participant_conversation", + "participant_repetition", + ] + + for band in frequency_bands: + for condition in conditions: + roi_fname = f"{subject}_task-{condition}_{band}_lcmv_beamformer_roi_time_courses.npy" + stc_fname = ( + f"{subject}_task-{condition}_{band}_lcmv_beamformer_averaged-stc-lh.stc" + ) + if not ( + os.path.exists(os.path.join(output_dir, roi_fname)) + and os.path.exists(os.path.join(output_dir, stc_fname)) + ): + return False + return True + + +def setup_directories(): + data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/" + output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_4" + os.makedirs(output_dir, exist_ok=True) + return data_dir, output_dir + + +def load_data(subject, data_dir): + fwd_fname = os.path.join( + data_dir, subject, "meg", f"{subject}_task-conversation_fwd.fif" + ) + epochs_fname = os.path.join( + data_dir, subject, "meg", f"{subject}_task-conversation_proc-clean_epo.fif" + ) + noise_fname = os.path.join( + data_dir, subject, "meg", f"{subject}_task-rest_proc-clean_raw.fif" + ) + + if not all(os.path.exists(f) for f in [fwd_fname, epochs_fname, noise_fname]): + return None + + fwd = mne.read_forward_solution(fwd_fname) + fwd_fixed = mne.convert_forward_solution( + fwd, surf_ori=True, force_fixed=False, use_cps=True + ) # Convert to fixed orientation - necessary for LCMV ori = normal + all_epochs = mne.read_epochs(epochs_fname) + noise_raw = mne.io.read_raw_fif(noise_fname) + return fwd_fixed, all_epochs, noise_raw + + +def prepare_epochs(all_epochs, noise_raw): + print(f"Initial all_epochs: {len(all_epochs)}") + + sfreq = all_epochs.info["sfreq"] + ch_names = all_epochs.ch_names + tmin, tmax = all_epochs.tmin, all_epochs.tmax + n_samples = len(all_epochs.times) + + if noise_raw.info["sfreq"] != sfreq: + noise_raw = noise_raw.resample(sfreq) + + print(f"Task epochs - tmin: {tmin}, tmax: {tmax}, n_samples: {n_samples}") + + noise_events = mne.make_fixed_length_events(noise_raw, duration=tmax - tmin) + noise_epochs = mne.Epochs( + noise_raw, + noise_events, + event_id={"noise": 1}, + tmin=tmin, + tmax=tmax, + baseline=None, + preload=True, + ) + + print(f"Initial noise_epochs: {len(noise_epochs)}") + print( + f"Noise epochs - tmin: {noise_epochs.tmin}, tmax: {noise_epochs.tmax}, n_samples: {len(noise_epochs.times)}" + ) + + noise_epochs = noise_epochs.copy().pick(ch_names) + + if len(all_epochs.times) > len(noise_epochs.times): + print( + f"Cropping task epochs from {len(all_epochs.times)} to {len(noise_epochs.times)} samples" + ) + all_epochs = all_epochs.copy().crop( + tmin=tmin, tmax=tmin + (len(noise_epochs.times) - 1) / sfreq + ) + + print(f"After cropping - Task epochs n_samples: {len(all_epochs.times)}") + print(f"After cropping - Noise epochs n_samples: {len(noise_epochs.times)}") + + all_epochs.metadata = None + noise_epochs.metadata = None + + if not np.allclose(all_epochs.times, noise_epochs.times): + raise ValueError("Time points still don't match after cropping") + + combined_epochs = mne.concatenate_epochs([all_epochs, noise_epochs]) + + print(f"Final combined_epochs: {len(combined_epochs)}") + print(f"Time points: {len(combined_epochs.times)}") + + if EQUALIZE_EVENT_COUNTS: + combined_epochs.equalize_event_counts() + return combined_epochs + + +def filter_data(epochs, fmin, fmax): + """Filters epochs in the specified frequency band.""" + return epochs.copy().filter(fmin, fmax) + + +def create_and_apply_beamformer(epochs_filt, fwd, noise_epochs_filt): + data_cov = compute_covariance(epochs_filt, tmin=None, tmax=None, method="empirical") + noise_cov = compute_covariance( + noise_epochs_filt, tmin=None, tmax=None, method="empirical" + ) + filters_lcmv = make_lcmv( + epochs_filt.info, + fwd, + data_cov=data_cov, + noise_cov=noise_cov, + reg=0.05, + pick_ori="normal", + ) + return apply_lcmv_epochs(epochs_filt, filters_lcmv, return_generator=True) + + +def stc_to_matrix(stc, parcellation): + """Parcellate a SourceEstimate and return a matrix of ROI time courses.""" + roi_time_courses = [ + np.mean(stc.in_label(label).data, axis=0) for label in parcellation + ] + return np.array(roi_time_courses) + + +def load_parcellation(): + subjects_dir = os.environ.get( + "SUBJECTS_DIR", + "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects", + ) + return mne.read_labels_from_annot( + "fsaverage", parc="HCPMMP1", subjects_dir=subjects_dir, hemi="both" + ) + + +def compute_source_estimate( + epochs_stcs, fwd, epochs, subject, output_dir, condition, band_name, parcellation +): + n_sources = fwd["nsource"] + n_times = len(epochs.times) + averaged_data = np.zeros((n_sources, n_times), dtype=complex) + all_data = [] + n_epochs = 0 + + vertices_lh, vertices_rh = fwd["src"][0]["vertno"], fwd["src"][1]["vertno"] + + if SAVE_CROPPED_DATA_H5: + h5_filename = f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5" + h5_filepath = os.path.join(output_dir, h5_filename) + h5f = h5py.File(h5_filepath, "w") + + crop_time = 0.1 # in seconds + crop_samples = int(crop_time * epochs.info["sfreq"]) + + # Calculate the new time array after cropping + cropped_times = epochs.times[crop_samples:-crop_samples] + + for i, stc in enumerate(epochs_stcs): + cropped_data = stc.data[:, crop_samples:-crop_samples] + analytic_signal = hilbert(cropped_data, axis=1) + averaged_data[:, crop_samples:-crop_samples] += analytic_signal + all_data.append(analytic_signal) + n_epochs += 1 + + if SAVE_CROPPED_DATA_H5: + h5f.create_dataset(f"epoch_{i}", data=cropped_data) + + if SAVE_CROPPED_DATA_H5: + h5f.attrs["subject"] = subject + h5f.attrs["condition"] = condition + h5f.attrs["band_name"] = band_name + h5f.attrs["n_epochs"] = n_epochs + h5f.close() + + if n_epochs == 0: + raise ValueError("No epochs were processed") + + averaged_data /= n_epochs + envelope = np.abs(averaged_data) + + # Use the cropped time array for tmin and adjust the data shape + averaged_stc = mne.SourceEstimate( + envelope, + vertices=[vertices_lh, vertices_rh], + tmin=epochs.times[crop_samples], + tstep=epochs.times[1] - epochs.times[0], + subject="fsaverage", + ) + + # Create time-domain averaged version + time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1) + + # Compute ROI time courses for both original and time-averaged data + roi_time_courses = stc_to_matrix(averaged_stc, parcellation) + time_averaged_roi_time_courses = stc_to_matrix(time_averaged_stc, parcellation) + + # Save original ROI time courses + roi_fname = ( + f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_roi_time_courses.npy" + ) + np.save(os.path.join(output_dir, roi_fname), roi_time_courses) + + # Save time-averaged ROI time courses + time_avg_roi_fname = f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged_roi_time_courses.npy" + np.save( + os.path.join(output_dir, time_avg_roi_fname), time_averaged_roi_time_courses + ) + + # Save original SourceEstimate + averaged_fname = ( + f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_averaged-stc" + ) + averaged_stc.save(os.path.join(output_dir, averaged_fname), overwrite=True) + + # Save time-averaged SourceEstimate + time_avg_fname = ( + f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged-stc" + ) + time_averaged_stc.save(os.path.join(output_dir, time_avg_fname), overwrite=True) + + print( + f"Saved original SourceEstimate to {os.path.join(output_dir, averaged_fname)}" + ) + print( + f"Saved time-averaged SourceEstimate to {os.path.join(output_dir, time_avg_fname)}" + ) + print(f"Saved original ROI time courses to {os.path.join(output_dir, roi_fname)}") + print( + f"Saved time-averaged ROI time courses to {os.path.join(output_dir, time_avg_roi_fname)}" + ) + + return ( + averaged_stc, + roi_time_courses, + time_averaged_stc, + time_averaged_roi_time_courses, + ) + + +def process_subject(subject, data_dir, output_dir): + try: + fwd, all_epochs, noise_raw = load_data(subject, data_dir) + if fwd is None: + print(f"Skipping {subject}: Missing required files") + return + + print(f"All epochs info:") + print(f"Number of epochs: {len(all_epochs)}") + print(f"Time points: {len(all_epochs.times)}") + print(f"tmin: {all_epochs.tmin}, tmax: {all_epochs.tmax}") + + combined_epochs = prepare_epochs(all_epochs, noise_raw) + parcellation = load_parcellation() + + frequency_bands = { + "broadband": (1, 40), + "alpha": (8, 12), + "beta": (13, 30), + "gamma": (30, 40), + } + + for band_name, (fmin, fmax) in frequency_bands.items(): + print(f"Processing {band_name} band ({fmin}-{fmax} Hz)") + filtered_epochs = filter_data(combined_epochs, fmin, fmax) + + averaged_stc_dict = {} + roi_time_courses_dict = {} + time_averaged_stc_dict = {} + time_averaged_roi_time_courses_dict = {} + + conditions = [ + "ba", + "da", + "interviewer_conversation", + "interviewer_repetition", + "participant_conversation", + "participant_repetition", + ] + + for condition in conditions: + condition_epochs = filtered_epochs[condition] + noise_epochs = filtered_epochs["noise"] + + epochs_stcs = create_and_apply_beamformer( + condition_epochs, fwd, noise_epochs + ) + ( + averaged_stc, + roi_time_courses, + time_averaged_stc, + time_averaged_roi_time_courses, + ) = compute_source_estimate( + epochs_stcs, + fwd, + condition_epochs, + subject, + output_dir, + condition, + band_name, + parcellation, + ) + + averaged_stc_dict[condition] = averaged_stc + roi_time_courses_dict[condition] = roi_time_courses + time_averaged_stc_dict[condition] = time_averaged_stc + time_averaged_roi_time_courses_dict[condition] = ( + time_averaged_roi_time_courses + ) + + diff_pairs = [ + ("interviewer_conversation", "interviewer_repetition"), + ("participant_conversation", "participant_repetition"), + ] + + for cond1, cond2 in diff_pairs: + # Original difference + diff_stc = averaged_stc_dict[cond1] - averaged_stc_dict[cond2] + diff_fname = os.path.join( + output_dir, + f"{subject}_{band_name}_lcmv_beamformer_{cond1}_vs_{cond2}_difference-stc", + ) + diff_stc.save(diff_fname, overwrite=True) + + # Time-averaged difference + time_avg_diff_stc = ( + time_averaged_stc_dict[cond1] - time_averaged_stc_dict[cond2] + ) + time_avg_diff_fname = os.path.join( + output_dir, + f"{subject}_{band_name}_lcmv_beamformer_{cond1}_vs_{cond2}_time_averaged_difference-stc", + ) + time_avg_diff_stc.save(time_avg_diff_fname, overwrite=True) + + print(f"Finished processing {subject}") + except Exception as e: + print(f"Error processing {subject}: {str(e)}") + import traceback + + traceback.print_exc() + + +def update_progress(subject, output_dir): + progress_file = os.path.join(output_dir, "progress.txt") + with open(progress_file, "a") as f: + f.write(f"{subject}\n") + + +def get_processed_subjects(output_dir): + progress_file = os.path.join(output_dir, "progress.txt") + if os.path.exists(progress_file): + with open(progress_file, "r") as f: + return set(line.strip() for line in f) + return set() + + +def main(): + data_dir, output_dir = setup_directories() + subject_dirs = [ + d for d in os.listdir(data_dir) if d.startswith("sub-") and d[4:].isdigit() + ] + + processed_subjects = get_processed_subjects(output_dir) + + for subject in subject_dirs: + if subject in processed_subjects or is_subject_processed(subject, output_dir): + print(f"Skipping {subject}: Already processed") + continue + process_subject(subject, data_dir, output_dir) + update_progress(subject, output_dir) + + print("All subjects processed") + + +if __name__ == "__main__": + main() diff --git a/LCMV_conversation_grand_average.py b/LCMV_conversation_grand_average.py new file mode 100644 index 0000000..7f52ad4 --- /dev/null +++ b/LCMV_conversation_grand_average.py @@ -0,0 +1,74 @@ +import os +import mne +import numpy as np +from glob import glob +import re + +# Set the path to the folder containing the .stc files +stc_folder = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_analysis" + +# Create a subfolder for average STCs +average_folder = os.path.join(stc_folder, "average_stc") +os.makedirs(average_folder, exist_ok=True) + + +# Function to extract subject number and condition from filename +def parse_filename(filename): + # Extract subject number + match = re.search(r"sub-(\d+)", filename) + if match: + subject_number = int(match.group(1)) + else: + raise ValueError(f"Cannot extract subject number from filename: {filename}") + + # Extract condition (everything after 'sub-XX_' and before '.stc') + condition = filename.split("sub-")[1].split(".stc")[0].split("_", 1)[1] + + return subject_number, condition + + +# Get all .stc files +all_files = glob(os.path.join(stc_folder, "sub-*_*.stc")) + +# Extract all unique conditions +conditions = list(set([parse_filename(f)[1] for f in all_files])) + +# Process each condition +for condition in conditions: + print(f"Processing condition: {condition}") + + # Get all .stc files for this condition + condition_files = [f for f in all_files if parse_filename(f)[1] == condition] + + # Sort files by subject number + condition_files.sort(key=lambda f: parse_filename(f)[0]) + + # Read all STCs for this condition + stcs = [mne.read_source_estimate(f) for f in condition_files] + + # Check if all STCs have the same number of time points + n_times = [stc.data.shape[1] for stc in stcs] + if len(set(n_times)) > 1: + raise ValueError("Not all STCs have the same number of time points") + + # Stack the data from all subjects + data = np.stack([stc.data for stc in stcs]) + + # Calculate the average across subjects + avg_data = np.mean(data, axis=0) + + # Create a new STC with the averaged data + avg_stc = mne.SourceEstimate( + data=avg_data, + vertices=stcs[0].vertices, + tmin=stcs[0].tmin, + tstep=stcs[0].tstep, + subject=stcs[0].subject, + ) + + # Save the averaged STC in the new subfolder + avg_stc.save(os.path.join(average_folder, f"avg_{condition}"), overwrite=True) + +print( + "Averaging complete for all conditions. Results saved in 'average_stc' subfolder." +) diff --git a/LCMV_conversation_summarise.py b/LCMV_conversation_summarise.py new file mode 100644 index 0000000..c3501e3 --- /dev/null +++ b/LCMV_conversation_summarise.py @@ -0,0 +1,187 @@ +import os +import mne +import numpy as np +from scipy import stats +from mne.stats import spatio_temporal_cluster_1samp_test, summarize_clusters_stc +from scipy.ndimage import label + +# Define constants +SUBJECTS_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects" +STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_analysis" +SRC_FNAME = os.path.join(SUBJECTS_DIR, "fsaverage/bem/fsaverage-oct6-src.fif") +FREQUENCY_BANDS = ["broadband"] + + +def load_stcs(directory, band): + """Load Source Time Courses (STCs) for a given frequency band and crop them.""" + stcs = [] + for filename in os.listdir(directory): + if filename.endswith( + f"{band}_lcmv_beamformer_participant_conversation_vs_participant_repetition_difference-stc-rh.stc" + ): + stc_path = os.path.join(directory, filename) + stc = mne.read_source_estimate(stc_path) + + # Crop the STC + cropped_stc = stc.crop(tmin=0.25, tmax=0.75) + + stcs.append(cropped_stc) + return stcs + + +def process_frequency_band(band, src): + """Process data for a single frequency band and return aggregated timeseries for significant clusters.""" + print(f"Processing {band} band") + + stcs = load_stcs(STC_DIR, band) + if not stcs: + print(f"No STCs found for {band} band") + return None + + avg_stc = sum(stcs) / len(stcs) + + X = np.array([stc.data for stc in stcs]) + X = np.transpose(X, [0, 2, 1]) + + t_threshold = stats.distributions.t.ppf(1 - 0.001, len(X) - 1) + adjacency = mne.spatial_src_adjacency(src[:]) + + cluster_stats = spatio_temporal_cluster_1samp_test( + X, + threshold=t_threshold, + adjacency=adjacency, + n_jobs=-1, + buffer_size=None, + n_permutations=50, + ) + + t_vals, clusters, cluster_pvals, H0 = cluster_stats + + # Find significant clusters + good_cluster_inds = np.where(cluster_pvals < 0.05)[0] + + if len(good_cluster_inds) == 0: + print("No significant clusters found.") + return None + + # Create a 3D mask for significant clusters + mask = np.zeros(t_vals.shape, dtype=bool) + for cluster_ind in good_cluster_inds: + mask[clusters[cluster_ind]] = True + + # Label connected components in the mask + labeled_mask, num_clusters = label(mask) + + # Initialize a dictionary to store aggregated timeseries for each cluster + cluster_timeseries = {} + + for i in range(1, num_clusters + 1): + cluster_mask = labeled_mask == i + cluster_data = t_vals[cluster_mask].reshape(-1, t_vals.shape[2]) + + # Aggregate timeseries for this cluster (using mean, but you could use median or other methods) + aggregated_timeseries = np.mean(cluster_data, axis=0) + + # Store the aggregated timeseries + cluster_timeseries[f"cluster_{i}"] = aggregated_timeseries + + # Create a new STC with one timeseries per cluster + n_clusters = len(cluster_timeseries) + aggregated_data = np.zeros((n_clusters, t_vals.shape[2])) + for i, timeseries in enumerate(cluster_timeseries.values()): + aggregated_data[i, :] = timeseries + + # Use the first vertex of each hemisphere as placeholder locations + vertices = [ + src[0]["vertno"][:n_clusters], + src[1]["vertno"][:0], + ] # All in left hemisphere for simplicity + + aggregated_stc = mne.SourceEstimate( + aggregated_data, + vertices=vertices, + tmin=avg_stc.tmin, + tstep=avg_stc.tstep, + subject=avg_stc.subject, + ) + + # Save aggregated STC + aggregated_fname = os.path.join( + STC_DIR, f"aggregated_clusters_{band}_lcmv_beamformer-stc" + ) + aggregated_stc.save(aggregated_fname) + print(f"Saved aggregated SourceEstimate to {aggregated_fname}") + + visualize_clusters(cluster_stats, stcs, src) + + return aggregated_stc, cluster_timeseries + + +def visualize_clusters(cluster_stats, stcs, src): + """Visualize cluster results.""" + print("Visualizing clusters.") + tmin = stcs[0].tmin * 100 + tstep = stcs[0].tstep * 100 # convert to milliseconds + + fsave_vertices = [s["vertno"] for s in src] + stc_all_cluster_vis = summarize_clusters_stc( + cluster_stats, tstep=tstep, vertices=fsave_vertices, subject="fsaverage" + ) + plot_fmax = np.max(stc_all_cluster_vis.data) * 1.2 + brain = stc_all_cluster_vis.plot( + hemi="both", + views="lateral", + brain_kwargs=dict(show=False), + add_data_kwargs=dict( + fmin=plot_fmax / 10, + fmid=plot_fmax / 2, + fmax=plot_fmax, + scale_factor=0.0001, + colorbar_kwargs=dict(label_font_size=10), + ), + subjects_dir=SUBJECTS_DIR, + time_label="temporal extent (ms)", + size=(800, 800), + ) + + +def plot_average_stc(avg_stc): + """Plot the average Source Time Course.""" + plot_fmax = np.max(avg_stc.data) * 0.8 + brain = avg_stc.plot( + subject="fsaverage", + hemi="both", + views="dorsal", + initial_time=1.2, + brain_kwargs=dict(show=False), + add_data_kwargs=dict( + fmin=plot_fmax / 10, + fmid=plot_fmax / 2, + fmax=plot_fmax, + scale_factor=0.0001, + colorbar_kwargs=dict(label_font_size=10), + ), + ) + + +def main(): + src = mne.read_source_spaces(SRC_FNAME) + aggregated_stcs = {} + all_cluster_timeseries = {} + for band in FREQUENCY_BANDS: + result = process_frequency_band(band, src) + if result is not None: + aggregated_stc, cluster_timeseries = result + aggregated_stcs[band] = aggregated_stc + all_cluster_timeseries[band] = cluster_timeseries + + print("Analysis complete") + return aggregated_stcs, all_cluster_timeseries + + +if __name__ == "__main__": + aggregated_stcs, all_cluster_timeseries = main() + + +if __name__ == "__main__": + main() diff --git a/plot_stc.py b/plot_stc.py new file mode 100644 index 0000000..acd23dc --- /dev/null +++ b/plot_stc.py @@ -0,0 +1,20 @@ +import mne +import os + +SUBJECTS_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects" +STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_4/" +DATA_FILE = "sub-01_task-ba_broadband_lcmv_beamformer_time_averaged-stc-lh.stc" + + +# Load the saved STC file (replace with your actual file path) +stc_file = os.path.join(STC_DIR, DATA_FILE) +stc = mne.read_source_estimate(stc_file) + +stc.plot( + subjects_dir=SUBJECTS_DIR, + subject="fsaverage", + initial_time=0.1, + hemi="both", + views="lateral", + clim=dict(kind="value", lims=[3, 6, 9]), +) From 91baa58a5579475423bf9854ef6d407aa4f877d1 Mon Sep 17 00:00:00 2001 From: paulsowman Date: Thu, 25 Jul 2024 10:54:07 +1200 Subject: [PATCH 08/20] fix time axis --- LCMV_CSF_conversation_batch.py | 456 +++++++++++++++++++++++++++++ LCMV_conversation_batch.py | 89 +++--- LCMV_conversation_grand_average.py | 2 +- plot_stc.py | 4 +- 4 files changed, 502 insertions(+), 49 deletions(-) create mode 100644 LCMV_CSF_conversation_batch.py diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py new file mode 100644 index 0000000..2e46966 --- /dev/null +++ b/LCMV_CSF_conversation_batch.py @@ -0,0 +1,456 @@ +import os +import mne +import numpy as np +import h5py +from mne.beamformer import apply_lcmv_epochs, make_lcmv +from mne.cov import compute_covariance +from scipy.signal import hilbert + +# Configuration options - change as needed saving +# cropped data takes up a lot of space +EQUALIZE_EVENT_COUNTS = False +SAVE_CROPPED_DATA_H5 = False + + +def setup_directories(): + data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/" + output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_1" + os.makedirs(output_dir, exist_ok=True) + return data_dir, output_dir + + +def load_data(subject, data_dir): + fwd_fname = os.path.join( + data_dir, subject, "meg", f"{subject}_task-conversation_fwd.fif" + ) + epochs_fname = os.path.join( + data_dir, subject, "meg", f"{subject}_task-conversation_proc-clean_epo.fif" + ) + noise_fname = os.path.join( + data_dir, subject, "meg", f"{subject}_task-rest_proc-clean_raw.fif" + ) + + if not all(os.path.exists(f) for f in [fwd_fname, epochs_fname, noise_fname]): + return None + + fwd = mne.read_forward_solution(fwd_fname) + fwd_fixed = mne.convert_forward_solution( + fwd, surf_ori=True, force_fixed=False, use_cps=True + ) # Convert to fixed orientation - necessary for LCMV ori = normal + all_epochs = mne.read_epochs(epochs_fname) + noise_raw = mne.io.read_raw_fif(noise_fname) + return fwd_fixed, all_epochs, noise_raw + + +def average_stc_in_time(stc, window_size=0.1): + + sfreq = 1 / stc.tstep + n_samples = int(window_size * sfreq) + + # Calculate the number of full windows + n_windows = ( + len(stc.times) - 1 + ) // n_samples # -1 to ensure we don't exceed the time range + + # Prepare the data for averaging + data_to_average = stc.data[:, : n_windows * n_samples] + + # Reshape and average + averaged_data = data_to_average.reshape( + data_to_average.shape[0], n_windows, n_samples + ).mean(axis=2) + + # Create new time array + new_times = np.arange(n_windows) * window_size + stc.tmin + (window_size / 2) + + return mne.SourceEstimate( + averaged_data, + vertices=stc.vertices, + tmin=new_times[0], + tstep=window_size, + subject=stc.subject, + ) + + +def is_subject_processed(subject, output_dir): + # Check if all expected output files exist for the subject + frequency_bands = ["broadband", "alpha", "beta", "gamma"] + conditions = [ + "ba", + "da", + "interviewer_conversation", + "interviewer_repetition", + "participant_conversation", + "participant_repetition", + ] + + for band in frequency_bands: + for condition in conditions: + roi_fname = f"{subject}_task-{condition}_{band}_lcmv_beamformer_roi_time_courses.npy" + stc_fname = ( + f"{subject}_task-{condition}_{band}_lcmv_beamformer_averaged-stc-lh.stc" + ) + if not ( + os.path.exists(os.path.join(output_dir, roi_fname)) + and os.path.exists(os.path.join(output_dir, stc_fname)) + ): + return False + return True + + +def prepare_epochs(all_epochs, noise_raw): + print(f"Initial all_epochs: {len(all_epochs)}") + + sfreq = all_epochs.info["sfreq"] + ch_names = all_epochs.ch_names + tmin, tmax = all_epochs.tmin, all_epochs.tmax + n_samples = len(all_epochs.times) + + if noise_raw.info["sfreq"] != sfreq: + noise_raw = noise_raw.resample(sfreq) + + print(f"Task epochs - tmin: {tmin}, tmax: {tmax}, n_samples: {n_samples}") + + noise_events = mne.make_fixed_length_events(noise_raw, duration=tmax - tmin) + noise_epochs = mne.Epochs( + noise_raw, + noise_events, + event_id={"noise": 1}, + tmin=tmin, + tmax=tmax, + baseline=None, + preload=True, + ) + + print(f"Initial noise_epochs: {len(noise_epochs)}") + print( + f"Noise epochs - tmin: {noise_epochs.tmin}, tmax: {noise_epochs.tmax}, n_samples: {len(noise_epochs.times)}" + ) + + noise_epochs = noise_epochs.copy().pick(ch_names) + + if len(all_epochs.times) > len(noise_epochs.times): + print( + f"Cropping task epochs from {len(all_epochs.times)} to {len(noise_epochs.times)} samples" + ) + all_epochs = all_epochs.copy().crop( + tmin=tmin, tmax=tmin + (len(noise_epochs.times) - 1) / sfreq + ) + + # Crop 200 ms from each end + crop_time = 0.2 + all_epochs = all_epochs.copy().crop( + tmin=all_epochs.tmin + crop_time, tmax=all_epochs.tmax - crop_time + ) + noise_epochs = noise_epochs.copy().crop( + tmin=noise_epochs.tmin + crop_time, tmax=noise_epochs.tmax - crop_time + ) + + print(f"After cropping - Task epochs n_samples: {len(all_epochs.times)}") + print(f"After cropping - Noise epochs n_samples: {len(noise_epochs.times)}") + + all_epochs.metadata = None + noise_epochs.metadata = None + + if not np.allclose(all_epochs.times, noise_epochs.times): + raise ValueError("Time points still don't match after cropping") + + combined_epochs = mne.concatenate_epochs([all_epochs, noise_epochs]) + + print(f"Final combined_epochs: {len(combined_epochs)}") + print(f"Time points: {len(combined_epochs.times)}") + + if EQUALIZE_EVENT_COUNTS: + combined_epochs.equalize_event_counts() + return combined_epochs + + +def filter_data(epochs, fmin, fmax): + """Filters epochs in the specified frequency band.""" + return epochs.copy().filter(fmin, fmax) + + +def create_lcmv_filter(epochs_filt, fwd, noise_epochs_filt): + data_cov = compute_covariance(epochs_filt, tmin=None, tmax=None, method="empirical") + noise_cov = compute_covariance( + noise_epochs_filt, tmin=None, tmax=None, method="empirical" + ) + filters_lcmv = make_lcmv( + epochs_filt.info, + fwd, + data_cov=data_cov, + noise_cov=noise_cov, + reg=0.05, + pick_ori="normal", + ) + return filters_lcmv + + +def apply_lcmv_filter(epochs_filt, filters_lcmv): + return apply_lcmv_epochs(epochs_filt, filters_lcmv, return_generator=True) + + +def stc_to_matrix(stc, parcellation): + """Parcellate a SourceEstimate and return a matrix of ROI time courses.""" + roi_time_courses = [ + np.mean(stc.in_label(label).data, axis=0) for label in parcellation + ] + return np.array(roi_time_courses) + + +def load_parcellation(): + subjects_dir = os.environ.get( + "SUBJECTS_DIR", + "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects", + ) + return mne.read_labels_from_annot( + "fsaverage", parc="HCPMMP1", subjects_dir=subjects_dir, hemi="both" + ) + + +def compute_source_estimate( + epochs_stcs, fwd, epochs, subject, output_dir, condition, band_name, parcellation +): + n_sources = fwd["nsource"] + n_times = len(epochs.times) + averaged_data = np.zeros((n_sources, n_times), dtype=complex) + all_data = [] + n_epochs = 0 + + vertices_lh, vertices_rh = fwd["src"][0]["vertno"], fwd["src"][1]["vertno"] + + if SAVE_CROPPED_DATA_H5: + h5_filename = f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5" + h5_filepath = os.path.join(output_dir, h5_filename) + h5f = h5py.File(h5_filepath, "w") + + crop_time = 0.1 # in seconds + crop_samples = int(crop_time * epochs.info["sfreq"]) + + # Calculate the new time array after cropping + cropped_times = epochs.times[crop_samples:-crop_samples] + + for i, stc in enumerate(epochs_stcs): + cropped_data = stc.data[:, crop_samples:-crop_samples] + analytic_signal = hilbert(cropped_data, axis=1) + averaged_data[:, crop_samples:-crop_samples] += analytic_signal + all_data.append(analytic_signal) + n_epochs += 1 + + if SAVE_CROPPED_DATA_H5: + h5f.create_dataset(f"epoch_{i}", data=cropped_data) + + if SAVE_CROPPED_DATA_H5: + h5f.attrs["subject"] = subject + h5f.attrs["condition"] = condition + h5f.attrs["band_name"] = band_name + h5f.attrs["n_epochs"] = n_epochs + h5f.close() + + if n_epochs == 0: + raise ValueError("No epochs were processed") + + averaged_data /= n_epochs + envelope = np.abs(averaged_data) + + # Use the cropped time array for tmin and adjust the data shape + averaged_stc = mne.SourceEstimate( + envelope, + vertices=[vertices_lh, vertices_rh], + tmin=cropped_times[0], + tstep=epochs.times[1] - epochs.times[0], + subject="fsaverage", + ) + + # Create time-domain averaged version + time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1) + + # Compute ROI time courses for both original and time-averaged data + roi_time_courses = stc_to_matrix(averaged_stc, parcellation) + time_averaged_roi_time_courses = stc_to_matrix(time_averaged_stc, parcellation) + + # Save original ROI time courses + roi_fname = ( + f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_roi_time_courses.npy" + ) + np.save(os.path.join(output_dir, roi_fname), roi_time_courses) + + # Save time-averaged ROI time courses + time_avg_roi_fname = f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged_roi_time_courses.npy" + np.save( + os.path.join(output_dir, time_avg_roi_fname), time_averaged_roi_time_courses + ) + + # Save original SourceEstimate + averaged_fname = ( + f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_averaged-stc" + ) + averaged_stc.save(os.path.join(output_dir, averaged_fname), overwrite=True) + + # Save time-averaged SourceEstimate + time_avg_fname = ( + f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged-stc" + ) + time_averaged_stc.save(os.path.join(output_dir, time_avg_fname), overwrite=True) + + print( + f"Saved original SourceEstimate to {os.path.join(output_dir, averaged_fname)}" + ) + print( + f"Saved time-averaged SourceEstimate to {os.path.join(output_dir, time_avg_fname)}" + ) + print(f"Saved original ROI time courses to {os.path.join(output_dir, roi_fname)}") + print( + f"Saved time-averaged ROI time courses to {os.path.join(output_dir, time_avg_roi_fname)}" + ) + + return ( + averaged_stc, + roi_time_courses, + time_averaged_stc, + time_averaged_roi_time_courses, + ) + + +def process_subject(subject, data_dir, output_dir): + try: + fwd, all_epochs, noise_raw = load_data(subject, data_dir) + if fwd is None: + print(f"Skipping {subject}: Missing required files") + return + + print(f"All epochs info:") + print(f"Number of epochs: {len(all_epochs)}") + print(f"Time points: {len(all_epochs.times)}") + print(f"tmin: {all_epochs.tmin}, tmax: {all_epochs.tmax}") + + combined_epochs = prepare_epochs(all_epochs, noise_raw) + parcellation = load_parcellation() + + frequency_bands = { + "broadband": (1, 40), + "alpha": (8, 12), + "beta": (13, 30), + "gamma": (30, 40), + } + + conditions = [ + "ba", + "da", + "interviewer_conversation", + "interviewer_repetition", + "participant_conversation", + "participant_repetition", + ] # TODO see below re: removing localisers + + for band_name, (fmin, fmax) in frequency_bands.items(): + print(f"Processing {band_name} band ({fmin}-{fmax} Hz)") + filtered_epochs = filter_data(combined_epochs, fmin, fmax) + + # Create a common filter using all conditions + all_condition_epochs = mne.concatenate_epochs( + [filtered_epochs[cond] for cond in conditions] + ) + noise_epochs = filtered_epochs["noise"] + common_filter = create_lcmv_filter( + all_condition_epochs, fwd, noise_epochs + ) # TODO don't include localisers? Mayby do a separate run for each type so that code doesn't become too complex? + + averaged_stc_dict = {} + roi_time_courses_dict = {} + time_averaged_stc_dict = {} + time_averaged_roi_time_courses_dict = {} + + for condition in conditions: + condition_epochs = filtered_epochs[condition] + + # Apply the common filter to each condition + epochs_stcs = apply_lcmv_filter(condition_epochs, common_filter) + + ( + averaged_stc, + roi_time_courses, + time_averaged_stc, + time_averaged_roi_time_courses, + ) = compute_source_estimate( + epochs_stcs, + fwd, + condition_epochs, + subject, + output_dir, + condition, + band_name, + parcellation, + ) + + averaged_stc_dict[condition] = averaged_stc + roi_time_courses_dict[condition] = roi_time_courses + time_averaged_stc_dict[condition] = time_averaged_stc + time_averaged_roi_time_courses_dict[condition] = ( + time_averaged_roi_time_courses + ) + + diff_pairs = [ + ("interviewer_conversation", "interviewer_repetition"), + ("participant_conversation", "participant_repetition"), + ] + + for cond1, cond2 in diff_pairs: + # Original difference + diff_stc = averaged_stc_dict[cond1] - averaged_stc_dict[cond2] + diff_fname = os.path.join( + output_dir, + f"{subject}_{band_name}_lcmv_beamformer_{cond1}_vs_{cond2}_difference-stc", + ) + diff_stc.save(diff_fname, overwrite=True) + + # Time-averaged difference + time_avg_diff_stc = ( + time_averaged_stc_dict[cond1] - time_averaged_stc_dict[cond2] + ) + time_avg_diff_fname = os.path.join( + output_dir, + f"{subject}_{band_name}_lcmv_beamformer_{cond1}_vs_{cond2}_time_averaged_difference-stc", + ) + time_avg_diff_stc.save(time_avg_diff_fname, overwrite=True) + + print(f"Finished processing {subject}") + update_progress(subject, output_dir) + except Exception as e: + print(f"Error processing {subject}: {str(e)}") + import traceback + traceback.print_exc( + + +def update_progress(subject, output_dir): + progress_file = os.path.join(output_dir, "progress.txt") + with open(progress_file, "a") as f: + f.write(f"{subject}\n") + + +def get_processed_subjects(output_dir): + progress_file = os.path.join(output_dir, "progress.txt") + if os.path.exists(progress_file): + with open(progress_file, "r") as f: + return set(line.strip() for line in f) + return set() + + +def main(): + data_dir, output_dir = setup_directories() + subject_dirs = [d for d in os.listdir(data_dir) if d.startswith("sub-") and d[4:].isdigit()] + + processed_subjects = get_processed_subjects(output_dir) + + for subject in subject_dirs: + if subject in processed_subjects or is_subject_processed(subject, output_dir): + print(f"Skipping {subject}: Already processed") + continue + process_subject(subject, data_dir, output_dir) + # Removed: update_progress(subject, output_dir) + + print("All subjects processed") + + +if __name__ == "__main__": + main() diff --git a/LCMV_conversation_batch.py b/LCMV_conversation_batch.py index 082a533..f0d02cc 100644 --- a/LCMV_conversation_batch.py +++ b/LCMV_conversation_batch.py @@ -12,22 +12,38 @@ SAVE_CROPPED_DATA_H5 = False +def setup_directories(): + data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/" + output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output2" + os.makedirs(output_dir, exist_ok=True) + return data_dir, output_dir + + +def load_data(subject, data_dir): + fwd_fname = os.path.join( + data_dir, subject, "meg", f"{subject}_task-conversation_fwd.fif" + ) + epochs_fname = os.path.join( + data_dir, subject, "meg", f"{subject}_task-conversation_proc-clean_epo.fif" + ) + noise_fname = os.path.join( + data_dir, subject, "meg", f"{subject}_task-rest_proc-clean_raw.fif" + ) + + if not all(os.path.exists(f) for f in [fwd_fname, epochs_fname, noise_fname]): + return None + + fwd = mne.read_forward_solution(fwd_fname) + fwd_fixed = mne.convert_forward_solution( + fwd, surf_ori=True, force_fixed=False, use_cps=True + ) # Convert to fixed orientation - necessary for LCMV ori = normal + all_epochs = mne.read_epochs(epochs_fname) + noise_raw = mne.io.read_raw_fif(noise_fname) + return fwd_fixed, all_epochs, noise_raw + + def average_stc_in_time(stc, window_size=0.1): - """ - Average the SourceEstimate in the time domain. - - Parameters: - ----------- - stc : mne.SourceEstimate - The source estimate to average. - window_size : float - The size of the averaging window in seconds. - - Returns: - -------- - mne.SourceEstimate - A new SourceEstimate with time-averaged data. - """ + sfreq = 1 / stc.tstep n_samples = int(window_size * sfreq) @@ -82,36 +98,6 @@ def is_subject_processed(subject, output_dir): return True -def setup_directories(): - data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/" - output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_4" - os.makedirs(output_dir, exist_ok=True) - return data_dir, output_dir - - -def load_data(subject, data_dir): - fwd_fname = os.path.join( - data_dir, subject, "meg", f"{subject}_task-conversation_fwd.fif" - ) - epochs_fname = os.path.join( - data_dir, subject, "meg", f"{subject}_task-conversation_proc-clean_epo.fif" - ) - noise_fname = os.path.join( - data_dir, subject, "meg", f"{subject}_task-rest_proc-clean_raw.fif" - ) - - if not all(os.path.exists(f) for f in [fwd_fname, epochs_fname, noise_fname]): - return None - - fwd = mne.read_forward_solution(fwd_fname) - fwd_fixed = mne.convert_forward_solution( - fwd, surf_ori=True, force_fixed=False, use_cps=True - ) # Convert to fixed orientation - necessary for LCMV ori = normal - all_epochs = mne.read_epochs(epochs_fname) - noise_raw = mne.io.read_raw_fif(noise_fname) - return fwd_fixed, all_epochs, noise_raw - - def prepare_epochs(all_epochs, noise_raw): print(f"Initial all_epochs: {len(all_epochs)}") @@ -258,11 +244,22 @@ def compute_source_estimate( averaged_stc = mne.SourceEstimate( envelope, vertices=[vertices_lh, vertices_rh], - tmin=epochs.times[crop_samples], + tmin=cropped_times[0], tstep=epochs.times[1] - epochs.times[0], subject="fsaverage", ) + print( + f"Original STC time range: {averaged_stc.times[0]:.3f}s to {averaged_stc.times[-1]:.3f}s" + ) + print(f"Original STC time points: {len(averaged_stc.times)}") + # Apply time-domain averaging + time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1) + + print( + f"Time-averaged STC time range: {time_averaged_stc.times[0]:.3f}s to {time_averaged_stc.times[-1]:.3f}s" + ) + print(f"Time-averaged STC time points: {len(time_averaged_stc.times)}") # Create time-domain averaged version time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1) diff --git a/LCMV_conversation_grand_average.py b/LCMV_conversation_grand_average.py index 7f52ad4..fffad22 100644 --- a/LCMV_conversation_grand_average.py +++ b/LCMV_conversation_grand_average.py @@ -5,7 +5,7 @@ import re # Set the path to the folder containing the .stc files -stc_folder = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_analysis" +stc_folder = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_output/" # Create a subfolder for average STCs average_folder = os.path.join(stc_folder, "average_stc") diff --git a/plot_stc.py b/plot_stc.py index acd23dc..f107171 100644 --- a/plot_stc.py +++ b/plot_stc.py @@ -2,8 +2,8 @@ import os SUBJECTS_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects" -STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_4/" -DATA_FILE = "sub-01_task-ba_broadband_lcmv_beamformer_time_averaged-stc-lh.stc" +STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_1/" +DATA_FILE = "sub-01_task-ba_broadband_lcmv_beamformer_averaged-stc-lh.stc" # Load the saved STC file (replace with your actual file path) From d530e45e461c4ff5f9e3accab4d34f5e61382307 Mon Sep 17 00:00:00 2001 From: paulsowman Date: Thu, 25 Jul 2024 12:26:04 +1200 Subject: [PATCH 09/20] fix x axis --- LCMV_CSF_conversation_batch.py | 112 +++++++++++++++++++++++---------- plot_stc.py | 2 +- 2 files changed, 80 insertions(+), 34 deletions(-) diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py index 2e46966..6da0829 100644 --- a/LCMV_CSF_conversation_batch.py +++ b/LCMV_CSF_conversation_batch.py @@ -5,11 +5,13 @@ from mne.beamformer import apply_lcmv_epochs, make_lcmv from mne.cov import compute_covariance from scipy.signal import hilbert +import matplotlib.pyplot as plt # Configuration options - change as needed saving # cropped data takes up a lot of space EQUALIZE_EVENT_COUNTS = False SAVE_CROPPED_DATA_H5 = False +DIAGNOSTIC_PLOTS = False def setup_directories(): @@ -146,8 +148,12 @@ def prepare_epochs(all_epochs, noise_raw): tmin=noise_epochs.tmin + crop_time, tmax=noise_epochs.tmax - crop_time ) - print(f"After cropping - Task epochs n_samples: {len(all_epochs.times)}") - print(f"After cropping - Noise epochs n_samples: {len(noise_epochs.times)}") + print( + f"After cropping - Task epochs tmin: {all_epochs.tmin}, tmax: {all_epochs.tmax}, n_samples: {len(all_epochs.times)}" + ) + print( + f"After cropping - Noise epochs tmin: {noise_epochs.tmin}, tmax: {noise_epochs.tmax}, n_samples: {len(noise_epochs.times)}" + ) all_epochs.metadata = None noise_epochs.metadata = None @@ -159,6 +165,38 @@ def prepare_epochs(all_epochs, noise_raw): print(f"Final combined_epochs: {len(combined_epochs)}") print(f"Time points: {len(combined_epochs.times)}") + print(f"Combined epochs tmin: {combined_epochs.tmin}, tmax: {combined_epochs.tmax}") + if DIAGNOSTIC_PLOTS: + # Diagnostic plots + plt.figure(figsize=(15, 10)) + + plt.subplot(311) + plt.plot(combined_epochs.times, combined_epochs.get_data().mean(axis=(0, 1))) + plt.title("Average of all channels and epochs") + plt.xlabel("Time (s)") + + plt.subplot(312) + plt.plot(combined_epochs.times, combined_epochs.get_data()[0, 0, :]) + plt.title("First channel of first epoch") + plt.xlabel("Time (s)") + + plt.subplot(313) + plt.imshow( + combined_epochs.get_data().mean(axis=0), + aspect="auto", + extent=[ + combined_epochs.times[0], + combined_epochs.times[-1], + 0, + combined_epochs.get_data().shape[1], + ], + ) + plt.title("Heatmap of all channels (averaged across epochs)") + plt.xlabel("Time (s)") + plt.ylabel("Channels") + + plt.tight_layout() + plt.show() if EQUALIZE_EVENT_COUNTS: combined_epochs.equalize_event_counts() @@ -211,6 +249,26 @@ def load_parcellation(): def compute_source_estimate( epochs_stcs, fwd, epochs, subject, output_dir, condition, band_name, parcellation ): + """Computes source estimates, ROI time courses, and saves them. + + Args: + epochs_stcs (list): List of SourceEstimate objects. + fwd (dict): Forward solution dictionary. + epochs (mne.Epochs): Evoked data object. + subject (str): Subject name. + output_dir (str): Output directory path. + condition (str): Experimental condition. + band_name (str): Band name. + parcellation (mne.Parcellation): Parcellation object. + + Returns: + tuple: A tuple containing the following elements: + - averaged_stc (mne.SourceEstimate): The averaged source estimate. + - roi_time_courses (np.ndarray): Original ROI time courses. + - time_averaged_stc (mne.SourceEstimate): Time-averaged source estimate. + - time_averaged_roi_time_courses (np.ndarray): Time-averaged ROI time courses. + """ + n_sources = fwd["nsource"] n_times = len(epochs.times) averaged_data = np.zeros((n_sources, n_times), dtype=complex) @@ -219,33 +277,24 @@ def compute_source_estimate( vertices_lh, vertices_rh = fwd["src"][0]["vertno"], fwd["src"][1]["vertno"] - if SAVE_CROPPED_DATA_H5: - h5_filename = f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5" - h5_filepath = os.path.join(output_dir, h5_filename) - h5f = h5py.File(h5_filepath, "w") - - crop_time = 0.1 # in seconds - crop_samples = int(crop_time * epochs.info["sfreq"]) - - # Calculate the new time array after cropping - cropped_times = epochs.times[crop_samples:-crop_samples] + with h5py.File( + os.path.join( + output_dir, f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5" + ), + "w", + ) as h5f: + for i, stc in enumerate(epochs_stcs): + analytic_signal = hilbert(stc.data, axis=1) + averaged_data += analytic_signal + all_data.append(analytic_signal) + n_epochs += 1 - for i, stc in enumerate(epochs_stcs): - cropped_data = stc.data[:, crop_samples:-crop_samples] - analytic_signal = hilbert(cropped_data, axis=1) - averaged_data[:, crop_samples:-crop_samples] += analytic_signal - all_data.append(analytic_signal) - n_epochs += 1 + h5f.create_dataset(f"epoch_{i}", data=stc.data) - if SAVE_CROPPED_DATA_H5: - h5f.create_dataset(f"epoch_{i}", data=cropped_data) - - if SAVE_CROPPED_DATA_H5: h5f.attrs["subject"] = subject h5f.attrs["condition"] = condition h5f.attrs["band_name"] = band_name h5f.attrs["n_epochs"] = n_epochs - h5f.close() if n_epochs == 0: raise ValueError("No epochs were processed") @@ -253,41 +302,35 @@ def compute_source_estimate( averaged_data /= n_epochs envelope = np.abs(averaged_data) - # Use the cropped time array for tmin and adjust the data shape averaged_stc = mne.SourceEstimate( envelope, vertices=[vertices_lh, vertices_rh], - tmin=cropped_times[0], + tmin=epochs.times[0], tstep=epochs.times[1] - epochs.times[0], subject="fsaverage", ) - # Create time-domain averaged version time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1) - # Compute ROI time courses for both original and time-averaged data roi_time_courses = stc_to_matrix(averaged_stc, parcellation) time_averaged_roi_time_courses = stc_to_matrix(time_averaged_stc, parcellation) - # Save original ROI time courses + # Save outputs roi_fname = ( f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_roi_time_courses.npy" ) np.save(os.path.join(output_dir, roi_fname), roi_time_courses) - # Save time-averaged ROI time courses time_avg_roi_fname = f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged_roi_time_courses.npy" np.save( os.path.join(output_dir, time_avg_roi_fname), time_averaged_roi_time_courses ) - # Save original SourceEstimate averaged_fname = ( f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_averaged-stc" ) averaged_stc.save(os.path.join(output_dir, averaged_fname), overwrite=True) - # Save time-averaged SourceEstimate time_avg_fname = ( f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged-stc" ) @@ -419,7 +462,8 @@ def process_subject(subject, data_dir, output_dir): except Exception as e: print(f"Error processing {subject}: {str(e)}") import traceback - traceback.print_exc( + + traceback.print_exc() def update_progress(subject, output_dir): @@ -438,7 +482,9 @@ def get_processed_subjects(output_dir): def main(): data_dir, output_dir = setup_directories() - subject_dirs = [d for d in os.listdir(data_dir) if d.startswith("sub-") and d[4:].isdigit()] + subject_dirs = [ + d for d in os.listdir(data_dir) if d.startswith("sub-") and d[4:].isdigit() + ] processed_subjects = get_processed_subjects(output_dir) diff --git a/plot_stc.py b/plot_stc.py index f107171..5b16893 100644 --- a/plot_stc.py +++ b/plot_stc.py @@ -3,7 +3,7 @@ SUBJECTS_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects" STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_1/" -DATA_FILE = "sub-01_task-ba_broadband_lcmv_beamformer_averaged-stc-lh.stc" +DATA_FILE = "sub-01_task-da_broadband_lcmv_beamformer_averaged-stc-lh.stc" # Load the saved STC file (replace with your actual file path) From 371f2ce16b2adf47a0b36eaf36364c20838b98d1 Mon Sep 17 00:00:00 2001 From: paulsowman Date: Thu, 25 Jul 2024 12:26:53 +1200 Subject: [PATCH 10/20] remove debugging plots --- LCMV_CSF_conversation_batch.py | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py index 6da0829..94202c0 100644 --- a/LCMV_CSF_conversation_batch.py +++ b/LCMV_CSF_conversation_batch.py @@ -11,7 +11,6 @@ # cropped data takes up a lot of space EQUALIZE_EVENT_COUNTS = False SAVE_CROPPED_DATA_H5 = False -DIAGNOSTIC_PLOTS = False def setup_directories(): @@ -166,37 +165,6 @@ def prepare_epochs(all_epochs, noise_raw): print(f"Final combined_epochs: {len(combined_epochs)}") print(f"Time points: {len(combined_epochs.times)}") print(f"Combined epochs tmin: {combined_epochs.tmin}, tmax: {combined_epochs.tmax}") - if DIAGNOSTIC_PLOTS: - # Diagnostic plots - plt.figure(figsize=(15, 10)) - - plt.subplot(311) - plt.plot(combined_epochs.times, combined_epochs.get_data().mean(axis=(0, 1))) - plt.title("Average of all channels and epochs") - plt.xlabel("Time (s)") - - plt.subplot(312) - plt.plot(combined_epochs.times, combined_epochs.get_data()[0, 0, :]) - plt.title("First channel of first epoch") - plt.xlabel("Time (s)") - - plt.subplot(313) - plt.imshow( - combined_epochs.get_data().mean(axis=0), - aspect="auto", - extent=[ - combined_epochs.times[0], - combined_epochs.times[-1], - 0, - combined_epochs.get_data().shape[1], - ], - ) - plt.title("Heatmap of all channels (averaged across epochs)") - plt.xlabel("Time (s)") - plt.ylabel("Channels") - - plt.tight_layout() - plt.show() if EQUALIZE_EVENT_COUNTS: combined_epochs.equalize_event_counts() From b8696e36883e06c89634f3cada1074b99cd827c1 Mon Sep 17 00:00:00 2001 From: paulsowman Date: Thu, 25 Jul 2024 12:44:15 +1200 Subject: [PATCH 11/20] tidy up --- LCMV_CSF_conversation_batch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py index 94202c0..5f82659 100644 --- a/LCMV_CSF_conversation_batch.py +++ b/LCMV_CSF_conversation_batch.py @@ -14,8 +14,8 @@ def setup_directories(): - data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/" - output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_1" + data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/mne-bids-pipeline-20240628/" + output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_output/" os.makedirs(output_dir, exist_ok=True) return data_dir, output_dir From ea643366dda1a230c715723d26c911e58154f17d Mon Sep 17 00:00:00 2001 From: paulsowman Date: Thu, 25 Jul 2024 14:44:04 +1200 Subject: [PATCH 12/20] refix h5 save option --- LCMV_CSF_conversation_batch.py | 39 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py index 5f82659..2f5eaf1 100644 --- a/LCMV_CSF_conversation_batch.py +++ b/LCMV_CSF_conversation_batch.py @@ -5,7 +5,6 @@ from mne.beamformer import apply_lcmv_epochs, make_lcmv from mne.cov import compute_covariance from scipy.signal import hilbert -import matplotlib.pyplot as plt # Configuration options - change as needed saving # cropped data takes up a lot of space @@ -244,25 +243,25 @@ def compute_source_estimate( n_epochs = 0 vertices_lh, vertices_rh = fwd["src"][0]["vertno"], fwd["src"][1]["vertno"] - - with h5py.File( - os.path.join( - output_dir, f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5" - ), - "w", - ) as h5f: - for i, stc in enumerate(epochs_stcs): - analytic_signal = hilbert(stc.data, axis=1) - averaged_data += analytic_signal - all_data.append(analytic_signal) - n_epochs += 1 - - h5f.create_dataset(f"epoch_{i}", data=stc.data) - - h5f.attrs["subject"] = subject - h5f.attrs["condition"] = condition - h5f.attrs["band_name"] = band_name - h5f.attrs["n_epochs"] = n_epochs + if SAVE_CROPPED_DATA_H5: + with h5py.File( + os.path.join( + output_dir, f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5" + ), + "w", + ) as h5f: + for i, stc in enumerate(epochs_stcs): + analytic_signal = hilbert(stc.data, axis=1) + averaged_data += analytic_signal + all_data.append(analytic_signal) + n_epochs += 1 + + h5f.create_dataset(f"epoch_{i}", data=stc.data) + + h5f.attrs["subject"] = subject + h5f.attrs["condition"] = condition + h5f.attrs["band_name"] = band_name + h5f.attrs["n_epochs"] = n_epochs if n_epochs == 0: raise ValueError("No epochs were processed") From 3e667db5ac8060076cf4ba662ebe3cddc921e934 Mon Sep 17 00:00:00 2001 From: Geet Vashista <148626773+geetvashista@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:44:35 +1200 Subject: [PATCH 13/20] For calculating wpli and graph metrics from LCMV --- Connectivity.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 Connectivity.py diff --git a/Connectivity.py b/Connectivity.py new file mode 100644 index 0000000..74fd281 --- /dev/null +++ b/Connectivity.py @@ -0,0 +1,98 @@ +# This is script to calculate connectivity using raw np arrays, +# each assumed to be in the shape (channels/ROI's, time points) + +import numpy as np +from datetime import datetime +import os +import dyconnmap +import bct +from scipy import stats +import pathlib + +def operations_to_perform(): # Change as desired + Cal_wpli = True + Cal_graph_metrics = True + return Cal_wpli, Cal_graph_metrics + + +def setup_dependencies(): # Change as desired + input_dir = r'C:\Users\em17531\Desktop\New_project\source_loc_output' # The directory containing files of interest + output_dir = r'' # The output directory + array_type = 'participant_conversation_alpha' # This is the type of array that will be loaded for further calculations, eg. "participant_conversation_alpha" + target_fb = [8,12] # The target frequency band TODO: set this up so variable can be a this can be list of bands and each generates it's own adjacency matrix + fs = 250 # The sampling frequency + saving = True + os.makedirs(output_dir, exist_ok=True) + return input_dir, output_dir, array_type, fs, target_fb, saving + + +def prep_data(input_dir, array_type): + data = [] + folder = pathlib.Path(input_dir).glob('*') + for file in folder: + if (os.path.basename(file)).endswith('.npy'): + if array_type in os.path.basename(file): + data.append(np.load(file)) + return np.array(data) + + +def wpli_conn(array, target_fb, fs): # (participants, roi's, time_points) + adj_matrix = [] + for participant in array: + adj_matrix.append(dyconnmap.fc.wpli(participant, fs=fs, fb=target_fb)) + return np.array(adj_matrix) + + +def graph_metrics(adj_matrix): # TODO: Add in stats, maybe nbs? Could use fdc as well? + # Strength calculator + Strength = [] + for participant in adj_matrix: + Strength.append(bct.strengths_und(np.nan_to_num(participant))) + Strength = np.array(Strength) + + # Zeroing negative phasing + Strength[Strength < 0] = 0 + + # Betweenness centrality calculator + Betweenness = [] + for participant in adj_matrix: + Betweenness.append(bct.betweenness_wei(np.nan_to_num(participant))) + Betweenness = np.array(Betweenness) + + # Eigenvector centrality calculator + Eigenvector = [] + for participant in adj_matrix: + Eigenvector.append(bct.eigenvector_centrality_und(np.nan_to_num(participant))) + Eigenvector = np.array(Eigenvector) + + # Clustering calculator + Clustering = [] + for participant in adj_matrix: + Clustering.append(bct.clustering_coef_wu(np.nan_to_num(participant))) + Clustering = np.array(Clustering) + + return Strength, Betweenness, Eigenvector, Clustering + + +def main(): # TODO: put in the elif statements + # Prep + Cal_wpli, Cal_graph_metrics = operations_to_perform() + input_dir, output_dir, array_type, fs, target_fb, saving = setup_dependencies() + data = prep_data(input_dir, array_type) + + # Core functions + if Cal_wpli: + adj_matrix = wpli_conn(data, target_fb, fs) + if Cal_graph_metrics: + Strength, Betweenness, Eigenvector, Clustering = graph_metrics(adj_matrix) + + # Saving + if saving: + np.save(output_dir + 'All_Strength_' + array_type, Strength) + np.save(output_dir + 'All_Betweenness_' + array_type, Betweenness) + np.save(output_dir + 'All_Eigenvector_' + array_type, Eigenvector) + np.save(output_dir + 'All_Clustering_' + array_type, Clustering) + + +if __name__ == "__main__": + main() From cfbc80ccbb3ce033265885a45f9d98a838bf7e33 Mon Sep 17 00:00:00 2001 From: Geet Vashista <148626773+geetvashista@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:43:17 +1200 Subject: [PATCH 14/20] Updated Connectivity.py --- Connectivity.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Connectivity.py b/Connectivity.py index 74fd281..245fd3b 100644 --- a/Connectivity.py +++ b/Connectivity.py @@ -2,12 +2,13 @@ # each assumed to be in the shape (channels/ROI's, time points) import numpy as np -from datetime import datetime +import time import os import dyconnmap import bct from scipy import stats import pathlib +start = time.time() def operations_to_perform(): # Change as desired Cal_wpli = True @@ -16,7 +17,7 @@ def operations_to_perform(): # Change as desired def setup_dependencies(): # Change as desired - input_dir = r'C:\Users\em17531\Desktop\New_project\source_loc_output' # The directory containing files of interest + input_dir = r'' # The directory containing files of interest output_dir = r'' # The output directory array_type = 'participant_conversation_alpha' # This is the type of array that will be loaded for further calculations, eg. "participant_conversation_alpha" target_fb = [8,12] # The target frequency band TODO: set this up so variable can be a this can be list of bands and each generates it's own adjacency matrix @@ -48,7 +49,7 @@ def graph_metrics(adj_matrix): # TODO: Add in stats, maybe nbs? Could use fdc a Strength = [] for participant in adj_matrix: Strength.append(bct.strengths_und(np.nan_to_num(participant))) - Strength = np.array(Strength) + Strength = np.array(Strength) # Zeroing negative phasing Strength[Strength < 0] = 0 @@ -57,19 +58,19 @@ def graph_metrics(adj_matrix): # TODO: Add in stats, maybe nbs? Could use fdc a Betweenness = [] for participant in adj_matrix: Betweenness.append(bct.betweenness_wei(np.nan_to_num(participant))) - Betweenness = np.array(Betweenness) + Betweenness = np.array(Betweenness) # Eigenvector centrality calculator Eigenvector = [] for participant in adj_matrix: Eigenvector.append(bct.eigenvector_centrality_und(np.nan_to_num(participant))) - Eigenvector = np.array(Eigenvector) + Eigenvector = np.array(Eigenvector) # Clustering calculator Clustering = [] for participant in adj_matrix: Clustering.append(bct.clustering_coef_wu(np.nan_to_num(participant))) - Clustering = np.array(Clustering) + Clustering = np.array(Clustering) return Strength, Betweenness, Eigenvector, Clustering @@ -96,3 +97,5 @@ def main(): # TODO: put in the elif statements if __name__ == "__main__": main() + +print('\n' + "EXECUTION TIME: " + str(time.time()-start)) From 39c36d54fc8de05e2064dfe3441b3aa7621bfae0 Mon Sep 17 00:00:00 2001 From: Geet Vashista <148626773+geetvashista@users.noreply.github.com> Date: Fri, 26 Jul 2024 17:17:55 +1200 Subject: [PATCH 15/20] Update Connectivity.py --- Connectivity.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Connectivity.py b/Connectivity.py index 245fd3b..e873e5a 100644 --- a/Connectivity.py +++ b/Connectivity.py @@ -89,10 +89,10 @@ def main(): # TODO: put in the elif statements # Saving if saving: - np.save(output_dir + 'All_Strength_' + array_type, Strength) - np.save(output_dir + 'All_Betweenness_' + array_type, Betweenness) - np.save(output_dir + 'All_Eigenvector_' + array_type, Eigenvector) - np.save(output_dir + 'All_Clustering_' + array_type, Clustering) + np.save(output_dir + '_All_Strength_' + array_type, Strength) + np.save(output_dir + '_All_Betweenness_' + array_type, Betweenness) + np.save(output_dir + '_All_Eigenvector_' + array_type, Eigenvector) + np.save(output_dir + '_All_Clustering_' + array_type, Clustering) if __name__ == "__main__": From bd76c6e3e7a5bd339be6c4c312e9ef206a33a90e Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:03:14 +1000 Subject: [PATCH 16/20] Add z-scores version of ROI time series --- traditional_source_analysis_and_ROIs.py | 96 +++++++++++++++++++------ 1 file changed, 76 insertions(+), 20 deletions(-) diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py index d4c8be0..6d055a1 100644 --- a/traditional_source_analysis_and_ROIs.py +++ b/traditional_source_analysis_and_ROIs.py @@ -2,7 +2,6 @@ # Compute and plot GAs # Extract time courses from predefined ROIs -import os import os.path as op import numpy as np import matplotlib.pyplot as plt @@ -26,13 +25,19 @@ # specify how many SSP projectors to use for speech artifact removal n_proj = 4 # 1 means: 1 for MEG & 1 for EEG +this_run = f"{n_proj}-proj" +#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks # which conditions to compare in ROI analysis comparison = "participant" # "interviewer" or "participant" conds_ROI = [f"{comparison}_conversation", f"{comparison}_repetition"] -this_run = f"{n_proj}-proj" -#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks +ch_type = None # "meg" or "eeg" # None means all +if ch_type: + path_suffix = f"_{ch_type}-only" +else: + path_suffix = "" + subjects = config.subjects if subjects == "all": @@ -52,12 +57,15 @@ #analysis_path = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis") deriv_path = analysis_path / "natural-conversations-bids" / "derivatives" -source_results_dir = analysis_path / "results" / f"source-{source_method}-{src_type}" / this_run -figures_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run -figures_ROI_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run / f"{comparison}_ROI" +source_results_dir = analysis_path / "results" / f"source-{source_method}-{src_type}{path_suffix}" / this_run +figures_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}{path_suffix}" / this_run +figures_ROI_dir = figures_dir / f"{comparison}_ROI" +#figures_ROI_zscores_dir = figures_dir / f"{comparison}_ROI_zscores" +figures_ROI_zscores_dir = figures_dir / f"{comparison}_ROI_zscores-timeslices" # create the folders if needed source_results_dir.mkdir(parents=True, exist_ok=True) figures_ROI_dir.mkdir(parents=True, exist_ok=True) +figures_ROI_zscores_dir.mkdir(parents=True, exist_ok=True) subjects_dir = deriv_path / "freesurfer" / "subjects" subject = 'fsaverage' @@ -99,13 +107,15 @@ epochs.add_proj(proj).apply_proj() epochs = epochs.pick(["meg", "eeg"], exclude="bads") + if ch_type: + epochs.pick(ch_type) ranks = mne.compute_rank(inst=epochs, tol=1e-3, tol_kind="relative") rank = sum(ranks.values()) print(f" Ranks={ranks} (total={rank})") # TEMP - always recreate inv for now, until we change to free orientation in the pipeline - if 1: #n_proj or src_type == 'vol': + if 1: #n_proj or ch_type or src_type == 'vol': # Recreate inverse taking into account additional projections cov = mne.read_cov(cov_fname) if src_type == 'vol': @@ -227,13 +237,27 @@ # initialise the sum array to correct size using first subject's stc stc = mne.read_source_estimate(stc_files[0]) - + ''' # read in the stc for each subsequent subject, add to the sum array for fname in stc_files[1:]: stc += mne.read_source_estimate(fname) - # divide by number of files stc /= len(stc_files) + ''' + + # the above only seems to work for surface-based stcs, + # so we are retaining the code below for now to handle vol-based stcs + + stcs_sum = stc.data # this contains lh & rh vertices combined together + # there are also separate fields for the 2 hemis (stc.lh_data, stc.rh_data), + # but their content cannot be set directly, so just use the combined one + + # read in the stc for each subsequent subject, add to the sum array + for fname in stc_files[1:]: + stc = mne.read_source_estimate(fname) + stcs_sum = stcs_sum + stc.data + # divide by number of files + stc.data = stcs_sum / len(stc_files) # store in the GA struct GA_stcs[cond] = stc @@ -282,8 +306,13 @@ # Extract ROI time courses from source estimates -src = mne.read_source_spaces(src_fname) +window_size = 200 # sliding window size (ms) for calculating z-scores +window = window_size / 5 # 10 = 50ms, 20 = 100ms, 40 = 200ms + (figures_ROI_dir / "all_ROIs").mkdir(parents=True, exist_ok=True) +(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms").mkdir(parents=True, exist_ok=True) + +src = mne.read_source_spaces(src_fname) if src_type == 'vol': # choose atlas for parcellation @@ -297,22 +326,48 @@ for label_name in rois: (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True) + (figures_ROI_zscores_dir / label_name).mkdir(parents=True, exist_ok=True) # Plot GA ROI time series fig, axes = plt.subplots(1, layout="constrained") + fig_z, axes_z = plt.subplots(1, layout="constrained") for cond in conds_ROI: label_ts = mne.extract_label_time_course( [GA_stcs[cond]], (fname_aseg, label_name), src, mode="auto" ) - axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond) - - axes.axvline(linestyle='--') # add verticle line at time 0 + label_ts = label_ts[0][0] + axes.plot(1e3 * GA_stcs[cond].times, label_ts, label=cond) + + # calculate z-score at each time point (using a sliding time window) + mu = np.mean(label_ts) # demean is based on the whole epoch + label_ts = label_ts - mu + + zscores = [0] * (len(label_ts) - window) # initialise the z-scores array + for t in range(0, len(label_ts) - window): + sigma = np.std(label_ts[t:t+window], mean=0) # sigma is calculated on the time slice only, but need to manually set the mean to 0 + zscores[t] = label_ts[t] / sigma + axes_z.plot(1e3 * GA_stcs[cond].times[:len(zscores)], zscores, label=cond) + ''' + # take the time window centred on current time point (rather than on the RHS) + window_half = int(window/2) + zscores = [0] * (len(label_ts) - window_half) # initialise the z-scores array + for t in range(window_half, len(label_ts) - window_half): + sigma = np.std(label_ts[t-window_half:t+window_half], mean=0) # sigma is calculated on the time slice only, but need to manually set the mean to 0 + zscores[t] = label_ts[t] / sigma + axes_z.plot(1e3 * GA_stcs[cond].times[window_half:len(zscores)], zscores[window_half:], label=cond) + ''' + + axes.axvline(linestyle='-', color='k') # add verticle line at time 0 axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() + axes_z.axvline(linestyle='-', color='k') # add verticle line at time 0 + axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)") + axes_z.legend() fig.savefig(figures_ROI_dir / label_name / "GA.png") fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder - plt.close(fig) + fig_z.savefig(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms" / f"{label_name}_GA.png") + plt.close('all') # Plot individual-subjects ROI time series for sub in use_subjects: @@ -323,14 +378,14 @@ label_ts = mne.extract_label_time_course( [stc], (fname_aseg, label_name), src, mode="auto" ) - axes.plot(1e3 * stc.times, label_ts[0][0, :], label=cond) + axes.plot(1e3 * stc.times, label_ts[0][0], label=cond) - axes.axvline(linestyle='--') # add verticle line at time 0 + axes.axvline(linestyle='-', color='k') # add verticle line at time 0 axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() fig.savefig(figures_ROI_dir / label_name / f"sub-{sub}.png") - plt.close(fig) + plt.close('all') elif src_type == 'surface': # for surface source space, we need to create the Label object first @@ -352,7 +407,8 @@ # or read a single label (e.g. V1, BA44, etc) #labels_parc = mne.read_label(op.join(subjects_dir, subject, 'label', 'lh.V1.label')) - + + for label in labels: label_name = label.name # or set a custom name for combined labels @@ -371,7 +427,7 @@ ) axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond) - axes.axvline(linestyle='--') # add verticle line at time 0 + axes.axvline(linestyle='-', color='k') # add verticle line at time 0 axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() @@ -390,7 +446,7 @@ ) axes.plot(1e3 * stc.times, label_ts[0][0, :], label=cond) - axes.axvline(linestyle='--') # add verticle line at time 0 + axes.axvline(linestyle='-', color='k') # add verticle line at time 0 axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() From 72a1b48c7d2b571b9e95be7b58f54d8af0cedb16 Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Mon, 29 Jul 2024 15:17:44 +1000 Subject: [PATCH 17/20] Add saving of GA stc --- csp.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/csp.py b/csp.py index b32204e..cb81503 100644 --- a/csp.py +++ b/csp.py @@ -93,10 +93,12 @@ analysis_path = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis") deriv_path = analysis_path / "natural-conversations-bids" / "derivatives" -fig_path = analysis_path / "figures" / "CSP-decoding" if src_type == 'vol': fig_path = analysis_path / "figures" / "CSP-decoding-vol" results_path = analysis_path / "results" / "CSP-decoding-vol" +else: + fig_path = analysis_path / "figures" / "CSP-decoding" + results_path = analysis_path / "results" / "CSP-decoding" cop_path = analysis_path / "figures" subjects_dir = deriv_path / "freesurfer" / "subjects" if src_type == 'vol': @@ -141,7 +143,7 @@ if decode != "participant": extra += f"-{decode}" title += f", {decode}" -for si, sub in enumerate(use_subjects): # just 03 for now +for si, sub in enumerate(use_subjects): path = deriv_path / 'mne-bids-pipeline' / f'sub-{sub}' / 'meg' epochs_fname = path / f'sub-{sub}_task-conversation_proc-clean_epo.fif' trans_fname = path / f'sub-{sub}_task-conversation_trans.fif' @@ -396,6 +398,7 @@ def clean_brain(brain_img): if subj_key == "": results_path.mkdir(exist_ok=True) stc.save(f'{results_path}/decoding{extra}_csp_GA') + src = mne.read_source_spaces(src_fname) # for vol src, the plot function returns a matplotlib Figure - # we can't update the clim & time point for this once plotted, so do the actual plotting later @@ -404,6 +407,11 @@ def clean_brain(brain_img): this_data.reshape(-1, n_vertices).T, vertices=fs_vertices, tmin=0, tstep=1., subject="fsaverage", ) + # save the GA stc + if subj_key == "": + results_path.mkdir(exist_ok=True) + stc.save(f'{results_path}/decoding{extra}_csp_GA') + brain = stc.plot( initial_time=0., transparent=True, colormap="viridis", clim=dict(kind="value", lims=[0, 1, 2]), From f47cbfc540edad395ac02f29d0dc718ba4736c11 Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Mon, 29 Jul 2024 22:42:27 +1000 Subject: [PATCH 18/20] Equalise event counts. Add more ROIs from HCPMMP1 atlas --- traditional_source_analysis_and_ROIs.py | 93 +++++++++++++++++++++---- 1 file changed, 81 insertions(+), 12 deletions(-) diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py index 6d055a1..51d5d7b 100644 --- a/traditional_source_analysis_and_ROIs.py +++ b/traditional_source_analysis_and_ROIs.py @@ -18,7 +18,7 @@ # specify source method etc source_method = "dSPM" #"MNE" -src_type = 'vol' # 'vol' or 'surface' +src_type = 'surface' # 'vol' or 'surface' spacing = "oct6" # for 'surface' source space only if src_type != 'surface': spacing = '' @@ -38,6 +38,13 @@ else: path_suffix = "" +if src_type == 'vol': + src_suffix = '-vl.stc' + #if this_run == "ba+da__ba-da": + # src_suffix = '-vol.stc' # to read a previous version of saved results +elif src_type == 'surface': + src_suffix = '-lh.stc' + subjects = config.subjects if subjects == "all": @@ -92,6 +99,8 @@ stc_filename_prefix = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc') + if Path(stc_filename_prefix + '_ba' + src_suffix).exists(): + continue # Read data epochs = mne.read_epochs(epochs_fname) @@ -144,6 +153,10 @@ #evoked_allconds = epochs.average() #evoked_allconds.plot_joint() # average ERF across all conds + epochs.equalize_event_counts(['ba', 'da']) + epochs.equalize_event_counts(['interviewer_conversation', 'interviewer_repetition']) + epochs.equalize_event_counts(['participant_conversation', 'participant_repetition']) + # compute evoked for each cond (ba, da, conversation, repetition) evokeds = [] for cond in epochs.event_id: @@ -216,13 +229,6 @@ if this_run == "ba+da__ba-da": conds = ['ba+da', 'ba-da'] -if src_type == 'vol': - src_suffix = '-vl.stc' - #if this_run == "ba+da__ba-da": - # src_suffix = '-vol.stc' # to read a previous version of saved results -elif src_type == 'surface': - src_suffix = '-lh.stc' - # overwrite the paths here to use a previous version of results for participant-locked epochs #source_results_dir = "/mnt/d/Work/analysis_ME206/results/bids/source/MNE_surface_4proj" #figures_dir = "/mnt/d/Work/analysis_ME206/results/bids/source/MNE_surface_4proj/Figures_ROI" @@ -244,7 +250,7 @@ # divide by number of files stc /= len(stc_files) ''' - + # the above only seems to work for surface-based stcs, # so we are retaining the code below for now to handle vol-based stcs @@ -307,7 +313,7 @@ # Extract ROI time courses from source estimates window_size = 200 # sliding window size (ms) for calculating z-scores -window = window_size / 5 # 10 = 50ms, 20 = 100ms, 40 = 200ms +window = int(window_size / 5) # e.g. 10 = 50ms, 20 = 100ms, 40 = 200ms (figures_ROI_dir / "all_ROIs").mkdir(parents=True, exist_ok=True) (figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms").mkdir(parents=True, exist_ok=True) @@ -324,6 +330,22 @@ 'ctx_lh_S_temporal_sup','ctx_rh_S_temporal_sup'] # can have multiple labels in this list #roi_idx = label_names.index(rois[0]) + # use the HCP-MMP parcellation + # create the volumetric atlas first: + # https://gist.github.com/larsoner/8e664205cd8285ca7c46211403ad12ce + ''' + # for some reason the label names are still not found, + # even though they come from the correct lut + fname_aseg = subjects_dir / subject / 'mri' / 'HCPMMP1_combined+aseg.mgz' # HCPMMP1 = cortical; aseg = subcortical + fname_lut = subjects_dir / subject / 'mri' / 'HCPMMP1_combinedColorLUT.txt' + lut = mne.read_freesurfer_lut(fname_lut) + label_names = mne.get_volume_labels_from_aseg(fname_aseg, atlas_ids=lut[0]) + rois = ['Anterior_Cingulate_and_Medial_Prefrontal_Cortex-lh', 'Anterior_Cingulate_and_Medial_Prefrontal_Cortex-rh', + 'DorsoLateral_Prefrontal_Cortex-lh', 'DorsoLateral_Prefrontal_Cortex-rh', + 'Posterior_Cingulate_Cortex-lh', 'Posterior_Cingulate_Cortex-rh', + 'Temporo-Parieto-Occipital_Junction-lh', 'Temporo-Parieto-Occipital_Junction-rh'] + ''' + for label_name in rois: (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True) (figures_ROI_zscores_dir / label_name).mkdir(parents=True, exist_ok=True) @@ -372,6 +394,7 @@ # Plot individual-subjects ROI time series for sub in use_subjects: fig, axes = plt.subplots(1, layout="constrained") + fig_z, axes_z = plt.subplots(1, layout="constrained") for cond in conds_ROI: stc_file = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc_{cond}{src_suffix}') stc = mne.read_source_estimate(stc_file) @@ -380,11 +403,21 @@ ) axes.plot(1e3 * stc.times, label_ts[0][0], label=cond) + # calculate z-score at each time point + #mu = np.mean(label_ts[0][0]) + #sigma = np.std(label_ts[0][0]) + #zscores = (label_ts - mu) / sigma + #axes_z.plot(1e3 * stc.times, zscores[0][0], label=cond) + axes.axvline(linestyle='-', color='k') # add verticle line at time 0 axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() + #axes_z.axvline(linestyle='-', color='k') # add verticle line at time 0 + #axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)") + #axes_z.legend() fig.savefig(figures_ROI_dir / label_name / f"sub-{sub}.png") + #fig_z.savefig(figures_ROI_zscores_dir / label_name / f"sub-{sub}.png") plt.close('all') elif src_type == 'surface': @@ -408,6 +441,26 @@ # or read a single label (e.g. V1, BA44, etc) #labels_parc = mne.read_label(op.join(subjects_dir, subject, 'label', 'lh.V1.label')) + + # use the HCP-MMP parcellation + # https://balsa.wustl.edu/WN56 + #mne.datasets.fetch_hcp_mmp_parcellation(subjects_dir=subjects_dir) + # can use the fine-grained (~360 labels) "HCPMMP1" or coarse (~44 labels) "HCPMMP1_combined" atlas + labels_parc = mne.read_labels_from_annot( + 'fsaverage', 'HCPMMP1_combined', subjects_dir=subjects_dir) + labels = [labels_parc[2], labels_parc[3], labels_parc[8], labels_parc[9], + labels_parc[30], labels_parc[31], labels_parc[42], labels_parc[43]] + #figures_ROI_dir = figures_ROI_dir.parent / (figures_ROI_dir.name + "_HCPMMP") + ''' + # to check the locations of ROIs + brain = mne.viz.Brain('fsaverage', 'lh', 'inflated', subjects_dir=subjects_dir, + cortex='low_contrast', background='white', size=(800, 600)) + #brain.add_annotation('HCPMMP1') # this adds "border lines" showing the parcellation + #aud_label = [label for label in labels if label.name == 'L_A1_ROI-lh'][0] + #brain.add_label(aud_label, borders=False) # this "colours in" the region + for label in labels: + brain.add_label(label) + ''' for label in labels: label_name = label.name @@ -421,19 +474,35 @@ # Plot GA ROI time series fig, axes = plt.subplots(1, layout="constrained") + fig_z, axes_z = plt.subplots(1, layout="constrained") for cond in conds_ROI: label_ts = mne.extract_label_time_course( [GA_stcs[cond]], label, src, mode="auto" ) - axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond) + label_ts = label_ts[0][0] + axes.plot(1e3 * GA_stcs[cond].times, label_ts, label=cond) + + # calculate z-score at each time point (using a sliding time window) + mu = np.mean(label_ts) # demean is based on the whole epoch + label_ts = label_ts - mu + + zscores = [0] * (len(label_ts) - window) # initialise the z-scores array + for t in range(0, len(label_ts) - window): + sigma = np.std(label_ts[t:t+window], mean=0) # sigma is calculated on the time slice only, but need to manually set the mean to 0 + zscores[t] = label_ts[t] / sigma + axes_z.plot(1e3 * GA_stcs[cond].times[:len(zscores)], zscores, label=cond) axes.axvline(linestyle='-', color='k') # add verticle line at time 0 axes.set(xlabel="Time (ms)", ylabel="Activation") axes.legend() + axes_z.axvline(linestyle='-', color='k') # add verticle line at time 0 + axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)") + axes_z.legend() fig.savefig(figures_ROI_dir / label_name / "GA.png") fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder - plt.close(fig) + fig_z.savefig(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms" / f"{label_name}_GA.png") + plt.close('all') # Plot individual-subjects ROI time series for sub in use_subjects: From 024824adba9f10c545b2b7773d1605c0403d1a3e Mon Sep 17 00:00:00 2001 From: Geet Vashista <148626773+geetvashista@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:52:47 +1200 Subject: [PATCH 19/20] Corrected for target frequency issue This update may not be needed, depending on the properties of the saved source files, but it does no damage to have the updated version. --- Connectivity.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Connectivity.py b/Connectivity.py index e873e5a..1acb22e 100644 --- a/Connectivity.py +++ b/Connectivity.py @@ -27,13 +27,15 @@ def setup_dependencies(): # Change as desired return input_dir, output_dir, array_type, fs, target_fb, saving -def prep_data(input_dir, array_type): +def prep_data(input_dir, array_type, target_fb, fs): data = [] folder = pathlib.Path(input_dir).glob('*') for file in folder: if (os.path.basename(file)).endswith('.npy'): if array_type in os.path.basename(file): - data.append(np.load(file)) + array = np.load(file) + _, _, filtered_array = dyconnmap.analytic_signal(array, fb=target_fb, fs=fs) + data.append(filtered_array) return np.array(data) @@ -79,7 +81,7 @@ def main(): # TODO: put in the elif statements # Prep Cal_wpli, Cal_graph_metrics = operations_to_perform() input_dir, output_dir, array_type, fs, target_fb, saving = setup_dependencies() - data = prep_data(input_dir, array_type) + data = prep_data(input_dir, array_type, target_fb, fs) # Core functions if Cal_wpli: @@ -93,6 +95,7 @@ def main(): # TODO: put in the elif statements np.save(output_dir + '_All_Betweenness_' + array_type, Betweenness) np.save(output_dir + '_All_Eigenvector_' + array_type, Eigenvector) np.save(output_dir + '_All_Clustering_' + array_type, Clustering) + np.save(output_dir + '_Master_adj_matrix_' + array_type, adj_matrix) if __name__ == "__main__": From b87503c9de646c5311716d4ff429c6fe143c93bf Mon Sep 17 00:00:00 2001 From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:59:54 +1000 Subject: [PATCH 20/20] FIX: use HCPMMP parcellation with volumetric source space --- traditional_source_analysis_and_ROIs.py | 49 +++++++++++++++---------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py index 51d5d7b..935579f 100644 --- a/traditional_source_analysis_and_ROIs.py +++ b/traditional_source_analysis_and_ROIs.py @@ -321,7 +321,9 @@ src = mne.read_source_spaces(src_fname) if src_type == 'vol': - # choose atlas for parcellation + # choose atlas + + # can use aparc 2009 parcellation fname_aseg = subjects_dir / subject / 'mri' / 'aparc.a2009s+aseg.mgz' # aparc = cortical; aseg = subcortical label_names = mne.get_volume_labels_from_aseg(fname_aseg) rois = ['ctx_lh_G_cingul-Post-dorsal','ctx_lh_G_cingul-Post-ventral','ctx_rh_G_cingul-Post-dorsal','ctx_rh_G_cingul-Post-ventral', @@ -330,32 +332,37 @@ 'ctx_lh_S_temporal_sup','ctx_rh_S_temporal_sup'] # can have multiple labels in this list #roi_idx = label_names.index(rois[0]) - # use the HCP-MMP parcellation - # create the volumetric atlas first: + # or use the HCP-MMP parcellation + # Note: need to create the volumetric atlas & custom lookup table (LUT) for this first - see link below: # https://gist.github.com/larsoner/8e664205cd8285ca7c46211403ad12ce - ''' - # for some reason the label names are still not found, - # even though they come from the correct lut fname_aseg = subjects_dir / subject / 'mri' / 'HCPMMP1_combined+aseg.mgz' # HCPMMP1 = cortical; aseg = subcortical fname_lut = subjects_dir / subject / 'mri' / 'HCPMMP1_combinedColorLUT.txt' lut = mne.read_freesurfer_lut(fname_lut) label_names = mne.get_volume_labels_from_aseg(fname_aseg, atlas_ids=lut[0]) - rois = ['Anterior_Cingulate_and_Medial_Prefrontal_Cortex-lh', 'Anterior_Cingulate_and_Medial_Prefrontal_Cortex-rh', - 'DorsoLateral_Prefrontal_Cortex-lh', 'DorsoLateral_Prefrontal_Cortex-rh', - 'Posterior_Cingulate_Cortex-lh', 'Posterior_Cingulate_Cortex-rh', - 'Temporo-Parieto-Occipital_Junction-lh', 'Temporo-Parieto-Occipital_Junction-rh'] - ''' - - for label_name in rois: - (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True) - (figures_ROI_zscores_dir / label_name).mkdir(parents=True, exist_ok=True) + # need to supply a list of dicts here (rather than strings), as we are using a custom LUT + rois = [{'Anterior_Cingulate_and_Medial_Prefrontal_Cortex-lh': 1001}, + {'Anterior_Cingulate_and_Medial_Prefrontal_Cortex-rh': 2001}, + {'DorsoLateral_Prefrontal_Cortex-lh': 1004}, + {'DorsoLateral_Prefrontal_Cortex-rh': 2004}, + {'Posterior_Cingulate_Cortex-lh': 1015}, + {'Posterior_Cingulate_Cortex-rh': 2015}, + {'Temporo-Parieto-Occipital_Junction-lh': 1021}, + {'Temporo-Parieto-Occipital_Junction-rh': 2021}] + + for label in rois: + # check if label is a dict, if so then we extract the label_name here + if isinstance(label, dict): + label_name = list(label.keys())[0] + + #(figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True) + #(figures_ROI_zscores_dir / label_name).mkdir(parents=True, exist_ok=True) # Plot GA ROI time series fig, axes = plt.subplots(1, layout="constrained") fig_z, axes_z = plt.subplots(1, layout="constrained") for cond in conds_ROI: label_ts = mne.extract_label_time_course( - [GA_stcs[cond]], (fname_aseg, label_name), src, mode="auto" + [GA_stcs[cond]], (fname_aseg, label), src, mode="auto" ) label_ts = label_ts[0][0] axes.plot(1e3 * GA_stcs[cond].times, label_ts, label=cond) @@ -386,7 +393,7 @@ axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)") axes_z.legend() - fig.savefig(figures_ROI_dir / label_name / "GA.png") + #fig.savefig(figures_ROI_dir / label_name / "GA.png") fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder fig_z.savefig(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms" / f"{label_name}_GA.png") plt.close('all') @@ -437,12 +444,14 @@ labels_parc[18]+labels_parc[20], labels_parc[19]+labels_parc[21], labels_parc[50], labels_parc[51], labels_parc[58], labels_parc[59], labels_parc[66], labels_parc[67], labels_parc[84], labels_parc[85], labels_parc[144], labels_parc[145] ] + # can check if a particular label is available in the atlas + #print([label for label in labels_parc if "fusiform" in label.name]) # or read a single label (e.g. V1, BA44, etc) #labels_parc = mne.read_label(op.join(subjects_dir, subject, 'label', 'lh.V1.label')) - # use the HCP-MMP parcellation + # or use the HCP-MMP parcellation # https://balsa.wustl.edu/WN56 #mne.datasets.fetch_hcp_mmp_parcellation(subjects_dir=subjects_dir) # can use the fine-grained (~360 labels) "HCPMMP1" or coarse (~44 labels) "HCPMMP1_combined" atlas @@ -470,7 +479,7 @@ if label_name == 'G_cingul-Post-dorsal-rh + G_cingul-Post-ventral-rh': label_name = 'G_cingul-Post-rh' - (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True) + #(figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True) # Plot GA ROI time series fig, axes = plt.subplots(1, layout="constrained") @@ -499,7 +508,7 @@ axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)") axes_z.legend() - fig.savefig(figures_ROI_dir / label_name / "GA.png") + #fig.savefig(figures_ROI_dir / label_name / "GA.png") fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder fig_z.savefig(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms" / f"{label_name}_GA.png") plt.close('all')