From 2c5b94cbb90a7bfc923febffe01ecfb26277715c Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Mon, 1 Jul 2024 15:29:01 +1000
Subject: [PATCH 01/20] Rerun pipeline with 1hz high pass filter and add
 broadband decoding

---
 bidsify.py |  5 +++--
 config.py  |  5 +++--
 csp.py     | 14 ++++++++++----
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/bidsify.py b/bidsify.py
index b155ff5..957056b 100644
--- a/bidsify.py
+++ b/bidsify.py
@@ -114,11 +114,12 @@
 analysis_root = share_root / "analysis"
 data_root = share_root / "data"
 del share_root
-bids_root = analysis_root / f"{name}-bids"
 if platform.system() == 'Windows':
     analysis_root = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis")
     data_root = Path("D:/Work/analysis_ME206/data")
-    bids_root = Path("E:/M3/Natural_Conversations_study/analysis") / f"{name}-bids"
+analysis_root = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis")
+data_root = Path("/mnt/d/Work/analysis_ME206/data")
+bids_root = analysis_root / f"{name}-bids"
 
 bids_root.mkdir(exist_ok=True)
 mne_bids.make_dataset_description(
diff --git a/config.py b/config.py
index c2a62da..8fb18ac 100644
--- a/config.py
+++ b/config.py
@@ -12,7 +12,7 @@
 
 study_name = "natural-conversations"
 if platform.system() == 'Windows':
-    analysis_path = Path("E:/M3/Natural_Conversations_study/analysis")
+    analysis_path = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis")
 else:
     analysis_path = (
         Path(__file__).parent / ".." / "Natural_Conversations_study" / "analysis"
@@ -29,7 +29,7 @@
 data_type = "meg"
 eeg_reference = "average"
 
-l_freq = 0.5
+l_freq = 1.0
 h_freq = 45.0
 h_trans_bandwidth = 5
 epochs_decim = 5
@@ -71,6 +71,7 @@
     'alpha': [8, 13],
     'beta': [14, 30],
     'gamma': [31, 45],
+    #'broadband': [4, 45], # makes the plot weird in html report due to overlapping freqs
 }
 
 # TFRs
diff --git a/csp.py b/csp.py
index 5f610bd..b32204e 100644
--- a/csp.py
+++ b/csp.py
@@ -51,6 +51,7 @@
 )
 
 csp_freqs = config.decoding_csp_freqs
+csp_freqs['broadband'] = [4, 45] # add broadband decoding
 n_components = 4
 random_state = 42
 n_splits = 5
@@ -60,13 +61,13 @@
 whiten = True  # default is True
 rerun = False  # force re-run / overwrite of existing files
 src_type = 'surf' # surf or vol
-mode = 'glass_brain' # stat_map or glass_brain, vol source space plotting mode
+mode = 'stat_map' # stat_map or glass_brain, vol source space plotting mode
 randomize = False  # False or nonzero int, randomize the trial labels
-decode = "participant"  # "participant" or "interviewer" turns, or "bada"
+decode = "interviewer"  # "participant" or "interviewer" turns, or "bada"
 
 assert src_type in ("vol", "surf"), src_type
 assert decode in ("participant", "interviewer", "bada"), decode
-mode_extra = f"_{mode[0:5]}" if src_type == "vol" else ""
+mode_extra = f"_{mode[0:5]}" if src_type == "vol" and mode == "glass_brain" else ""
 
 plot_classification = True
 plot_indiv = False
@@ -89,12 +90,13 @@
 analysis_path = deriv_path = Path(__file__).parents[1] / "Natural_Conversations_study" / "analysis"
 if platform.system() == 'Windows':
     data_path = Path("D:/Work/analysis_ME206/data")
-    analysis_path = Path("E:/M3/Natural_Conversations_study/analysis")
+    analysis_path = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis")
 
 deriv_path = analysis_path / "natural-conversations-bids" / "derivatives"
 fig_path = analysis_path / "figures" / "CSP-decoding"
 if src_type == 'vol':
     fig_path = analysis_path / "figures" / "CSP-decoding-vol"
+    results_path = analysis_path / "results" / "CSP-decoding-vol"
 cop_path = analysis_path / "figures"
 subjects_dir = deriv_path / "freesurfer" / "subjects"
 if src_type == 'vol':
@@ -390,6 +392,10 @@ def clean_brain(brain_img):
                 this_data.reshape(-1, n_vertices).T,
                 vertices=fs_vertices, tmin=0, tstep=1., subject="fsaverage",
             )
+            # save the GA stc - this can be exported as Nifti, then loaded into other software (e.g. xjview) to extract anatomical labels
+            if subj_key == "":                
+                results_path.mkdir(exist_ok=True)
+                stc.save(f'{results_path}/decoding{extra}_csp_GA') 
             src = mne.read_source_spaces(src_fname)
             # for vol src, the plot function returns a matplotlib Figure -
             # we can't update the clim & time point for this once plotted, so do the actual plotting later

From 4c509ca60ffeef5ee91d04cd9177417ef409e12d Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Mon, 1 Jul 2024 15:29:24 +1000
Subject: [PATCH 02/20] Convert coordinates from MNI305 to MNI152 space

---
 MNI305to152.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 MNI305to152.py

diff --git a/MNI305to152.py b/MNI305to152.py
new file mode 100644
index 0000000..77136a6
--- /dev/null
+++ b/MNI305to152.py
@@ -0,0 +1,21 @@
+# Convert coordinates from MNI305 space to MNI152 space
+
+import numpy as np
+
+# Transformation matrix
+# Ref: https://surfer.nmr.mgh.harvard.edu/fswiki/CoordinateSystems
+M = np.array([
+    [0.9975,   -0.0073,    0.0176,   -0.0429],
+    [0.0146,    1.0009,   -0.0024,    1.5496],
+    [-0.0130,   -0.0093,    0.9971,    1.1840]
+    ])
+
+# Specify your RAS coordinates in MNI305 space
+v = np.array([29, -40, -15, 1])
+v = v.transpose()
+
+# Tranform to MNI152 space
+print(np.matmul(M, v))
+
+# you can then input these coordinates into image viewing software (e.g. MRICron, xjview)
+# to read out the corresponding anatomical label

From 3b7638c508ff66f30fe16d806d0387de1c99a02f Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Mon, 1 Jul 2024 15:29:52 +1000
Subject: [PATCH 03/20] Run source analysis and extract ROI time series

---
 traditional_source_analysis_and_ROIs.py | 400 ++++++++++++++++++++++++
 1 file changed, 400 insertions(+)
 create mode 100644 traditional_source_analysis_and_ROIs.py

diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py
new file mode 100644
index 0000000..7ce0e6c
--- /dev/null
+++ b/traditional_source_analysis_and_ROIs.py
@@ -0,0 +1,400 @@
+# Run traditional source analysis (surface or volume-based) and save the STCs
+# Compute and plot GAs
+# Extract time courses from predefined ROIs
+
+import os
+import os.path as op
+import numpy as np
+import matplotlib.pyplot as plt
+import glob
+import platform
+from pathlib import Path
+
+import mne
+from mne.minimum_norm import read_inverse_operator, apply_inverse
+from mne.beamformer import make_lcmv, apply_lcmv
+
+import config  # our config.py file
+
+
+# specify source method etc
+source_method = "dSPM" #"MNE"
+src_type = 'vol' # 'vol' or 'surface'
+spacing = "oct6" # for 'surface' source space only
+if src_type != 'surface':
+    spacing = ''
+
+# specify how many SSP projectors to use for speech artifact removal
+n_proj = 4  # 1 means: 1 for MEG & 1 for EEG
+
+this_run = f'{n_proj}-proj'
+#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks
+
+# which conditions to compare in ROI analysis
+conds_ROI = ['interviewer_conversation','interviewer_repetition'] #['participant_conversation','participant_repetition']
+
+subjects = config.subjects
+if subjects == "all":
+    subjects = [f"{sub:02d}" for sub in range(1, 33)]
+subjects = [subject for subject in subjects if subject not in config.exclude_subjects]
+del config
+
+use_subjects = subjects  # run all of them (could use e.g. subjects[2:3] just to run 03)
+del subjects
+
+data_path = deriv_path = Path(__file__).parents[1] / "Natural_Conversations_study" / "data"
+analysis_path = deriv_path = Path(__file__).parents[1] / "Natural_Conversations_study" / "analysis"
+if platform.system() == 'Windows':
+    data_path = Path("D:/Work/analysis_ME206/data")
+    analysis_path = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis")
+#data_path = Path("/mnt/d/Work/analysis_ME206/data")
+#analysis_path = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis")
+
+deriv_path = analysis_path / "natural-conversations-bids" / "derivatives"
+source_results_dir = analysis_path / "results" / f"source-{source_method}-{src_type}" / this_run
+figures_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run
+# create the folders if needed
+source_results_dir.mkdir(parents=True, exist_ok=True)
+figures_dir.mkdir(parents=True, exist_ok=True)
+
+subjects_dir = deriv_path / "freesurfer" / "subjects"
+subject = 'fsaverage'
+if src_type == 'vol':
+    src_fname = subjects_dir / subject / "bem" / "fsaverage-vol-5-src.fif"
+    bem_fname = subjects_dir / subject / "bem" / "fsaverage-5120-5120-5120-bem-sol.fif"
+else:
+    src_fname = subjects_dir / subject / "bem" / "fsaverage-oct6-src.fif"
+
+# adjust mne options to fix rendering issues (only needed in Linux / WSL)
+mne.viz.set_3d_options(antialias = False, depth_peeling = False, 
+                    smooth_shading = False, multi_samples = 1) 
+
+
+# loop through the subjects we want to analyse
+for sub in use_subjects:
+    path = deriv_path / 'mne-bids-pipeline' / f'sub-{sub}' / 'meg'
+    epochs_fname = path / f'sub-{sub}_task-conversation_proc-clean_epo.fif'
+    trans_fname = path / f'sub-{sub}_task-conversation_trans.fif'
+    fwd_fname = path / f'sub-{sub}_task-conversation_fwd.fif'
+    cov_fname = path / f'sub-{sub}_task-rest_proc-clean_cov.fif'
+    inv_fname = path / f'sub-{sub}_task-conversation_inv.fif'
+    proj_fname = path / f'sub-{sub}_task-conversation_proc-proj_proj.fif'
+
+    stc_filename_prefix = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc')
+
+
+    # Read data
+    epochs = mne.read_epochs(epochs_fname)
+
+    # Apply SSP projectors for speech artifact removal
+    if n_proj:
+        all_proj = mne.read_proj(proj_fname)
+        proj = list()
+        for ii, kind in enumerate(("MEG", "EEG")):
+            these_proj = all_proj[10*ii : 10*ii+n_proj]
+            proj.extend(these_proj)
+        del all_proj
+        epochs.add_proj(proj).apply_proj()
+    
+    epochs = epochs.pick(["meg", "eeg"], exclude="bads")
+
+    ranks = mne.compute_rank(inst=epochs, tol=1e-3, tol_kind="relative")
+    rank = sum(ranks.values())
+    print(f"  Ranks={ranks} (total={rank})")
+
+    if n_proj or src_type == 'vol':
+        # Recreate inverse taking into account additional projections
+        cov = mne.read_cov(cov_fname)
+        if src_type == 'vol':
+            src = mne.read_source_spaces(src_fname)
+            fwd = mne.make_forward_solution(
+                epochs.info,
+                trans=trans_fname,
+                src=src,
+                bem=bem_fname,
+                meg=True,
+                eeg=True,
+            )
+            inv = mne.minimum_norm.make_inverse_operator(
+                epochs.info, fwd, cov, rank=ranks,
+            )
+        else: # surface source space
+            fwd = mne.read_forward_solution(fwd_fname)
+            inv = mne.minimum_norm.make_inverse_operator(
+                epochs.info, fwd, cov, loose=0.2, depth=0.8, rank=ranks,
+            )
+    else: # load the default inv solution (surface source space; no SSP projectors for speech artifact removal)
+        inv = mne.minimum_norm.read_inverse_operator(inv_fname)
+        
+        
+    # compute evoked (averaged over all conditions)
+    #evoked_allconds = epochs.average()
+    #evoked_allconds.plot_joint() # average ERF across all conds
+
+    # compute evoked for each cond (ba, da, conversation, repetition)
+    evokeds = []
+    for cond in epochs.event_id:
+        evokeds.append(epochs[cond].average())
+
+    # compute source timecourses
+    stcs = dict()
+
+    for index, evoked in enumerate(evokeds):
+        cond = evoked.comment
+
+        snr = 3.
+        lambda2 = 1. / snr ** 2
+        stcs[cond], residual = apply_inverse(evoked, inv, lambda2,
+                                    method=source_method, pick_ori=None,
+                                    return_residual=True, verbose=True)
+
+        # save the source estimates
+        stcs[cond].save(stc_filename_prefix + '_' + cond, overwrite=True)
+
+    if this_run == "ba+da__ba-da":
+        # compute "ba+da" (i.e. combining the two conditions)
+        evoked_ba = epochs['ba'].average()
+        evoked_da = epochs['da'].average()
+        evoked_combined = mne.combine_evoked([evoked_ba, evoked_da], weights='equal')
+        stc_combined, residual = apply_inverse(evoked_combined, inv, lambda2,
+                                        method=source_method, pick_ori=None,
+                                        return_residual=True, verbose=True)
+        stc_combined.save(f'{stc_filename_prefix}_ba+da')
+
+        # compute "ba-da" (i.e. difference between the two conditions)
+        evoked_subtracted = mne.combine_evoked([evoked_ba, evoked_da], weights=[1, -1])
+        stc_subtracted, residual = apply_inverse(evoked_subtracted, inv, lambda2,
+                                        method=source_method, pick_ori=None,
+                                        return_residual=True, verbose=True)
+        stc_subtracted.save(f'{stc_filename_prefix}_ba-da')
+
+    
+    # Plot the source timecourses
+    '''
+    for index, evoked in enumerate(evokeds):
+        cond = evoked.comment
+
+        hemi='both'
+        #vertno_max, time_max = stcs[cond].get_peak(hemi=hemi, tmin=0.1, tmax=0.27)
+        initial_time = 0.09 #time_max
+        surfer_kwargs = dict(
+            hemi=hemi, subjects_dir=subjects_dir,
+            initial_time=initial_time, 
+            time_unit='s', title=subject + ' - ' + cond,
+            views=['lateral','medial'], #['caudal','ventral','lateral','medial'], 
+            show_traces=False,
+            smoothing_steps=10)
+        brain = stcs[cond].plot(**surfer_kwargs)
+        #brain.add_foci(vertno_max, coords_as_verts=True, hemi=hemi, 
+        #    color='blue', scale_factor=0.6, alpha=0.5)
+        brain.save_image(op.join(figures_dir, "indi_subjects", subject + '-' + cond + '.png'))
+        #brain.save_movie(op.join(figures_dir, "indi_subjects", subject + '-' + cond + '-both.mov'), 
+        #    tmin=0, tmax=0.35, interpolation='linear',
+        #    time_dilation=50, time_viewer=True)
+
+    # close all figures before moving onto the next subject
+    mne.viz.close_all_3d_figures()
+    '''
+
+
+# Compute the grand average
+
+conds = ['ba','da','interviewer_conversation','interviewer_repetition','participant_conversation','participant_repetition']
+if this_run == "ba+da__ba-da":
+    conds = ['ba+da', 'ba-da']
+
+if src_type == 'vol':
+    src_suffix = '-vl.stc'
+    #if this_run == "ba+da__ba-da":
+    #    src_suffix = '-vol.stc' # to read a previous version of saved results
+elif src_type == 'surface':
+    src_suffix = '-lh.stc'
+
+# overwrite the paths here to use a previous version of results for participant-locked epochs
+#source_results_dir = "/mnt/d/Work/analysis_ME206/results/bids/source/MNE_surface_4proj"
+#figures_dir = "/mnt/d/Work/analysis_ME206/results/bids/source/MNE_surface_4proj/Figures_ROI"
+
+GA_stcs = {}
+for cond in conds:
+    # find all the saved stc results
+    stc_files = glob.glob(op.join(source_results_dir, 'sub*' + cond + src_suffix))
+    # Note: for surface-based stcs, only need to supply the filename 
+    # for one hemisphere (e.g. '-lh.stc'), and it will look for both
+    # https://mne.tools/stable/generated/mne.read_source_estimate.html
+
+    # initialise the sum array to correct size using first subject's stc
+    stc = mne.read_source_estimate(stc_files[0])
+    stcs_sum = stc.data # this contains lh & rh vertices combined together
+    # there are also separate fields for the 2 hemis (stc.lh_data, stc.rh_data),
+    # but their content cannot be set directly, so just use the combined one
+
+    # read in the stc for each subsequent subject, add to the sum array
+    for fname in stc_files[1:]:
+        stc = mne.read_source_estimate(fname)
+        stcs_sum = stcs_sum + stc.data
+
+    # divide by number of files
+    stcs_GA = stcs_sum / len(stc_files)
+
+    # feed into the dummy stc structure
+    stc.data = stcs_GA
+    GA_stcs[cond] = stc
+
+    
+    # Plot the GAs
+    initial_time = 0.09
+
+    # Depending on the src type, use diff types of plots
+    if src_type == 'vol':
+        src = mne.read_source_spaces(src_fname)
+        fig = stc.plot(src=src, colormap="viridis", 
+            subject='fsaverage', subjects_dir=subjects_dir, verbose=True,
+            initial_time=initial_time)
+        fig.savefig(op.join(figures_dir, 'GA_' + cond + '.png'))
+        plt.close(fig)
+        # seems like we can't save movies for these volume-based stcs
+
+    elif src_type == 'surface':  
+        hemi='both'
+        #vertno_max, time_max = stc.get_peak(hemi=hemi)
+        #initial_time = time_max
+        # to get auto clim for a particular time point, crop to a short interval around that time first
+        # (otherwise clim will be based on the peak activity over the entire epoch)
+        #stc = stc.crop(tmin=initial_time, tmax=initial_time+0.1)
+        surfer_kwargs = dict(
+            hemi=hemi, subject='fsaverage', subjects_dir=subjects_dir, 
+            #clim=dict(kind='value', lims=[8, 12, 15]), 
+            initial_time=initial_time, time_unit='s', 
+            views=['lateral','medial'], show_traces=False,
+            size=(800, 800), smoothing_steps=10)
+        brain = stc.plot(**surfer_kwargs)
+        #brain.add_foci(vertno_max, coords_as_verts=True, hemi=hemi, color='blue',
+        #            scale_factor=0.6, alpha=0.5)
+        brain.save_image(op.join(figures_dir, 'GA_' + str(initial_time) + 's_' + cond + '.png'))
+        brain.save_movie(op.join(figures_dir, 'GA-' + cond + '.mov'), 
+            tmin=-1, tmax=1, interpolation='linear',
+            time_dilation=50, time_viewer=True) 
+          
+    # Note: if there are any issues with the plots/movies (e.g. showing 
+    # horizontal bands), it's probably a rendering issue in Linux. 
+    # Try running this script in Windows/Mac!
+
+    plt.close('all')
+
+
+# Extract ROI time courses from source estimates
+
+src = mne.read_source_spaces(src_fname)
+Path(op.join(figures_dir, "all_ROIs")).mkdir(parents=True, exist_ok=True)
+
+if src_type == 'vol':
+    # choose atlas for parcellation
+    fname_aseg = op.join(subjects_dir, subject, 'mri', 'aparc.a2009s+aseg.mgz') # aparc = cortical; aseg = subcortical
+    label_names = mne.get_volume_labels_from_aseg(fname_aseg)
+    rois = ['ctx_lh_G_cingul-Post-dorsal','ctx_lh_G_cingul-Post-ventral','ctx_rh_G_cingul-Post-dorsal','ctx_rh_G_cingul-Post-ventral',
+            'ctx_lh_G_pariet_inf-Supramar','ctx_rh_G_pariet_inf-Supramar','ctx_lh_G_precuneus','ctx_rh_G_precuneus',
+            'ctx_lh_G_temp_sup-Lateral','ctx_rh_G_temp_sup-Lateral','ctx_lh_Pole_temporal','ctx_rh_Pole_temporal',
+            'ctx_lh_S_temporal_sup','ctx_rh_S_temporal_sup']  # can have multiple labels in this list
+    #roi_idx = label_names.index(rois[0])
+
+    for label_name in rois:
+        Path(op.join(figures_dir, label_name)).mkdir(parents=True, exist_ok=True)
+
+        # Plot GA ROI time series
+        fig, axes = plt.subplots(1, layout="constrained")
+        for cond in conds_ROI:
+            label_ts = mne.extract_label_time_course(
+                [GA_stcs[cond]], (fname_aseg, label_name), src, mode="auto"
+            )
+            axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond)
+
+        axes.axvline(linestyle='--') # add verticle line at time 0
+        axes.set(xlabel="Time (ms)", ylabel="Activation")
+        axes.legend()
+
+        fig.savefig(op.join(figures_dir, label_name, "GA.png"))
+        fig.savefig(op.join(figures_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder
+        plt.close(fig)
+
+        # Plot individual-subjects ROI time series
+        for sub in use_subjects:
+            fig, axes = plt.subplots(1, layout="constrained")
+            for cond in conds_ROI:
+                stc_file = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc_{cond}{src_suffix}')    
+                stc = mne.read_source_estimate(stc_file)
+                label_ts = mne.extract_label_time_course(
+                    [stc], (fname_aseg, label_name), src, mode="auto"
+                )
+                axes.plot(1e3 * stc.times, label_ts[0][0, :], label=cond)
+
+            axes.axvline(linestyle='--') # add verticle line at time 0
+            axes.set(xlabel="Time (ms)", ylabel="Activation")
+            axes.legend()
+
+            fig.savefig(op.join(figures_dir, label_name, "sub-" + sub + ".png"))
+            plt.close(fig)
+
+elif src_type == 'surface':
+    # for surface source space, we need to create the Label object first
+    # by reading from .annot or .label file
+    # Can't use the mri file like above, as extract_label_time_course() will throw an error
+    # https://mne.tools/stable/generated/mne.extract_label_time_course.html
+
+    # Get labels for FreeSurfer 'aparc' cortical parcellation (69 labels)
+    # https://freesurfer.net/fswiki/CorticalParcellation
+    #labels_parc = mne.read_labels_from_annot(subject, parc='aparc', subjects_dir=subjects_dir) # read all labels from annot file
+    #labels = [labels_parc[60], labels_parc[61]] # 60: left STG; 61: right STG
+
+    # or use 'aparc.a2009s' parcellation (150 labels)
+    labels_parc = mne.read_labels_from_annot(subject, parc='aparc.a2009s', subjects_dir=subjects_dir)
+    labels = [
+        labels_parc[18]+labels_parc[20], labels_parc[19]+labels_parc[21], labels_parc[50], labels_parc[51], 
+        labels_parc[58], labels_parc[59], labels_parc[66], labels_parc[67], labels_parc[84], labels_parc[85], labels_parc[144], labels_parc[145]
+    ]
+
+    # or read a single label (e.g. V1, BA44, etc)
+    #labels_parc = mne.read_label(op.join(subjects_dir, subject, 'label', 'lh.V1.label'))
+    
+    for label in labels:
+        label_name = label.name 
+        # or set a custom name for combined labels
+        if label_name == 'G_cingul-Post-dorsal-lh + G_cingul-Post-ventral-lh':
+            label_name = 'G_cingul-Post-lh'
+        if label_name == 'G_cingul-Post-dorsal-rh + G_cingul-Post-ventral-rh':
+            label_name = 'G_cingul-Post-rh'
+
+        Path(op.join(figures_dir, label_name)).mkdir(parents=True, exist_ok=True)
+
+        # Plot GA ROI time series
+        fig, axes = plt.subplots(1, layout="constrained")
+        for cond in conds_ROI:
+            label_ts = mne.extract_label_time_course(
+                [GA_stcs[cond]], label, src, mode="auto"
+            )
+            axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond)
+
+        axes.axvline(linestyle='--') # add verticle line at time 0
+        axes.set(xlabel="Time (ms)", ylabel="Activation")
+        axes.legend()
+
+        fig.savefig(op.join(figures_dir, label_name, "GA.png"))
+        fig.savefig(op.join(figures_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder
+        plt.close(fig)
+        
+        # Plot individual-subjects ROI time series
+        for sub in use_subjects:
+            fig, axes = plt.subplots(1, layout="constrained")    
+            for cond in conds_ROI:
+                stc_file = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc_{cond}{src_suffix}')    
+                stc = mne.read_source_estimate(stc_file)
+                label_ts = mne.extract_label_time_course(
+                    [stc], label, src, mode="auto"
+                )
+                axes.plot(1e3 * stc.times, label_ts[0][0, :], label=cond)
+
+            axes.axvline(linestyle='--') # add verticle line at time 0
+            axes.set(xlabel="Time (ms)", ylabel="Activation")
+            axes.legend()
+
+            fig.savefig(op.join(figures_dir, label_name, "sub-" + sub + ".png"))
+            plt.close(fig)

From 9b1272a4dd35dd23ffc45f10e0f4acceffd14f31 Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Mon, 1 Jul 2024 15:30:47 +1000
Subject: [PATCH 04/20] Update paths

---
 bidsify.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bidsify.py b/bidsify.py
index 957056b..1be7a4d 100644
--- a/bidsify.py
+++ b/bidsify.py
@@ -117,8 +117,8 @@
 if platform.system() == 'Windows':
     analysis_root = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis")
     data_root = Path("D:/Work/analysis_ME206/data")
-analysis_root = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis")
-data_root = Path("/mnt/d/Work/analysis_ME206/data")
+#analysis_root = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis")
+#data_root = Path("/mnt/d/Work/analysis_ME206/data")
 bids_root = analysis_root / f"{name}-bids"
 
 bids_root.mkdir(exist_ok=True)

From ddc7842c4b5c441ff2fe6b555dcdf3a2c1920afa Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Mon, 1 Jul 2024 17:37:22 +1000
Subject: [PATCH 05/20] Separate the interview-locked and participant-locked
 ROI figures

---
 traditional_source_analysis_and_ROIs.py | 30 +++++++++++++------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py
index 7ce0e6c..3652a23 100644
--- a/traditional_source_analysis_and_ROIs.py
+++ b/traditional_source_analysis_and_ROIs.py
@@ -27,11 +27,12 @@
 # specify how many SSP projectors to use for speech artifact removal
 n_proj = 4  # 1 means: 1 for MEG & 1 for EEG
 
-this_run = f'{n_proj}-proj'
-#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks
-
 # which conditions to compare in ROI analysis
-conds_ROI = ['interviewer_conversation','interviewer_repetition'] #['participant_conversation','participant_repetition']
+comparison = "participant" # "interviewer" or "participant"
+conds_ROI = [f"{comparison}_conversation", f"{comparison}_repetition"]
+
+this_run = f"{n_proj}-proj"
+#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks
 
 subjects = config.subjects
 if subjects == "all":
@@ -53,9 +54,10 @@
 deriv_path = analysis_path / "natural-conversations-bids" / "derivatives"
 source_results_dir = analysis_path / "results" / f"source-{source_method}-{src_type}" / this_run
 figures_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run
+figures_ROI_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run / f"{comparison}_ROI"
 # create the folders if needed
 source_results_dir.mkdir(parents=True, exist_ok=True)
-figures_dir.mkdir(parents=True, exist_ok=True)
+figures_ROI_dir.mkdir(parents=True, exist_ok=True)
 
 subjects_dir = deriv_path / "freesurfer" / "subjects"
 subject = 'fsaverage'
@@ -285,7 +287,7 @@
 # Extract ROI time courses from source estimates
 
 src = mne.read_source_spaces(src_fname)
-Path(op.join(figures_dir, "all_ROIs")).mkdir(parents=True, exist_ok=True)
+Path(op.join(figures_ROI_dir, "all_ROIs")).mkdir(parents=True, exist_ok=True)
 
 if src_type == 'vol':
     # choose atlas for parcellation
@@ -298,7 +300,7 @@
     #roi_idx = label_names.index(rois[0])
 
     for label_name in rois:
-        Path(op.join(figures_dir, label_name)).mkdir(parents=True, exist_ok=True)
+        Path(op.join(figures_ROI_dir, label_name)).mkdir(parents=True, exist_ok=True)
 
         # Plot GA ROI time series
         fig, axes = plt.subplots(1, layout="constrained")
@@ -312,8 +314,8 @@
         axes.set(xlabel="Time (ms)", ylabel="Activation")
         axes.legend()
 
-        fig.savefig(op.join(figures_dir, label_name, "GA.png"))
-        fig.savefig(op.join(figures_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder
+        fig.savefig(op.join(figures_ROI_dir, label_name, "GA.png"))
+        fig.savefig(op.join(figures_ROI_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder
         plt.close(fig)
 
         # Plot individual-subjects ROI time series
@@ -331,7 +333,7 @@
             axes.set(xlabel="Time (ms)", ylabel="Activation")
             axes.legend()
 
-            fig.savefig(op.join(figures_dir, label_name, "sub-" + sub + ".png"))
+            fig.savefig(op.join(figures_ROI_dir, label_name, "sub-" + sub + ".png"))
             plt.close(fig)
 
 elif src_type == 'surface':
@@ -363,7 +365,7 @@
         if label_name == 'G_cingul-Post-dorsal-rh + G_cingul-Post-ventral-rh':
             label_name = 'G_cingul-Post-rh'
 
-        Path(op.join(figures_dir, label_name)).mkdir(parents=True, exist_ok=True)
+        Path(op.join(figures_ROI_dir, label_name)).mkdir(parents=True, exist_ok=True)
 
         # Plot GA ROI time series
         fig, axes = plt.subplots(1, layout="constrained")
@@ -377,8 +379,8 @@
         axes.set(xlabel="Time (ms)", ylabel="Activation")
         axes.legend()
 
-        fig.savefig(op.join(figures_dir, label_name, "GA.png"))
-        fig.savefig(op.join(figures_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder
+        fig.savefig(op.join(figures_ROI_dir, label_name, "GA.png"))
+        fig.savefig(op.join(figures_ROI_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder
         plt.close(fig)
         
         # Plot individual-subjects ROI time series
@@ -396,5 +398,5 @@
             axes.set(xlabel="Time (ms)", ylabel="Activation")
             axes.legend()
 
-            fig.savefig(op.join(figures_dir, label_name, "sub-" + sub + ".png"))
+            fig.savefig(op.join(figures_ROI_dir, label_name, "sub-" + sub + ".png"))
             plt.close(fig)

From c4895d4e18f1d8f0c341c63b3e120d8e7a14f480 Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Tue, 2 Jul 2024 11:29:14 +1000
Subject: [PATCH 06/20] Tidying up

---
 traditional_source_analysis_and_ROIs.py | 42 +++++++++++--------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py
index 3652a23..d4c8be0 100644
--- a/traditional_source_analysis_and_ROIs.py
+++ b/traditional_source_analysis_and_ROIs.py
@@ -104,7 +104,8 @@
     rank = sum(ranks.values())
     print(f"  Ranks={ranks} (total={rank})")
 
-    if n_proj or src_type == 'vol':
+    # TEMP - always recreate inv for now, until we change to free orientation in the pipeline
+    if 1: #n_proj or src_type == 'vol':
         # Recreate inverse taking into account additional projections
         cov = mne.read_cov(cov_fname)
         if src_type == 'vol':
@@ -123,7 +124,7 @@
         else: # surface source space
             fwd = mne.read_forward_solution(fwd_fname)
             inv = mne.minimum_norm.make_inverse_operator(
-                epochs.info, fwd, cov, loose=0.2, depth=0.8, rank=ranks,
+                epochs.info, fwd, cov, loose=1., depth=0.8, rank=ranks, # use free orientation, so that the surface- and volume-based results are more comparable
             )
     else: # load the default inv solution (surface source space; no SSP projectors for speech artifact removal)
         inv = mne.minimum_norm.read_inverse_operator(inv_fname)
@@ -226,20 +227,15 @@
 
     # initialise the sum array to correct size using first subject's stc
     stc = mne.read_source_estimate(stc_files[0])
-    stcs_sum = stc.data # this contains lh & rh vertices combined together
-    # there are also separate fields for the 2 hemis (stc.lh_data, stc.rh_data),
-    # but their content cannot be set directly, so just use the combined one
 
     # read in the stc for each subsequent subject, add to the sum array
     for fname in stc_files[1:]:
-        stc = mne.read_source_estimate(fname)
-        stcs_sum = stcs_sum + stc.data
+        stc += mne.read_source_estimate(fname)
 
     # divide by number of files
-    stcs_GA = stcs_sum / len(stc_files)
+    stc /= len(stc_files)
 
-    # feed into the dummy stc structure
-    stc.data = stcs_GA
+    # store in the GA struct
     GA_stcs[cond] = stc
 
     
@@ -252,7 +248,7 @@
         fig = stc.plot(src=src, colormap="viridis", 
             subject='fsaverage', subjects_dir=subjects_dir, verbose=True,
             initial_time=initial_time)
-        fig.savefig(op.join(figures_dir, 'GA_' + cond + '.png'))
+        fig.savefig(figures_dir / f'GA_{cond}.png')
         plt.close(fig)
         # seems like we can't save movies for these volume-based stcs
 
@@ -272,8 +268,8 @@
         brain = stc.plot(**surfer_kwargs)
         #brain.add_foci(vertno_max, coords_as_verts=True, hemi=hemi, color='blue',
         #            scale_factor=0.6, alpha=0.5)
-        brain.save_image(op.join(figures_dir, 'GA_' + str(initial_time) + 's_' + cond + '.png'))
-        brain.save_movie(op.join(figures_dir, 'GA-' + cond + '.mov'), 
+        brain.save_image(figures_dir / f'GA_{str(initial_time)}s_{cond}.png')
+        brain.save_movie(figures_dir / f'GA-{cond}.mov', 
             tmin=-1, tmax=1, interpolation='linear',
             time_dilation=50, time_viewer=True) 
           
@@ -287,11 +283,11 @@
 # Extract ROI time courses from source estimates
 
 src = mne.read_source_spaces(src_fname)
-Path(op.join(figures_ROI_dir, "all_ROIs")).mkdir(parents=True, exist_ok=True)
+(figures_ROI_dir / "all_ROIs").mkdir(parents=True, exist_ok=True)
 
 if src_type == 'vol':
     # choose atlas for parcellation
-    fname_aseg = op.join(subjects_dir, subject, 'mri', 'aparc.a2009s+aseg.mgz') # aparc = cortical; aseg = subcortical
+    fname_aseg = subjects_dir / subject / 'mri' / 'aparc.a2009s+aseg.mgz' # aparc = cortical; aseg = subcortical
     label_names = mne.get_volume_labels_from_aseg(fname_aseg)
     rois = ['ctx_lh_G_cingul-Post-dorsal','ctx_lh_G_cingul-Post-ventral','ctx_rh_G_cingul-Post-dorsal','ctx_rh_G_cingul-Post-ventral',
             'ctx_lh_G_pariet_inf-Supramar','ctx_rh_G_pariet_inf-Supramar','ctx_lh_G_precuneus','ctx_rh_G_precuneus',
@@ -300,7 +296,7 @@
     #roi_idx = label_names.index(rois[0])
 
     for label_name in rois:
-        Path(op.join(figures_ROI_dir, label_name)).mkdir(parents=True, exist_ok=True)
+        (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True)
 
         # Plot GA ROI time series
         fig, axes = plt.subplots(1, layout="constrained")
@@ -314,8 +310,8 @@
         axes.set(xlabel="Time (ms)", ylabel="Activation")
         axes.legend()
 
-        fig.savefig(op.join(figures_ROI_dir, label_name, "GA.png"))
-        fig.savefig(op.join(figures_ROI_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder
+        fig.savefig(figures_ROI_dir / label_name / "GA.png")
+        fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder
         plt.close(fig)
 
         # Plot individual-subjects ROI time series
@@ -333,7 +329,7 @@
             axes.set(xlabel="Time (ms)", ylabel="Activation")
             axes.legend()
 
-            fig.savefig(op.join(figures_ROI_dir, label_name, "sub-" + sub + ".png"))
+            fig.savefig(figures_ROI_dir / label_name / f"sub-{sub}.png")
             plt.close(fig)
 
 elif src_type == 'surface':
@@ -365,7 +361,7 @@
         if label_name == 'G_cingul-Post-dorsal-rh + G_cingul-Post-ventral-rh':
             label_name = 'G_cingul-Post-rh'
 
-        Path(op.join(figures_ROI_dir, label_name)).mkdir(parents=True, exist_ok=True)
+        (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True)
 
         # Plot GA ROI time series
         fig, axes = plt.subplots(1, layout="constrained")
@@ -379,8 +375,8 @@
         axes.set(xlabel="Time (ms)", ylabel="Activation")
         axes.legend()
 
-        fig.savefig(op.join(figures_ROI_dir, label_name, "GA.png"))
-        fig.savefig(op.join(figures_ROI_dir, "all_ROIs", label_name + "_GA.png")) # to save an additional copy of all GA plots into one folder
+        fig.savefig(figures_ROI_dir / label_name / "GA.png")
+        fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder
         plt.close(fig)
         
         # Plot individual-subjects ROI time series
@@ -398,5 +394,5 @@
             axes.set(xlabel="Time (ms)", ylabel="Activation")
             axes.legend()
 
-            fig.savefig(op.join(figures_ROI_dir, label_name, "sub-" + sub + ".png"))
+            fig.savefig(figures_ROI_dir / label_name / f"sub-{sub}.png")
             plt.close(fig)

From f2f36fc45cd12763e9415a51f21f00645eaa5c73 Mon Sep 17 00:00:00 2001
From: paulsowman <paulsowman@gmail.com>
Date: Wed, 24 Jul 2024 11:39:47 +1200
Subject: [PATCH 07/20] initial commit of Paul's beamformer code

---
 LCMV_conversation_batch.py         | 451 +++++++++++++++++++++++++++++
 LCMV_conversation_grand_average.py |  74 +++++
 LCMV_conversation_summarise.py     | 187 ++++++++++++
 plot_stc.py                        |  20 ++
 4 files changed, 732 insertions(+)
 create mode 100644 LCMV_conversation_batch.py
 create mode 100644 LCMV_conversation_grand_average.py
 create mode 100644 LCMV_conversation_summarise.py
 create mode 100644 plot_stc.py

diff --git a/LCMV_conversation_batch.py b/LCMV_conversation_batch.py
new file mode 100644
index 0000000..082a533
--- /dev/null
+++ b/LCMV_conversation_batch.py
@@ -0,0 +1,451 @@
+import os
+import mne
+import numpy as np
+import h5py
+from mne.beamformer import apply_lcmv_epochs, make_lcmv
+from mne.cov import compute_covariance
+from scipy.signal import hilbert
+
+# Configuration options - change as needed saving
+# cropped data takes up a lot of space
+EQUALIZE_EVENT_COUNTS = False
+SAVE_CROPPED_DATA_H5 = False
+
+
+def average_stc_in_time(stc, window_size=0.1):
+    """
+    Average the SourceEstimate in the time domain.
+
+    Parameters:
+    -----------
+    stc : mne.SourceEstimate
+        The source estimate to average.
+    window_size : float
+        The size of the averaging window in seconds.
+
+    Returns:
+    --------
+    mne.SourceEstimate
+        A new SourceEstimate with time-averaged data.
+    """
+    sfreq = 1 / stc.tstep
+    n_samples = int(window_size * sfreq)
+
+    # Calculate the number of full windows
+    n_windows = (
+        len(stc.times) - 1
+    ) // n_samples  # -1 to ensure we don't exceed the time range
+
+    # Prepare the data for averaging
+    data_to_average = stc.data[:, : n_windows * n_samples]
+
+    # Reshape and average
+    averaged_data = data_to_average.reshape(
+        data_to_average.shape[0], n_windows, n_samples
+    ).mean(axis=2)
+
+    # Create new time array
+    new_times = np.arange(n_windows) * window_size + stc.tmin + (window_size / 2)
+
+    return mne.SourceEstimate(
+        averaged_data,
+        vertices=stc.vertices,
+        tmin=new_times[0],
+        tstep=window_size,
+        subject=stc.subject,
+    )
+
+
+def is_subject_processed(subject, output_dir):
+    # Check if all expected output files exist for the subject
+    frequency_bands = ["broadband", "alpha", "beta", "gamma"]
+    conditions = [
+        "ba",
+        "da",
+        "interviewer_conversation",
+        "interviewer_repetition",
+        "participant_conversation",
+        "participant_repetition",
+    ]
+
+    for band in frequency_bands:
+        for condition in conditions:
+            roi_fname = f"{subject}_task-{condition}_{band}_lcmv_beamformer_roi_time_courses.npy"
+            stc_fname = (
+                f"{subject}_task-{condition}_{band}_lcmv_beamformer_averaged-stc-lh.stc"
+            )
+            if not (
+                os.path.exists(os.path.join(output_dir, roi_fname))
+                and os.path.exists(os.path.join(output_dir, stc_fname))
+            ):
+                return False
+    return True
+
+
+def setup_directories():
+    data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/"
+    output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_4"
+    os.makedirs(output_dir, exist_ok=True)
+    return data_dir, output_dir
+
+
+def load_data(subject, data_dir):
+    fwd_fname = os.path.join(
+        data_dir, subject, "meg", f"{subject}_task-conversation_fwd.fif"
+    )
+    epochs_fname = os.path.join(
+        data_dir, subject, "meg", f"{subject}_task-conversation_proc-clean_epo.fif"
+    )
+    noise_fname = os.path.join(
+        data_dir, subject, "meg", f"{subject}_task-rest_proc-clean_raw.fif"
+    )
+
+    if not all(os.path.exists(f) for f in [fwd_fname, epochs_fname, noise_fname]):
+        return None
+
+    fwd = mne.read_forward_solution(fwd_fname)
+    fwd_fixed = mne.convert_forward_solution(
+        fwd, surf_ori=True, force_fixed=False, use_cps=True
+    )  # Convert to fixed orientation - necessary for LCMV ori = normal
+    all_epochs = mne.read_epochs(epochs_fname)
+    noise_raw = mne.io.read_raw_fif(noise_fname)
+    return fwd_fixed, all_epochs, noise_raw
+
+
+def prepare_epochs(all_epochs, noise_raw):
+    print(f"Initial all_epochs: {len(all_epochs)}")
+
+    sfreq = all_epochs.info["sfreq"]
+    ch_names = all_epochs.ch_names
+    tmin, tmax = all_epochs.tmin, all_epochs.tmax
+    n_samples = len(all_epochs.times)
+
+    if noise_raw.info["sfreq"] != sfreq:
+        noise_raw = noise_raw.resample(sfreq)
+
+    print(f"Task epochs - tmin: {tmin}, tmax: {tmax}, n_samples: {n_samples}")
+
+    noise_events = mne.make_fixed_length_events(noise_raw, duration=tmax - tmin)
+    noise_epochs = mne.Epochs(
+        noise_raw,
+        noise_events,
+        event_id={"noise": 1},
+        tmin=tmin,
+        tmax=tmax,
+        baseline=None,
+        preload=True,
+    )
+
+    print(f"Initial noise_epochs: {len(noise_epochs)}")
+    print(
+        f"Noise epochs - tmin: {noise_epochs.tmin}, tmax: {noise_epochs.tmax}, n_samples: {len(noise_epochs.times)}"
+    )
+
+    noise_epochs = noise_epochs.copy().pick(ch_names)
+
+    if len(all_epochs.times) > len(noise_epochs.times):
+        print(
+            f"Cropping task epochs from {len(all_epochs.times)} to {len(noise_epochs.times)} samples"
+        )
+        all_epochs = all_epochs.copy().crop(
+            tmin=tmin, tmax=tmin + (len(noise_epochs.times) - 1) / sfreq
+        )
+
+    print(f"After cropping - Task epochs n_samples: {len(all_epochs.times)}")
+    print(f"After cropping - Noise epochs n_samples: {len(noise_epochs.times)}")
+
+    all_epochs.metadata = None
+    noise_epochs.metadata = None
+
+    if not np.allclose(all_epochs.times, noise_epochs.times):
+        raise ValueError("Time points still don't match after cropping")
+
+    combined_epochs = mne.concatenate_epochs([all_epochs, noise_epochs])
+
+    print(f"Final combined_epochs: {len(combined_epochs)}")
+    print(f"Time points: {len(combined_epochs.times)}")
+
+    if EQUALIZE_EVENT_COUNTS:
+        combined_epochs.equalize_event_counts()
+    return combined_epochs
+
+
+def filter_data(epochs, fmin, fmax):
+    """Filters epochs in the specified frequency band."""
+    return epochs.copy().filter(fmin, fmax)
+
+
+def create_and_apply_beamformer(epochs_filt, fwd, noise_epochs_filt):
+    data_cov = compute_covariance(epochs_filt, tmin=None, tmax=None, method="empirical")
+    noise_cov = compute_covariance(
+        noise_epochs_filt, tmin=None, tmax=None, method="empirical"
+    )
+    filters_lcmv = make_lcmv(
+        epochs_filt.info,
+        fwd,
+        data_cov=data_cov,
+        noise_cov=noise_cov,
+        reg=0.05,
+        pick_ori="normal",
+    )
+    return apply_lcmv_epochs(epochs_filt, filters_lcmv, return_generator=True)
+
+
+def stc_to_matrix(stc, parcellation):
+    """Parcellate a SourceEstimate and return a matrix of ROI time courses."""
+    roi_time_courses = [
+        np.mean(stc.in_label(label).data, axis=0) for label in parcellation
+    ]
+    return np.array(roi_time_courses)
+
+
+def load_parcellation():
+    subjects_dir = os.environ.get(
+        "SUBJECTS_DIR",
+        "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects",
+    )
+    return mne.read_labels_from_annot(
+        "fsaverage", parc="HCPMMP1", subjects_dir=subjects_dir, hemi="both"
+    )
+
+
+def compute_source_estimate(
+    epochs_stcs, fwd, epochs, subject, output_dir, condition, band_name, parcellation
+):
+    n_sources = fwd["nsource"]
+    n_times = len(epochs.times)
+    averaged_data = np.zeros((n_sources, n_times), dtype=complex)
+    all_data = []
+    n_epochs = 0
+
+    vertices_lh, vertices_rh = fwd["src"][0]["vertno"], fwd["src"][1]["vertno"]
+
+    if SAVE_CROPPED_DATA_H5:
+        h5_filename = f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5"
+        h5_filepath = os.path.join(output_dir, h5_filename)
+        h5f = h5py.File(h5_filepath, "w")
+
+    crop_time = 0.1  # in seconds
+    crop_samples = int(crop_time * epochs.info["sfreq"])
+
+    # Calculate the new time array after cropping
+    cropped_times = epochs.times[crop_samples:-crop_samples]
+
+    for i, stc in enumerate(epochs_stcs):
+        cropped_data = stc.data[:, crop_samples:-crop_samples]
+        analytic_signal = hilbert(cropped_data, axis=1)
+        averaged_data[:, crop_samples:-crop_samples] += analytic_signal
+        all_data.append(analytic_signal)
+        n_epochs += 1
+
+        if SAVE_CROPPED_DATA_H5:
+            h5f.create_dataset(f"epoch_{i}", data=cropped_data)
+
+    if SAVE_CROPPED_DATA_H5:
+        h5f.attrs["subject"] = subject
+        h5f.attrs["condition"] = condition
+        h5f.attrs["band_name"] = band_name
+        h5f.attrs["n_epochs"] = n_epochs
+        h5f.close()
+
+    if n_epochs == 0:
+        raise ValueError("No epochs were processed")
+
+    averaged_data /= n_epochs
+    envelope = np.abs(averaged_data)
+
+    # Use the cropped time array for tmin and adjust the data shape
+    averaged_stc = mne.SourceEstimate(
+        envelope,
+        vertices=[vertices_lh, vertices_rh],
+        tmin=epochs.times[crop_samples],
+        tstep=epochs.times[1] - epochs.times[0],
+        subject="fsaverage",
+    )
+
+    # Create time-domain averaged version
+    time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1)
+
+    # Compute ROI time courses for both original and time-averaged data
+    roi_time_courses = stc_to_matrix(averaged_stc, parcellation)
+    time_averaged_roi_time_courses = stc_to_matrix(time_averaged_stc, parcellation)
+
+    # Save original ROI time courses
+    roi_fname = (
+        f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_roi_time_courses.npy"
+    )
+    np.save(os.path.join(output_dir, roi_fname), roi_time_courses)
+
+    # Save time-averaged ROI time courses
+    time_avg_roi_fname = f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged_roi_time_courses.npy"
+    np.save(
+        os.path.join(output_dir, time_avg_roi_fname), time_averaged_roi_time_courses
+    )
+
+    # Save original SourceEstimate
+    averaged_fname = (
+        f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_averaged-stc"
+    )
+    averaged_stc.save(os.path.join(output_dir, averaged_fname), overwrite=True)
+
+    # Save time-averaged SourceEstimate
+    time_avg_fname = (
+        f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged-stc"
+    )
+    time_averaged_stc.save(os.path.join(output_dir, time_avg_fname), overwrite=True)
+
+    print(
+        f"Saved original SourceEstimate to {os.path.join(output_dir, averaged_fname)}"
+    )
+    print(
+        f"Saved time-averaged SourceEstimate to {os.path.join(output_dir, time_avg_fname)}"
+    )
+    print(f"Saved original ROI time courses to {os.path.join(output_dir, roi_fname)}")
+    print(
+        f"Saved time-averaged ROI time courses to {os.path.join(output_dir, time_avg_roi_fname)}"
+    )
+
+    return (
+        averaged_stc,
+        roi_time_courses,
+        time_averaged_stc,
+        time_averaged_roi_time_courses,
+    )
+
+
+def process_subject(subject, data_dir, output_dir):
+    try:
+        fwd, all_epochs, noise_raw = load_data(subject, data_dir)
+        if fwd is None:
+            print(f"Skipping {subject}: Missing required files")
+            return
+
+        print(f"All epochs info:")
+        print(f"Number of epochs: {len(all_epochs)}")
+        print(f"Time points: {len(all_epochs.times)}")
+        print(f"tmin: {all_epochs.tmin}, tmax: {all_epochs.tmax}")
+
+        combined_epochs = prepare_epochs(all_epochs, noise_raw)
+        parcellation = load_parcellation()
+
+        frequency_bands = {
+            "broadband": (1, 40),
+            "alpha": (8, 12),
+            "beta": (13, 30),
+            "gamma": (30, 40),
+        }
+
+        for band_name, (fmin, fmax) in frequency_bands.items():
+            print(f"Processing {band_name} band ({fmin}-{fmax} Hz)")
+            filtered_epochs = filter_data(combined_epochs, fmin, fmax)
+
+            averaged_stc_dict = {}
+            roi_time_courses_dict = {}
+            time_averaged_stc_dict = {}
+            time_averaged_roi_time_courses_dict = {}
+
+            conditions = [
+                "ba",
+                "da",
+                "interviewer_conversation",
+                "interviewer_repetition",
+                "participant_conversation",
+                "participant_repetition",
+            ]
+
+            for condition in conditions:
+                condition_epochs = filtered_epochs[condition]
+                noise_epochs = filtered_epochs["noise"]
+
+                epochs_stcs = create_and_apply_beamformer(
+                    condition_epochs, fwd, noise_epochs
+                )
+                (
+                    averaged_stc,
+                    roi_time_courses,
+                    time_averaged_stc,
+                    time_averaged_roi_time_courses,
+                ) = compute_source_estimate(
+                    epochs_stcs,
+                    fwd,
+                    condition_epochs,
+                    subject,
+                    output_dir,
+                    condition,
+                    band_name,
+                    parcellation,
+                )
+
+                averaged_stc_dict[condition] = averaged_stc
+                roi_time_courses_dict[condition] = roi_time_courses
+                time_averaged_stc_dict[condition] = time_averaged_stc
+                time_averaged_roi_time_courses_dict[condition] = (
+                    time_averaged_roi_time_courses
+                )
+
+            diff_pairs = [
+                ("interviewer_conversation", "interviewer_repetition"),
+                ("participant_conversation", "participant_repetition"),
+            ]
+
+            for cond1, cond2 in diff_pairs:
+                # Original difference
+                diff_stc = averaged_stc_dict[cond1] - averaged_stc_dict[cond2]
+                diff_fname = os.path.join(
+                    output_dir,
+                    f"{subject}_{band_name}_lcmv_beamformer_{cond1}_vs_{cond2}_difference-stc",
+                )
+                diff_stc.save(diff_fname, overwrite=True)
+
+                # Time-averaged difference
+                time_avg_diff_stc = (
+                    time_averaged_stc_dict[cond1] - time_averaged_stc_dict[cond2]
+                )
+                time_avg_diff_fname = os.path.join(
+                    output_dir,
+                    f"{subject}_{band_name}_lcmv_beamformer_{cond1}_vs_{cond2}_time_averaged_difference-stc",
+                )
+                time_avg_diff_stc.save(time_avg_diff_fname, overwrite=True)
+
+        print(f"Finished processing {subject}")
+    except Exception as e:
+        print(f"Error processing {subject}: {str(e)}")
+        import traceback
+
+        traceback.print_exc()
+
+
+def update_progress(subject, output_dir):
+    progress_file = os.path.join(output_dir, "progress.txt")
+    with open(progress_file, "a") as f:
+        f.write(f"{subject}\n")
+
+
+def get_processed_subjects(output_dir):
+    progress_file = os.path.join(output_dir, "progress.txt")
+    if os.path.exists(progress_file):
+        with open(progress_file, "r") as f:
+            return set(line.strip() for line in f)
+    return set()
+
+
+def main():
+    data_dir, output_dir = setup_directories()
+    subject_dirs = [
+        d for d in os.listdir(data_dir) if d.startswith("sub-") and d[4:].isdigit()
+    ]
+
+    processed_subjects = get_processed_subjects(output_dir)
+
+    for subject in subject_dirs:
+        if subject in processed_subjects or is_subject_processed(subject, output_dir):
+            print(f"Skipping {subject}: Already processed")
+            continue
+        process_subject(subject, data_dir, output_dir)
+        update_progress(subject, output_dir)
+
+    print("All subjects processed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/LCMV_conversation_grand_average.py b/LCMV_conversation_grand_average.py
new file mode 100644
index 0000000..7f52ad4
--- /dev/null
+++ b/LCMV_conversation_grand_average.py
@@ -0,0 +1,74 @@
+import os
+import mne
+import numpy as np
+from glob import glob
+import re
+
+# Set the path to the folder containing the .stc files
+stc_folder = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_analysis"
+
+# Create a subfolder for average STCs
+average_folder = os.path.join(stc_folder, "average_stc")
+os.makedirs(average_folder, exist_ok=True)
+
+
+# Function to extract subject number and condition from filename
+def parse_filename(filename):
+    # Extract subject number
+    match = re.search(r"sub-(\d+)", filename)
+    if match:
+        subject_number = int(match.group(1))
+    else:
+        raise ValueError(f"Cannot extract subject number from filename: {filename}")
+
+    # Extract condition (everything after 'sub-XX_' and before '.stc')
+    condition = filename.split("sub-")[1].split(".stc")[0].split("_", 1)[1]
+
+    return subject_number, condition
+
+
+# Get all .stc files
+all_files = glob(os.path.join(stc_folder, "sub-*_*.stc"))
+
+# Extract all unique conditions
+conditions = list(set([parse_filename(f)[1] for f in all_files]))
+
+# Process each condition
+for condition in conditions:
+    print(f"Processing condition: {condition}")
+
+    # Get all .stc files for this condition
+    condition_files = [f for f in all_files if parse_filename(f)[1] == condition]
+
+    # Sort files by subject number
+    condition_files.sort(key=lambda f: parse_filename(f)[0])
+
+    # Read all STCs for this condition
+    stcs = [mne.read_source_estimate(f) for f in condition_files]
+
+    # Check if all STCs have the same number of time points
+    n_times = [stc.data.shape[1] for stc in stcs]
+    if len(set(n_times)) > 1:
+        raise ValueError("Not all STCs have the same number of time points")
+
+    # Stack the data from all subjects
+    data = np.stack([stc.data for stc in stcs])
+
+    # Calculate the average across subjects
+    avg_data = np.mean(data, axis=0)
+
+    # Create a new STC with the averaged data
+    avg_stc = mne.SourceEstimate(
+        data=avg_data,
+        vertices=stcs[0].vertices,
+        tmin=stcs[0].tmin,
+        tstep=stcs[0].tstep,
+        subject=stcs[0].subject,
+    )
+
+    # Save the averaged STC in the new subfolder
+    avg_stc.save(os.path.join(average_folder, f"avg_{condition}"), overwrite=True)
+
+print(
+    "Averaging complete for all conditions. Results saved in 'average_stc' subfolder."
+)
diff --git a/LCMV_conversation_summarise.py b/LCMV_conversation_summarise.py
new file mode 100644
index 0000000..c3501e3
--- /dev/null
+++ b/LCMV_conversation_summarise.py
@@ -0,0 +1,187 @@
+import os
+import mne
+import numpy as np
+from scipy import stats
+from mne.stats import spatio_temporal_cluster_1samp_test, summarize_clusters_stc
+from scipy.ndimage import label
+
+# Define constants
+SUBJECTS_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects"
+STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_analysis"
+SRC_FNAME = os.path.join(SUBJECTS_DIR, "fsaverage/bem/fsaverage-oct6-src.fif")
+FREQUENCY_BANDS = ["broadband"]
+
+
+def load_stcs(directory, band):
+    """Load Source Time Courses (STCs) for a given frequency band and crop them."""
+    stcs = []
+    for filename in os.listdir(directory):
+        if filename.endswith(
+            f"{band}_lcmv_beamformer_participant_conversation_vs_participant_repetition_difference-stc-rh.stc"
+        ):
+            stc_path = os.path.join(directory, filename)
+            stc = mne.read_source_estimate(stc_path)
+
+            # Crop the STC
+            cropped_stc = stc.crop(tmin=0.25, tmax=0.75)
+
+            stcs.append(cropped_stc)
+    return stcs
+
+
+def process_frequency_band(band, src):
+    """Process data for a single frequency band and return aggregated timeseries for significant clusters."""
+    print(f"Processing {band} band")
+
+    stcs = load_stcs(STC_DIR, band)
+    if not stcs:
+        print(f"No STCs found for {band} band")
+        return None
+
+    avg_stc = sum(stcs) / len(stcs)
+
+    X = np.array([stc.data for stc in stcs])
+    X = np.transpose(X, [0, 2, 1])
+
+    t_threshold = stats.distributions.t.ppf(1 - 0.001, len(X) - 1)
+    adjacency = mne.spatial_src_adjacency(src[:])
+
+    cluster_stats = spatio_temporal_cluster_1samp_test(
+        X,
+        threshold=t_threshold,
+        adjacency=adjacency,
+        n_jobs=-1,
+        buffer_size=None,
+        n_permutations=50,
+    )
+
+    t_vals, clusters, cluster_pvals, H0 = cluster_stats
+
+    # Find significant clusters
+    good_cluster_inds = np.where(cluster_pvals < 0.05)[0]
+
+    if len(good_cluster_inds) == 0:
+        print("No significant clusters found.")
+        return None
+
+    # Create a 3D mask for significant clusters
+    mask = np.zeros(t_vals.shape, dtype=bool)
+    for cluster_ind in good_cluster_inds:
+        mask[clusters[cluster_ind]] = True
+
+    # Label connected components in the mask
+    labeled_mask, num_clusters = label(mask)
+
+    # Initialize a dictionary to store aggregated timeseries for each cluster
+    cluster_timeseries = {}
+
+    for i in range(1, num_clusters + 1):
+        cluster_mask = labeled_mask == i
+        cluster_data = t_vals[cluster_mask].reshape(-1, t_vals.shape[2])
+
+        # Aggregate timeseries for this cluster (using mean, but you could use median or other methods)
+        aggregated_timeseries = np.mean(cluster_data, axis=0)
+
+        # Store the aggregated timeseries
+        cluster_timeseries[f"cluster_{i}"] = aggregated_timeseries
+
+    # Create a new STC with one timeseries per cluster
+    n_clusters = len(cluster_timeseries)
+    aggregated_data = np.zeros((n_clusters, t_vals.shape[2]))
+    for i, timeseries in enumerate(cluster_timeseries.values()):
+        aggregated_data[i, :] = timeseries
+
+    # Use the first vertex of each hemisphere as placeholder locations
+    vertices = [
+        src[0]["vertno"][:n_clusters],
+        src[1]["vertno"][:0],
+    ]  # All in left hemisphere for simplicity
+
+    aggregated_stc = mne.SourceEstimate(
+        aggregated_data,
+        vertices=vertices,
+        tmin=avg_stc.tmin,
+        tstep=avg_stc.tstep,
+        subject=avg_stc.subject,
+    )
+
+    # Save aggregated STC
+    aggregated_fname = os.path.join(
+        STC_DIR, f"aggregated_clusters_{band}_lcmv_beamformer-stc"
+    )
+    aggregated_stc.save(aggregated_fname)
+    print(f"Saved aggregated SourceEstimate to {aggregated_fname}")
+
+    visualize_clusters(cluster_stats, stcs, src)
+
+    return aggregated_stc, cluster_timeseries
+
+
+def visualize_clusters(cluster_stats, stcs, src):
+    """Visualize cluster results."""
+    print("Visualizing clusters.")
+    tmin = stcs[0].tmin * 100
+    tstep = stcs[0].tstep * 100  # convert to milliseconds
+
+    fsave_vertices = [s["vertno"] for s in src]
+    stc_all_cluster_vis = summarize_clusters_stc(
+        cluster_stats, tstep=tstep, vertices=fsave_vertices, subject="fsaverage"
+    )
+    plot_fmax = np.max(stc_all_cluster_vis.data) * 1.2
+    brain = stc_all_cluster_vis.plot(
+        hemi="both",
+        views="lateral",
+        brain_kwargs=dict(show=False),
+        add_data_kwargs=dict(
+            fmin=plot_fmax / 10,
+            fmid=plot_fmax / 2,
+            fmax=plot_fmax,
+            scale_factor=0.0001,
+            colorbar_kwargs=dict(label_font_size=10),
+        ),
+        subjects_dir=SUBJECTS_DIR,
+        time_label="temporal extent (ms)",
+        size=(800, 800),
+    )
+
+
+def plot_average_stc(avg_stc):
+    """Plot the average Source Time Course."""
+    plot_fmax = np.max(avg_stc.data) * 0.8
+    brain = avg_stc.plot(
+        subject="fsaverage",
+        hemi="both",
+        views="dorsal",
+        initial_time=1.2,
+        brain_kwargs=dict(show=False),
+        add_data_kwargs=dict(
+            fmin=plot_fmax / 10,
+            fmid=plot_fmax / 2,
+            fmax=plot_fmax,
+            scale_factor=0.0001,
+            colorbar_kwargs=dict(label_font_size=10),
+        ),
+    )
+
+
+def main():
+    src = mne.read_source_spaces(SRC_FNAME)
+    aggregated_stcs = {}
+    all_cluster_timeseries = {}
+    for band in FREQUENCY_BANDS:
+        result = process_frequency_band(band, src)
+        if result is not None:
+            aggregated_stc, cluster_timeseries = result
+            aggregated_stcs[band] = aggregated_stc
+            all_cluster_timeseries[band] = cluster_timeseries
+
+    print("Analysis complete")
+    return aggregated_stcs, all_cluster_timeseries
+
+
+if __name__ == "__main__":
+    aggregated_stcs, all_cluster_timeseries = main()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/plot_stc.py b/plot_stc.py
new file mode 100644
index 0000000..acd23dc
--- /dev/null
+++ b/plot_stc.py
@@ -0,0 +1,20 @@
+import mne
+import os
+
+SUBJECTS_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects"
+STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_4/"
+DATA_FILE = "sub-01_task-ba_broadband_lcmv_beamformer_time_averaged-stc-lh.stc"
+
+
+# Load the saved STC file (replace with your actual file path)
+stc_file = os.path.join(STC_DIR, DATA_FILE)
+stc = mne.read_source_estimate(stc_file)
+
+stc.plot(
+    subjects_dir=SUBJECTS_DIR,
+    subject="fsaverage",
+    initial_time=0.1,
+    hemi="both",
+    views="lateral",
+    clim=dict(kind="value", lims=[3, 6, 9]),
+)

From 91baa58a5579475423bf9854ef6d407aa4f877d1 Mon Sep 17 00:00:00 2001
From: paulsowman <paulsowman@gmail.com>
Date: Thu, 25 Jul 2024 10:54:07 +1200
Subject: [PATCH 08/20] fix time axis

---
 LCMV_CSF_conversation_batch.py     | 456 +++++++++++++++++++++++++++++
 LCMV_conversation_batch.py         |  89 +++---
 LCMV_conversation_grand_average.py |   2 +-
 plot_stc.py                        |   4 +-
 4 files changed, 502 insertions(+), 49 deletions(-)
 create mode 100644 LCMV_CSF_conversation_batch.py

diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py
new file mode 100644
index 0000000..2e46966
--- /dev/null
+++ b/LCMV_CSF_conversation_batch.py
@@ -0,0 +1,456 @@
+import os
+import mne
+import numpy as np
+import h5py
+from mne.beamformer import apply_lcmv_epochs, make_lcmv
+from mne.cov import compute_covariance
+from scipy.signal import hilbert
+
+# Configuration options - change as needed saving
+# cropped data takes up a lot of space
+EQUALIZE_EVENT_COUNTS = False
+SAVE_CROPPED_DATA_H5 = False
+
+
+def setup_directories():
+    data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/"
+    output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_1"
+    os.makedirs(output_dir, exist_ok=True)
+    return data_dir, output_dir
+
+
+def load_data(subject, data_dir):
+    fwd_fname = os.path.join(
+        data_dir, subject, "meg", f"{subject}_task-conversation_fwd.fif"
+    )
+    epochs_fname = os.path.join(
+        data_dir, subject, "meg", f"{subject}_task-conversation_proc-clean_epo.fif"
+    )
+    noise_fname = os.path.join(
+        data_dir, subject, "meg", f"{subject}_task-rest_proc-clean_raw.fif"
+    )
+
+    if not all(os.path.exists(f) for f in [fwd_fname, epochs_fname, noise_fname]):
+        return None
+
+    fwd = mne.read_forward_solution(fwd_fname)
+    fwd_fixed = mne.convert_forward_solution(
+        fwd, surf_ori=True, force_fixed=False, use_cps=True
+    )  # Convert to fixed orientation - necessary for LCMV ori = normal
+    all_epochs = mne.read_epochs(epochs_fname)
+    noise_raw = mne.io.read_raw_fif(noise_fname)
+    return fwd_fixed, all_epochs, noise_raw
+
+
+def average_stc_in_time(stc, window_size=0.1):
+
+    sfreq = 1 / stc.tstep
+    n_samples = int(window_size * sfreq)
+
+    # Calculate the number of full windows
+    n_windows = (
+        len(stc.times) - 1
+    ) // n_samples  # -1 to ensure we don't exceed the time range
+
+    # Prepare the data for averaging
+    data_to_average = stc.data[:, : n_windows * n_samples]
+
+    # Reshape and average
+    averaged_data = data_to_average.reshape(
+        data_to_average.shape[0], n_windows, n_samples
+    ).mean(axis=2)
+
+    # Create new time array
+    new_times = np.arange(n_windows) * window_size + stc.tmin + (window_size / 2)
+
+    return mne.SourceEstimate(
+        averaged_data,
+        vertices=stc.vertices,
+        tmin=new_times[0],
+        tstep=window_size,
+        subject=stc.subject,
+    )
+
+
+def is_subject_processed(subject, output_dir):
+    # Check if all expected output files exist for the subject
+    frequency_bands = ["broadband", "alpha", "beta", "gamma"]
+    conditions = [
+        "ba",
+        "da",
+        "interviewer_conversation",
+        "interviewer_repetition",
+        "participant_conversation",
+        "participant_repetition",
+    ]
+
+    for band in frequency_bands:
+        for condition in conditions:
+            roi_fname = f"{subject}_task-{condition}_{band}_lcmv_beamformer_roi_time_courses.npy"
+            stc_fname = (
+                f"{subject}_task-{condition}_{band}_lcmv_beamformer_averaged-stc-lh.stc"
+            )
+            if not (
+                os.path.exists(os.path.join(output_dir, roi_fname))
+                and os.path.exists(os.path.join(output_dir, stc_fname))
+            ):
+                return False
+    return True
+
+
+def prepare_epochs(all_epochs, noise_raw):
+    print(f"Initial all_epochs: {len(all_epochs)}")
+
+    sfreq = all_epochs.info["sfreq"]
+    ch_names = all_epochs.ch_names
+    tmin, tmax = all_epochs.tmin, all_epochs.tmax
+    n_samples = len(all_epochs.times)
+
+    if noise_raw.info["sfreq"] != sfreq:
+        noise_raw = noise_raw.resample(sfreq)
+
+    print(f"Task epochs - tmin: {tmin}, tmax: {tmax}, n_samples: {n_samples}")
+
+    noise_events = mne.make_fixed_length_events(noise_raw, duration=tmax - tmin)
+    noise_epochs = mne.Epochs(
+        noise_raw,
+        noise_events,
+        event_id={"noise": 1},
+        tmin=tmin,
+        tmax=tmax,
+        baseline=None,
+        preload=True,
+    )
+
+    print(f"Initial noise_epochs: {len(noise_epochs)}")
+    print(
+        f"Noise epochs - tmin: {noise_epochs.tmin}, tmax: {noise_epochs.tmax}, n_samples: {len(noise_epochs.times)}"
+    )
+
+    noise_epochs = noise_epochs.copy().pick(ch_names)
+
+    if len(all_epochs.times) > len(noise_epochs.times):
+        print(
+            f"Cropping task epochs from {len(all_epochs.times)} to {len(noise_epochs.times)} samples"
+        )
+        all_epochs = all_epochs.copy().crop(
+            tmin=tmin, tmax=tmin + (len(noise_epochs.times) - 1) / sfreq
+        )
+
+    # Crop 200 ms from each end
+    crop_time = 0.2
+    all_epochs = all_epochs.copy().crop(
+        tmin=all_epochs.tmin + crop_time, tmax=all_epochs.tmax - crop_time
+    )
+    noise_epochs = noise_epochs.copy().crop(
+        tmin=noise_epochs.tmin + crop_time, tmax=noise_epochs.tmax - crop_time
+    )
+
+    print(f"After cropping - Task epochs n_samples: {len(all_epochs.times)}")
+    print(f"After cropping - Noise epochs n_samples: {len(noise_epochs.times)}")
+
+    all_epochs.metadata = None
+    noise_epochs.metadata = None
+
+    if not np.allclose(all_epochs.times, noise_epochs.times):
+        raise ValueError("Time points still don't match after cropping")
+
+    combined_epochs = mne.concatenate_epochs([all_epochs, noise_epochs])
+
+    print(f"Final combined_epochs: {len(combined_epochs)}")
+    print(f"Time points: {len(combined_epochs.times)}")
+
+    if EQUALIZE_EVENT_COUNTS:
+        combined_epochs.equalize_event_counts()
+    return combined_epochs
+
+
+def filter_data(epochs, fmin, fmax):
+    """Filters epochs in the specified frequency band."""
+    return epochs.copy().filter(fmin, fmax)
+
+
+def create_lcmv_filter(epochs_filt, fwd, noise_epochs_filt):
+    data_cov = compute_covariance(epochs_filt, tmin=None, tmax=None, method="empirical")
+    noise_cov = compute_covariance(
+        noise_epochs_filt, tmin=None, tmax=None, method="empirical"
+    )
+    filters_lcmv = make_lcmv(
+        epochs_filt.info,
+        fwd,
+        data_cov=data_cov,
+        noise_cov=noise_cov,
+        reg=0.05,
+        pick_ori="normal",
+    )
+    return filters_lcmv
+
+
+def apply_lcmv_filter(epochs_filt, filters_lcmv):
+    return apply_lcmv_epochs(epochs_filt, filters_lcmv, return_generator=True)
+
+
+def stc_to_matrix(stc, parcellation):
+    """Parcellate a SourceEstimate and return a matrix of ROI time courses."""
+    roi_time_courses = [
+        np.mean(stc.in_label(label).data, axis=0) for label in parcellation
+    ]
+    return np.array(roi_time_courses)
+
+
+def load_parcellation():
+    subjects_dir = os.environ.get(
+        "SUBJECTS_DIR",
+        "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects",
+    )
+    return mne.read_labels_from_annot(
+        "fsaverage", parc="HCPMMP1", subjects_dir=subjects_dir, hemi="both"
+    )
+
+
+def compute_source_estimate(
+    epochs_stcs, fwd, epochs, subject, output_dir, condition, band_name, parcellation
+):
+    n_sources = fwd["nsource"]
+    n_times = len(epochs.times)
+    averaged_data = np.zeros((n_sources, n_times), dtype=complex)
+    all_data = []
+    n_epochs = 0
+
+    vertices_lh, vertices_rh = fwd["src"][0]["vertno"], fwd["src"][1]["vertno"]
+
+    if SAVE_CROPPED_DATA_H5:
+        h5_filename = f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5"
+        h5_filepath = os.path.join(output_dir, h5_filename)
+        h5f = h5py.File(h5_filepath, "w")
+
+    crop_time = 0.1  # in seconds
+    crop_samples = int(crop_time * epochs.info["sfreq"])
+
+    # Calculate the new time array after cropping
+    cropped_times = epochs.times[crop_samples:-crop_samples]
+
+    for i, stc in enumerate(epochs_stcs):
+        cropped_data = stc.data[:, crop_samples:-crop_samples]
+        analytic_signal = hilbert(cropped_data, axis=1)
+        averaged_data[:, crop_samples:-crop_samples] += analytic_signal
+        all_data.append(analytic_signal)
+        n_epochs += 1
+
+        if SAVE_CROPPED_DATA_H5:
+            h5f.create_dataset(f"epoch_{i}", data=cropped_data)
+
+    if SAVE_CROPPED_DATA_H5:
+        h5f.attrs["subject"] = subject
+        h5f.attrs["condition"] = condition
+        h5f.attrs["band_name"] = band_name
+        h5f.attrs["n_epochs"] = n_epochs
+        h5f.close()
+
+    if n_epochs == 0:
+        raise ValueError("No epochs were processed")
+
+    averaged_data /= n_epochs
+    envelope = np.abs(averaged_data)
+
+    # Use the cropped time array for tmin and adjust the data shape
+    averaged_stc = mne.SourceEstimate(
+        envelope,
+        vertices=[vertices_lh, vertices_rh],
+        tmin=cropped_times[0],
+        tstep=epochs.times[1] - epochs.times[0],
+        subject="fsaverage",
+    )
+
+    # Create time-domain averaged version
+    time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1)
+
+    # Compute ROI time courses for both original and time-averaged data
+    roi_time_courses = stc_to_matrix(averaged_stc, parcellation)
+    time_averaged_roi_time_courses = stc_to_matrix(time_averaged_stc, parcellation)
+
+    # Save original ROI time courses
+    roi_fname = (
+        f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_roi_time_courses.npy"
+    )
+    np.save(os.path.join(output_dir, roi_fname), roi_time_courses)
+
+    # Save time-averaged ROI time courses
+    time_avg_roi_fname = f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged_roi_time_courses.npy"
+    np.save(
+        os.path.join(output_dir, time_avg_roi_fname), time_averaged_roi_time_courses
+    )
+
+    # Save original SourceEstimate
+    averaged_fname = (
+        f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_averaged-stc"
+    )
+    averaged_stc.save(os.path.join(output_dir, averaged_fname), overwrite=True)
+
+    # Save time-averaged SourceEstimate
+    time_avg_fname = (
+        f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged-stc"
+    )
+    time_averaged_stc.save(os.path.join(output_dir, time_avg_fname), overwrite=True)
+
+    print(
+        f"Saved original SourceEstimate to {os.path.join(output_dir, averaged_fname)}"
+    )
+    print(
+        f"Saved time-averaged SourceEstimate to {os.path.join(output_dir, time_avg_fname)}"
+    )
+    print(f"Saved original ROI time courses to {os.path.join(output_dir, roi_fname)}")
+    print(
+        f"Saved time-averaged ROI time courses to {os.path.join(output_dir, time_avg_roi_fname)}"
+    )
+
+    return (
+        averaged_stc,
+        roi_time_courses,
+        time_averaged_stc,
+        time_averaged_roi_time_courses,
+    )
+
+
+def process_subject(subject, data_dir, output_dir):
+    try:
+        fwd, all_epochs, noise_raw = load_data(subject, data_dir)
+        if fwd is None:
+            print(f"Skipping {subject}: Missing required files")
+            return
+
+        print(f"All epochs info:")
+        print(f"Number of epochs: {len(all_epochs)}")
+        print(f"Time points: {len(all_epochs.times)}")
+        print(f"tmin: {all_epochs.tmin}, tmax: {all_epochs.tmax}")
+
+        combined_epochs = prepare_epochs(all_epochs, noise_raw)
+        parcellation = load_parcellation()
+
+        frequency_bands = {
+            "broadband": (1, 40),
+            "alpha": (8, 12),
+            "beta": (13, 30),
+            "gamma": (30, 40),
+        }
+
+        conditions = [
+            "ba",
+            "da",
+            "interviewer_conversation",
+            "interviewer_repetition",
+            "participant_conversation",
+            "participant_repetition",
+        ]  # TODO see below re: removing localisers
+
+        for band_name, (fmin, fmax) in frequency_bands.items():
+            print(f"Processing {band_name} band ({fmin}-{fmax} Hz)")
+            filtered_epochs = filter_data(combined_epochs, fmin, fmax)
+
+            # Create a common filter using all conditions
+            all_condition_epochs = mne.concatenate_epochs(
+                [filtered_epochs[cond] for cond in conditions]
+            )
+            noise_epochs = filtered_epochs["noise"]
+            common_filter = create_lcmv_filter(
+                all_condition_epochs, fwd, noise_epochs
+            )  # TODO don't include localisers? Mayby do a separate run for each type so that code doesn't become too complex?
+
+            averaged_stc_dict = {}
+            roi_time_courses_dict = {}
+            time_averaged_stc_dict = {}
+            time_averaged_roi_time_courses_dict = {}
+
+            for condition in conditions:
+                condition_epochs = filtered_epochs[condition]
+
+                # Apply the common filter to each condition
+                epochs_stcs = apply_lcmv_filter(condition_epochs, common_filter)
+
+                (
+                    averaged_stc,
+                    roi_time_courses,
+                    time_averaged_stc,
+                    time_averaged_roi_time_courses,
+                ) = compute_source_estimate(
+                    epochs_stcs,
+                    fwd,
+                    condition_epochs,
+                    subject,
+                    output_dir,
+                    condition,
+                    band_name,
+                    parcellation,
+                )
+
+                averaged_stc_dict[condition] = averaged_stc
+                roi_time_courses_dict[condition] = roi_time_courses
+                time_averaged_stc_dict[condition] = time_averaged_stc
+                time_averaged_roi_time_courses_dict[condition] = (
+                    time_averaged_roi_time_courses
+                )
+
+            diff_pairs = [
+                ("interviewer_conversation", "interviewer_repetition"),
+                ("participant_conversation", "participant_repetition"),
+            ]
+
+            for cond1, cond2 in diff_pairs:
+                # Original difference
+                diff_stc = averaged_stc_dict[cond1] - averaged_stc_dict[cond2]
+                diff_fname = os.path.join(
+                    output_dir,
+                    f"{subject}_{band_name}_lcmv_beamformer_{cond1}_vs_{cond2}_difference-stc",
+                )
+                diff_stc.save(diff_fname, overwrite=True)
+
+                # Time-averaged difference
+                time_avg_diff_stc = (
+                    time_averaged_stc_dict[cond1] - time_averaged_stc_dict[cond2]
+                )
+                time_avg_diff_fname = os.path.join(
+                    output_dir,
+                    f"{subject}_{band_name}_lcmv_beamformer_{cond1}_vs_{cond2}_time_averaged_difference-stc",
+                )
+                time_avg_diff_stc.save(time_avg_diff_fname, overwrite=True)
+
+        print(f"Finished processing {subject}")
+        update_progress(subject, output_dir)
+    except Exception as e:
+        print(f"Error processing {subject}: {str(e)}")
+        import traceback
+        traceback.print_exc(
+
+
+def update_progress(subject, output_dir):
+    progress_file = os.path.join(output_dir, "progress.txt")
+    with open(progress_file, "a") as f:
+        f.write(f"{subject}\n")
+
+
+def get_processed_subjects(output_dir):
+    progress_file = os.path.join(output_dir, "progress.txt")
+    if os.path.exists(progress_file):
+        with open(progress_file, "r") as f:
+            return set(line.strip() for line in f)
+    return set()
+
+
+def main():
+    data_dir, output_dir = setup_directories()
+    subject_dirs = [d for d in os.listdir(data_dir) if d.startswith("sub-") and d[4:].isdigit()]
+
+    processed_subjects = get_processed_subjects(output_dir)
+
+    for subject in subject_dirs:
+        if subject in processed_subjects or is_subject_processed(subject, output_dir):
+            print(f"Skipping {subject}: Already processed")
+            continue
+        process_subject(subject, data_dir, output_dir)
+        # Removed: update_progress(subject, output_dir)
+
+    print("All subjects processed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/LCMV_conversation_batch.py b/LCMV_conversation_batch.py
index 082a533..f0d02cc 100644
--- a/LCMV_conversation_batch.py
+++ b/LCMV_conversation_batch.py
@@ -12,22 +12,38 @@
 SAVE_CROPPED_DATA_H5 = False
 
 
+def setup_directories():
+    data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/"
+    output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output2"
+    os.makedirs(output_dir, exist_ok=True)
+    return data_dir, output_dir
+
+
+def load_data(subject, data_dir):
+    fwd_fname = os.path.join(
+        data_dir, subject, "meg", f"{subject}_task-conversation_fwd.fif"
+    )
+    epochs_fname = os.path.join(
+        data_dir, subject, "meg", f"{subject}_task-conversation_proc-clean_epo.fif"
+    )
+    noise_fname = os.path.join(
+        data_dir, subject, "meg", f"{subject}_task-rest_proc-clean_raw.fif"
+    )
+
+    if not all(os.path.exists(f) for f in [fwd_fname, epochs_fname, noise_fname]):
+        return None
+
+    fwd = mne.read_forward_solution(fwd_fname)
+    fwd_fixed = mne.convert_forward_solution(
+        fwd, surf_ori=True, force_fixed=False, use_cps=True
+    )  # Convert to fixed orientation - necessary for LCMV ori = normal
+    all_epochs = mne.read_epochs(epochs_fname)
+    noise_raw = mne.io.read_raw_fif(noise_fname)
+    return fwd_fixed, all_epochs, noise_raw
+
+
 def average_stc_in_time(stc, window_size=0.1):
-    """
-    Average the SourceEstimate in the time domain.
-
-    Parameters:
-    -----------
-    stc : mne.SourceEstimate
-        The source estimate to average.
-    window_size : float
-        The size of the averaging window in seconds.
-
-    Returns:
-    --------
-    mne.SourceEstimate
-        A new SourceEstimate with time-averaged data.
-    """
+
     sfreq = 1 / stc.tstep
     n_samples = int(window_size * sfreq)
 
@@ -82,36 +98,6 @@ def is_subject_processed(subject, output_dir):
     return True
 
 
-def setup_directories():
-    data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/"
-    output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_4"
-    os.makedirs(output_dir, exist_ok=True)
-    return data_dir, output_dir
-
-
-def load_data(subject, data_dir):
-    fwd_fname = os.path.join(
-        data_dir, subject, "meg", f"{subject}_task-conversation_fwd.fif"
-    )
-    epochs_fname = os.path.join(
-        data_dir, subject, "meg", f"{subject}_task-conversation_proc-clean_epo.fif"
-    )
-    noise_fname = os.path.join(
-        data_dir, subject, "meg", f"{subject}_task-rest_proc-clean_raw.fif"
-    )
-
-    if not all(os.path.exists(f) for f in [fwd_fname, epochs_fname, noise_fname]):
-        return None
-
-    fwd = mne.read_forward_solution(fwd_fname)
-    fwd_fixed = mne.convert_forward_solution(
-        fwd, surf_ori=True, force_fixed=False, use_cps=True
-    )  # Convert to fixed orientation - necessary for LCMV ori = normal
-    all_epochs = mne.read_epochs(epochs_fname)
-    noise_raw = mne.io.read_raw_fif(noise_fname)
-    return fwd_fixed, all_epochs, noise_raw
-
-
 def prepare_epochs(all_epochs, noise_raw):
     print(f"Initial all_epochs: {len(all_epochs)}")
 
@@ -258,11 +244,22 @@ def compute_source_estimate(
     averaged_stc = mne.SourceEstimate(
         envelope,
         vertices=[vertices_lh, vertices_rh],
-        tmin=epochs.times[crop_samples],
+        tmin=cropped_times[0],
         tstep=epochs.times[1] - epochs.times[0],
         subject="fsaverage",
     )
+    print(
+        f"Original STC time range: {averaged_stc.times[0]:.3f}s to {averaged_stc.times[-1]:.3f}s"
+    )
+    print(f"Original STC time points: {len(averaged_stc.times)}")
 
+    # Apply time-domain averaging
+    time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1)
+
+    print(
+        f"Time-averaged STC time range: {time_averaged_stc.times[0]:.3f}s to {time_averaged_stc.times[-1]:.3f}s"
+    )
+    print(f"Time-averaged STC time points: {len(time_averaged_stc.times)}")
     # Create time-domain averaged version
     time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1)
 
diff --git a/LCMV_conversation_grand_average.py b/LCMV_conversation_grand_average.py
index 7f52ad4..fffad22 100644
--- a/LCMV_conversation_grand_average.py
+++ b/LCMV_conversation_grand_average.py
@@ -5,7 +5,7 @@
 import re
 
 # Set the path to the folder containing the .stc files
-stc_folder = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_analysis"
+stc_folder = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_output/"
 
 # Create a subfolder for average STCs
 average_folder = os.path.join(stc_folder, "average_stc")
diff --git a/plot_stc.py b/plot_stc.py
index acd23dc..f107171 100644
--- a/plot_stc.py
+++ b/plot_stc.py
@@ -2,8 +2,8 @@
 import os
 
 SUBJECTS_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects"
-STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_4/"
-DATA_FILE = "sub-01_task-ba_broadband_lcmv_beamformer_time_averaged-stc-lh.stc"
+STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_1/"
+DATA_FILE = "sub-01_task-ba_broadband_lcmv_beamformer_averaged-stc-lh.stc"
 
 
 # Load the saved STC file (replace with your actual file path)

From d530e45e461c4ff5f9e3accab4d34f5e61382307 Mon Sep 17 00:00:00 2001
From: paulsowman <paulsowman@gmail.com>
Date: Thu, 25 Jul 2024 12:26:04 +1200
Subject: [PATCH 09/20] fix x axis

---
 LCMV_CSF_conversation_batch.py | 112 +++++++++++++++++++++++----------
 plot_stc.py                    |   2 +-
 2 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py
index 2e46966..6da0829 100644
--- a/LCMV_CSF_conversation_batch.py
+++ b/LCMV_CSF_conversation_batch.py
@@ -5,11 +5,13 @@
 from mne.beamformer import apply_lcmv_epochs, make_lcmv
 from mne.cov import compute_covariance
 from scipy.signal import hilbert
+import matplotlib.pyplot as plt
 
 # Configuration options - change as needed saving
 # cropped data takes up a lot of space
 EQUALIZE_EVENT_COUNTS = False
 SAVE_CROPPED_DATA_H5 = False
+DIAGNOSTIC_PLOTS = False
 
 
 def setup_directories():
@@ -146,8 +148,12 @@ def prepare_epochs(all_epochs, noise_raw):
         tmin=noise_epochs.tmin + crop_time, tmax=noise_epochs.tmax - crop_time
     )
 
-    print(f"After cropping - Task epochs n_samples: {len(all_epochs.times)}")
-    print(f"After cropping - Noise epochs n_samples: {len(noise_epochs.times)}")
+    print(
+        f"After cropping - Task epochs tmin: {all_epochs.tmin}, tmax: {all_epochs.tmax}, n_samples: {len(all_epochs.times)}"
+    )
+    print(
+        f"After cropping - Noise epochs tmin: {noise_epochs.tmin}, tmax: {noise_epochs.tmax}, n_samples: {len(noise_epochs.times)}"
+    )
 
     all_epochs.metadata = None
     noise_epochs.metadata = None
@@ -159,6 +165,38 @@ def prepare_epochs(all_epochs, noise_raw):
 
     print(f"Final combined_epochs: {len(combined_epochs)}")
     print(f"Time points: {len(combined_epochs.times)}")
+    print(f"Combined epochs tmin: {combined_epochs.tmin}, tmax: {combined_epochs.tmax}")
+    if DIAGNOSTIC_PLOTS:
+        # Diagnostic plots
+        plt.figure(figsize=(15, 10))
+
+        plt.subplot(311)
+        plt.plot(combined_epochs.times, combined_epochs.get_data().mean(axis=(0, 1)))
+        plt.title("Average of all channels and epochs")
+        plt.xlabel("Time (s)")
+
+        plt.subplot(312)
+        plt.plot(combined_epochs.times, combined_epochs.get_data()[0, 0, :])
+        plt.title("First channel of first epoch")
+        plt.xlabel("Time (s)")
+
+        plt.subplot(313)
+        plt.imshow(
+            combined_epochs.get_data().mean(axis=0),
+            aspect="auto",
+            extent=[
+                combined_epochs.times[0],
+                combined_epochs.times[-1],
+                0,
+                combined_epochs.get_data().shape[1],
+            ],
+        )
+        plt.title("Heatmap of all channels (averaged across epochs)")
+        plt.xlabel("Time (s)")
+        plt.ylabel("Channels")
+
+        plt.tight_layout()
+        plt.show()
 
     if EQUALIZE_EVENT_COUNTS:
         combined_epochs.equalize_event_counts()
@@ -211,6 +249,26 @@ def load_parcellation():
 def compute_source_estimate(
     epochs_stcs, fwd, epochs, subject, output_dir, condition, band_name, parcellation
 ):
+    """Computes source estimates, ROI time courses, and saves them.
+
+    Args:
+        epochs_stcs (list): List of SourceEstimate objects.
+        fwd (dict): Forward solution dictionary.
+        epochs (mne.Epochs): Evoked data object.
+        subject (str): Subject name.
+        output_dir (str): Output directory path.
+        condition (str): Experimental condition.
+        band_name (str): Band name.
+        parcellation (mne.Parcellation): Parcellation object.
+
+    Returns:
+        tuple: A tuple containing the following elements:
+            - averaged_stc (mne.SourceEstimate): The averaged source estimate.
+            - roi_time_courses (np.ndarray): Original ROI time courses.
+            - time_averaged_stc (mne.SourceEstimate): Time-averaged source estimate.
+            - time_averaged_roi_time_courses (np.ndarray): Time-averaged ROI time courses.
+    """
+
     n_sources = fwd["nsource"]
     n_times = len(epochs.times)
     averaged_data = np.zeros((n_sources, n_times), dtype=complex)
@@ -219,33 +277,24 @@ def compute_source_estimate(
 
     vertices_lh, vertices_rh = fwd["src"][0]["vertno"], fwd["src"][1]["vertno"]
 
-    if SAVE_CROPPED_DATA_H5:
-        h5_filename = f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5"
-        h5_filepath = os.path.join(output_dir, h5_filename)
-        h5f = h5py.File(h5_filepath, "w")
-
-    crop_time = 0.1  # in seconds
-    crop_samples = int(crop_time * epochs.info["sfreq"])
-
-    # Calculate the new time array after cropping
-    cropped_times = epochs.times[crop_samples:-crop_samples]
+    with h5py.File(
+        os.path.join(
+            output_dir, f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5"
+        ),
+        "w",
+    ) as h5f:
+        for i, stc in enumerate(epochs_stcs):
+            analytic_signal = hilbert(stc.data, axis=1)
+            averaged_data += analytic_signal
+            all_data.append(analytic_signal)
+            n_epochs += 1
 
-    for i, stc in enumerate(epochs_stcs):
-        cropped_data = stc.data[:, crop_samples:-crop_samples]
-        analytic_signal = hilbert(cropped_data, axis=1)
-        averaged_data[:, crop_samples:-crop_samples] += analytic_signal
-        all_data.append(analytic_signal)
-        n_epochs += 1
+            h5f.create_dataset(f"epoch_{i}", data=stc.data)
 
-        if SAVE_CROPPED_DATA_H5:
-            h5f.create_dataset(f"epoch_{i}", data=cropped_data)
-
-    if SAVE_CROPPED_DATA_H5:
         h5f.attrs["subject"] = subject
         h5f.attrs["condition"] = condition
         h5f.attrs["band_name"] = band_name
         h5f.attrs["n_epochs"] = n_epochs
-        h5f.close()
 
     if n_epochs == 0:
         raise ValueError("No epochs were processed")
@@ -253,41 +302,35 @@ def compute_source_estimate(
     averaged_data /= n_epochs
     envelope = np.abs(averaged_data)
 
-    # Use the cropped time array for tmin and adjust the data shape
     averaged_stc = mne.SourceEstimate(
         envelope,
         vertices=[vertices_lh, vertices_rh],
-        tmin=cropped_times[0],
+        tmin=epochs.times[0],
         tstep=epochs.times[1] - epochs.times[0],
         subject="fsaverage",
     )
 
-    # Create time-domain averaged version
     time_averaged_stc = average_stc_in_time(averaged_stc, window_size=0.1)
 
-    # Compute ROI time courses for both original and time-averaged data
     roi_time_courses = stc_to_matrix(averaged_stc, parcellation)
     time_averaged_roi_time_courses = stc_to_matrix(time_averaged_stc, parcellation)
 
-    # Save original ROI time courses
+    # Save outputs
     roi_fname = (
         f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_roi_time_courses.npy"
     )
     np.save(os.path.join(output_dir, roi_fname), roi_time_courses)
 
-    # Save time-averaged ROI time courses
     time_avg_roi_fname = f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged_roi_time_courses.npy"
     np.save(
         os.path.join(output_dir, time_avg_roi_fname), time_averaged_roi_time_courses
     )
 
-    # Save original SourceEstimate
     averaged_fname = (
         f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_averaged-stc"
     )
     averaged_stc.save(os.path.join(output_dir, averaged_fname), overwrite=True)
 
-    # Save time-averaged SourceEstimate
     time_avg_fname = (
         f"{subject}_task-{condition}_{band_name}_lcmv_beamformer_time_averaged-stc"
     )
@@ -419,7 +462,8 @@ def process_subject(subject, data_dir, output_dir):
     except Exception as e:
         print(f"Error processing {subject}: {str(e)}")
         import traceback
-        traceback.print_exc(
+
+        traceback.print_exc()
 
 
 def update_progress(subject, output_dir):
@@ -438,7 +482,9 @@ def get_processed_subjects(output_dir):
 
 def main():
     data_dir, output_dir = setup_directories()
-    subject_dirs = [d for d in os.listdir(data_dir) if d.startswith("sub-") and d[4:].isdigit()]
+    subject_dirs = [
+        d for d in os.listdir(data_dir) if d.startswith("sub-") and d[4:].isdigit()
+    ]
 
     processed_subjects = get_processed_subjects(output_dir)
 
diff --git a/plot_stc.py b/plot_stc.py
index f107171..5b16893 100644
--- a/plot_stc.py
+++ b/plot_stc.py
@@ -3,7 +3,7 @@
 
 SUBJECTS_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/freesurfer/subjects"
 STC_DIR = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_1/"
-DATA_FILE = "sub-01_task-ba_broadband_lcmv_beamformer_averaged-stc-lh.stc"
+DATA_FILE = "sub-01_task-da_broadband_lcmv_beamformer_averaged-stc-lh.stc"
 
 
 # Load the saved STC file (replace with your actual file path)

From 371f2ce16b2adf47a0b36eaf36364c20838b98d1 Mon Sep 17 00:00:00 2001
From: paulsowman <paulsowman@gmail.com>
Date: Thu, 25 Jul 2024 12:26:53 +1200
Subject: [PATCH 10/20] remove debugging plots

---
 LCMV_CSF_conversation_batch.py | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py
index 6da0829..94202c0 100644
--- a/LCMV_CSF_conversation_batch.py
+++ b/LCMV_CSF_conversation_batch.py
@@ -11,7 +11,6 @@
 # cropped data takes up a lot of space
 EQUALIZE_EVENT_COUNTS = False
 SAVE_CROPPED_DATA_H5 = False
-DIAGNOSTIC_PLOTS = False
 
 
 def setup_directories():
@@ -166,37 +165,6 @@ def prepare_epochs(all_epochs, noise_raw):
     print(f"Final combined_epochs: {len(combined_epochs)}")
     print(f"Time points: {len(combined_epochs.times)}")
     print(f"Combined epochs tmin: {combined_epochs.tmin}, tmax: {combined_epochs.tmax}")
-    if DIAGNOSTIC_PLOTS:
-        # Diagnostic plots
-        plt.figure(figsize=(15, 10))
-
-        plt.subplot(311)
-        plt.plot(combined_epochs.times, combined_epochs.get_data().mean(axis=(0, 1)))
-        plt.title("Average of all channels and epochs")
-        plt.xlabel("Time (s)")
-
-        plt.subplot(312)
-        plt.plot(combined_epochs.times, combined_epochs.get_data()[0, 0, :])
-        plt.title("First channel of first epoch")
-        plt.xlabel("Time (s)")
-
-        plt.subplot(313)
-        plt.imshow(
-            combined_epochs.get_data().mean(axis=0),
-            aspect="auto",
-            extent=[
-                combined_epochs.times[0],
-                combined_epochs.times[-1],
-                0,
-                combined_epochs.get_data().shape[1],
-            ],
-        )
-        plt.title("Heatmap of all channels (averaged across epochs)")
-        plt.xlabel("Time (s)")
-        plt.ylabel("Channels")
-
-        plt.tight_layout()
-        plt.show()
 
     if EQUALIZE_EVENT_COUNTS:
         combined_epochs.equalize_event_counts()

From b8696e36883e06c89634f3cada1074b99cd827c1 Mon Sep 17 00:00:00 2001
From: paulsowman <paulsowman@gmail.com>
Date: Thu, 25 Jul 2024 12:44:15 +1200
Subject: [PATCH 11/20] tidy up

---
 LCMV_CSF_conversation_batch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py
index 94202c0..5f82659 100644
--- a/LCMV_CSF_conversation_batch.py
+++ b/LCMV_CSF_conversation_batch.py
@@ -14,8 +14,8 @@
 
 
 def setup_directories():
-    data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_data/"
-    output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/Test_output_1"
+    data_dir = "/Users/em18033/Library/CloudStorage/OneDrive-SharedLibraries-MacquarieUniversity/Natural_Conversations_study - Documents/analysis/natural-conversations-bids/derivatives/mne-bids-pipeline-20240628/"
+    output_dir = "/Users/em18033/Library/CloudStorage/OneDrive-AUTUniversity/Projects/Conversational_AI/LCMV_output/"
     os.makedirs(output_dir, exist_ok=True)
     return data_dir, output_dir
 

From ea643366dda1a230c715723d26c911e58154f17d Mon Sep 17 00:00:00 2001
From: paulsowman <paulsowman@gmail.com>
Date: Thu, 25 Jul 2024 14:44:04 +1200
Subject: [PATCH 12/20] refix h5 save option

---
 LCMV_CSF_conversation_batch.py | 39 +++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/LCMV_CSF_conversation_batch.py b/LCMV_CSF_conversation_batch.py
index 5f82659..2f5eaf1 100644
--- a/LCMV_CSF_conversation_batch.py
+++ b/LCMV_CSF_conversation_batch.py
@@ -5,7 +5,6 @@
 from mne.beamformer import apply_lcmv_epochs, make_lcmv
 from mne.cov import compute_covariance
 from scipy.signal import hilbert
-import matplotlib.pyplot as plt
 
 # Configuration options - change as needed saving
 # cropped data takes up a lot of space
@@ -244,25 +243,25 @@ def compute_source_estimate(
     n_epochs = 0
 
     vertices_lh, vertices_rh = fwd["src"][0]["vertno"], fwd["src"][1]["vertno"]
-
-    with h5py.File(
-        os.path.join(
-            output_dir, f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5"
-        ),
-        "w",
-    ) as h5f:
-        for i, stc in enumerate(epochs_stcs):
-            analytic_signal = hilbert(stc.data, axis=1)
-            averaged_data += analytic_signal
-            all_data.append(analytic_signal)
-            n_epochs += 1
-
-            h5f.create_dataset(f"epoch_{i}", data=stc.data)
-
-        h5f.attrs["subject"] = subject
-        h5f.attrs["condition"] = condition
-        h5f.attrs["band_name"] = band_name
-        h5f.attrs["n_epochs"] = n_epochs
+    if SAVE_CROPPED_DATA_H5:
+        with h5py.File(
+            os.path.join(
+                output_dir, f"{subject}_task-{condition}_{band_name}_epochs_stcs.h5"
+            ),
+            "w",
+        ) as h5f:
+            for i, stc in enumerate(epochs_stcs):
+                analytic_signal = hilbert(stc.data, axis=1)
+                averaged_data += analytic_signal
+                all_data.append(analytic_signal)
+                n_epochs += 1
+
+                h5f.create_dataset(f"epoch_{i}", data=stc.data)
+
+            h5f.attrs["subject"] = subject
+            h5f.attrs["condition"] = condition
+            h5f.attrs["band_name"] = band_name
+            h5f.attrs["n_epochs"] = n_epochs
 
     if n_epochs == 0:
         raise ValueError("No epochs were processed")

From 3e667db5ac8060076cf4ba662ebe3cddc921e934 Mon Sep 17 00:00:00 2001
From: Geet Vashista <148626773+geetvashista@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:44:35 +1200
Subject: [PATCH 13/20] For calculating wpli and graph metrics from LCMV

---
 Connectivity.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 Connectivity.py

diff --git a/Connectivity.py b/Connectivity.py
new file mode 100644
index 0000000..74fd281
--- /dev/null
+++ b/Connectivity.py
@@ -0,0 +1,98 @@
+# This is script to calculate connectivity using raw np arrays,
+# each assumed to be in the shape (channels/ROI's, time points)
+
+import numpy as np
+from datetime import datetime
+import os
+import dyconnmap
+import bct
+from scipy import stats
+import pathlib
+
+def operations_to_perform():    # Change as desired
+    Cal_wpli = True
+    Cal_graph_metrics = True
+    return Cal_wpli, Cal_graph_metrics
+
+
+def setup_dependencies():    # Change as desired
+    input_dir = r'C:\Users\em17531\Desktop\New_project\source_loc_output'     # The directory containing files of interest
+    output_dir = r''    # The output directory
+    array_type = 'participant_conversation_alpha'     # This is the type of array that will be loaded for further calculations, eg. "participant_conversation_alpha"
+    target_fb = [8,12]  # The target frequency band     TODO: set this up so variable can be a this can be list of bands and each generates it's own adjacency matrix
+    fs = 250    # The sampling frequency
+    saving = True
+    os.makedirs(output_dir, exist_ok=True)
+    return input_dir, output_dir, array_type, fs, target_fb, saving
+
+
+def prep_data(input_dir, array_type):
+    data = []
+    folder = pathlib.Path(input_dir).glob('*')
+    for file in folder:
+        if (os.path.basename(file)).endswith('.npy'):
+            if array_type in os.path.basename(file):
+                data.append(np.load(file))
+    return np.array(data)
+
+
+def wpli_conn(array, target_fb, fs):   # (participants, roi's, time_points)
+    adj_matrix = []
+    for participant in array:
+        adj_matrix.append(dyconnmap.fc.wpli(participant, fs=fs, fb=target_fb))
+    return np.array(adj_matrix)
+
+
+def graph_metrics(adj_matrix):  # TODO: Add in stats, maybe nbs? Could use fdc as well?
+    # Strength calculator
+    Strength = []
+    for participant in adj_matrix:
+        Strength.append(bct.strengths_und(np.nan_to_num(participant)))
+        Strength = np.array(Strength)
+
+    # Zeroing negative phasing
+    Strength[Strength < 0] = 0
+
+    # Betweenness centrality calculator
+    Betweenness = []
+    for participant in adj_matrix:
+        Betweenness.append(bct.betweenness_wei(np.nan_to_num(participant)))
+        Betweenness = np.array(Betweenness)
+
+    # Eigenvector centrality calculator
+    Eigenvector = []
+    for participant in adj_matrix:
+        Eigenvector.append(bct.eigenvector_centrality_und(np.nan_to_num(participant)))
+        Eigenvector = np.array(Eigenvector)
+
+    # Clustering calculator
+    Clustering = []
+    for participant in adj_matrix:
+        Clustering.append(bct.clustering_coef_wu(np.nan_to_num(participant)))
+        Clustering = np.array(Clustering)
+
+    return Strength, Betweenness, Eigenvector, Clustering
+
+
+def main():     # TODO: put in the elif statements
+    # Prep
+    Cal_wpli, Cal_graph_metrics = operations_to_perform()
+    input_dir, output_dir, array_type, fs, target_fb, saving = setup_dependencies()
+    data = prep_data(input_dir, array_type)
+
+    # Core functions
+    if Cal_wpli:
+        adj_matrix = wpli_conn(data, target_fb, fs)
+    if Cal_graph_metrics:
+        Strength, Betweenness, Eigenvector, Clustering = graph_metrics(adj_matrix)
+
+    # Saving
+    if saving:
+        np.save(output_dir + 'All_Strength_' + array_type, Strength)
+        np.save(output_dir + 'All_Betweenness_' + array_type, Betweenness)
+        np.save(output_dir + 'All_Eigenvector_' + array_type, Eigenvector)
+        np.save(output_dir + 'All_Clustering_' + array_type, Clustering)
+
+
+if __name__ == "__main__":
+    main()

From cfbc80ccbb3ce033265885a45f9d98a838bf7e33 Mon Sep 17 00:00:00 2001
From: Geet Vashista <148626773+geetvashista@users.noreply.github.com>
Date: Fri, 26 Jul 2024 16:43:17 +1200
Subject: [PATCH 14/20] Updated Connectivity.py

---
 Connectivity.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/Connectivity.py b/Connectivity.py
index 74fd281..245fd3b 100644
--- a/Connectivity.py
+++ b/Connectivity.py
@@ -2,12 +2,13 @@
 # each assumed to be in the shape (channels/ROI's, time points)
 
 import numpy as np
-from datetime import datetime
+import time
 import os
 import dyconnmap
 import bct
 from scipy import stats
 import pathlib
+start = time.time()
 
 def operations_to_perform():    # Change as desired
     Cal_wpli = True
@@ -16,7 +17,7 @@ def operations_to_perform():    # Change as desired
 
 
 def setup_dependencies():    # Change as desired
-    input_dir = r'C:\Users\em17531\Desktop\New_project\source_loc_output'     # The directory containing files of interest
+    input_dir = r''     # The directory containing files of interest
     output_dir = r''    # The output directory
     array_type = 'participant_conversation_alpha'     # This is the type of array that will be loaded for further calculations, eg. "participant_conversation_alpha"
     target_fb = [8,12]  # The target frequency band     TODO: set this up so variable can be a this can be list of bands and each generates it's own adjacency matrix
@@ -48,7 +49,7 @@ def graph_metrics(adj_matrix):  # TODO: Add in stats, maybe nbs? Could use fdc a
     Strength = []
     for participant in adj_matrix:
         Strength.append(bct.strengths_und(np.nan_to_num(participant)))
-        Strength = np.array(Strength)
+    Strength = np.array(Strength)
 
     # Zeroing negative phasing
     Strength[Strength < 0] = 0
@@ -57,19 +58,19 @@ def graph_metrics(adj_matrix):  # TODO: Add in stats, maybe nbs? Could use fdc a
     Betweenness = []
     for participant in adj_matrix:
         Betweenness.append(bct.betweenness_wei(np.nan_to_num(participant)))
-        Betweenness = np.array(Betweenness)
+    Betweenness = np.array(Betweenness)
 
     # Eigenvector centrality calculator
     Eigenvector = []
     for participant in adj_matrix:
         Eigenvector.append(bct.eigenvector_centrality_und(np.nan_to_num(participant)))
-        Eigenvector = np.array(Eigenvector)
+    Eigenvector = np.array(Eigenvector)
 
     # Clustering calculator
     Clustering = []
     for participant in adj_matrix:
         Clustering.append(bct.clustering_coef_wu(np.nan_to_num(participant)))
-        Clustering = np.array(Clustering)
+    Clustering = np.array(Clustering)
 
     return Strength, Betweenness, Eigenvector, Clustering
 
@@ -96,3 +97,5 @@ def main():     # TODO: put in the elif statements
 
 if __name__ == "__main__":
     main()
+
+print('\n' + "EXECUTION TIME: " + str(time.time()-start))

From 39c36d54fc8de05e2064dfe3441b3aa7621bfae0 Mon Sep 17 00:00:00 2001
From: Geet Vashista <148626773+geetvashista@users.noreply.github.com>
Date: Fri, 26 Jul 2024 17:17:55 +1200
Subject: [PATCH 15/20] Update Connectivity.py

---
 Connectivity.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Connectivity.py b/Connectivity.py
index 245fd3b..e873e5a 100644
--- a/Connectivity.py
+++ b/Connectivity.py
@@ -89,10 +89,10 @@ def main():     # TODO: put in the elif statements
 
     # Saving
     if saving:
-        np.save(output_dir + 'All_Strength_' + array_type, Strength)
-        np.save(output_dir + 'All_Betweenness_' + array_type, Betweenness)
-        np.save(output_dir + 'All_Eigenvector_' + array_type, Eigenvector)
-        np.save(output_dir + 'All_Clustering_' + array_type, Clustering)
+        np.save(output_dir + '_All_Strength_' + array_type, Strength)
+        np.save(output_dir + '_All_Betweenness_' + array_type, Betweenness)
+        np.save(output_dir + '_All_Eigenvector_' + array_type, Eigenvector)
+        np.save(output_dir + '_All_Clustering_' + array_type, Clustering)
 
 
 if __name__ == "__main__":

From bd76c6e3e7a5bd339be6c4c312e9ef206a33a90e Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Mon, 29 Jul 2024 12:03:14 +1000
Subject: [PATCH 16/20] Add z-scores version of ROI time series

---
 traditional_source_analysis_and_ROIs.py | 96 +++++++++++++++++++------
 1 file changed, 76 insertions(+), 20 deletions(-)

diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py
index d4c8be0..6d055a1 100644
--- a/traditional_source_analysis_and_ROIs.py
+++ b/traditional_source_analysis_and_ROIs.py
@@ -2,7 +2,6 @@
 # Compute and plot GAs
 # Extract time courses from predefined ROIs
 
-import os
 import os.path as op
 import numpy as np
 import matplotlib.pyplot as plt
@@ -26,13 +25,19 @@
 
 # specify how many SSP projectors to use for speech artifact removal
 n_proj = 4  # 1 means: 1 for MEG & 1 for EEG
+this_run = f"{n_proj}-proj"
+#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks
 
 # which conditions to compare in ROI analysis
 comparison = "participant" # "interviewer" or "participant"
 conds_ROI = [f"{comparison}_conversation", f"{comparison}_repetition"]
 
-this_run = f"{n_proj}-proj"
-#this_run = "ba+da__ba-da" # for the "ba+da" and "ba-da" sanity checks
+ch_type = None  # "meg" or "eeg"  # None means all
+if ch_type:
+    path_suffix = f"_{ch_type}-only"
+else:
+    path_suffix = ""
+
 
 subjects = config.subjects
 if subjects == "all":
@@ -52,12 +57,15 @@
 #analysis_path = Path("/mnt/d/Work/analysis_ME206/Natural_Conversations_study/analysis")
 
 deriv_path = analysis_path / "natural-conversations-bids" / "derivatives"
-source_results_dir = analysis_path / "results" / f"source-{source_method}-{src_type}" / this_run
-figures_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run
-figures_ROI_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}" / this_run / f"{comparison}_ROI"
+source_results_dir = analysis_path / "results" / f"source-{source_method}-{src_type}{path_suffix}" / this_run
+figures_dir = analysis_path / "figures" / f"source-{source_method}-{src_type}{path_suffix}" / this_run
+figures_ROI_dir = figures_dir / f"{comparison}_ROI"
+#figures_ROI_zscores_dir = figures_dir / f"{comparison}_ROI_zscores"
+figures_ROI_zscores_dir = figures_dir / f"{comparison}_ROI_zscores-timeslices"
 # create the folders if needed
 source_results_dir.mkdir(parents=True, exist_ok=True)
 figures_ROI_dir.mkdir(parents=True, exist_ok=True)
+figures_ROI_zscores_dir.mkdir(parents=True, exist_ok=True)
 
 subjects_dir = deriv_path / "freesurfer" / "subjects"
 subject = 'fsaverage'
@@ -99,13 +107,15 @@
         epochs.add_proj(proj).apply_proj()
     
     epochs = epochs.pick(["meg", "eeg"], exclude="bads")
+    if ch_type:
+        epochs.pick(ch_type)
 
     ranks = mne.compute_rank(inst=epochs, tol=1e-3, tol_kind="relative")
     rank = sum(ranks.values())
     print(f"  Ranks={ranks} (total={rank})")
 
     # TEMP - always recreate inv for now, until we change to free orientation in the pipeline
-    if 1: #n_proj or src_type == 'vol':
+    if 1: #n_proj or ch_type or src_type == 'vol':
         # Recreate inverse taking into account additional projections
         cov = mne.read_cov(cov_fname)
         if src_type == 'vol':
@@ -227,13 +237,27 @@
 
     # initialise the sum array to correct size using first subject's stc
     stc = mne.read_source_estimate(stc_files[0])
-
+    '''
     # read in the stc for each subsequent subject, add to the sum array
     for fname in stc_files[1:]:
         stc += mne.read_source_estimate(fname)
-
     # divide by number of files
     stc /= len(stc_files)
+    '''
+
+    # the above only seems to work for surface-based stcs,
+    # so we are retaining the code below for now to handle vol-based stcs
+
+    stcs_sum = stc.data # this contains lh & rh vertices combined together
+    # there are also separate fields for the 2 hemis (stc.lh_data, stc.rh_data),
+    # but their content cannot be set directly, so just use the combined one
+
+    # read in the stc for each subsequent subject, add to the sum array
+    for fname in stc_files[1:]:
+        stc = mne.read_source_estimate(fname)
+        stcs_sum = stcs_sum + stc.data
+    # divide by number of files
+    stc.data = stcs_sum / len(stc_files)
 
     # store in the GA struct
     GA_stcs[cond] = stc
@@ -282,8 +306,13 @@
 
 # Extract ROI time courses from source estimates
 
-src = mne.read_source_spaces(src_fname)
+window_size = 200  # sliding window size (ms) for calculating z-scores
+window = window_size / 5  # 10 = 50ms, 20 = 100ms, 40 = 200ms
+            
 (figures_ROI_dir / "all_ROIs").mkdir(parents=True, exist_ok=True)
+(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms").mkdir(parents=True, exist_ok=True)
+
+src = mne.read_source_spaces(src_fname)
 
 if src_type == 'vol':
     # choose atlas for parcellation
@@ -297,22 +326,48 @@
 
     for label_name in rois:
         (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True)
+        (figures_ROI_zscores_dir / label_name).mkdir(parents=True, exist_ok=True)
 
         # Plot GA ROI time series
         fig, axes = plt.subplots(1, layout="constrained")
+        fig_z, axes_z = plt.subplots(1, layout="constrained")
         for cond in conds_ROI:
             label_ts = mne.extract_label_time_course(
                 [GA_stcs[cond]], (fname_aseg, label_name), src, mode="auto"
             )
-            axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond)
-
-        axes.axvline(linestyle='--') # add verticle line at time 0
+            label_ts = label_ts[0][0]
+            axes.plot(1e3 * GA_stcs[cond].times, label_ts, label=cond)
+
+            # calculate z-score at each time point (using a sliding time window)
+            mu = np.mean(label_ts) # demean is based on the whole epoch
+            label_ts = label_ts - mu
+            
+            zscores = [0] * (len(label_ts) - window) # initialise the z-scores array
+            for t in range(0, len(label_ts) - window):
+                sigma = np.std(label_ts[t:t+window], mean=0) # sigma is calculated on the time slice only, but need to manually set the mean to 0
+                zscores[t] = label_ts[t] / sigma
+            axes_z.plot(1e3 * GA_stcs[cond].times[:len(zscores)], zscores, label=cond)
+            '''
+            # take the time window centred on current time point (rather than on the RHS)
+            window_half = int(window/2)
+            zscores = [0] * (len(label_ts) - window_half) # initialise the z-scores array
+            for t in range(window_half, len(label_ts) - window_half):
+                sigma = np.std(label_ts[t-window_half:t+window_half], mean=0) # sigma is calculated on the time slice only, but need to manually set the mean to 0
+                zscores[t] = label_ts[t] / sigma
+            axes_z.plot(1e3 * GA_stcs[cond].times[window_half:len(zscores)], zscores[window_half:], label=cond)
+            '''
+
+        axes.axvline(linestyle='-', color='k') # add verticle line at time 0
         axes.set(xlabel="Time (ms)", ylabel="Activation")
         axes.legend()
+        axes_z.axvline(linestyle='-', color='k') # add verticle line at time 0
+        axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)")
+        axes_z.legend()
 
         fig.savefig(figures_ROI_dir / label_name / "GA.png")
         fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder
-        plt.close(fig)
+        fig_z.savefig(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms" / f"{label_name}_GA.png") 
+        plt.close('all')
 
         # Plot individual-subjects ROI time series
         for sub in use_subjects:
@@ -323,14 +378,14 @@
                 label_ts = mne.extract_label_time_course(
                     [stc], (fname_aseg, label_name), src, mode="auto"
                 )
-                axes.plot(1e3 * stc.times, label_ts[0][0, :], label=cond)
+                axes.plot(1e3 * stc.times, label_ts[0][0], label=cond)
 
-            axes.axvline(linestyle='--') # add verticle line at time 0
+            axes.axvline(linestyle='-', color='k') # add verticle line at time 0
             axes.set(xlabel="Time (ms)", ylabel="Activation")
             axes.legend()
 
             fig.savefig(figures_ROI_dir / label_name / f"sub-{sub}.png")
-            plt.close(fig)
+            plt.close('all')
 
 elif src_type == 'surface':
     # for surface source space, we need to create the Label object first
@@ -352,7 +407,8 @@
 
     # or read a single label (e.g. V1, BA44, etc)
     #labels_parc = mne.read_label(op.join(subjects_dir, subject, 'label', 'lh.V1.label'))
-    
+
+
     for label in labels:
         label_name = label.name 
         # or set a custom name for combined labels
@@ -371,7 +427,7 @@
             )
             axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond)
 
-        axes.axvline(linestyle='--') # add verticle line at time 0
+        axes.axvline(linestyle='-', color='k') # add verticle line at time 0
         axes.set(xlabel="Time (ms)", ylabel="Activation")
         axes.legend()
 
@@ -390,7 +446,7 @@
                 )
                 axes.plot(1e3 * stc.times, label_ts[0][0, :], label=cond)
 
-            axes.axvline(linestyle='--') # add verticle line at time 0
+            axes.axvline(linestyle='-', color='k') # add verticle line at time 0
             axes.set(xlabel="Time (ms)", ylabel="Activation")
             axes.legend()
 

From 72a1b48c7d2b571b9e95be7b58f54d8af0cedb16 Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Mon, 29 Jul 2024 15:17:44 +1000
Subject: [PATCH 17/20] Add saving of GA stc

---
 csp.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/csp.py b/csp.py
index b32204e..cb81503 100644
--- a/csp.py
+++ b/csp.py
@@ -93,10 +93,12 @@
     analysis_path = Path("D:/Work/analysis_ME206/Natural_Conversations_study/analysis")
 
 deriv_path = analysis_path / "natural-conversations-bids" / "derivatives"
-fig_path = analysis_path / "figures" / "CSP-decoding"
 if src_type == 'vol':
     fig_path = analysis_path / "figures" / "CSP-decoding-vol"
     results_path = analysis_path / "results" / "CSP-decoding-vol"
+else:
+    fig_path = analysis_path / "figures" / "CSP-decoding"
+    results_path = analysis_path / "results" / "CSP-decoding"
 cop_path = analysis_path / "figures"
 subjects_dir = deriv_path / "freesurfer" / "subjects"
 if src_type == 'vol':
@@ -141,7 +143,7 @@
     if decode != "participant":
         extra += f"-{decode}"
         title += f", {decode}"
-for si, sub in enumerate(use_subjects):  # just 03 for now
+for si, sub in enumerate(use_subjects):
     path = deriv_path / 'mne-bids-pipeline' / f'sub-{sub}' / 'meg'
     epochs_fname = path / f'sub-{sub}_task-conversation_proc-clean_epo.fif'
     trans_fname = path / f'sub-{sub}_task-conversation_trans.fif'
@@ -396,6 +398,7 @@ def clean_brain(brain_img):
             if subj_key == "":                
                 results_path.mkdir(exist_ok=True)
                 stc.save(f'{results_path}/decoding{extra}_csp_GA') 
+
             src = mne.read_source_spaces(src_fname)
             # for vol src, the plot function returns a matplotlib Figure -
             # we can't update the clim & time point for this once plotted, so do the actual plotting later
@@ -404,6 +407,11 @@ def clean_brain(brain_img):
                 this_data.reshape(-1, n_vertices).T,
                 vertices=fs_vertices, tmin=0, tstep=1., subject="fsaverage",
             )
+            # save the GA stc
+            if subj_key == "":                
+                results_path.mkdir(exist_ok=True)
+                stc.save(f'{results_path}/decoding{extra}_csp_GA') 
+
             brain = stc.plot(
                 initial_time=0., transparent=True,
                 colormap="viridis", clim=dict(kind="value", lims=[0, 1, 2]),

From f47cbfc540edad395ac02f29d0dc718ba4736c11 Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Mon, 29 Jul 2024 22:42:27 +1000
Subject: [PATCH 18/20] Equalise event counts. Add more ROIs from HCPMMP1 atlas

---
 traditional_source_analysis_and_ROIs.py | 93 +++++++++++++++++++++----
 1 file changed, 81 insertions(+), 12 deletions(-)

diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py
index 6d055a1..51d5d7b 100644
--- a/traditional_source_analysis_and_ROIs.py
+++ b/traditional_source_analysis_and_ROIs.py
@@ -18,7 +18,7 @@
 
 # specify source method etc
 source_method = "dSPM" #"MNE"
-src_type = 'vol' # 'vol' or 'surface'
+src_type = 'surface' # 'vol' or 'surface'
 spacing = "oct6" # for 'surface' source space only
 if src_type != 'surface':
     spacing = ''
@@ -38,6 +38,13 @@
 else:
     path_suffix = ""
 
+if src_type == 'vol':
+    src_suffix = '-vl.stc'
+    #if this_run == "ba+da__ba-da":
+    #    src_suffix = '-vol.stc' # to read a previous version of saved results
+elif src_type == 'surface':
+    src_suffix = '-lh.stc'
+
 
 subjects = config.subjects
 if subjects == "all":
@@ -92,6 +99,8 @@
 
     stc_filename_prefix = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc')
 
+    if Path(stc_filename_prefix + '_ba' + src_suffix).exists():
+        continue
 
     # Read data
     epochs = mne.read_epochs(epochs_fname)
@@ -144,6 +153,10 @@
     #evoked_allconds = epochs.average()
     #evoked_allconds.plot_joint() # average ERF across all conds
 
+    epochs.equalize_event_counts(['ba', 'da'])
+    epochs.equalize_event_counts(['interviewer_conversation', 'interviewer_repetition'])
+    epochs.equalize_event_counts(['participant_conversation', 'participant_repetition'])
+    
     # compute evoked for each cond (ba, da, conversation, repetition)
     evokeds = []
     for cond in epochs.event_id:
@@ -216,13 +229,6 @@
 if this_run == "ba+da__ba-da":
     conds = ['ba+da', 'ba-da']
 
-if src_type == 'vol':
-    src_suffix = '-vl.stc'
-    #if this_run == "ba+da__ba-da":
-    #    src_suffix = '-vol.stc' # to read a previous version of saved results
-elif src_type == 'surface':
-    src_suffix = '-lh.stc'
-
 # overwrite the paths here to use a previous version of results for participant-locked epochs
 #source_results_dir = "/mnt/d/Work/analysis_ME206/results/bids/source/MNE_surface_4proj"
 #figures_dir = "/mnt/d/Work/analysis_ME206/results/bids/source/MNE_surface_4proj/Figures_ROI"
@@ -244,7 +250,7 @@
     # divide by number of files
     stc /= len(stc_files)
     '''
-
+    
     # the above only seems to work for surface-based stcs,
     # so we are retaining the code below for now to handle vol-based stcs
 
@@ -307,7 +313,7 @@
 # Extract ROI time courses from source estimates
 
 window_size = 200  # sliding window size (ms) for calculating z-scores
-window = window_size / 5  # 10 = 50ms, 20 = 100ms, 40 = 200ms
+window = int(window_size / 5)  # e.g. 10 = 50ms, 20 = 100ms, 40 = 200ms
             
 (figures_ROI_dir / "all_ROIs").mkdir(parents=True, exist_ok=True)
 (figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms").mkdir(parents=True, exist_ok=True)
@@ -324,6 +330,22 @@
             'ctx_lh_S_temporal_sup','ctx_rh_S_temporal_sup']  # can have multiple labels in this list
     #roi_idx = label_names.index(rois[0])
 
+    # use the HCP-MMP parcellation
+    # create the volumetric atlas first:
+    # https://gist.github.com/larsoner/8e664205cd8285ca7c46211403ad12ce
+    '''
+    # for some reason the label names are still not found,
+    # even though they come from the correct lut
+    fname_aseg = subjects_dir / subject / 'mri' / 'HCPMMP1_combined+aseg.mgz' # HCPMMP1 = cortical; aseg = subcortical
+    fname_lut = subjects_dir / subject / 'mri' / 'HCPMMP1_combinedColorLUT.txt'
+    lut = mne.read_freesurfer_lut(fname_lut)
+    label_names = mne.get_volume_labels_from_aseg(fname_aseg, atlas_ids=lut[0])
+    rois = ['Anterior_Cingulate_and_Medial_Prefrontal_Cortex-lh', 'Anterior_Cingulate_and_Medial_Prefrontal_Cortex-rh',
+            'DorsoLateral_Prefrontal_Cortex-lh', 'DorsoLateral_Prefrontal_Cortex-rh',
+            'Posterior_Cingulate_Cortex-lh', 'Posterior_Cingulate_Cortex-rh',
+            'Temporo-Parieto-Occipital_Junction-lh', 'Temporo-Parieto-Occipital_Junction-rh']
+    '''        
+
     for label_name in rois:
         (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True)
         (figures_ROI_zscores_dir / label_name).mkdir(parents=True, exist_ok=True)
@@ -372,6 +394,7 @@
         # Plot individual-subjects ROI time series
         for sub in use_subjects:
             fig, axes = plt.subplots(1, layout="constrained")
+            fig_z, axes_z = plt.subplots(1, layout="constrained")
             for cond in conds_ROI:
                 stc_file = op.join(source_results_dir, f'sub-{sub}_task-conversation_proc_{cond}{src_suffix}')    
                 stc = mne.read_source_estimate(stc_file)
@@ -380,11 +403,21 @@
                 )
                 axes.plot(1e3 * stc.times, label_ts[0][0], label=cond)
 
+                # calculate z-score at each time point
+                #mu = np.mean(label_ts[0][0])
+                #sigma = np.std(label_ts[0][0])
+                #zscores = (label_ts - mu) / sigma
+                #axes_z.plot(1e3 * stc.times, zscores[0][0], label=cond)
+
             axes.axvline(linestyle='-', color='k') # add verticle line at time 0
             axes.set(xlabel="Time (ms)", ylabel="Activation")
             axes.legend()
+            #axes_z.axvline(linestyle='-', color='k') # add verticle line at time 0
+            #axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)")
+            #axes_z.legend()
 
             fig.savefig(figures_ROI_dir / label_name / f"sub-{sub}.png")
+            #fig_z.savefig(figures_ROI_zscores_dir / label_name / f"sub-{sub}.png")
             plt.close('all')
 
 elif src_type == 'surface':
@@ -408,6 +441,26 @@
     # or read a single label (e.g. V1, BA44, etc)
     #labels_parc = mne.read_label(op.join(subjects_dir, subject, 'label', 'lh.V1.label'))
 
+    
+    # use the HCP-MMP parcellation
+    # https://balsa.wustl.edu/WN56
+    #mne.datasets.fetch_hcp_mmp_parcellation(subjects_dir=subjects_dir)
+    # can use the fine-grained (~360 labels) "HCPMMP1" or coarse (~44 labels) "HCPMMP1_combined" atlas
+    labels_parc = mne.read_labels_from_annot(
+        'fsaverage', 'HCPMMP1_combined', subjects_dir=subjects_dir)
+    labels = [labels_parc[2], labels_parc[3], labels_parc[8], labels_parc[9],
+              labels_parc[30], labels_parc[31], labels_parc[42], labels_parc[43]]
+    #figures_ROI_dir = figures_ROI_dir.parent / (figures_ROI_dir.name + "_HCPMMP")
+    '''
+    # to check the locations of ROIs
+    brain = mne.viz.Brain('fsaverage', 'lh', 'inflated', subjects_dir=subjects_dir,
+                cortex='low_contrast', background='white', size=(800, 600))
+    #brain.add_annotation('HCPMMP1') # this adds "border lines" showing the parcellation
+    #aud_label = [label for label in labels if label.name == 'L_A1_ROI-lh'][0]
+    #brain.add_label(aud_label, borders=False) # this "colours in" the region
+    for label in labels:
+        brain.add_label(label)
+    '''
 
     for label in labels:
         label_name = label.name 
@@ -421,19 +474,35 @@
 
         # Plot GA ROI time series
         fig, axes = plt.subplots(1, layout="constrained")
+        fig_z, axes_z = plt.subplots(1, layout="constrained")
         for cond in conds_ROI:
             label_ts = mne.extract_label_time_course(
                 [GA_stcs[cond]], label, src, mode="auto"
             )
-            axes.plot(1e3 * GA_stcs[cond].times, label_ts[0][0, :], label=cond)
+            label_ts = label_ts[0][0]
+            axes.plot(1e3 * GA_stcs[cond].times, label_ts, label=cond)
+
+            # calculate z-score at each time point (using a sliding time window)
+            mu = np.mean(label_ts) # demean is based on the whole epoch
+            label_ts = label_ts - mu
+            
+            zscores = [0] * (len(label_ts) - window) # initialise the z-scores array
+            for t in range(0, len(label_ts) - window):
+                sigma = np.std(label_ts[t:t+window], mean=0) # sigma is calculated on the time slice only, but need to manually set the mean to 0
+                zscores[t] = label_ts[t] / sigma
+            axes_z.plot(1e3 * GA_stcs[cond].times[:len(zscores)], zscores, label=cond)
 
         axes.axvline(linestyle='-', color='k') # add verticle line at time 0
         axes.set(xlabel="Time (ms)", ylabel="Activation")
         axes.legend()
+        axes_z.axvline(linestyle='-', color='k') # add verticle line at time 0
+        axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)")
+        axes_z.legend()
 
         fig.savefig(figures_ROI_dir / label_name / "GA.png")
         fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder
-        plt.close(fig)
+        fig_z.savefig(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms" / f"{label_name}_GA.png") 
+        plt.close('all')
         
         # Plot individual-subjects ROI time series
         for sub in use_subjects:

From 024824adba9f10c545b2b7773d1605c0403d1a3e Mon Sep 17 00:00:00 2001
From: Geet Vashista <148626773+geetvashista@users.noreply.github.com>
Date: Wed, 7 Aug 2024 16:52:47 +1200
Subject: [PATCH 19/20] Corrected for target frequency issue

This update may not be needed, depending on the properties of the saved source files, but it does no damage to have the updated version.
---
 Connectivity.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/Connectivity.py b/Connectivity.py
index e873e5a..1acb22e 100644
--- a/Connectivity.py
+++ b/Connectivity.py
@@ -27,13 +27,15 @@ def setup_dependencies():    # Change as desired
     return input_dir, output_dir, array_type, fs, target_fb, saving
 
 
-def prep_data(input_dir, array_type):
+def prep_data(input_dir, array_type, target_fb, fs):
     data = []
     folder = pathlib.Path(input_dir).glob('*')
     for file in folder:
         if (os.path.basename(file)).endswith('.npy'):
             if array_type in os.path.basename(file):
-                data.append(np.load(file))
+                array = np.load(file)
+                _, _, filtered_array = dyconnmap.analytic_signal(array, fb=target_fb, fs=fs)
+                data.append(filtered_array)
     return np.array(data)
 
 
@@ -79,7 +81,7 @@ def main():     # TODO: put in the elif statements
     # Prep
     Cal_wpli, Cal_graph_metrics = operations_to_perform()
     input_dir, output_dir, array_type, fs, target_fb, saving = setup_dependencies()
-    data = prep_data(input_dir, array_type)
+    data = prep_data(input_dir, array_type, target_fb, fs)
 
     # Core functions
     if Cal_wpli:
@@ -93,6 +95,7 @@ def main():     # TODO: put in the elif statements
         np.save(output_dir + '_All_Betweenness_' + array_type, Betweenness)
         np.save(output_dir + '_All_Eigenvector_' + array_type, Eigenvector)
         np.save(output_dir + '_All_Clustering_' + array_type, Clustering)
+        np.save(output_dir + '_Master_adj_matrix_' + array_type, adj_matrix)
 
 
 if __name__ == "__main__":

From b87503c9de646c5311716d4ff429c6fe143c93bf Mon Sep 17 00:00:00 2001
From: Judy D Zhu <38392787+JD-Zhu@users.noreply.github.com>
Date: Fri, 23 Aug 2024 12:59:54 +1000
Subject: [PATCH 20/20] FIX: use HCPMMP parcellation with volumetric source
 space

---
 traditional_source_analysis_and_ROIs.py | 49 +++++++++++++++----------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/traditional_source_analysis_and_ROIs.py b/traditional_source_analysis_and_ROIs.py
index 51d5d7b..935579f 100644
--- a/traditional_source_analysis_and_ROIs.py
+++ b/traditional_source_analysis_and_ROIs.py
@@ -321,7 +321,9 @@
 src = mne.read_source_spaces(src_fname)
 
 if src_type == 'vol':
-    # choose atlas for parcellation
+    # choose atlas
+
+    # can use aparc 2009 parcellation
     fname_aseg = subjects_dir / subject / 'mri' / 'aparc.a2009s+aseg.mgz' # aparc = cortical; aseg = subcortical
     label_names = mne.get_volume_labels_from_aseg(fname_aseg)
     rois = ['ctx_lh_G_cingul-Post-dorsal','ctx_lh_G_cingul-Post-ventral','ctx_rh_G_cingul-Post-dorsal','ctx_rh_G_cingul-Post-ventral',
@@ -330,32 +332,37 @@
             'ctx_lh_S_temporal_sup','ctx_rh_S_temporal_sup']  # can have multiple labels in this list
     #roi_idx = label_names.index(rois[0])
 
-    # use the HCP-MMP parcellation
-    # create the volumetric atlas first:
+    # or use the HCP-MMP parcellation
+    # Note: need to create the volumetric atlas & custom lookup table (LUT) for this first - see link below:
     # https://gist.github.com/larsoner/8e664205cd8285ca7c46211403ad12ce
-    '''
-    # for some reason the label names are still not found,
-    # even though they come from the correct lut
     fname_aseg = subjects_dir / subject / 'mri' / 'HCPMMP1_combined+aseg.mgz' # HCPMMP1 = cortical; aseg = subcortical
     fname_lut = subjects_dir / subject / 'mri' / 'HCPMMP1_combinedColorLUT.txt'
     lut = mne.read_freesurfer_lut(fname_lut)
     label_names = mne.get_volume_labels_from_aseg(fname_aseg, atlas_ids=lut[0])
-    rois = ['Anterior_Cingulate_and_Medial_Prefrontal_Cortex-lh', 'Anterior_Cingulate_and_Medial_Prefrontal_Cortex-rh',
-            'DorsoLateral_Prefrontal_Cortex-lh', 'DorsoLateral_Prefrontal_Cortex-rh',
-            'Posterior_Cingulate_Cortex-lh', 'Posterior_Cingulate_Cortex-rh',
-            'Temporo-Parieto-Occipital_Junction-lh', 'Temporo-Parieto-Occipital_Junction-rh']
-    '''        
-
-    for label_name in rois:
-        (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True)
-        (figures_ROI_zscores_dir / label_name).mkdir(parents=True, exist_ok=True)
+    # need to supply a list of dicts here (rather than strings), as we are using a custom LUT
+    rois = [{'Anterior_Cingulate_and_Medial_Prefrontal_Cortex-lh': 1001},
+            {'Anterior_Cingulate_and_Medial_Prefrontal_Cortex-rh': 2001},
+            {'DorsoLateral_Prefrontal_Cortex-lh': 1004}, 
+            {'DorsoLateral_Prefrontal_Cortex-rh': 2004},
+            {'Posterior_Cingulate_Cortex-lh': 1015}, 
+            {'Posterior_Cingulate_Cortex-rh': 2015},
+            {'Temporo-Parieto-Occipital_Junction-lh': 1021}, 
+            {'Temporo-Parieto-Occipital_Junction-rh': 2021}]       
+
+    for label in rois:
+        # check if label is a dict, if so then we extract the label_name here
+        if isinstance(label, dict):
+            label_name = list(label.keys())[0]
+        
+        #(figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True)
+        #(figures_ROI_zscores_dir / label_name).mkdir(parents=True, exist_ok=True)
 
         # Plot GA ROI time series
         fig, axes = plt.subplots(1, layout="constrained")
         fig_z, axes_z = plt.subplots(1, layout="constrained")
         for cond in conds_ROI:
             label_ts = mne.extract_label_time_course(
-                [GA_stcs[cond]], (fname_aseg, label_name), src, mode="auto"
+                [GA_stcs[cond]], (fname_aseg, label), src, mode="auto"
             )
             label_ts = label_ts[0][0]
             axes.plot(1e3 * GA_stcs[cond].times, label_ts, label=cond)
@@ -386,7 +393,7 @@
         axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)")
         axes_z.legend()
 
-        fig.savefig(figures_ROI_dir / label_name / "GA.png")
+        #fig.savefig(figures_ROI_dir / label_name / "GA.png")
         fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder
         fig_z.savefig(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms" / f"{label_name}_GA.png") 
         plt.close('all')
@@ -437,12 +444,14 @@
         labels_parc[18]+labels_parc[20], labels_parc[19]+labels_parc[21], labels_parc[50], labels_parc[51], 
         labels_parc[58], labels_parc[59], labels_parc[66], labels_parc[67], labels_parc[84], labels_parc[85], labels_parc[144], labels_parc[145]
     ]
+    # can check if a particular label is available in the atlas
+    #print([label for label in labels_parc if "fusiform" in label.name])
 
     # or read a single label (e.g. V1, BA44, etc)
     #labels_parc = mne.read_label(op.join(subjects_dir, subject, 'label', 'lh.V1.label'))
 
     
-    # use the HCP-MMP parcellation
+    # or use the HCP-MMP parcellation
     # https://balsa.wustl.edu/WN56
     #mne.datasets.fetch_hcp_mmp_parcellation(subjects_dir=subjects_dir)
     # can use the fine-grained (~360 labels) "HCPMMP1" or coarse (~44 labels) "HCPMMP1_combined" atlas
@@ -470,7 +479,7 @@
         if label_name == 'G_cingul-Post-dorsal-rh + G_cingul-Post-ventral-rh':
             label_name = 'G_cingul-Post-rh'
 
-        (figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True)
+        #(figures_ROI_dir / label_name).mkdir(parents=True, exist_ok=True)
 
         # Plot GA ROI time series
         fig, axes = plt.subplots(1, layout="constrained")
@@ -499,7 +508,7 @@
         axes_z.set(xlabel="Time (ms)", ylabel="Activation (z-score)")
         axes_z.legend()
 
-        fig.savefig(figures_ROI_dir / label_name / "GA.png")
+        #fig.savefig(figures_ROI_dir / label_name / "GA.png")
         fig.savefig(figures_ROI_dir / "all_ROIs" / f"{label_name}_GA.png") # to save an additional copy of all GA plots into one folder
         fig_z.savefig(figures_ROI_zscores_dir / f"all_ROIs_{window_size}ms" / f"{label_name}_GA.png") 
         plt.close('all')