Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix eager dtype conversion of value column #1353

Merged
merged 10 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["auto_examples/index.rst", "_build", "Thumbs.db", ".DS_Store"]

nitpick_ignore_regex = [
# needs https://github.com/sphinx-doc/sphinx/issues/13178
("py:class", r".*pathlib\._local\.Path"),
]

# HTML options (e.g., theme)
html_show_sourcelink = False
html_copy_source = False
Expand Down
4 changes: 2 additions & 2 deletions examples/convert_eeg_to_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
# to the "eyes closed" task.
subject = 1
run = 2
eegbci.load_data(subject=subject, runs=run, update_path=True)
eegbci.load_data(subjects=subject, runs=run, update_path=True)

# %%
# Let's see whether the data has been downloaded using a quick visualization
Expand Down Expand Up @@ -94,7 +94,7 @@
# It prevents the data from being loaded and modified when converting to BIDS.

# Load the data from "2 minutes eyes closed rest"
edf_path = eegbci.load_data(subject=subject, runs=run)[0]
edf_path = eegbci.load_data(subjects=subject, runs=run)[0]
raw = mne.io.read_raw_edf(edf_path, preload=False)
raw.info["line_freq"] = 50 # specify power line frequency as required by BIDS

Expand Down
4 changes: 2 additions & 2 deletions examples/convert_group_studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
run_map = dict(zip(runs, range(1, 4)))

for subject_id in subject_ids:
eegbci.load_data(subject=subject_id, runs=runs, update_path=True)
eegbci.load_data(subjects=subject_id, runs=runs, update_path=True)

# get path to MNE directory with the downloaded example data
mne_data_dir = mne.get_config("MNE_DATASETS_EEGBCI_PATH")
Expand Down Expand Up @@ -81,7 +81,7 @@
bids_list = list()
for subject_id in subject_ids:
for run in runs:
raw_fname = eegbci.load_data(subject=subject_id, runs=run)[0]
raw_fname = eegbci.load_data(subjects=subject_id, runs=run)[0]
raw = mne.io.read_raw_edf(raw_fname)
raw.info["line_freq"] = 50 # specify power line frequency
raw_list.append(raw)
Expand Down
38 changes: 24 additions & 14 deletions mne_bids/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,8 @@
logger.info(f"Reading events from {events_fname}.")
events_dict = _from_tsv(events_fname)

# drop events where onset is n/a
# drop events where onset is n/a; we can't annotate them and thus don't need entries
# for them in event_id either
events_dict = _drop(events_dict, "n/a", "onset")

# Get event descriptions. Use `trial_type` column if available.
Expand All @@ -547,9 +548,11 @@
# If we lack proper event descriptions, perhaps we have at least an event value?
elif "value" in events_dict:
trial_type_col_name = "value"
# Worst case: all events will become `n/a` and all values will be `1`
# Worst case: all events become `n/a` and all values become `1`
else:
trial_type_col_name = None
descrs = np.full(len(events_dict["onset"]), "n/a")
event_id = {descrs[0]: 1}

if trial_type_col_name is not None:
# Drop events unrelated to a trial type
Expand All @@ -569,26 +572,33 @@
"Creating hierarchical event names."
)
for ii in idx:
value = values[ii]
value = "na" if value == "n/a" else value
# strip `/` from `n/a` before incorporating into trial type name
value = values[ii] if values[ii] != "n/a" else "na"
new_name = f"{trial_type}/{value}"
logger.info(f" Renaming event: {trial_type} -> {new_name}")
trial_types[ii] = new_name
# drop rows where `value` is `n/a` & convert remaining `value` to int (only
# when making our `event_id` dict; `value = n/a` doesn't prevent annotation)
# make a copy with rows dropped where `value` is `n/a` (only for making our
# `event_id` dict; `value = n/a` doesn't prevent making annotations).
culled = _drop(events_dict, "n/a", "value")
event_id = dict(
zip(culled[trial_type_col_name], np.asarray(culled["value"], dtype=int))
)
# Often (but not always!) the `value` column was written by MNE-BIDS and
# represents integer event IDs (as would be found in MNE-Python events
# arrays / event_id dicts). But in case not, let's be defensive:
culled_vals = culled["value"]
try:
culled_vals = np.asarray(culled_vals, dtype=float)
except ValueError: # contained strings or complex numbers
pass
Copy link
Contributor

@scott-huberty scott-huberty Dec 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pass
# If the values are not numeric, just use the row index as the event ID
culled_vals = np.arange(len(culled_vals))

else:
try:
culled_vals = culled_vals.astype(int)
except ValueError: # numeric, but has some non-integer values
pass

Check warning on line 595 in mne_bids/read.py

View check run for this annotation

Codecov / codecov/patch

mne_bids/read.py#L594-L595

Added lines #L594 - L595 were not covered by tests
event_id = dict(zip(culled[trial_type_col_name], culled_vals))
else:
event_id = dict(zip(trial_types, np.arange(len(trial_types))))
descrs = np.asarray(trial_types, dtype=str)

# Worst case: all events become `n/a` and all values become `1`
else:
descrs = np.full(len(events_dict["onset"]), "n/a")
event_id = {descrs[0]: 1}
# Deal with "n/a" strings before converting to float
# convert onsets & durations to floats ("n/a" onsets were already dropped)
ons = np.asarray(events_dict["onset"], dtype=float)
durs = np.array(
[0 if du == "n/a" else du for du in events_dict["duration"]], dtype=float
Expand Down
10 changes: 10 additions & 0 deletions mne_bids/tests/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,16 @@ def test_handle_events_reading(tmp_path):
ev_arr, ev_dict = mne.events_from_annotations(raw)
assert event_id == ev_dict == {"n/a": 1} # fallback behavior

# Test with only a (non-numeric) `value` column
events = {"onset": [10, 15], "duration": [1, 1], "value": ["A", "B"]}
events_fname = tmp_path / "bids6" / "sub-01_task-test_events.tsv"
events_fname.parent.mkdir()
_to_tsv(events, events_fname)
raw, event_id = _handle_events_reading(events_fname, raw)
# don't pass event_id to mne.events_from_annotatations; its values are strings
assert event_id == {"A": "A", "B": "B"}
assert raw.annotations.description.tolist() == ["A", "B"]


@pytest.mark.filterwarnings(warning_str["channel_unit_changed"])
@testing.requires_testing_data
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[build-system]
build-backend = "hatchling.build"
requires = ["hatch-vcs", "hatchling"]
requires = ["hatch-vcs", "hatchling==1.26.3"]

[project]
authors = [{name = "The MNE-BIDS developers"}]
Expand Down
Loading