Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Primarily fixing suite2p loader for missing ROI detection or trace extraction - pulling from staging for other minor updates #109

Merged
merged 12 commits into from
May 22, 2024
43 changes: 31 additions & 12 deletions element_interface/dandi.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import subprocess

from dandi.download import download
from dandi.upload import upload


Expand All @@ -13,6 +12,8 @@ def upload_to_dandi(
api_key: str = None,
sync: bool = False,
existing: str = "refresh",
validation: str = "required",
shell=True, # without this param, subprocess interprets first arg as file/dir
):
"""Upload NWB files to DANDI Archive

Expand All @@ -27,6 +28,7 @@ def upload_to_dandi(
sync (str, optional): If True, delete all files in archive that are not present
in the local directory.
existing (str, optional): see full description from `dandi upload --help`
validation (str, optional): [require|skip|ignore] see full description from `dandi upload --help`
"""

working_directory = working_directory or os.path.curdir
Expand All @@ -38,29 +40,46 @@ def upload_to_dandi(
working_directory, str(dandiset_id)
) # enforce str

dandiset_url = f"https://gui-staging.dandiarchive.org/#/dandiset/{dandiset_id}" if staging else f"https://dandiarchive.org/dandiset/{dandiset_id}/draft"

subprocess.run(
["dandi", "download", "--download", "dandiset.yaml", "-o", working_directory, dandiset_url],
shell=True,
dandiset_url = (
f"https://gui-staging.dandiarchive.org/#/dandiset/{dandiset_id}"
if staging
else f"https://dandiarchive.org/dandiset/{dandiset_id}/draft"
)

subprocess.run(
["dandi", "organize", "-d", dandiset_directory, data_directory, "-f", "dry"],
shell=True, # without this param, subprocess interprets first arg as file/dir
[
"dandi",
"download",
"--download",
"dandiset.yaml",
"-o",
working_directory,
dandiset_url,
],
shell=shell,
)

subprocess.run(
["dandi", "organize", "-d", dandiset_directory, data_directory], shell=True
[
"dandi",
"organize",
"-d",
dandiset_directory,
data_directory,
"--required-field",
"subject_id",
"--required-field",
"session_id",
],
shell=shell,
)

subprocess.run(
["dandi", "validate", dandiset_directory], shell=True
)
subprocess.run(["dandi", "validate", dandiset_directory], shell=shell)

upload(
paths=[dandiset_directory],
dandi_instance="dandi-staging" if staging else "dandi",
existing=existing,
sync=sync,
validation=validation,
)
172 changes: 120 additions & 52 deletions element_interface/prairie_view_loader.py
Original file line number Diff line number Diff line change
@@ -1,89 +1,114 @@
import pathlib
from pathlib import Path
import xml.etree.ElementTree as ET
from datetime import datetime

import numpy as np


def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
"""Extract metadata for scans generated by Prairie View acquisition software.
class PrairieViewMeta:

The Prairie View software generates one `.ome.tif` imaging file per frame
acquired. The metadata for all frames is contained in one .xml file. This
function locates the .xml file and generates a dictionary necessary to
populate the DataJoint `ScanInfo` and `Field` tables. Prairie View works
with resonance scanners with a single field. Prairie View does not support
bidirectional x and y scanning. ROI information is not contained in the
`.xml` file. All images generated using Prairie View have square dimensions(e.g. 512x512).
def __init__(self, prairieview_dir: str):
"""Initialize PrairieViewMeta loader class

Args:
ome_tif_filepath: An absolute path to the .ome.tif image file.
Args:
prairieview_dir (str): string, absolute file path to directory containing PrairieView dataset
"""
# ---- Search and verify CaImAn output file exists ----
# May return multiple xml files. Only need one that contains scan metadata.
self.prairieview_dir = Path(prairieview_dir)

Raises:
FileNotFoundError: No .xml file containing information about the acquired scan
was found at path in parent directory at `ome_tif_filepath`.
for file in self.prairieview_dir.glob("*.xml"):
xml_tree = ET.parse(file)
xml_root = xml_tree.getroot()
if xml_root.find(".//Sequence"):
self.xml_file = file
self._xml_root = xml_root
break
else:
raise FileNotFoundError(
f"No PrarieView metadata .xml file found at {prairieview_dir}"
)

Returns:
metainfo: A dict mapping keys to corresponding metadata values fetched from the
.xml file.
"""
self._meta = None

# May return multiple xml files. Only need one that contains scan metadata.
xml_files_list = pathlib.Path(ome_tif_filepath).parent.glob("*.xml")
@property
def meta(self):
if self._meta is None:
self._meta = _extract_prairieview_metadata(self.xml_file)
return self._meta

for file in xml_files_list:
xml_tree = ET.parse(file)
xml_file = xml_tree.getroot()
if xml_file.find(".//Sequence"):
break
else:
raise FileNotFoundError(
f"No PrarieView metadata .xml file found at {pathlib.Path(ome_tif_filepath).parent}"
)
def get_prairieview_files(self, plane_idx=None, channel=None):
if plane_idx is None:
if self.meta['num_planes'] > 1:
raise ValueError(f"Please specify 'plane_idx' - Plane indices: {self.meta['plane_indices']}")
else:
plane_idx = self.meta['plane_indices'][0]
else:
assert plane_idx in self.meta['plane_indices'], f"Invalid 'plane_idx' - Plane indices: {self.meta['plane_indices']}"

if channel is None:
if self.meta['num_channels'] > 1:
raise ValueError(f"Please specify 'channel' - Channels: {self.meta['channels']}")
else:
plane_idx = self.meta['channels'][0]
else:
assert channel in self.meta['channels'], f"Invalid 'channel' - Channels: {self.meta['channels']}"

frames = self._xml_root.findall(f".//Sequence/Frame/[@index='{plane_idx}']/File/[@channel='{channel}']")
return [f.attrib['filename'] for f in frames]


def _extract_prairieview_metadata(xml_filepath: str):
xml_filepath = Path(xml_filepath)
if not xml_filepath.exists():
raise FileNotFoundError(f"{xml_filepath} does not exist")
xml_tree = ET.parse(xml_filepath)
xml_root = xml_tree.getroot()

bidirectional_scan = False # Does not support bidirectional
roi = 0
n_fields = 1 # Always contains 1 field
recording_start_time = xml_file.find(".//Sequence/[@cycle='1']").attrib.get("time")
recording_start_time = xml_root.find(".//Sequence/[@cycle='1']").attrib.get("time")

# Get all channels and find unique values
channel_list = [
int(channel.attrib.get("channel"))
for channel in xml_file.iterfind(".//Sequence/Frame/File/[@channel]")
for channel in xml_root.iterfind(".//Sequence/Frame/File/[@channel]")
]
n_channels = len(set(channel_list))
n_frames = len(xml_file.findall(".//Sequence/Frame"))
channels = set(channel_list)
n_channels = len(channels)
n_frames = len(xml_root.findall(".//Sequence/Frame"))
framerate = 1 / float(
xml_file.findall('.//PVStateValue/[@key="framePeriod"]')[0].attrib.get("value")
xml_root.findall('.//PVStateValue/[@key="framePeriod"]')[0].attrib.get("value")
) # rate = 1/framePeriod

usec_per_line = (
float(
xml_file.findall(".//PVStateValue/[@key='scanLinePeriod']")[0].attrib.get(
xml_root.findall(".//PVStateValue/[@key='scanLinePeriod']")[0].attrib.get(
"value"
)
)
* 1e6
) # Convert from seconds to microseconds

scan_datetime = datetime.strptime(
xml_file.attrib.get("date"), "%m/%d/%Y %I:%M:%S %p"
xml_root.attrib.get("date"), "%m/%d/%Y %I:%M:%S %p"
)

total_scan_duration = float(
xml_file.findall(".//Sequence/Frame")[-1].attrib.get("relativeTime")
xml_root.findall(".//Sequence/Frame")[-1].attrib.get("relativeTime")
)

pixel_height = int(
xml_file.findall(".//PVStateValue/[@key='pixelsPerLine']")[0].attrib.get(
xml_root.findall(".//PVStateValue/[@key='pixelsPerLine']")[0].attrib.get(
"value"
)
)
# All PrairieView-acquired images have square dimensions (512 x 512; 1024 x 1024)
pixel_width = pixel_height

um_per_pixel = float(
xml_file.find(
xml_root.find(
".//PVStateValue/[@key='micronsPerPixel']/IndexedValue/[@index='XAxis']"
).attrib.get("value")
)
Expand All @@ -92,43 +117,45 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict:

# x and y coordinate values for the center of the field
x_field = float(
xml_file.find(
xml_root.find(
".//PVStateValue/[@key='currentScanCenter']/IndexedValue/[@index='XAxis']"
).attrib.get("value")
)
y_field = float(
xml_file.find(
xml_root.find(
".//PVStateValue/[@key='currentScanCenter']/IndexedValue/[@index='YAxis']"
).attrib.get("value")
)

if (
xml_file.find(
xml_root.find(
".//Sequence/[@cycle='1']/Frame/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']"
)
is None
):
z_fields = np.float64(
xml_file.find(
xml_root.find(
".//PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue"
).attrib.get("value")
)
n_depths = 1
plane_indices = {0}
assert z_fields.size == n_depths
bidirection_z = False

else:
bidirection_z = (
xml_file.find(".//Sequence").attrib.get("bidirectionalZ") == "True"
xml_root.find(".//Sequence").attrib.get("bidirectionalZ") == "True"
)

# One "Frame" per depth in the .xml file. Gets number of frames in first sequence
planes = [
int(plane.attrib.get("index"))
for plane in xml_file.findall(".//Sequence/[@cycle='1']/Frame")
for plane in xml_root.findall(".//Sequence/[@cycle='1']/Frame")
]
n_depths = len(set(planes))
plane_indices = set(planes)
n_depths = len(plane_indices)

z_controllers = xml_file.findall(
z_controllers = xml_root.findall(
".//Sequence/[@cycle='1']/Frame/[@index='1']/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue"
)

Expand All @@ -137,13 +164,13 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
# must change depths.
if len(z_controllers) > 1:
z_repeats = []
for controller in xml_file.findall(
for controller in xml_root.findall(
".//Sequence/[@cycle='1']/Frame/[@index='1']/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/"
):
z_repeats.append(
[
float(z.attrib.get("value"))
for z in xml_file.findall(
for z in xml_root.findall(
".//Sequence/[@cycle='1']/Frame/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue/[@subindex='{0}']".format(
controller.attrib.get("subindex")
)
Expand All @@ -163,7 +190,7 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
else:
z_fields = [
z.attrib.get("value")
for z in xml_file.findall(
for z in xml_root.findall(
".//Sequence/[@cycle='1']/Frame/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue/[@subindex='0']"
)
]
Expand Down Expand Up @@ -195,6 +222,47 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
fieldY=y_field,
fieldZ=z_fields,
recording_time=recording_start_time,
channels=list(channels),
plane_indices=list(plane_indices),
)

return metainfo


def get_prairieview_metadata(ome_tif_filepath: str) -> dict:
"""Extract metadata for scans generated by Prairie View acquisition software.

The Prairie View software generates one `.ome.tif` imaging file per frame
acquired. The metadata for all frames is contained in one .xml file. This
function locates the .xml file and generates a dictionary necessary to
populate the DataJoint `ScanInfo` and `Field` tables. Prairie View works
with resonance scanners with a single field. Prairie View does not support
bidirectional x and y scanning. ROI information is not contained in the
`.xml` file. All images generated using Prairie View have square dimensions(e.g. 512x512).

Args:
ome_tif_filepath: An absolute path to the .ome.tif image file.

Raises:
FileNotFoundError: No .xml file containing information about the acquired scan
was found at path in parent directory at `ome_tif_filepath`.

Returns:
metainfo: A dict mapping keys to corresponding metadata values fetched from the
.xml file.
"""

# May return multiple xml files. Only need one that contains scan metadata.
xml_files_list = pathlib.Path(ome_tif_filepath).parent.glob("*.xml")

for file in xml_files_list:
xml_tree = ET.parse(file)
xml_file = xml_tree.getroot()
if xml_file.find(".//Sequence"):
break
else:
raise FileNotFoundError(
f"No PrarieView metadata .xml file found at {pathlib.Path(ome_tif_filepath).parent}"
)

return _extract_prairieview_metadata(file)
14 changes: 5 additions & 9 deletions element_interface/suite2p_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,6 @@ def __init__(self, suite2p_plane_dir: str):
)
self.creation_time = datetime.fromtimestamp(ops_fp.stat().st_ctime)

iscell_fp = self.fpath / "iscell.npy"
if not iscell_fp.exists():
raise FileNotFoundError(
'No "iscell.npy" found. Invalid suite2p plane folder: {}'.format(
self.fpath
)
)
self.curation_time = datetime.fromtimestamp(iscell_fp.stat().st_ctime)

# -- Initialize attributes --
for s2p_type in _suite2p_ftypes:
setattr(self, "_{}".format(s2p_type), None)
Expand All @@ -160,6 +151,11 @@ def __init__(self, suite2p_plane_dir: str):

# -- load core files --

@property
def curation_time(self):
print("DeprecationWarning: 'curation_time' is deprecated, set to be the same as 'creation time', no longer reliable.")
return self.creation_time

@property
def ops(self):
if self._ops is None:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
dandi
dandi>=0.56.0
numpy
Loading