Skip to content

Commit

Permalink
Merge pull request #64 from NeurodataWithoutBorders/zarr-json-to-nwb-…
Browse files Browse the repository at this point in the history
…lindi-json

Update example urls to use new .nwb.lindi.json file extension
  • Loading branch information
magland authored May 9, 2024
2 parents b85b8cf + d3b0779 commit d94c1c5
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 21 deletions.
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ import pynwb
import lindi

# URL of the remote .nwb.lindi.json file
url = 'https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json'
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/56d875d6-a705-48d3-944c-53394a389c85/nwb.lindi.json'

# Load the h5py-like client
client = lindi.LindiH5pyFile.from_lindi_file(url)
Expand All @@ -112,7 +112,7 @@ import json
import lindi

# URL of the remote .nwb.lindi.json file
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json'
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/56d875d6-a705-48d3-944c-53394a389c85/nwb.lindi.json'

# Load the h5py-like client for the reference file system
# in read-write mode
Expand All @@ -122,9 +122,7 @@ client = lindi.LindiH5pyFile.from_reference_file_system(url, mode="r+")
client.attrs['new_attribute'] = 'new_value'

# Save the changes to a new .nwb.lindi.json file
rfs_new = client.to_reference_file_system()
with open('new.nwb.lindi.json', 'w') as f:
f.write(json.dumps(rfs_new, indent=2, sort_keys=True))
client.write_lindi_file('new.nwb.lindi.json')
```

### Add datasets to a .nwb.lindi.json file using a local staging area
Expand All @@ -133,7 +131,7 @@ with open('new.nwb.lindi.json', 'w') as f:
import lindi

# URL of the remote .nwb.lindi.json file
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json'
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/56d875d6-a705-48d3-944c-53394a389c85/nwb.lindi.json'

# Load the h5py-like client for the reference file system
# in read-write mode with a staging area
Expand Down
2 changes: 1 addition & 1 deletion examples/example2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import lindi

# Define the URL for a remote .nwb.lindi.json file
url = 'https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json'
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/56d875d6-a705-48d3-944c-53394a389c85/nwb.lindi.json'

# Load the h5py-like client from the reference file system
client = lindi.LindiH5pyFile.from_lindi_file(url)
Expand Down
2 changes: 1 addition & 1 deletion examples/example_edit_nwb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


# Define the URL for a remote .nwb.lindi.json file
url = 'https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json'
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/56d875d6-a705-48d3-944c-53394a389c85/nwb.lindi.json'

# Load the h5py-like client from the reference file system
client = lindi.LindiH5pyFile.from_lindi_file(url, mode='r+')
Expand Down
15 changes: 10 additions & 5 deletions lindi/LindiH5pyFile/LindiH5pyFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,11 +528,16 @@ def _recursive_copy(src_item: Union[h5py.Group, h5py.Dataset], dest: h5py.File,
for src_ref_key in src_ref_keys:
if src_ref_key.startswith(f'{src_item_name}/'):
dst_ref_key = f'{name}/{src_ref_key[len(src_item_name) + 1:]}'
# Even though it's not expected to be a problem, we
# do a deep copy here because a problem resulting
# from one rfs being modified affecting another
# would be very difficult to debug.
dst_rfs['refs'][dst_ref_key] = _deep_copy(src_rfs['refs'][src_ref_key])
# important to do a deep copy
val = _deep_copy(src_rfs['refs'][src_ref_key])
if isinstance(val, list) and len(val) > 0:
# if it's a list then we need to resolve any
# templates in the first element of the list.
# This is very important because the destination
# rfs will probably have different templates.
url0 = _apply_templates(val[0], src_rfs.get('templates', {}))
val[0] = url0
dst_rfs['refs'][dst_ref_key] = val
return

dst_item = dest.create_dataset(name, data=src_item[()], chunks=src_item.chunks)
Expand Down
20 changes: 12 additions & 8 deletions tests/test_remote_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_remote_data_2():
import pynwb

# Define the URL for a remote .nwb.lindi.json file
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json'
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/56d875d6-a705-48d3-944c-53394a389c85/nwb.lindi.json'

# Load the h5py-like client from the reference file system
client = lindi.LindiH5pyFile.from_reference_file_system(url)
Expand All @@ -50,7 +50,7 @@ def test_remote_data_2():
def test_remote_data_rfs_copy():
# Test that we can copy datasets and groups from one reference file system to another
# and the data itself is not copied, only the references.
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json'
url = 'https://lindi.neurosift.org/dandi/dandisets/000939/assets/56d875d6-a705-48d3-944c-53394a389c85/nwb.lindi.json'

client = lindi.LindiH5pyFile.from_reference_file_system(url)

Expand All @@ -62,11 +62,11 @@ def test_remote_data_rfs_copy():
# This first dataset is a 2D array with chunks
ds = client['processing/behavior/Position/position/data']
assert isinstance(ds, lindi.LindiH5pyDataset)
assert ds.shape == (494315, 2)
assert ds.shape == (360867, 2)

client.copy('processing/behavior/Position/position/data', client2, 'copied_data1')
aa = rfs2['refs']['copied_data1/.zarray']
assert isinstance(aa, str)
assert isinstance(aa, str) or isinstance(aa, dict)
assert 'copied_data1/0.0' in rfs2['refs']
bb = rfs2['refs']['copied_data1/0.0']
assert isinstance(bb, list) # make sure it is a reference, not the actual data
Expand All @@ -77,13 +77,17 @@ def test_remote_data_rfs_copy():

# This next dataset has an _EXTERNAL_ARRAY_LINK which means it has a pointer
# to a dataset in a remote h5py
ds = client['processing/ecephys/LFP/LFP/data']
# https://neurosift.app/?p=/nwb&dandisetId=000409&dandisetVersion=draft&url=https://api.dandiarchive.org/api/assets/ab3998c2-3540-4bda-8b03-3f3795fa602d/download/
url_b = 'https://lindi.neurosift.org/dandi/dandisets/000409/assets/ab3998c2-3540-4bda-8b03-3f3795fa602d/nwb.lindi.json'
client_b = lindi.LindiH5pyFile.from_reference_file_system(url_b)

ds = client_b['acquisition/ElectricalSeriesAp/data']
assert isinstance(ds, lindi.LindiH5pyDataset)
assert ds.shape == (17647830, 64)
assert ds.shape == (109281892, 384)

client.copy('processing/ecephys/LFP/LFP/data', client2, 'copied_data2')
client_b.copy('acquisition/ElectricalSeriesAp/data', client2, 'copied_data2')
aa = rfs2['refs']['copied_data2/.zarray']
assert isinstance(aa, str)
assert isinstance(aa, str) or isinstance(aa, dict)
assert 'copied_data2/0.0' not in rfs2['refs'] # make sure the chunks were not copied

ds2 = client2['copied_data2']
Expand Down

0 comments on commit d94c1c5

Please sign in to comment.