Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve html representation of datasets #1100

Merged
merged 33 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
649f141
improve dev repr
h-mayorquin Apr 19, 2024
475cda9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 19, 2024
7f3c94e
address ruff
h-mayorquin Apr 19, 2024
5128d53
add changelog
h-mayorquin Apr 23, 2024
21ae3cf
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 23, 2024
4eb2635
add table representation for hdf5 info
h-mayorquin Apr 26, 2024
08292c6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 26, 2024
59083c2
add test
h-mayorquin Apr 29, 2024
06a064e
Merge remote-tracking branch 'refs/remotes/origin/improve_html_repr_o…
h-mayorquin Apr 29, 2024
7ce5b3f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 29, 2024
fc14d71
ruff
h-mayorquin Apr 29, 2024
a2931e2
Merge remote-tracking branch 'refs/remotes/origin/improve_html_repr_o…
h-mayorquin Apr 29, 2024
96456a4
Merge branch 'dev' into improve_html_repr_of_data
h-mayorquin Apr 29, 2024
133e28d
handle division by zer
h-mayorquin Apr 30, 2024
ae21b61
add zarr, array, hdf5 repr tests
stephprince May 1, 2024
28449a3
generalize array html table description
stephprince May 1, 2024
6e6a84c
remove zarr tests
stephprince May 1, 2024
89fd978
fix nbytes
h-mayorquin May 2, 2024
a0e1736
fix use of nbytes ahead
h-mayorquin May 2, 2024
538ba98
added TODO
h-mayorquin May 2, 2024
e0ad0a1
Merge branch 'dev' into improve_html_repr_of_data
h-mayorquin May 2, 2024
9cbcf64
add html test array data type
stephprince May 2, 2024
5b235e0
Merge branch 'dev' into improve_html_repr_of_data
rly Oct 2, 2024
3813723
Merge branch 'dev' into improve_html_repr_of_data
rly Oct 2, 2024
0a929b3
Merge branch 'dev' into improve_html_repr_of_data
stephprince Oct 24, 2024
2c967dd
add array html repr utils
stephprince Oct 30, 2024
6d007d1
add generate_dataset_html method to io objects
stephprince Oct 30, 2024
3552923
add tests for array html repr
stephprince Oct 30, 2024
4bb38df
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 30, 2024
f1afe81
fix import style
stephprince Oct 30, 2024
495e626
update CHANGLEOG
stephprince Oct 30, 2024
03c9f8f
Merge branch 'dev' into improve_html_repr_of_data
rly Oct 31, 2024
01f8f8f
add test for base hdmfio
stephprince Nov 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Enhancements
- Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188)
- Improved html representation of data in `Containers` @h-mayorquin [#1100](https://github.com/hdmf-dev/hdmf/pull/1100)

### Bug fixes
- Fixed inaccurate error message when validating reference data types. @stephprince [#1199](https://github.com/hdmf-dev/hdmf/pull/1199)
Expand Down
29 changes: 28 additions & 1 deletion src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
from ...container import Container
from ...data_utils import AbstractDataChunkIterator
from ...spec import RefSpec, DtypeSpec, NamespaceCatalog
from ...utils import docval, getargs, popargs, get_data_shape, get_docval, StrDataset
from ...utils import (docval, getargs, popargs, get_data_shape, get_docval, StrDataset,
get_basic_array_info, generate_array_html_repr)
from ..utils import NamespaceToBuilderHelper, WriteStatusTracker

ROOT_NAME = 'root'
Expand Down Expand Up @@ -1603,3 +1604,29 @@
data = H5DataIO(data)
"""
return H5DataIO.__init__(**kwargs)

@staticmethod
def generate_dataset_html(dataset):
"""Generates an html representation for a dataset for the HDF5IO class"""
stephprince marked this conversation as resolved.
Show resolved Hide resolved

# get info from hdf5 dataset
compressed_size = dataset.id.get_storage_size()
if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
uncompressed_size = dataset.nbytes
else:
uncompressed_size = dataset.size * dataset.dtype.itemsize

Check warning on line 1617 in src/hdmf/backends/hdf5/h5tools.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/backends/hdf5/h5tools.py#L1617

Added line #L1617 was not covered by tests
compression_ratio = uncompressed_size / compressed_size if compressed_size != 0 else "undefined"

hdf5_info_dict = {"Chunk shape": dataset.chunks,
"Compression": dataset.compression,
"Compression opts": dataset.compression_opts,
"Compression ratio": compression_ratio}

# get basic array info
array_info_dict = get_basic_array_info(dataset)
array_info_dict.update(hdf5_info_dict)

# generate html repr
repr_html = generate_array_html_repr(array_info_dict, dataset, "HDF5 dataset")

return repr_html
10 changes: 9 additions & 1 deletion src/hdmf/backends/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ..build import BuildManager, GroupBuilder
from ..container import Container, HERDManager
from .errors import UnsupportedOperation
from ..utils import docval, getargs, popargs
from ..utils import docval, getargs, popargs, get_basic_array_info, generate_array_html_repr
from warnings import warn


Expand Down Expand Up @@ -188,6 +188,14 @@ def close(self):
''' Close this HDMFIO object to further reading/writing'''
pass

@staticmethod
def generate_dataset_html(dataset):
oruebel marked this conversation as resolved.
Show resolved Hide resolved
"""Generates an html representation for a dataset"""
array_info_dict = get_basic_array_info(dataset)
repr_html = generate_array_html_repr(array_info_dict, dataset)

return repr_html

def __enter__(self):
return self

Expand Down
32 changes: 22 additions & 10 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
import pandas as pd

from .data_utils import DataIO, append_data, extend_data, AbstractDataChunkIterator
from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict
from .utils import (docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict,
get_basic_array_info, generate_array_html_repr)

from .term_set import TermSet, TermSetWrapper

Expand Down Expand Up @@ -707,8 +708,6 @@
for index, item in enumerate(fields):
access_code += f'[{index}]'
html_repr += self._generate_field_html(index, item, level, access_code)
elif isinstance(fields, np.ndarray):
html_repr += self._generate_array_html(fields, level)
else:
pass

Expand All @@ -724,18 +723,23 @@
return f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"' \
f' title="{access_code}">{key}: </span><span class="field-value">{value}</span></div>'

if hasattr(value, "generate_html_repr"):
html_content = value.generate_html_repr(level + 1, access_code)
is_array_data = isinstance(value, (np.ndarray, h5py.Dataset, DataIO)) or \
(hasattr(value, "store") and hasattr(value, "shape")) # Duck typing for zarr array

if is_array_data:
html_content = self._generate_array_html(value, level + 1)
elif hasattr(value, "generate_html_repr"):
html_content = value.generate_html_repr(level + 1, access_code)

Check warning on line 732 in src/hdmf/container.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/container.py#L732

Added line #L732 was not covered by tests
elif hasattr(value, '__repr_html__'):
stephprince marked this conversation as resolved.
Show resolved Hide resolved
html_content = value.__repr_html__()

elif hasattr(value, "fields"):
elif hasattr(value, "fields"): # Note that h5py.Dataset has a fields attribute so there is an implicit order
html_content = self._generate_html_repr(value.fields, level + 1, access_code, is_field=True)
elif isinstance(value, (list, dict, np.ndarray)):
html_content = self._generate_html_repr(value, level + 1, access_code, is_field=False)
else:
html_content = f'<span class="field-key">{value}</span>'


html_repr = (
f'<details><summary style="display: list-item; margin-left: {level * 20}px;" '
f'class="container-fields field-key" title="{access_code}"><b>{key}</b></summary>'
Expand All @@ -745,10 +749,18 @@

return html_repr


def _generate_array_html(self, array, level):
"""Generates HTML for a NumPy array."""
str_ = str(array).replace("\n", "</br>")
return f'<div style="margin-left: {level * 20}px;" class="container-fields">{str_}</div>'
"""Generates HTML for array data"""

read_io = self.get_read_io() # if the Container was read from file, get IO object
if read_io is not None:
repr_html = read_io.generate_dataset_html(array)
else:
array_info_dict = get_basic_array_info(array)
repr_html = generate_array_html_repr(array_info_dict, array, "NumPy array")

return f'<div style="margin-left: {level * 20}px;" class="container-fields">{repr_html}</div>'

@staticmethod
def __smart_str(v, num_indent):
Expand Down
48 changes: 48 additions & 0 deletions src/hdmf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,54 @@

return False

def get_basic_array_info(array):
def convert_bytes_to_str(bytes_size):
suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
i = 0
while bytes_size >= 1024 and i < len(suffixes)-1:
bytes_size /= 1024.
i += 1
return f"{bytes_size:.2f} {suffixes[i]}"

if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
array_size_in_bytes = array.nbytes
else:
array_size_in_bytes = array.size * array.dtype.itemsize

Check warning on line 982 in src/hdmf/utils.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/utils.py#L982

Added line #L982 was not covered by tests
array_size_repr = convert_bytes_to_str(array_size_in_bytes)
basic_array_info_dict = {"Data type": array.dtype, "Shape": array.shape, "Array size": array_size_repr}

return basic_array_info_dict

def generate_array_html_repr(backend_info_dict, array, dataset_type=None):
def html_table(item_dicts) -> str:
"""
Generates an html table from a dictionary
"""
report = '<table class="data-info">'
report += "<tbody>"
for k, v in item_dicts.items():
report += (
f"<tr>"
f'<th style="text-align: left">{k}</th>'
f'<td style="text-align: left">{v}</td>'
f"</tr>"
)
report += "</tbody>"
report += "</table>"
return report

array_info_html = html_table(backend_info_dict)
repr_html = dataset_type + "<br>" + array_info_html if dataset_type is not None else array_info_html

if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
array_size = array.nbytes
else:
array_size = array.size * array.dtype.itemsize

Check warning on line 1012 in src/hdmf/utils.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/utils.py#L1012

Added line #L1012 was not covered by tests
array_is_small = array_size < 1024 * 0.1 # 10 % a kilobyte to display the array
if array_is_small:
repr_html += "<br>" + str(np.asarray(array))

return repr_html

class LabelledDict(dict):
"""A dict wrapper that allows querying by an attribute of the values and running a callable on removed items.
Expand Down
94 changes: 94 additions & 0 deletions tests/unit/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from hdmf.utils import docval
from hdmf.common import DynamicTable, VectorData, DynamicTableRegion
from hdmf.backends.hdf5.h5tools import HDF5IO
from hdmf.backends.io import HDMFIO


class Subcontainer(Container):
Expand Down Expand Up @@ -423,6 +424,23 @@ def __init__(self, **kwargs):
self.data = kwargs['data']
self.str = kwargs['str']

class ContainerWithData(Container):

__fields__ = (
"data",
"str"
)

@docval(
{'name': "data", "doc": 'data', 'type': 'array_data', "default": None},
{'name': "str", "doc": 'str', 'type': str, "default": None},
)
def __init__(self, **kwargs):
super().__init__('test name')
self.data = kwargs['data']
self.str = kwargs['str']

def test_repr_html_(self):
child_obj1 = Container('test child 1')
obj1 = self.ContainerWithChildAndData(child=child_obj1, data=[1, 2, 3], str="hello")
Expand Down Expand Up @@ -455,6 +473,82 @@ def test_repr_html_(self):
'class="field-value">hello</span></div></div>'
)

def test_repr_html_array(self):
obj = self.ContainerWithData(data=np.array([1, 2, 3, 4], dtype=np.int64), str="hello")
expected_html_table = (
'class="container-fields">NumPy array<br><table class="data-info"><tbody><tr><th style="text-align: '
'left">Data type</th><td style="text-align: left">int64</td></tr><tr><th style="text-align: left">Shape'
'</th><td style="text-align: left">(4,)</td></tr><tr><th style="text-align: left">Array size</th><td '
'style="text-align: left">32.00 bytes</td></tr></tbody></table><br>[1 2 3 4]'
)
self.assertIn(expected_html_table, obj._repr_html_())

def test_repr_html_array_large_arrays_not_displayed(self):
obj = self.ContainerWithData(data=np.arange(200, dtype=np.int64), str="hello")
expected_html_table = (
'class="container-fields">NumPy array<br><table class="data-info"><tbody><tr><th style="text-align: '
'left">Data type</th><td style="text-align: left">int64</td></tr><tr><th style="text-align: left">Shape'
'</th><td style="text-align: left">(200,)</td></tr><tr><th style="text-align: left">Array size</th><td '
'style="text-align: left">1.56 KiB</td></tr></tbody></table></div></details>'
)
self.assertIn(expected_html_table, obj._repr_html_())

def test_repr_html_hdf5_dataset(self):
stephprince marked this conversation as resolved.
Show resolved Hide resolved
with HDF5IO('array_data.h5', mode='w') as io:
dataset = io._file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64))
obj = self.ContainerWithData(data=dataset, str="hello")
obj.read_io = io

expected_html_table = (
'class="container-fields">HDF5 dataset<br><table class="data-info"><tbody><tr><th style="text-align: '
'left">Data type</th><td style="text-align: left">int64</td></tr><tr><th style="text-align: left">'
'Shape</th><td style="text-align: left">(4,)</td></tr><tr><th style="text-align: left">Array size'
'</th><td style="text-align: left">32.00 bytes</td></tr><tr><th style="text-align: left">Chunk shape'
'</th><td style="text-align: left">None</td></tr><tr><th style="text-align: left">Compression</th><td '
'style="text-align: left">None</td></tr><tr><th style="text-align: left">Compression opts</th><td '
'style="text-align: left">None</td></tr><tr><th style="text-align: left">Compression ratio</th><td '
'style="text-align: left">1.0</td></tr></tbody></table><br>[1 2 3 4]'
)

self.assertIn(expected_html_table, obj._repr_html_())

os.remove('array_data.h5')

def test_repr_html_hdmf_io(self):
with HDF5IO('array_data.h5', mode='w') as io:
dataset = io._file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64))
obj = self.ContainerWithData(data=dataset, str="hello")

class OtherIO(HDMFIO):

@staticmethod
def can_read(path):
pass

def read_builder(self):
pass

def write_builder(self, **kwargs):
pass

def open(self):
pass

def close(self):
pass

obj.read_io = OtherIO()

expected_html_table = (
'class="container-fields"><table class="data-info"><tbody><tr><th style="text-align: '
'left">Data type</th><td style="text-align: left">int64</td></tr><tr><th style="text-align: left">'
'Shape</th><td style="text-align: left">(4,)</td></tr><tr><th style="text-align: left">Array size'
'</th><td style="text-align: left">32.00 bytes</td></tr></tbody></table><br>[1 2 3 4]'
)

self.assertIn(expected_html_table, obj._repr_html_())

os.remove('array_data.h5')

class TestData(TestCase):

Expand Down