Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve html representation of datasets #1100

Open
wants to merge 24 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
649f141
improve dev repr
h-mayorquin Apr 19, 2024
475cda9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 19, 2024
7f3c94e
address ruff
h-mayorquin Apr 19, 2024
5128d53
add changelog
h-mayorquin Apr 23, 2024
21ae3cf
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 23, 2024
4eb2635
add table representation for hdf5 info
h-mayorquin Apr 26, 2024
08292c6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 26, 2024
59083c2
add test
h-mayorquin Apr 29, 2024
06a064e
Merge remote-tracking branch 'refs/remotes/origin/improve_html_repr_o…
h-mayorquin Apr 29, 2024
7ce5b3f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 29, 2024
fc14d71
ruff
h-mayorquin Apr 29, 2024
a2931e2
Merge remote-tracking branch 'refs/remotes/origin/improve_html_repr_o…
h-mayorquin Apr 29, 2024
96456a4
Merge branch 'dev' into improve_html_repr_of_data
h-mayorquin Apr 29, 2024
133e28d
handle division by zer
h-mayorquin Apr 30, 2024
ae21b61
add zarr, array, hdf5 repr tests
stephprince May 1, 2024
28449a3
generalize array html table description
stephprince May 1, 2024
6e6a84c
remove zarr tests
stephprince May 1, 2024
89fd978
fix nbytes
h-mayorquin May 2, 2024
a0e1736
fix use of nbytes ahead
h-mayorquin May 2, 2024
538ba98
added TODO
h-mayorquin May 2, 2024
e0ad0a1
Merge branch 'dev' into improve_html_repr_of_data
h-mayorquin May 2, 2024
9cbcf64
add html test array data type
stephprince May 2, 2024
5b235e0
Merge branch 'dev' into improve_html_repr_of_data
rly Oct 2, 2024
3813723
Merge branch 'dev' into improve_html_repr_of_data
rly Oct 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- Unwrap `TermSetWrapper` within the builder to support different backends more efficiently. @mavaylon1 [#1070](https://github.com/hdmf-dev/hdmf/pull/1070)
- Added docs page that lists limitations of support for the HDMF specification language. @rly [#1069](https://github.com/hdmf-dev/hdmf/pull/1069)
- Added warning when using `add_row` or `add_column` to add a ragged array to `DynamicTable` without an index parameter. @stephprince [#1066](https://github.com/hdmf-dev/hdmf/pull/1066)
- Improve html representation of data in `Containers` @h-mayorquin [#1100](https://github.com/hdmf-dev/hdmf/pull/1100)

## HDMF 3.12.2 (February 9, 2024)

Expand Down
86 changes: 78 additions & 8 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,9 @@ def _generate_html_repr(self, fields, level=0, access_code="", is_field=False):
for index, item in enumerate(fields):
access_code += f'[{index}]'
html_repr += self._generate_field_html(index, item, level, access_code)
elif isinstance(fields, np.ndarray):
elif isinstance(fields, (np.ndarray, h5py.Dataset)):
html_repr += self._generate_array_html(fields, level)
elif hasattr(fields, "store") and hasattr(fields, "shape"): # Duck typing for zarr array
html_repr += self._generate_array_html(fields, level)
else:
pass
Expand All @@ -728,18 +730,22 @@ def _generate_field_html(self, key, value, level, access_code):
return f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"' \
f' title="{access_code}">{key}: </span><span class="field-value">{value}</span></div>'

if hasattr(value, "generate_html_repr"):
if isinstance(value, (np.ndarray, h5py.Dataset)):
html_content = self._generate_array_html(value, level + 1)
elif hasattr(value, "store") and hasattr(value, "shape"): # Duck typing for zarr array
html_content = self._generate_array_html(value, level + 1)
elif hasattr(value, "generate_html_repr"):
html_content = value.generate_html_repr(level + 1, access_code)

elif hasattr(value, '__repr_html__'):
stephprince marked this conversation as resolved.
Show resolved Hide resolved
html_content = value.__repr_html__()

elif hasattr(value, "fields"):
elif hasattr(value, "fields"): # Note that h5py.Dataset has a fields attribute so there is an implicit order
html_content = self._generate_html_repr(value.fields, level + 1, access_code, is_field=True)
elif isinstance(value, (list, dict, np.ndarray)):
html_content = self._generate_html_repr(value, level + 1, access_code, is_field=False)
else:
html_content = f'<span class="field-key">{value}</span>'


html_repr = (
f'<details><summary style="display: list-item; margin-left: {level * 20}px;" '
f'class="container-fields field-key" title="{access_code}"><b>{key}</b></summary>'
Expand All @@ -749,10 +755,74 @@ def _generate_field_html(self, key, value, level, access_code):

return html_repr


def _generate_array_html(self, array, level):
"""Generates HTML for a NumPy array."""
str_ = str(array).replace("\n", "</br>")
return f'<div style="margin-left: {level * 20}px;" class="container-fields">{str_}</div>'
"""Generates HTML for a NumPy array, h5py Dataset, or Zarr array."""

def convert_bytes_to_str(bytes_size):
suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
i = 0
while bytes_size >= 1024 and i < len(suffixes)-1:
bytes_size /= 1024.
i += 1
return f"{bytes_size:.2f} {suffixes[i]}"

# Generates an html report for the backend info, inspired on zarr info html representation
def html_table(item_dicts) -> str:
report = '<table class="data-info">'
report += "<tbody>"
for k, v in item_dicts.items():
report += (
f"<tr>"
f'<th style="text-align: left">{k}</th>'
f'<td style="text-align: left">{v}</td>'
f"</tr>"
)
report += "</tbody>"
report += "</table>"
return report

array_size_in_bytes = array.nbytes
array_size_repr = convert_bytes_to_str(array_size_in_bytes)
basic_array_info_dict = {"shape": array.shape, "dtype": array.dtype, "Array size": array_size_repr}

if isinstance(array, np.ndarray):
head = "NumPy Array"
backend_info_dict = basic_array_info_dict

if isinstance(array, h5py.Dataset):
hdf5_dataset = array
chunks = hdf5_dataset.chunks
compression = hdf5_dataset.compression
if hasattr(hdf5_dataset, "nbytes"): # TODO, remove if statement when and if h5py 3.0 is minimal
stephprince marked this conversation as resolved.
Show resolved Hide resolved
uncompressed_size = hdf5_dataset.nbytes
else:
uncompressed_size = hdf5_dataset.size * hdf5_dataset.dtype.itemsize
compression_opts = hdf5_dataset.compression_opts
compressed_size = hdf5_dataset.id.get_storage_size()
compression_ratio = uncompressed_size / compressed_size if compressed_size != 0 else "undefined"

head = "HDF5 Dataset"
hdf5_info_dict = {"chunks": chunks, "compression": compression, "compression_opts": compression_opts,
"compression_ratio": compression_ratio}
backend_info_dict = {**basic_array_info_dict, **hdf5_info_dict}

if hasattr(array, "store") and hasattr(array, "shape"): # Duck typing for zarr array
head = "Zarr Array"
zarr_info_dict = {k:v for k, v in array.info_items()}
backend_info_dict = zarr_info_dict

# Add <br> tags and concatenate the components
head_html = head
backend_info_html = html_table(backend_info_dict)
repr_html = head_html + "<br>" + backend_info_html

# Display data for small datasets
array_is_small = array_size_in_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array
if array_is_small or isinstance(array, np.ndarray):
repr_html += "<br>" + str(np.asarray(array))

return f'<div style="margin-left: {level * 20}px;" class="container-fields">{repr_html}</div>'

@staticmethod
def __smart_str(v, num_indent):
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_container.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from uuid import uuid4, UUID
import os
import h5py

from hdmf.container import AbstractContainer, Container, Data, HERDManager
from hdmf.common.resources import HERD
Expand Down Expand Up @@ -423,6 +424,23 @@ def __init__(self, **kwargs):
self.data = kwargs['data']
self.str = kwargs['str']

class ContainerWithData(Container):

__fields__ = (
"data",
"str"
)

@docval(
{'name': "data", "doc": 'data', 'type': 'array_data', "default": None},
{'name': "str", "doc": 'str', 'type': str, "default": None},

)
def __init__(self, **kwargs):
super().__init__('test name')
self.data = kwargs['data']
self.str = kwargs['str']

def test_repr_html_(self):
child_obj1 = Container('test child 1')
obj1 = self.ContainerWithChildAndData(child=child_obj1, data=[1, 2, 3], str="hello")
Expand Down Expand Up @@ -455,6 +473,39 @@ def test_repr_html_(self):
'class="field-value">hello</span></div></div>'
)

def test_repr_html_array(self):
obj = self.ContainerWithData(data=np.array([1, 2, 3, 4]), str="hello")
expected_html_table = (
'class="container-fields">NumPy Array<br><table class="data-info"><tbody><tr><th style="text-align: '
'left">shape</th><td style="text-align: left">(4,)</td></tr><tr><th style="text-align: left">dtype</'
'th><td style="text-align: left">int64</td></tr><tr><th style="text-align: left">Array size</th><td '
'style="text-align: left">32.00 bytes</td></tr></tbody></table><br>[1 2 3 4]</div></details><div '
'style="margin-left: 0px;" class="container-fields"><span class="field-key" title=".str">str: </'
'span><span class="field-value">hello</span></div></div>'
)
self.assertIn(expected_html_table, obj._repr_html_())

def test_repr_html_hdf5_dataset(self):
stephprince marked this conversation as resolved.
Show resolved Hide resolved

# Open an HDF5 file in write mode
with h5py.File('data.h5', 'w') as file:
dataset = file.create_dataset(name='my_dataset', data=[1, 2, 3, 4])
obj = self.ContainerWithData(data=dataset, str="hello")
expected_html_table = (
'class="container-fields">HDF5 Dataset<br><table class="data-info"><tbody><tr><th style="text-align: '
'left">shape</th><td style="text-align: left">(4,)</td></tr><tr><th style="text-align: left">dtype</'
'th><td style="text-align: left">int64</td></tr><tr><th style="text-align: left">Array size</th><td '
'style="text-align: left">32.00 bytes</td></tr><tr><th style="text-align: left">chunks</th><td '
'style="text-align: left">None</td></tr><tr><th style="text-align: left">compression</th><td '
'style="text-align: left">None</td></tr><tr><th style="text-align: left">compression_opts</th><td '
'style="text-align: left">None</td></tr><tr><th style="text-align: left">compression_ratio</th><td '
'style="text-align: left">1.0</td></tr></tbody></table><br>[1 2 3 4]</div></details><div '
'style="margin-left: 0px;" class="container-fields"><span class="field-key" title=".str">str: </'
'span><span class="field-value">hello</span></div></div>'
)

self.assertIn(expected_html_table, obj._repr_html_())
os.remove('data.h5')

class TestData(TestCase):

Expand Down
Loading