diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c76f12bef..221182985 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.1 + rev: v0.6.3 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate diff --git a/CHANGELOG.md b/CHANGELOG.md index ddd100cb2..6f06e5112 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,20 @@ # HDMF Changelog -## HDMF 3.14.4 (Upcoming) +## HDMF 3.14.4 (August 22, 2024) ### Enhancements - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157) - Added support for overriding backend configurations of h5py.Dataset objects in Container.set_data_io: [#1172](https://github.com/hdmf-dev/hdmf/pull/1172) - Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166) - Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165) +- Added support to write multidimensional string arrays. @stephprince [#1173](https://github.com/hdmf-dev/hdmf/pull/1173) +- Add support for appending to a dataset of references. @mavaylon1 [#1135](https://github.com/hdmf-dev/hdmf/pull/1135) + +### Bug fixes +- Fixed issue where scalar datasets with a compound data type were being written as non-scalar datasets @stephprince [#1176](https://github.com/hdmf-dev/hdmf/pull/1176) +- Fixed H5DataIO not exposing `maxshape` on non-dci dsets. @cboulay [#1149](https://github.com/hdmf-dev/hdmf/pull/1149) +- Fixed generation of classes in an extension that contain attributes or datasets storing references to other types defined in the extension. + @rly [#1183](https://github.com/hdmf-dev/hdmf/pull/1183) ## HDMF 3.14.3 (July 29, 2024) diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst index d043a351a..04e351c41 100644 --- a/docs/source/install_developers.rst +++ b/docs/source/install_developers.rst @@ -73,7 +73,7 @@ environment by using the ``conda remove --name hdmf-venv --all`` command. For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager that includes conda-forge as a default channel. -.. _Anaconda: https://www.anaconda.com/products/distribution +.. _Anaconda: https://www.anaconda.com/download .. _Mambaforge: https://github.com/conda-forge/miniforge Install from GitHub diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 8102651ff..49fbe07b2 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a conda install -c conda-forge hdmf -.. _Anaconda Distribution: https://www.anaconda.com/products/distribution +.. _Anaconda Distribution: https://www.anaconda.com/download diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index e484a43c2..2d7187721 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -17,11 +17,11 @@ import logging from ...array import Array -from ...data_utils import DataIO, AbstractDataChunkIterator +from ...data_utils import DataIO, AbstractDataChunkIterator, append_data from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver from ...region import RegionSlicer from ...spec import SpecWriter, SpecReader -from ...utils import docval, getargs, popargs, get_docval +from ...utils import docval, getargs, popargs, get_docval, get_data_shape class HDF5IODataChunkIteratorQueue(deque): @@ -108,6 +108,20 @@ def ref(self): def shape(self): return self.dataset.shape + def append(self, arg): + # Get Builder + builder = self.io.manager.get_builder(arg) + if builder is None: + raise ValueError( + "The container being appended to the dataset has not yet been built. " + "Please write the container to the file, then open the modified file, and " + "append the read container to the dataset." + ) + + # Get HDF5 Reference + ref = self.io._create_ref(builder) + append_data(self.dataset, ref) + class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta): """ @@ -658,3 +672,14 @@ def valid(self): if isinstance(self.data, Dataset) and not self.data.id.valid: return False return super().valid + + @property + def maxshape(self): + if 'maxshape' in self.io_settings: + return self.io_settings['maxshape'] + elif hasattr(self.data, 'maxshape'): + return self.data.maxshape + elif hasattr(self, "shape"): + return self.shape + else: + return get_data_shape(self.data) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index da07a6a5c..da7f78a91 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -698,6 +698,8 @@ def __read_dataset(self, h5obj, name=None): d = ReferenceBuilder(target_builder) kwargs['data'] = d kwargs['dtype'] = d.dtype + elif h5obj.dtype.kind == 'V': # scalar compound data type + kwargs['data'] = np.array(scalar, dtype=h5obj.dtype) else: kwargs["data"] = scalar else: @@ -1227,6 +1229,8 @@ def _filler(): return # If the compound data type contains only regular data (i.e., no references) then we can write it as usual + elif len(np.shape(data)) == 0: + dset = self.__scalar_fill__(parent, name, data, options) else: dset = self.__list_fill__(parent, name, data, options) # Write a dataset containing references, i.e., a region or object reference. @@ -1469,7 +1473,7 @@ def __list_fill__(cls, parent, name, data, options=None): data_shape = io_settings.pop('shape') elif hasattr(data, 'shape'): data_shape = data.shape - elif isinstance(dtype, np.dtype): + elif isinstance(dtype, np.dtype) and len(dtype) > 1: # check if compound dtype data_shape = (len(data),) else: data_shape = get_data_shape(data) @@ -1514,6 +1518,7 @@ def __get_ref(self, **kwargs): self.logger.debug("Getting reference for %s '%s'" % (container.__class__.__name__, container.name)) builder = self.manager.build(container) path = self.__get_path(builder) + self.logger.debug("Getting reference at path '%s'" % path) if isinstance(container, RegionBuilder): region = container.region @@ -1525,6 +1530,14 @@ def __get_ref(self, **kwargs): else: return self.__file[path].ref + @docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference', + 'default': None}, + {'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object', + 'default': None}, + returns='the reference', rtype=Reference) + def _create_ref(self, **kwargs): + return self.__get_ref(**kwargs) + def __is_ref(self, dtype): if isinstance(dtype, DtypeSpec): return self.__is_ref(dtype.dtype) diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index 25b9b81bd..967c34010 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -7,7 +7,7 @@ from .classgenerator import ClassGenerator, CustomClassGenerator, MCIClassGenerator from ..container import AbstractContainer, Container, Data from ..term_set import TypeConfigurator -from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog +from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog, RefSpec from ..spec.spec import BaseStorageSpec from ..utils import docval, getargs, ExtenderMeta, get_docval @@ -480,6 +480,7 @@ def load_namespaces(self, **kwargs): load_namespaces here has the advantage of being able to keep track of type dependencies across namespaces. ''' deps = self.__ns_catalog.load_namespaces(**kwargs) + # register container types for each dependent type in each dependent namespace for new_ns, ns_deps in deps.items(): for src_ns, types in ns_deps.items(): for dt in types: @@ -529,7 +530,7 @@ def get_dt_container_cls(self, **kwargs): namespace = ns_key break if namespace is None: - raise ValueError("Namespace could not be resolved.") + raise ValueError(f"Namespace could not be resolved for data type '{data_type}'.") cls = self.__get_container_cls(namespace, data_type) @@ -549,6 +550,8 @@ def get_dt_container_cls(self, **kwargs): def __check_dependent_types(self, spec, namespace): """Ensure that classes for all types used by this type exist in this namespace and generate them if not. + + `spec` should be a GroupSpec or DatasetSpec in the `namespace` """ def __check_dependent_types_helper(spec, namespace): if isinstance(spec, (GroupSpec, DatasetSpec)): @@ -564,6 +567,16 @@ def __check_dependent_types_helper(spec, namespace): if spec.data_type_inc is not None: self.get_dt_container_cls(spec.data_type_inc, namespace) + + # handle attributes that have a reference dtype + for attr_spec in spec.attributes: + if isinstance(attr_spec.dtype, RefSpec): + self.get_dt_container_cls(attr_spec.dtype.target_type, namespace) + # handle datasets that have a reference dtype + if isinstance(spec, DatasetSpec): + if isinstance(spec.dtype, RefSpec): + self.get_dt_container_cls(spec.dtype.target_type, namespace) + # recurse into nested types if isinstance(spec, GroupSpec): for child_spec in (spec.groups + spec.datasets + spec.links): __check_dependent_types_helper(child_spec, namespace) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index b5815ee2c..3e8d835f1 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -10,8 +10,11 @@ from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, ConstructError) from .manager import Proxy, BuildManager + from .warnings import (MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning, IncorrectDatasetShapeBuildWarning) +from hdmf.backends.hdf5.h5_utils import H5DataIO + from ..container import AbstractContainer, Data, DataRegion from ..term_set import TermSetWrapper from ..data_utils import DataIO, AbstractDataChunkIterator @@ -598,11 +601,17 @@ def __get_data_type(cls, spec): def __convert_string(self, value, spec): """Convert string types to the specified dtype.""" + def __apply_string_type(value, string_type): + if isinstance(value, (list, tuple, np.ndarray, DataIO)): + return [__apply_string_type(item, string_type) for item in value] + else: + return string_type(value) + ret = value if isinstance(spec, AttributeSpec): if 'text' in spec.dtype: if spec.shape is not None or spec.dims is not None: - ret = list(map(str, value)) + ret = __apply_string_type(value, str) else: ret = str(value) elif isinstance(spec, DatasetSpec): @@ -618,7 +627,7 @@ def string_type(x): return x.isoformat() # method works for both date and datetime if string_type is not None: if spec.shape is not None or spec.dims is not None: - ret = list(map(string_type, value)) + ret = __apply_string_type(value, string_type) else: ret = string_type(value) # copy over any I/O parameters if they were specified @@ -972,6 +981,9 @@ def __get_ref_builder(self, builder, dtype, shape, container, build_manager): for d in container.data: target_builder = self.__get_target_builder(d, build_manager, builder) bldr_data.append(ReferenceBuilder(target_builder)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) else: self.logger.debug("Setting %s '%s' data to reference builder" % (builder.__class__.__name__, builder.name)) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index f7883148c..7c450770a 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -629,12 +629,8 @@ def __repr__(self): template += "\nFields:\n" for k in sorted(self.fields): # sorted to enable tests v = self.fields[k] - # if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0: if hasattr(v, '__len__'): - if isinstance(v, (np.ndarray, list, tuple)): - if len(v) > 0: - template += " {}: {}\n".format(k, self.__smart_str(v, 1)) - elif v: + if isinstance(v, (np.ndarray, list, tuple)) or v: template += " {}: {}\n".format(k, self.__smart_str(v, 1)) else: template += " {}: {}\n".format(k, v) diff --git a/src/hdmf/query.py b/src/hdmf/query.py index 835b295c5..9693b0b1c 100644 --- a/src/hdmf/query.py +++ b/src/hdmf/query.py @@ -163,6 +163,12 @@ def __next__(self): def next(self): return self.dataset.next() + def append(self, arg): + """ + Override this method to support appending to backend-specific datasets + """ + pass # pragma: no cover + class ReferenceResolver(metaclass=ABCMeta): """ diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index e39011d9f..2668da1ec 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -134,7 +134,7 @@ def get_type(data, builder_dtype=None): elif isinstance(data, ReferenceResolver): return data.dtype, None # Numpy nd-array data - elif isinstance(data, np.ndarray): + elif isinstance(data, np.ndarray) and len(data.dtype) <= 1: if data.size > 0: return get_type(data[0], builder_dtype) else: @@ -147,11 +147,14 @@ def get_type(data, builder_dtype=None): # Case for h5py.Dataset and other I/O specific array types else: # Compound dtype - if builder_dtype and isinstance(builder_dtype, list): + if builder_dtype and len(builder_dtype) > 1: dtypes = [] string_formats = [] for i in range(len(builder_dtype)): - dtype, string_format = get_type(data[0][i]) + if len(np.shape(data)) == 0: + dtype, string_format = get_type(data[()][i]) + else: + dtype, string_format = get_type(data[0][i]) dtypes.append(dtype) string_formats.append(string_format) return dtypes, string_formats @@ -438,7 +441,9 @@ def validate(self, **kwargs): except EmptyArrayError: # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple pass - if isinstance(builder.dtype, list): + if builder.dtype is not None and len(builder.dtype) > 1 and len(np.shape(builder.data)) == 0: + shape = () # scalar compound dataset + elif isinstance(builder.dtype, list): shape = (len(builder.data), ) # only 1D datasets with compound types are supported else: shape = get_data_shape(data) diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index 52fdc4839..42a55b470 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -7,7 +7,9 @@ from hdmf.build import TypeMap, CustomClassGenerator from hdmf.build.classgenerator import ClassGenerator, MCIClassGenerator from hdmf.container import Container, Data, MultiContainerInterface, AbstractContainer -from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, LinkSpec +from hdmf.spec import ( + GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, LinkSpec, RefSpec +) from hdmf.testing import TestCase from hdmf.utils import get_docval, docval @@ -180,10 +182,11 @@ def test_dynamic_container_creation(self): baz_spec = GroupSpec('A test extension with no Container class', data_type_def='Baz', data_type_inc=self.bar_spec, attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), - AttributeSpec('attr4', 'another float attribute', 'float')]) + AttributeSpec('attr4', 'another float attribute', 'float'), + AttributeSpec('attr_array', 'an array attribute', 'text', shape=(None,)),]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) - expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'skip_post_init'} + expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'skip_post_init'} received_args = set() for x in get_docval(cls.__init__): @@ -211,7 +214,7 @@ def test_dynamic_container_creation_defaults(self): AttributeSpec('attr4', 'another float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) - expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'foo', 'skip_post_init'} + expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'foo', 'skip_post_init'} received_args = set(map(lambda x: x['name'], get_docval(cls.__init__))) self.assertSetEqual(expected_args, received_args) self.assertEqual(cls.__name__, 'Baz') @@ -733,9 +736,18 @@ def _build_separate_namespaces(self): GroupSpec(data_type_inc='Bar', doc='a bar', quantity='?') ] ) + moo_spec = DatasetSpec( + doc='A test dataset that is a 1D array of object references of Baz', + data_type_def='Moo', + shape=(None,), + dtype=RefSpec( + reftype='object', + target_type='Baz' + ) + ) create_load_namespace_yaml( namespace_name='ndx-test', - specs=[baz_spec], + specs=[baz_spec, moo_spec], output_dir=self.test_dir, incl_types={ CORE_NAMESPACE: ['Bar'], @@ -827,6 +839,171 @@ def test_get_class_include_from_separate_ns_4(self): self._check_classes(baz_cls, bar_cls, bar_cls2, qux_cls, qux_cls2) +class TestGetClassObjectReferences(TestCase): + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + if os.path.exists(self.test_dir): # start clean + self.tearDown() + os.mkdir(self.test_dir) + self.type_map = TypeMap() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_get_class_include_dataset_of_references(self): + """Test that get_class resolves datasets of object references.""" + qux_spec = DatasetSpec( + doc='A test extension', + data_type_def='Qux' + ) + moo_spec = DatasetSpec( + doc='A test dataset that is a 1D array of object references of Qux', + data_type_def='Moo', + shape=(None,), + dtype=RefSpec( + reftype='object', + target_type='Qux' + ), + ) + + create_load_namespace_yaml( + namespace_name='ndx-test', + specs=[qux_spec, moo_spec], + output_dir=self.test_dir, + incl_types={}, + type_map=self.type_map + ) + # no types should be resolved to start + assert self.type_map.get_container_classes('ndx-test') == [] + + self.type_map.get_dt_container_cls('Moo', 'ndx-test') + # now, Moo and Qux should be resolved + assert len(self.type_map.get_container_classes('ndx-test')) == 2 + assert "Moo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + + def test_get_class_include_attribute_object_reference(self): + """Test that get_class resolves data types with an attribute that is an object reference.""" + qux_spec = DatasetSpec( + doc='A test extension', + data_type_def='Qux' + ) + woo_spec = DatasetSpec( + doc='A test dataset that has a scalar object reference to a Qux', + data_type_def='Woo', + attributes=[ + AttributeSpec( + name='attr1', + doc='a string attribute', + dtype=RefSpec(reftype='object', target_type='Qux') + ), + ] + ) + create_load_namespace_yaml( + namespace_name='ndx-test', + specs=[qux_spec, woo_spec], + output_dir=self.test_dir, + incl_types={}, + type_map=self.type_map + ) + # no types should be resolved to start + assert self.type_map.get_container_classes('ndx-test') == [] + + self.type_map.get_dt_container_cls('Woo', 'ndx-test') + # now, Woo and Qux should be resolved + assert len(self.type_map.get_container_classes('ndx-test')) == 2 + assert "Woo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + + def test_get_class_include_nested_object_reference(self): + """Test that get_class resolves nested datasets that are object references.""" + qux_spec = DatasetSpec( + doc='A test extension', + data_type_def='Qux' + ) + spam_spec = DatasetSpec( + doc='A test extension', + data_type_def='Spam', + shape=(None,), + dtype=RefSpec( + reftype='object', + target_type='Qux' + ), + ) + goo_spec = GroupSpec( + doc='A test dataset that has a nested dataset (Spam) that has a scalar object reference to a Qux', + data_type_def='Goo', + datasets=[ + DatasetSpec( + doc='a dataset', + data_type_inc='Spam', + ), + ], + ) + + create_load_namespace_yaml( + namespace_name='ndx-test', + specs=[qux_spec, spam_spec, goo_spec], + output_dir=self.test_dir, + incl_types={}, + type_map=self.type_map + ) + # no types should be resolved to start + assert self.type_map.get_container_classes('ndx-test') == [] + + self.type_map.get_dt_container_cls('Goo', 'ndx-test') + # now, Goo, Spam, and Qux should be resolved + assert len(self.type_map.get_container_classes('ndx-test')) == 3 + assert "Goo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Spam" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + + def test_get_class_include_nested_attribute_object_reference(self): + """Test that get_class resolves nested datasets that have an attribute that is an object reference.""" + qux_spec = DatasetSpec( + doc='A test extension', + data_type_def='Qux' + ) + bam_spec = DatasetSpec( + doc='A test extension', + data_type_def='Bam', + attributes=[ + AttributeSpec( + name='attr1', + doc='a string attribute', + dtype=RefSpec(reftype='object', target_type='Qux') + ), + ], + ) + boo_spec = GroupSpec( + doc='A test dataset that has a nested dataset (Spam) that has a scalar object reference to a Qux', + data_type_def='Boo', + datasets=[ + DatasetSpec( + doc='a dataset', + data_type_inc='Bam', + ), + ], + ) + + create_load_namespace_yaml( + namespace_name='ndx-test', + specs=[qux_spec, bam_spec, boo_spec], + output_dir=self.test_dir, + incl_types={}, + type_map=self.type_map + ) + # no types should be resolved to start + assert self.type_map.get_container_classes('ndx-test') == [] + + self.type_map.get_dt_container_cls('Boo', 'ndx-test') + # now, Boo, Bam, and Qux should be resolved + assert len(self.type_map.get_container_classes('ndx-test')) == 3 + assert "Boo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Bam" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')] + class EmptyBar(Container): pass diff --git a/tests/unit/build_tests/test_io_manager.py b/tests/unit/build_tests/test_io_manager.py index 01421e218..a3be47cf7 100644 --- a/tests/unit/build_tests/test_io_manager.py +++ b/tests/unit/build_tests/test_io_manager.py @@ -341,7 +341,7 @@ def test_get_dt_container_cls(self): self.assertIs(ret, Foo) def test_get_dt_container_cls_no_namespace(self): - with self.assertRaisesWith(ValueError, "Namespace could not be resolved."): + with self.assertRaisesWith(ValueError, "Namespace could not be resolved for data type 'Unknown'."): self.type_map.get_dt_container_cls(data_type="Unknown") diff --git a/tests/unit/build_tests/test_io_map.py b/tests/unit/build_tests/test_io_map.py index 63f397682..e095ef318 100644 --- a/tests/unit/build_tests/test_io_map.py +++ b/tests/unit/build_tests/test_io_map.py @@ -9,6 +9,7 @@ from hdmf.testing import TestCase from abc import ABCMeta, abstractmethod import unittest +import numpy as np from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map @@ -20,24 +21,27 @@ class Bar(Container): {'name': 'attr1', 'type': str, 'doc': 'an attribute'}, {'name': 'attr2', 'type': int, 'doc': 'another attribute'}, {'name': 'attr3', 'type': float, 'doc': 'a third attribute', 'default': 3.14}, + {'name': 'attr_array', 'type': 'array_data', 'doc': 'another attribute', 'default': (1, 2, 3)}, {'name': 'foo', 'type': 'Foo', 'doc': 'a group', 'default': None}) def __init__(self, **kwargs): - name, data, attr1, attr2, attr3, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3', 'foo', kwargs) + name, data, attr1, attr2, attr3, attr_array, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3', + 'attr_array', 'foo', kwargs) super().__init__(name=name) self.__data = data self.__attr1 = attr1 self.__attr2 = attr2 self.__attr3 = attr3 + self.__attr_array = attr_array self.__foo = foo if self.__foo is not None and self.__foo.parent is None: self.__foo.parent = self def __eq__(self, other): - attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo') + attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'attr_array', 'foo') return all(getattr(self, a) == getattr(other, a) for a in attrs) def __str__(self): - attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo') + attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'attr_array', 'foo') return ','.join('%s=%s' % (a, getattr(self, a)) for a in attrs) @property @@ -60,6 +64,10 @@ def attr2(self): def attr3(self): return self.__attr3 + @property + def attr_array(self): + return self.__attr_array + @property def foo(self): return self.__foo @@ -333,12 +341,15 @@ def test_build_1d(self): datasets=[DatasetSpec('an example dataset', 'text', name='data', shape=(None,), attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) + attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), + AttributeSpec('attr_array', 'an example array attribute', 'text', + shape=(None,))]) type_map = self.customSetUp(bar_spec) type_map.register_map(Bar, BarMapper) - bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10) + bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10, attr_array=['a', 'b', 'c', 'd']) builder = type_map.build(bar_inst) - self.assertEqual(builder.get('data').data, ['a', 'b', 'c', 'd']) + np.testing.assert_array_equal(builder.get('data').data, np.array(['a', 'b', 'c', 'd'])) + np.testing.assert_array_equal(builder.get('attr_array'), np.array(['a', 'b', 'c', 'd'])) def test_build_scalar(self): bar_spec = GroupSpec('A test group specification with a data type', @@ -353,6 +364,102 @@ def test_build_scalar(self): builder = type_map.build(bar_inst) self.assertEqual(builder.get('data').data, "['a', 'b', 'c', 'd']") + def test_build_2d_lol(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_lol_2d = [['aa', 'bb'], ['cc', 'dd']] + bar_inst = Bar('my_bar', str_lol_2d, 'value1', 10, attr_array=str_lol_2d) + builder = type_map.build(bar_inst) + self.assertEqual(builder.get('data').data, str_lol_2d) + self.assertEqual(builder.get('attr_array'), str_lol_2d) + + def test_build_2d_ndarray(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_array_2d = np.array([['aa', 'bb'], ['cc', 'dd']]) + bar_inst = Bar('my_bar', str_array_2d, 'value1', 10, attr_array=str_array_2d) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, str_array_2d) + np.testing.assert_array_equal(builder.get('attr_array'), str_array_2d) + + def test_build_3d_lol(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_lol_3d = [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]] + bar_inst = Bar('my_bar', str_lol_3d, 'value1', 10, attr_array=str_lol_3d) + builder = type_map.build(bar_inst) + self.assertEqual(builder.get('data').data, str_lol_3d) + self.assertEqual(builder.get('attr_array'), str_lol_3d) + + def test_build_3d_ndarray(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='text', + name='data', + shape=(None, None, None), + attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')], + ) + ], + attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text', + shape=(None, None, None))], + ) + type_map = self.customSetUp(bar_spec) + type_map.register_map(Bar, BarMapper) + str_array_3d = np.array([[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]]) + bar_inst = Bar('my_bar', str_array_3d, 'value1', 10, attr_array=str_array_3d) + builder = type_map.build(bar_inst) + np.testing.assert_array_equal(builder.get('data').data, str_array_3d) + np.testing.assert_array_equal(builder.get('attr_array'), str_array_3d) + def test_build_dataio(self): bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 6e5ba21df..d8e8853cd 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -24,7 +24,7 @@ from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError from hdmf.spec.catalog import SpecCatalog from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace -from hdmf.spec.spec import GroupSpec +from hdmf.spec.spec import GroupSpec, DtypeSpec from hdmf.testing import TestCase, remove_test_file from hdmf.common.resources import HERD from hdmf.term_set import TermSet, TermSetWrapper @@ -144,6 +144,16 @@ def test_write_dataset_string(self): read_a = read_a.decode('utf-8') self.assertEqual(read_a, a) + def test_write_dataset_scalar_compound(self): + cmpd_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) + a = np.array((1, 0.1), dtype=cmpd_dtype) + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, + dtype=[DtypeSpec('x', doc='x', dtype='int32'), + DtypeSpec('y', doc='y', dtype='float64')])) + dset = self.f['test_dataset'] + self.assertTupleEqual(dset.shape, ()) + self.assertEqual(dset[()].tolist(), a.tolist()) + ########################################## # write_dataset tests: TermSetWrapper ########################################## @@ -164,6 +174,31 @@ def test_write_dataset_list(self): dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a)) + def test_write_dataset_lol_strings(self): + a = [['aa', 'bb'], ['cc', 'dd']] + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) + dset = self.f['test_dataset'] + decoded_dset = [[item.decode('utf-8') if isinstance(item, bytes) else item for item in sublist] + for sublist in dset[:]] + self.assertTrue(decoded_dset == a) + + def test_write_dataset_list_compound_datatype(self): + a = np.array([(1, 2, 0.5), (3, 4, 0.5)], dtype=[('x', 'int'), ('y', 'int'), ('z', 'float')]) + dset_builder = DatasetBuilder( + name='test_dataset', + data=a.tolist(), + attributes={}, + dtype=[ + DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='int'), + DtypeSpec('z', doc='z', dtype='float'), + ], + ) + self.io.write_dataset(self.f, dset_builder) + dset = self.f['test_dataset'] + for field in a.dtype.names: + self.assertTrue(np.all(dset[field][:] == a[field])) + def test_write_dataset_list_compress_gzip(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='gzip', @@ -572,6 +607,12 @@ def test_pass_through_of_chunk_shape_generic_data_chunk_iterator(self): ############################################# # H5DataIO general ############################################# + def test_pass_through_of_maxshape_on_h5dataset(self): + k = 10 + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(k), attributes={})) + dset = H5DataIO(self.f['test_dataset']) + self.assertEqual(dset.maxshape, (k,)) + def test_warning_on_non_gzip_compression(self): # Make sure no warning is issued when using gzip with warnings.catch_warnings(record=True) as w: @@ -762,6 +803,17 @@ def test_read_str(self): self.assertEqual(str(bldr['test_dataset'].data), '') + def test_read_scalar_compound(self): + cmpd_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) + a = np.array((1, 0.1), dtype=cmpd_dtype) + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, + dtype=[DtypeSpec('x', doc='x', dtype='int32'), + DtypeSpec('y', doc='y', dtype='float64')])) + self.io.close() + with HDF5IO(self.path, 'r') as io: + bldr = io.read_builder() + np.testing.assert_array_equal(bldr['test_dataset'].data[()], a) + class TestRoundTrip(TestCase): @@ -2958,6 +3010,57 @@ def test_append_data(self): self.assertEqual(f['foofile_data'].file.filename, self.paths[1]) self.assertIsInstance(f.attrs['foo_ref_attr'], h5py.Reference) + def test_append_dataset_of_references(self): + """Test that exporting a written container with a dataset of references works.""" + bazs = [] + num_bazs = 1 + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + array_bazs=np.array(bazs) + wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,)) + baz_data = BazData(name='baz_data1', data=wrapped_bazs) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io: + read_bucket1 = append_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + append_io.write(read_bucket1) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + DoR = read_bucket1.baz_data.data + DoR.append(read_bucket1.bazs['new']) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket1 = read_io.read() + self.assertEqual(len(read_bucket1.baz_data.data), 2) + self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"]) + + def test_append_dataset_of_references_orphaned_target(self): + bazs = [] + num_bazs = 1 + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + array_bazs=np.array(bazs) + wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,)) + baz_data = BazData(name='baz_data1', data=wrapped_bazs) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + DoR = read_bucket1.baz_data.data + with self.assertRaises(ValueError): + DoR.append(read_bucket1.bazs['new']) + def test_append_external_link_data(self): """Test that exporting a written container after adding a link with link_data=True creates external links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) @@ -3666,6 +3769,14 @@ def test_dataio_shape_then_data(self): with self.assertRaisesRegex(ValueError, "Setting data when dtype and shape are not None is not supported"): dataio.data = list() + def test_dataio_maxshape(self): + dataio = H5DataIO(data=np.arange(10), maxshape=(None,)) + self.assertEqual(dataio.maxshape, (None,)) + + def test_dataio_maxshape_from_data(self): + dataio = H5DataIO(data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + self.assertEqual(dataio.maxshape, (10,)) + def test_hdf5io_can_read(): assert not HDF5IO.can_read("not_a_file") diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index 95ff5d98e..dd79cfce5 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -501,6 +501,28 @@ def test_np_bool_for_bool(self): results = self.vmap.validate(bar_builder) self.assertEqual(len(results), 0) + def test_scalar_compound_dtype(self): + """Test that validator allows scalar compound dtype data where a compound dtype is specified.""" + spec_catalog = SpecCatalog() + dtype = [DtypeSpec('x', doc='x', dtype='int'), DtypeSpec('y', doc='y', dtype='float')] + spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec('an example dataset', dtype, name='data',)], + attributes=[AttributeSpec('attr1', 'an example attribute', 'text',)]) + spec_catalog.register_spec(spec, 'test2.yaml') + self.namespace = SpecNamespace( + 'a test namespace', CORE_NAMESPACE, [{'source': 'test2.yaml'}], version='0.1.0', catalog=spec_catalog) + self.vmap = ValidatorMap(self.namespace) + + value = np.array((1, 2.2), dtype=[('x', 'int'), ('y', 'float')]) + bar_builder = GroupBuilder('my_bar', + attributes={'data_type': 'Bar', 'attr1': 'test'}, + datasets=[DatasetBuilder(name='data', + data=value, + dtype=[DtypeSpec('x', doc='x', dtype='int'), + DtypeSpec('y', doc='y', dtype='float'),],),]) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) class Test1DArrayValidation(TestCase):