From e0bedca13f167d55a4be5657044c4c6697de95ca Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 22 Aug 2024 08:45:29 -0700 Subject: [PATCH] Append a Dataset of References (#1135) --- CHANGELOG.md | 1 + docs/source/install_developers.rst | 2 +- docs/source/install_users.rst | 2 +- src/hdmf/backends/hdf5/h5_utils.py | 16 +++++++++- src/hdmf/backends/hdf5/h5tools.py | 9 ++++++ src/hdmf/build/objectmapper.py | 6 ++++ src/hdmf/query.py | 6 ++++ tests/unit/test_io_hdf5_h5tools.py | 51 ++++++++++++++++++++++++++++++ 8 files changed, 90 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3c15392b..66a3474d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166) - Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165) - Added support to write multidimensional string arrays. @stephprince [#1173](https://github.com/hdmf-dev/hdmf/pull/1173) +- Add support for appending to a dataset of references. @mavaylon1 [#1135](https://github.com/hdmf-dev/hdmf/pull/1135) ### Bug fixes - Fixed issue where scalar datasets with a compound data type were being written as non-scalar datasets @stephprince [#1176](https://github.com/hdmf-dev/hdmf/pull/1176) diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst index d043a351a..04e351c41 100644 --- a/docs/source/install_developers.rst +++ b/docs/source/install_developers.rst @@ -73,7 +73,7 @@ environment by using the ``conda remove --name hdmf-venv --all`` command. For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager that includes conda-forge as a default channel. -.. _Anaconda: https://www.anaconda.com/products/distribution +.. _Anaconda: https://www.anaconda.com/download .. _Mambaforge: https://github.com/conda-forge/miniforge Install from GitHub diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 8102651ff..49fbe07b2 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a conda install -c conda-forge hdmf -.. _Anaconda Distribution: https://www.anaconda.com/products/distribution +.. _Anaconda Distribution: https://www.anaconda.com/download diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index e484a43c2..278735fbc 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -17,7 +17,7 @@ import logging from ...array import Array -from ...data_utils import DataIO, AbstractDataChunkIterator +from ...data_utils import DataIO, AbstractDataChunkIterator, append_data from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver from ...region import RegionSlicer from ...spec import SpecWriter, SpecReader @@ -108,6 +108,20 @@ def ref(self): def shape(self): return self.dataset.shape + def append(self, arg): + # Get Builder + builder = self.io.manager.get_builder(arg) + if builder is None: + raise ValueError( + "The container being appended to the dataset has not yet been built. " + "Please write the container to the file, then open the modified file, and " + "append the read container to the dataset." + ) + + # Get HDF5 Reference + ref = self.io._create_ref(builder) + append_data(self.dataset, ref) + class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta): """ diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 4db6463dc..da7f78a91 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1518,6 +1518,7 @@ def __get_ref(self, **kwargs): self.logger.debug("Getting reference for %s '%s'" % (container.__class__.__name__, container.name)) builder = self.manager.build(container) path = self.__get_path(builder) + self.logger.debug("Getting reference at path '%s'" % path) if isinstance(container, RegionBuilder): region = container.region @@ -1529,6 +1530,14 @@ def __get_ref(self, **kwargs): else: return self.__file[path].ref + @docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference', + 'default': None}, + {'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object', + 'default': None}, + returns='the reference', rtype=Reference) + def _create_ref(self, **kwargs): + return self.__get_ref(**kwargs) + def __is_ref(self, dtype): if isinstance(dtype, DtypeSpec): return self.__is_ref(dtype.dtype) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index d6e1de15a..3e8d835f1 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -10,8 +10,11 @@ from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, ConstructError) from .manager import Proxy, BuildManager + from .warnings import (MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning, IncorrectDatasetShapeBuildWarning) +from hdmf.backends.hdf5.h5_utils import H5DataIO + from ..container import AbstractContainer, Data, DataRegion from ..term_set import TermSetWrapper from ..data_utils import DataIO, AbstractDataChunkIterator @@ -978,6 +981,9 @@ def __get_ref_builder(self, builder, dtype, shape, container, build_manager): for d in container.data: target_builder = self.__get_target_builder(d, build_manager, builder) bldr_data.append(ReferenceBuilder(target_builder)) + if isinstance(container.data, H5DataIO): + # This is here to support appending a dataset of references. + bldr_data = H5DataIO(bldr_data, **container.data.get_io_params()) else: self.logger.debug("Setting %s '%s' data to reference builder" % (builder.__class__.__name__, builder.name)) diff --git a/src/hdmf/query.py b/src/hdmf/query.py index 835b295c5..9693b0b1c 100644 --- a/src/hdmf/query.py +++ b/src/hdmf/query.py @@ -163,6 +163,12 @@ def __next__(self): def next(self): return self.dataset.next() + def append(self, arg): + """ + Override this method to support appending to backend-specific datasets + """ + pass # pragma: no cover + class ReferenceResolver(metaclass=ABCMeta): """ diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 73aa89788..1f0c2eb4c 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -3004,6 +3004,57 @@ def test_append_data(self): self.assertEqual(f['foofile_data'].file.filename, self.paths[1]) self.assertIsInstance(f.attrs['foo_ref_attr'], h5py.Reference) + def test_append_dataset_of_references(self): + """Test that exporting a written container with a dataset of references works.""" + bazs = [] + num_bazs = 1 + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + array_bazs=np.array(bazs) + wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,)) + baz_data = BazData(name='baz_data1', data=wrapped_bazs) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io: + read_bucket1 = append_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + append_io.write(read_bucket1) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + DoR = read_bucket1.baz_data.data + DoR.append(read_bucket1.bazs['new']) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket1 = read_io.read() + self.assertEqual(len(read_bucket1.baz_data.data), 2) + self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"]) + + def test_append_dataset_of_references_orphaned_target(self): + bazs = [] + num_bazs = 1 + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + array_bazs=np.array(bazs) + wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,)) + baz_data = BazData(name='baz_data1', data=wrapped_bazs) + bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io: + read_bucket1 = ref_io.read() + new_baz = Baz(name='new') + read_bucket1.add_baz(new_baz) + DoR = read_bucket1.baz_data.data + with self.assertRaises(ValueError): + DoR.append(read_bucket1.bazs['new']) + def test_append_external_link_data(self): """Test that exporting a written container after adding a link with link_data=True creates external links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)