Skip to content

Commit

Permalink
Merge branch 'dev' into repack
Browse files Browse the repository at this point in the history
  • Loading branch information
pauladkisson authored Sep 4, 2024
2 parents 5fd3025 + 1abb8ec commit 2b2e717
Show file tree
Hide file tree
Showing 16 changed files with 528 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
# hooks:
# - id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.1
rev: v0.6.3
hooks:
- id: ruff
# - repo: https://github.com/econchick/interrogate
Expand Down
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
# HDMF Changelog

## HDMF 3.14.4 (Upcoming)
## HDMF 3.14.4 (August 22, 2024)

### Enhancements
- Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157)
- Added support for overriding backend configurations of h5py.Dataset objects in Container.set_data_io: [#1172](https://github.com/hdmf-dev/hdmf/pull/1172)
- Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166)
- Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165)
- Added support to write multidimensional string arrays. @stephprince [#1173](https://github.com/hdmf-dev/hdmf/pull/1173)
- Add support for appending to a dataset of references. @mavaylon1 [#1135](https://github.com/hdmf-dev/hdmf/pull/1135)

### Bug fixes
- Fixed issue where scalar datasets with a compound data type were being written as non-scalar datasets @stephprince [#1176](https://github.com/hdmf-dev/hdmf/pull/1176)
- Fixed H5DataIO not exposing `maxshape` on non-dci dsets. @cboulay [#1149](https://github.com/hdmf-dev/hdmf/pull/1149)
- Fixed generation of classes in an extension that contain attributes or datasets storing references to other types defined in the extension.
@rly [#1183](https://github.com/hdmf-dev/hdmf/pull/1183)

## HDMF 3.14.3 (July 29, 2024)

Expand Down
2 changes: 1 addition & 1 deletion docs/source/install_developers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ environment by using the ``conda remove --name hdmf-venv --all`` command.
For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager
that includes conda-forge as a default channel.

.. _Anaconda: https://www.anaconda.com/products/distribution
.. _Anaconda: https://www.anaconda.com/download
.. _Mambaforge: https://github.com/conda-forge/miniforge

Install from GitHub
Expand Down
2 changes: 1 addition & 1 deletion docs/source/install_users.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a
conda install -c conda-forge hdmf
.. _Anaconda Distribution: https://www.anaconda.com/products/distribution
.. _Anaconda Distribution: https://www.anaconda.com/download
29 changes: 27 additions & 2 deletions src/hdmf/backends/hdf5/h5_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
import logging

from ...array import Array
from ...data_utils import DataIO, AbstractDataChunkIterator
from ...data_utils import DataIO, AbstractDataChunkIterator, append_data
from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver
from ...region import RegionSlicer
from ...spec import SpecWriter, SpecReader
from ...utils import docval, getargs, popargs, get_docval
from ...utils import docval, getargs, popargs, get_docval, get_data_shape


class HDF5IODataChunkIteratorQueue(deque):
Expand Down Expand Up @@ -108,6 +108,20 @@ def ref(self):
def shape(self):
return self.dataset.shape

def append(self, arg):
# Get Builder
builder = self.io.manager.get_builder(arg)
if builder is None:
raise ValueError(
"The container being appended to the dataset has not yet been built. "
"Please write the container to the file, then open the modified file, and "
"append the read container to the dataset."
)

# Get HDF5 Reference
ref = self.io._create_ref(builder)
append_data(self.dataset, ref)


class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta):
"""
Expand Down Expand Up @@ -658,3 +672,14 @@ def valid(self):
if isinstance(self.data, Dataset) and not self.data.id.valid:
return False
return super().valid

@property
def maxshape(self):
if 'maxshape' in self.io_settings:
return self.io_settings['maxshape']
elif hasattr(self.data, 'maxshape'):
return self.data.maxshape
elif hasattr(self, "shape"):
return self.shape
else:
return get_data_shape(self.data)
15 changes: 14 additions & 1 deletion src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,8 @@ def __read_dataset(self, h5obj, name=None):
d = ReferenceBuilder(target_builder)
kwargs['data'] = d
kwargs['dtype'] = d.dtype
elif h5obj.dtype.kind == 'V': # scalar compound data type
kwargs['data'] = np.array(scalar, dtype=h5obj.dtype)
else:
kwargs["data"] = scalar
else:
Expand Down Expand Up @@ -1227,6 +1229,8 @@ def _filler():

return
# If the compound data type contains only regular data (i.e., no references) then we can write it as usual
elif len(np.shape(data)) == 0:
dset = self.__scalar_fill__(parent, name, data, options)
else:
dset = self.__list_fill__(parent, name, data, options)
# Write a dataset containing references, i.e., a region or object reference.
Expand Down Expand Up @@ -1469,7 +1473,7 @@ def __list_fill__(cls, parent, name, data, options=None):
data_shape = io_settings.pop('shape')
elif hasattr(data, 'shape'):
data_shape = data.shape
elif isinstance(dtype, np.dtype):
elif isinstance(dtype, np.dtype) and len(dtype) > 1: # check if compound dtype
data_shape = (len(data),)
else:
data_shape = get_data_shape(data)
Expand Down Expand Up @@ -1514,6 +1518,7 @@ def __get_ref(self, **kwargs):
self.logger.debug("Getting reference for %s '%s'" % (container.__class__.__name__, container.name))
builder = self.manager.build(container)
path = self.__get_path(builder)

self.logger.debug("Getting reference at path '%s'" % path)
if isinstance(container, RegionBuilder):
region = container.region
Expand All @@ -1525,6 +1530,14 @@ def __get_ref(self, **kwargs):
else:
return self.__file[path].ref

@docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference',
'default': None},
{'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object',
'default': None},
returns='the reference', rtype=Reference)
def _create_ref(self, **kwargs):
return self.__get_ref(**kwargs)

def __is_ref(self, dtype):
if isinstance(dtype, DtypeSpec):
return self.__is_ref(dtype.dtype)
Expand Down
17 changes: 15 additions & 2 deletions src/hdmf/build/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .classgenerator import ClassGenerator, CustomClassGenerator, MCIClassGenerator
from ..container import AbstractContainer, Container, Data
from ..term_set import TypeConfigurator
from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog
from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog, RefSpec
from ..spec.spec import BaseStorageSpec
from ..utils import docval, getargs, ExtenderMeta, get_docval

Expand Down Expand Up @@ -480,6 +480,7 @@ def load_namespaces(self, **kwargs):
load_namespaces here has the advantage of being able to keep track of type dependencies across namespaces.
'''
deps = self.__ns_catalog.load_namespaces(**kwargs)
# register container types for each dependent type in each dependent namespace
for new_ns, ns_deps in deps.items():
for src_ns, types in ns_deps.items():
for dt in types:
Expand Down Expand Up @@ -529,7 +530,7 @@ def get_dt_container_cls(self, **kwargs):
namespace = ns_key
break
if namespace is None:
raise ValueError("Namespace could not be resolved.")
raise ValueError(f"Namespace could not be resolved for data type '{data_type}'.")

cls = self.__get_container_cls(namespace, data_type)

Expand All @@ -549,6 +550,8 @@ def get_dt_container_cls(self, **kwargs):

def __check_dependent_types(self, spec, namespace):
"""Ensure that classes for all types used by this type exist in this namespace and generate them if not.
`spec` should be a GroupSpec or DatasetSpec in the `namespace`
"""
def __check_dependent_types_helper(spec, namespace):
if isinstance(spec, (GroupSpec, DatasetSpec)):
Expand All @@ -564,6 +567,16 @@ def __check_dependent_types_helper(spec, namespace):

if spec.data_type_inc is not None:
self.get_dt_container_cls(spec.data_type_inc, namespace)

# handle attributes that have a reference dtype
for attr_spec in spec.attributes:
if isinstance(attr_spec.dtype, RefSpec):
self.get_dt_container_cls(attr_spec.dtype.target_type, namespace)
# handle datasets that have a reference dtype
if isinstance(spec, DatasetSpec):
if isinstance(spec.dtype, RefSpec):
self.get_dt_container_cls(spec.dtype.target_type, namespace)
# recurse into nested types
if isinstance(spec, GroupSpec):
for child_spec in (spec.groups + spec.datasets + spec.links):
__check_dependent_types_helper(child_spec, namespace)
Expand Down
16 changes: 14 additions & 2 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@
from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError,
ConstructError)
from .manager import Proxy, BuildManager

from .warnings import (MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning,
IncorrectDatasetShapeBuildWarning)
from hdmf.backends.hdf5.h5_utils import H5DataIO

from ..container import AbstractContainer, Data, DataRegion
from ..term_set import TermSetWrapper
from ..data_utils import DataIO, AbstractDataChunkIterator
Expand Down Expand Up @@ -598,11 +601,17 @@ def __get_data_type(cls, spec):

def __convert_string(self, value, spec):
"""Convert string types to the specified dtype."""
def __apply_string_type(value, string_type):
if isinstance(value, (list, tuple, np.ndarray, DataIO)):
return [__apply_string_type(item, string_type) for item in value]
else:
return string_type(value)

ret = value
if isinstance(spec, AttributeSpec):
if 'text' in spec.dtype:
if spec.shape is not None or spec.dims is not None:
ret = list(map(str, value))
ret = __apply_string_type(value, str)
else:
ret = str(value)
elif isinstance(spec, DatasetSpec):
Expand All @@ -618,7 +627,7 @@ def string_type(x):
return x.isoformat() # method works for both date and datetime
if string_type is not None:
if spec.shape is not None or spec.dims is not None:
ret = list(map(string_type, value))
ret = __apply_string_type(value, string_type)
else:
ret = string_type(value)
# copy over any I/O parameters if they were specified
Expand Down Expand Up @@ -972,6 +981,9 @@ def __get_ref_builder(self, builder, dtype, shape, container, build_manager):
for d in container.data:
target_builder = self.__get_target_builder(d, build_manager, builder)
bldr_data.append(ReferenceBuilder(target_builder))
if isinstance(container.data, H5DataIO):
# This is here to support appending a dataset of references.
bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
else:
self.logger.debug("Setting %s '%s' data to reference builder"
% (builder.__class__.__name__, builder.name))
Expand Down
6 changes: 1 addition & 5 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,12 +629,8 @@ def __repr__(self):
template += "\nFields:\n"
for k in sorted(self.fields): # sorted to enable tests
v = self.fields[k]
# if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0:
if hasattr(v, '__len__'):
if isinstance(v, (np.ndarray, list, tuple)):
if len(v) > 0:
template += " {}: {}\n".format(k, self.__smart_str(v, 1))
elif v:
if isinstance(v, (np.ndarray, list, tuple)) or v:
template += " {}: {}\n".format(k, self.__smart_str(v, 1))
else:
template += " {}: {}\n".format(k, v)
Expand Down
6 changes: 6 additions & 0 deletions src/hdmf/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,12 @@ def __next__(self):
def next(self):
return self.dataset.next()

def append(self, arg):
"""
Override this method to support appending to backend-specific datasets
"""
pass # pragma: no cover


class ReferenceResolver(metaclass=ABCMeta):
"""
Expand Down
13 changes: 9 additions & 4 deletions src/hdmf/validate/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def get_type(data, builder_dtype=None):
elif isinstance(data, ReferenceResolver):
return data.dtype, None
# Numpy nd-array data
elif isinstance(data, np.ndarray):
elif isinstance(data, np.ndarray) and len(data.dtype) <= 1:
if data.size > 0:
return get_type(data[0], builder_dtype)
else:
Expand All @@ -147,11 +147,14 @@ def get_type(data, builder_dtype=None):
# Case for h5py.Dataset and other I/O specific array types
else:
# Compound dtype
if builder_dtype and isinstance(builder_dtype, list):
if builder_dtype and len(builder_dtype) > 1:
dtypes = []
string_formats = []
for i in range(len(builder_dtype)):
dtype, string_format = get_type(data[0][i])
if len(np.shape(data)) == 0:
dtype, string_format = get_type(data[()][i])
else:
dtype, string_format = get_type(data[0][i])
dtypes.append(dtype)
string_formats.append(string_format)
return dtypes, string_formats
Expand Down Expand Up @@ -438,7 +441,9 @@ def validate(self, **kwargs):
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
pass
if isinstance(builder.dtype, list):
if builder.dtype is not None and len(builder.dtype) > 1 and len(np.shape(builder.data)) == 0:
shape = () # scalar compound dataset
elif isinstance(builder.dtype, list):
shape = (len(builder.data), ) # only 1D datasets with compound types are supported
else:
shape = get_data_shape(data)
Expand Down
Loading

0 comments on commit 2b2e717

Please sign in to comment.