diff --git a/CHANGELOG.md b/CHANGELOG.md index c1af6aab8..4a2f7d80d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ - Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166) - Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165) +### Bug fixes +- Fixed bug when converting string datasets from Zarr to HDF5. @oruebel [#1171](https://github.com/hdmf-dev/hdmf/pull/1171) + ## HDMF 3.14.3 (July 29, 2024) ### Enhancements @@ -22,10 +25,10 @@ is available on build (during the write process), but not on read of a dataset f - Warn when unexpected keys are present in specs. @rly [#1134](https://github.com/hdmf-dev/hdmf/pull/1134) - Support appending to zarr arrays. @mavaylon1 [#1136](https://github.com/hdmf-dev/hdmf/pull/1136) - Support specifying "value" key in DatasetSpec. @rly [#1143](https://github.com/hdmf-dev/hdmf/pull/1143) -- Add support for numpy 2. @rly [#1139](https://github.com/hdmf-dev/hdmf/pull/1139) +- Added support for numpy 2. @rly [#1139](https://github.com/hdmf-dev/hdmf/pull/1139) ### Bug fixes -- Fix iterator increment causing an extra +1 added after the end of completion. @CodyCBakerPhD [#1128](https://github.com/hdmf-dev/hdmf/pull/1128) +- Fixed iterator increment causing an extra +1 added after the end of completion. @CodyCBakerPhD [#1128](https://github.com/hdmf-dev/hdmf/pull/1128) ## HDMF 3.14.1 (June 6, 2024) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index da07a6a5c..9b06e80f1 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -923,6 +923,12 @@ def __resolve_dtype__(cls, dtype, data): # TODO: These values exist, but I haven't solved them yet # binary # number + + # Use text dtype for Zarr datasets of strings. Zarr stores variable length strings + # as objects, so we need to detect this special case here + if hasattr(data, 'attrs') and 'zarr_dtype' in data.attrs and data.attrs['zarr_dtype'] == 'str': + return cls.__dtypes['text'] + dtype = cls.__resolve_dtype_helper__(dtype) if dtype is None: dtype = cls.get_type(data) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index b5815ee2c..58dcf19f6 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -209,10 +209,18 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 if (isinstance(value, np.ndarray) or (hasattr(value, 'astype') and hasattr(value, 'dtype'))): if spec_dtype_type is _unicode: - ret = value.astype('U') + if hasattr(value, 'attrs') and 'zarr_dtype' in value.attrs: + # Zarr stores strings as objects, so we cannot convert to unicode dtype + ret = value + else: + ret = value.astype('U') ret_dtype = "utf8" elif spec_dtype_type is _ascii: - ret = value.astype('S') + if hasattr(value, 'attrs') and 'zarr_dtype' in value.attrs: + # Zarr stores strings as objects, so we cannot convert to unicode dtype + ret = value + else: + ret = value.astype('S') ret_dtype = "ascii" else: dtype_func, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type)