diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml
index 8df190d55..b713f4763 100644
--- a/.github/workflows/run_all_tests.yml
+++ b/.github/workflows/run_all_tests.yml
@@ -165,13 +165,12 @@ jobs:
           auto-update-conda: true
           python-version: ${{ matrix.python-ver }}
           channels: conda-forge
-          mamba-version: "*"
 
       - name: Install build dependencies
         run: |
           conda config --set always_yes yes --set changeps1 no
           conda info
-          mamba install -c conda-forge "tox>=4"
+          conda install -c conda-forge "tox>=4"
 
       - name: Conda reporting
         run: |
@@ -229,7 +228,6 @@ jobs:
           python-version: ${{ matrix.python-ver }}
           channels: conda-forge
           auto-activate-base: false
-          mamba-version: "*"
 
       - name: Install run dependencies
         run: |
diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml
index bd2eeb921..08b6c59ea 100644
--- a/.github/workflows/run_coverage.yml
+++ b/.github/workflows/run_coverage.yml
@@ -101,7 +101,6 @@ jobs:
           python-version: ${{ matrix.python-ver }}
           channels: conda-forge
           auto-activate-base: false
-          mamba-version: "*"
 
       - name: Install run dependencies
         run: |
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 5e0b3bff2..2e94bcb62 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -139,13 +139,12 @@ jobs:
           auto-update-conda: true
           python-version: ${{ matrix.python-ver }}
           channels: conda-forge
-          mamba-version: "*"
 
       - name: Install build dependencies
         run: |
           conda config --set always_yes yes --set changeps1 no
           conda info
-          mamba install -c conda-forge "tox>=4"
+          conda install -c conda-forge "tox>=4"
 
       - name: Conda reporting
         run: |
@@ -239,7 +238,6 @@ jobs:
           python-version: ${{ matrix.python-ver }}
           channels: conda-forge
           auto-activate-base: false
-          mamba-version: "*"
 
       - name: Install run dependencies
         run: |
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0f486273b..c84bfaffc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ repos:
 #     hooks:
 #     -   id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.9
+    rev: v0.6.8
     hooks:
     -   id: ruff
 # -   repo: https://github.com/econchick/interrogate
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 74d7bd477..b72ead1c5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,49 @@
 # HDMF Changelog
 
-## HDMF 3.14.2 (Upcoming)
+## HDMF 3.14.6 (Upcoming)
+
+### Bug fixes
+- Fixed mamba-related error in conda-based GitHub Actions. @rly [#1194](https://github.com/hdmf-dev/hdmf/pull/1194)
+
+## HDMF 3.14.5 (September 17, 2024)
+
+### Enhancements
+- Added support for overriding backend configurations of `h5py.Dataset` objects in `Container.set_data_io`. @pauladkisson [#1172](https://github.com/hdmf-dev/hdmf/pull/1172)
+
+### Bug fixes
+- Fixed bug in writing of string arrays to an HDF5 file that were read from an HDF5 file that was introduced in 3.14.4. @rly @stephprince
+  [#1189](https://github.com/hdmf-dev/hdmf/pull/1189)
+
+## HDMF 3.14.4 (September 4, 2024)
+
+### Enhancements
+- Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157)
+- Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166)
+- Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165)
+- Added support to write multidimensional string arrays. @stephprince [#1173](https://github.com/hdmf-dev/hdmf/pull/1173)
+- Add support for appending to a dataset of references. @mavaylon1 [#1135](https://github.com/hdmf-dev/hdmf/pull/1135)
+
+### Bug fixes
+- Fixed issue where scalar datasets with a compound data type were being written as non-scalar datasets @stephprince [#1176](https://github.com/hdmf-dev/hdmf/pull/1176)
+- Fixed H5DataIO not exposing `maxshape` on non-dci dsets. @cboulay [#1149](https://github.com/hdmf-dev/hdmf/pull/1149)
+- Fixed generation of classes in an extension that contain attributes or datasets storing references to other types defined in the extension.
+  @rly [#1183](https://github.com/hdmf-dev/hdmf/pull/1183)
+
+## HDMF 3.14.3 (July 29, 2024)
+
+### Enhancements
+- Added new attribute "dimension_labels" on `DatasetBuilder` which specifies the names of the dimensions used in the
+dataset based on the shape of the dataset data and the dimension names in the spec for the data type. This attribute
+is available on build (during the write process), but not on read of a dataset from a file. @rly [#1081](https://github.com/hdmf-dev/hdmf/pull/1081)
+- Speed up loading namespaces by skipping register_type when already registered. @magland [#1102](https://github.com/hdmf-dev/hdmf/pull/1102)
+- Speed up namespace loading: return a shallow copy rather than a deep copy in build_const_args. @magland [#1103](https://github.com/hdmf-dev/hdmf/pull/1103)
+
+## HDMF 3.14.2 (July 7, 2024)
 
 ### Enhancements
 - Warn when unexpected keys are present in specs. @rly [#1134](https://github.com/hdmf-dev/hdmf/pull/1134)
 - Support appending to zarr arrays. @mavaylon1 [#1136](https://github.com/hdmf-dev/hdmf/pull/1136)
+- Support specifying "value" key in DatasetSpec. @rly [#1143](https://github.com/hdmf-dev/hdmf/pull/1143)
 - Add support for numpy 2. @rly [#1139](https://github.com/hdmf-dev/hdmf/pull/1139)
 
 ### Bug fixes
diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst
index d043a351a..04e351c41 100644
--- a/docs/source/install_developers.rst
+++ b/docs/source/install_developers.rst
@@ -73,7 +73,7 @@ environment by using the ``conda remove --name hdmf-venv --all`` command.
     For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager
     that includes conda-forge as a default channel.
 
-.. _Anaconda: https://www.anaconda.com/products/distribution
+.. _Anaconda: https://www.anaconda.com/download
 .. _Mambaforge: https://github.com/conda-forge/miniforge
 
 Install from GitHub
diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst
index 8102651ff..49fbe07b2 100644
--- a/docs/source/install_users.rst
+++ b/docs/source/install_users.rst
@@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a
 
    conda install -c conda-forge hdmf
 
-.. _Anaconda Distribution: https://www.anaconda.com/products/distribution
+.. _Anaconda Distribution: https://www.anaconda.com/download
diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py
index 8654e2b4b..2d7187721 100644
--- a/src/hdmf/backends/hdf5/h5_utils.py
+++ b/src/hdmf/backends/hdf5/h5_utils.py
@@ -17,11 +17,11 @@
 import logging
 
 from ...array import Array
-from ...data_utils import DataIO, AbstractDataChunkIterator
+from ...data_utils import DataIO, AbstractDataChunkIterator, append_data
 from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver
 from ...region import RegionSlicer
 from ...spec import SpecWriter, SpecReader
-from ...utils import docval, getargs, popargs, get_docval
+from ...utils import docval, getargs, popargs, get_docval, get_data_shape
 
 
 class HDF5IODataChunkIteratorQueue(deque):
@@ -108,6 +108,20 @@ def ref(self):
     def shape(self):
         return self.dataset.shape
 
+    def append(self, arg):
+        # Get Builder
+        builder = self.io.manager.get_builder(arg)
+        if builder is None:
+            raise ValueError(
+                "The container being appended to the dataset has not yet been built. "
+                "Please write the container to the file, then open the modified file, and "
+                "append the read container to the dataset."
+            )
+
+        # Get HDF5 Reference
+        ref = self.io._create_ref(builder)
+        append_data(self.dataset, ref)
+
 
 class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta):
     """
@@ -501,7 +515,7 @@ def __init__(self, **kwargs):
         # Check for possible collision with other parameters
         if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data:
             self.__link_data = False
-            warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=2)
+            warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=3)
         # Call the super constructor and consume the data parameter
         super().__init__(**kwargs)
         # Construct the dict with the io args, ignoring all options that were set to None
@@ -525,7 +539,7 @@ def __init__(self, **kwargs):
                 self.__iosettings.pop('compression', None)
                 if 'compression_opts' in self.__iosettings:
                     warnings.warn('Compression disabled by compression=False setting. ' +
-                                  'compression_opts parameter will, therefore, be ignored.', stacklevel=2)
+                                  'compression_opts parameter will, therefore, be ignored.', stacklevel=3)
                     self.__iosettings.pop('compression_opts', None)
         # Validate the compression options used
         self._check_compression_options()
@@ -540,16 +554,37 @@ def __init__(self, **kwargs):
         if isinstance(self.data, Dataset):
             for k in self.__iosettings.keys():
                 warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k,
-                              stacklevel=2)
+                              stacklevel=3)
 
         self.__dataset = None
 
     @property
     def dataset(self):
+        """Get the cached h5py.Dataset."""
         return self.__dataset
 
     @dataset.setter
     def dataset(self, val):
+        """Cache the h5py.Dataset written with the stored IO settings.
+
+        This attribute can be used to cache a written, empty dataset and fill it in later.
+        This allows users to access the handle to the dataset *without* having to close
+        and reopen a file.
+
+        For example::
+
+            dataio = H5DataIO(shape=(5,), dtype=int)
+            foo = Foo('foo1', dataio, "I am foo1", 17, 3.14)
+            bucket = FooBucket('bucket1', [foo])
+            foofile = FooFile(buckets=[bucket])
+
+            io = HDF5IO(self.path, manager=self.manager, mode='w')
+            # write the object to disk, including initializing an empty int dataset with shape (5,)
+            io.write(foofile)
+
+            foo.my_data.dataset[:] = [0, 1, 2, 3, 4]
+            io.close()
+        """
         if self.__dataset is not None:
             raise ValueError("Cannot overwrite H5DataIO.dataset")
         self.__dataset = val
@@ -597,7 +632,7 @@ def _check_compression_options(self):
             if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]:
                 warnings.warn(str(self.__iosettings['compression']) + " compression may not be available "
                               "on all installations of HDF5. Use of gzip is recommended to ensure portability of "
-                              "the generated HDF5 files.", stacklevel=3)
+                              "the generated HDF5 files.", stacklevel=4)
 
     @staticmethod
     def filter_available(filter, allow_plugin_filters):
@@ -637,3 +672,14 @@ def valid(self):
         if isinstance(self.data, Dataset) and not self.data.id.valid:
             return False
         return super().valid
+
+    @property
+    def maxshape(self):
+        if 'maxshape' in self.io_settings:
+            return self.io_settings['maxshape']
+        elif hasattr(self.data, 'maxshape'):
+            return self.data.maxshape
+        elif hasattr(self, "shape"):
+            return self.shape
+        else:
+            return get_data_shape(self.data)
diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index 8135d75e7..da7f78a91 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -344,7 +344,7 @@ def copy_file(self, **kwargs):
         warnings.warn("The copy_file class method is no longer supported and may be removed in a future version of "
                       "HDMF. Please use the export method or h5py.File.copy method instead.",
                       category=DeprecationWarning,
-                      stacklevel=2)
+                      stacklevel=3)
 
         source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs('source_filename',
                                                                                             'dest_filename',
@@ -698,6 +698,8 @@ def __read_dataset(self, h5obj, name=None):
                     d = ReferenceBuilder(target_builder)
                 kwargs['data'] = d
                 kwargs['dtype'] = d.dtype
+            elif h5obj.dtype.kind == 'V':  # scalar compound data type
+                kwargs['data'] = np.array(scalar, dtype=h5obj.dtype)
             else:
                 kwargs["data"] = scalar
         else:
@@ -1227,6 +1229,8 @@ def _filler():
 
                 return
             # If the compound data type contains only regular data (i.e., no references) then we can write it as usual
+            elif len(np.shape(data)) == 0:
+                dset = self.__scalar_fill__(parent, name, data, options)
             else:
                 dset = self.__list_fill__(parent, name, data, options)
         # Write a dataset containing references, i.e., a region or object reference.
@@ -1469,7 +1473,7 @@ def __list_fill__(cls, parent, name, data, options=None):
             data_shape = io_settings.pop('shape')
         elif hasattr(data, 'shape'):
             data_shape = data.shape
-        elif isinstance(dtype, np.dtype):
+        elif isinstance(dtype, np.dtype) and len(dtype) > 1:  # check if compound dtype
             data_shape = (len(data),)
         else:
             data_shape = get_data_shape(data)
@@ -1514,6 +1518,7 @@ def __get_ref(self, **kwargs):
             self.logger.debug("Getting reference for %s '%s'" % (container.__class__.__name__, container.name))
             builder = self.manager.build(container)
         path = self.__get_path(builder)
+
         self.logger.debug("Getting reference at path '%s'" % path)
         if isinstance(container, RegionBuilder):
             region = container.region
@@ -1525,6 +1530,14 @@ def __get_ref(self, **kwargs):
         else:
             return self.__file[path].ref
 
+    @docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference',
+             'default': None},
+            {'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object',
+             'default': None},
+            returns='the reference', rtype=Reference)
+    def _create_ref(self, **kwargs):
+        return self.__get_ref(**kwargs)
+
     def __is_ref(self, dtype):
         if isinstance(dtype, DtypeSpec):
             return self.__is_ref(dtype.dtype)
diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py
index 73c683bbd..cb658b6d4 100644
--- a/src/hdmf/build/builders.py
+++ b/src/hdmf/build/builders.py
@@ -330,6 +330,10 @@ class DatasetBuilder(BaseBuilder):
              'doc': 'The datatype of this dataset.', 'default': None},
             {'name': 'attributes', 'type': dict,
              'doc': 'A dictionary of attributes to create in this dataset.', 'default': dict()},
+            {'name': 'dimension_labels', 'type': tuple,
+             'doc': ('A list of labels for each dimension of this dataset from the spec. Currently this is '
+                     'supplied only on build.'),
+             'default': None},
             {'name': 'maxshape', 'type': (int, tuple),
              'doc': 'The shape of this dataset. Use None for scalars.', 'default': None},
             {'name': 'chunks', 'type': bool, 'doc': 'Whether or not to chunk this dataset.', 'default': False},
@@ -337,11 +341,14 @@ class DatasetBuilder(BaseBuilder):
             {'name': 'source', 'type': str, 'doc': 'The source of the data in this builder.', 'default': None})
     def __init__(self, **kwargs):
         """ Create a Builder object for a dataset """
-        name, data, dtype, attributes, maxshape, chunks, parent, source = getargs(
-            'name', 'data', 'dtype', 'attributes', 'maxshape', 'chunks', 'parent', 'source', kwargs)
+        name, data, dtype, attributes, dimension_labels, maxshape, chunks, parent, source = getargs(
+            'name', 'data', 'dtype', 'attributes', 'dimension_labels', 'maxshape', 'chunks', 'parent', 'source',
+            kwargs
+        )
         super().__init__(name, attributes, parent, source)
         self['data'] = data
         self['attributes'] = _copy.copy(attributes)
+        self.__dimension_labels = dimension_labels
         self.__chunks = chunks
         self.__maxshape = maxshape
         if isinstance(data, BaseBuilder):
@@ -361,6 +368,11 @@ def data(self, val):
             raise AttributeError("Cannot overwrite data.")
         self['data'] = val
 
+    @property
+    def dimension_labels(self):
+        """Labels for each dimension of this dataset from the spec."""
+        return self.__dimension_labels
+
     @property
     def chunks(self):
         """Whether or not this dataset is chunked."""
diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py
index 25b9b81bd..967c34010 100644
--- a/src/hdmf/build/manager.py
+++ b/src/hdmf/build/manager.py
@@ -7,7 +7,7 @@
 from .classgenerator import ClassGenerator, CustomClassGenerator, MCIClassGenerator
 from ..container import AbstractContainer, Container, Data
 from ..term_set import TypeConfigurator
-from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog
+from ..spec import DatasetSpec, GroupSpec, NamespaceCatalog, RefSpec
 from ..spec.spec import BaseStorageSpec
 from ..utils import docval, getargs, ExtenderMeta, get_docval
 
@@ -480,6 +480,7 @@ def load_namespaces(self, **kwargs):
         load_namespaces here has the advantage of being able to keep track of type dependencies across namespaces.
         '''
         deps = self.__ns_catalog.load_namespaces(**kwargs)
+        # register container types for each dependent type in each dependent namespace
         for new_ns, ns_deps in deps.items():
             for src_ns, types in ns_deps.items():
                 for dt in types:
@@ -529,7 +530,7 @@ def get_dt_container_cls(self, **kwargs):
                     namespace = ns_key
                     break
         if namespace is None:
-            raise ValueError("Namespace could not be resolved.")
+            raise ValueError(f"Namespace could not be resolved for data type '{data_type}'.")
 
         cls = self.__get_container_cls(namespace, data_type)
 
@@ -549,6 +550,8 @@ def get_dt_container_cls(self, **kwargs):
 
     def __check_dependent_types(self, spec, namespace):
         """Ensure that classes for all types used by this type exist in this namespace and generate them if not.
+
+        `spec` should be a GroupSpec or DatasetSpec in the `namespace`
         """
         def __check_dependent_types_helper(spec, namespace):
             if isinstance(spec, (GroupSpec, DatasetSpec)):
@@ -564,6 +567,16 @@ def __check_dependent_types_helper(spec, namespace):
 
         if spec.data_type_inc is not None:
             self.get_dt_container_cls(spec.data_type_inc, namespace)
+
+        # handle attributes that have a reference dtype
+        for attr_spec in spec.attributes:
+            if isinstance(attr_spec.dtype, RefSpec):
+                self.get_dt_container_cls(attr_spec.dtype.target_type, namespace)
+        # handle datasets that have a reference dtype
+        if isinstance(spec, DatasetSpec):
+            if isinstance(spec.dtype, RefSpec):
+                self.get_dt_container_cls(spec.dtype.target_type, namespace)
+        # recurse into nested types
         if isinstance(spec, GroupSpec):
             for child_spec in (spec.groups + spec.datasets + spec.links):
                 __check_dependent_types_helper(child_spec, namespace)
diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index b0bd7d594..83df1b427 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -10,14 +10,18 @@
 from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError,
                      ConstructError)
 from .manager import Proxy, BuildManager
-from .warnings import MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning
+
+from .warnings import (MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning,
+                       IncorrectDatasetShapeBuildWarning)
+from hdmf.backends.hdf5.h5_utils import H5DataIO
+
 from ..container import AbstractContainer, Data, DataRegion
 from ..term_set import TermSetWrapper
 from ..data_utils import DataIO, AbstractDataChunkIterator
 from ..query import ReferenceResolver
 from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec
 from ..spec.spec import BaseStorageSpec
-from ..utils import docval, getargs, ExtenderMeta, get_docval
+from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape
 
 _const_arg = '__constructor_arg'
 
@@ -597,11 +601,20 @@ def __get_data_type(cls, spec):
 
     def __convert_string(self, value, spec):
         """Convert string types to the specified dtype."""
+        def __apply_string_type(value, string_type):
+            # NOTE: if a user passes a h5py.Dataset that is not wrapped with a hdmf.utils.StrDataset,
+            # then this conversion may not be correct. Users should unpack their string h5py.Datasets
+            # into a numpy array (or wrap them in StrDataset) before passing them to a container object.
+            if hasattr(value, '__iter__') and not isinstance(value, (str, bytes)):
+                return [__apply_string_type(item, string_type) for item in value]
+            else:
+                return string_type(value)
+
         ret = value
         if isinstance(spec, AttributeSpec):
             if 'text' in spec.dtype:
                 if spec.shape is not None or spec.dims is not None:
-                    ret = list(map(str, value))
+                    ret = __apply_string_type(value, str)
                 else:
                     ret = str(value)
         elif isinstance(spec, DatasetSpec):
@@ -617,7 +630,7 @@ def string_type(x):
                         return x.isoformat()  # method works for both date and datetime
                 if string_type is not None:
                     if spec.shape is not None or spec.dims is not None:
-                        ret = list(map(string_type, value))
+                        ret = __apply_string_type(value, string_type)
                     else:
                         ret = string_type(value)
                     # copy over any I/O parameters if they were specified
@@ -721,19 +734,34 @@ def build(self, **kwargs):
                 if not isinstance(container, Data):
                     msg = "'container' must be of type Data with DatasetSpec"
                     raise ValueError(msg)
-                spec_dtype, spec_shape, spec = self.__check_dset_spec(self.spec, spec_ext)
+                spec_dtype, spec_shape, spec_dims, spec = self.__check_dset_spec(self.spec, spec_ext)
+                dimension_labels = self.__get_dimension_labels_from_spec(container.data, spec_shape, spec_dims)
                 if isinstance(spec_dtype, RefSpec):
                     self.logger.debug("Building %s '%s' as a dataset of references (source: %s)"
                                       % (container.__class__.__name__, container.name, repr(source)))
                     # create dataset builder with data=None as a placeholder. fill in with refs later
-                    builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype.reftype)
+                    builder = DatasetBuilder(
+                        name,
+                        data=None,
+                        parent=parent,
+                        source=source,
+                        dtype=spec_dtype.reftype,
+                        dimension_labels=dimension_labels,
+                    )
                     manager.queue_ref(self.__set_dataset_to_refs(builder, spec_dtype, spec_shape, container, manager))
                 elif isinstance(spec_dtype, list):
                     # a compound dataset
                     self.logger.debug("Building %s '%s' as a dataset of compound dtypes (source: %s)"
                                       % (container.__class__.__name__, container.name, repr(source)))
                     # create dataset builder with data=None, dtype=None as a placeholder. fill in with refs later
-                    builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype)
+                    builder = DatasetBuilder(
+                        name,
+                        data=None,
+                        parent=parent,
+                        source=source,
+                        dtype=spec_dtype,
+                        dimension_labels=dimension_labels,
+                    )
                     manager.queue_ref(self.__set_compound_dataset_to_refs(builder, spec, spec_dtype, container,
                                                                           manager))
                 else:
@@ -744,7 +772,14 @@ def build(self, **kwargs):
                                           % (container.__class__.__name__, container.name, repr(source)))
                         # an unspecified dtype and we were given references
                         # create dataset builder with data=None as a placeholder. fill in with refs later
-                        builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype='object')
+                        builder = DatasetBuilder(
+                            name,
+                            data=None,
+                            parent=parent,
+                            source=source,
+                            dtype="object",
+                            dimension_labels=dimension_labels,
+                        )
                         manager.queue_ref(self.__set_untyped_dataset_to_refs(builder, container, manager))
                     else:
                         # a dataset that has no references, pass the conversion off to the convert_dtype method
@@ -760,7 +795,14 @@ def build(self, **kwargs):
                         except Exception as ex:
                             msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name)
                             raise Exception(msg) from ex
-                        builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=dtype)
+                        builder = DatasetBuilder(
+                            name,
+                            data=bldr_data,
+                            parent=parent,
+                            source=source,
+                            dtype=dtype,
+                            dimension_labels=dimension_labels,
+                        )
 
         # Add attributes from the specification extension to the list of attributes
         all_attrs = self.__spec.attributes + getattr(spec_ext, 'attributes', tuple())
@@ -779,14 +821,67 @@ def __check_dset_spec(self, orig, ext):
         """
         dtype = orig.dtype
         shape = orig.shape
+        dims = orig.dims
         spec = orig
         if ext is not None:
             if ext.dtype is not None:
                 dtype = ext.dtype
             if ext.shape is not None:
                 shape = ext.shape
+                dims = ext.dims
             spec = ext
-        return dtype, shape, spec
+        return dtype, shape, dims, spec
+
+    def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple:
+        if spec_shape is None or spec_dims is None:
+            return None
+        data_shape = get_data_shape(data)
+        # if shape is a list of allowed shapes, find the index of the shape that matches the data
+        if isinstance(spec_shape[0], list):
+            match_shape_inds = list()
+            for i, s in enumerate(spec_shape):
+                # skip this shape if it has a different number of dimensions from the data
+                if len(s) != len(data_shape):
+                    continue
+                # check each dimension. None means any length is allowed
+                match = True
+                for j, d in enumerate(data_shape):
+                    if s[j] is not None and s[j] != d:
+                        match = False
+                        break
+                if match:
+                    match_shape_inds.append(i)
+            # use the most specific match -- the one with the fewest Nones
+            if match_shape_inds:
+                if len(match_shape_inds) == 1:
+                    return tuple(spec_dims[match_shape_inds[0]])
+                else:
+                    count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds]
+                    index_min_count = count_nones.index(min(count_nones))
+                    best_match_ind = match_shape_inds[index_min_count]
+                    return tuple(spec_dims[best_match_ind])
+            else:
+                # no matches found
+                msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path
+                warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
+                return None
+        else:
+            if len(data_shape) != len(spec_shape):
+                msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
+                warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
+                return None
+            # check each dimension. None means any length is allowed
+            match = True
+            for j, d in enumerate(data_shape):
+                if spec_shape[j] is not None and spec_shape[j] != d:
+                    match = False
+                    break
+            if not match:
+                msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
+                warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
+                return None
+            # shape is a single list of allowed dimension lengths
+            return tuple(spec_dims)
 
     def __is_reftype(self, data):
         if (isinstance(data, AbstractDataChunkIterator) or
@@ -889,6 +984,9 @@ def __get_ref_builder(self, builder, dtype, shape, container, build_manager):
                 for d in container.data:
                     target_builder = self.__get_target_builder(d, build_manager, builder)
                     bldr_data.append(ReferenceBuilder(target_builder))
+                if isinstance(container.data, H5DataIO):
+                    # This is here to support appending a dataset of references.
+                    bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
             else:
                 self.logger.debug("Setting %s '%s' data to reference builder"
                                   % (builder.__class__.__name__, builder.name))
diff --git a/src/hdmf/build/warnings.py b/src/hdmf/build/warnings.py
index 3d5f02126..6a6ea6986 100644
--- a/src/hdmf/build/warnings.py
+++ b/src/hdmf/build/warnings.py
@@ -15,6 +15,13 @@ class IncorrectQuantityBuildWarning(BuildWarning):
     pass
 
 
+class IncorrectDatasetShapeBuildWarning(BuildWarning):
+    """
+    Raised when a dataset has a shape that is not allowed by the spec.
+    """
+    pass
+
+
 class MissingRequiredBuildWarning(BuildWarning):
     """
     Raised when a required field is missing.
diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py
index fdca4bb81..1fc731ef5 100644
--- a/src/hdmf/common/resources.py
+++ b/src/hdmf/common/resources.py
@@ -628,7 +628,7 @@ def add_ref(self, **kwargs):
             if entity_uri is not None:
                 entity_uri = entity.entity_uri
                 msg = 'This entity already exists. Ignoring new entity uri'
-                warn(msg, stacklevel=2)
+                warn(msg, stacklevel=3)
 
         #################
         # Validate Object
diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py
index 2e90b0cdf..b4530c7b7 100644
--- a/src/hdmf/common/table.py
+++ b/src/hdmf/common/table.py
@@ -717,7 +717,7 @@ def add_row(self, **kwargs):
                     warn(("Data has elements with different lengths and therefore cannot be coerced into an "
                           "N-dimensional array. Use the 'index' argument when creating a column to add rows "
                           "with different lengths."),
-                         stacklevel=2)
+                         stacklevel=3)
 
     def __eq__(self, other):
         """Compare if the two DynamicTables contain the same data.
@@ -776,7 +776,7 @@ def add_column(self, **kwargs):  # noqa: C901
 
         if isinstance(index, VectorIndex):
             warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be "
-                 "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=2)
+                 "deprecated in a future version of HDMF.", category=FutureWarning, stacklevel=3)
 
         if name in self.__colids:  # column has already been added
             msg = "column '%s' already exists in %s '%s'" % (name, self.__class__.__name__, self.name)
@@ -793,7 +793,7 @@ def add_column(self, **kwargs):  # noqa: C901
                        "Please ensure the new column complies with the spec. "
                        "This will raise an error in a future version of HDMF."
                        % (name, self.__class__.__name__, spec_table))
-                warn(msg, stacklevel=2)
+                warn(msg, stacklevel=3)
 
             index_bool = index or not isinstance(index, bool)
             spec_index = self.__uninit_cols[name].get('index', False)
@@ -803,7 +803,7 @@ def add_column(self, **kwargs):  # noqa: C901
                        "Please ensure the new column complies with the spec. "
                        "This will raise an error in a future version of HDMF."
                        % (name, self.__class__.__name__, spec_index))
-                warn(msg, stacklevel=2)
+                warn(msg, stacklevel=3)
 
             spec_col_cls = self.__uninit_cols[name].get('class', VectorData)
             if col_cls != spec_col_cls:
@@ -841,7 +841,7 @@ def add_column(self, **kwargs):  # noqa: C901
                 warn(("Data has elements with different lengths and therefore cannot be coerced into an "
                       "N-dimensional array. Use the 'index' argument when adding a column of data with "
                       "different lengths."),
-                     stacklevel=2)
+                     stacklevel=3)
 
             # Check that we are asked to create an index
             if (isinstance(index, bool) or isinstance(index, int)) and index > 0 and len(data) > 0:
diff --git a/src/hdmf/container.py b/src/hdmf/container.py
index 287809406..7c450770a 100644
--- a/src/hdmf/container.py
+++ b/src/hdmf/container.py
@@ -2,7 +2,7 @@
 from abc import abstractmethod
 from collections import OrderedDict
 from copy import deepcopy
-from typing import Type
+from typing import Type, Optional
 from uuid import uuid4
 from warnings import warn
 import os
@@ -11,7 +11,7 @@
 import numpy as np
 import pandas as pd
 
-from .data_utils import DataIO, append_data, extend_data
+from .data_utils import DataIO, append_data, extend_data, AbstractDataChunkIterator
 from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict
 
 from .term_set import TermSet, TermSetWrapper
@@ -629,12 +629,8 @@ def __repr__(self):
             template += "\nFields:\n"
         for k in sorted(self.fields):  # sorted to enable tests
             v = self.fields[k]
-            # if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0:
             if hasattr(v, '__len__'):
-                if isinstance(v, (np.ndarray, list, tuple)):
-                    if len(v) > 0:
-                        template += "  {}: {}\n".format(k, self.__smart_str(v, 1))
-                elif v:
+                if isinstance(v, (np.ndarray, list, tuple)) or v:
                     template += "  {}: {}\n".format(k, self.__smart_str(v, 1))
             else:
                 template += "  {}: {}\n".format(k, v)
@@ -830,7 +826,14 @@ def __smart_str_dict(d, num_indent):
         out += '\n' + indent + right_br
         return out
 
-    def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kwargs: dict = None, **kwargs):
+    def set_data_io(
+        self,
+        dataset_name: str,
+        data_io_class: Type[DataIO],
+        data_io_kwargs: dict = None,
+        data_chunk_iterator_class: Optional[Type[AbstractDataChunkIterator]] = None,
+        data_chunk_iterator_kwargs: dict = None, **kwargs
+    ):
         """
         Apply DataIO object to a dataset field of the Container.
 
@@ -842,9 +845,18 @@ def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kw
             Class to use for DataIO, e.g. H5DataIO or ZarrDataIO
         data_io_kwargs: dict
             keyword arguments passed to the constructor of the DataIO class.
+        data_chunk_iterator_class: Type[AbstractDataChunkIterator]
+            Class to use for DataChunkIterator. If None, no DataChunkIterator is used.
+        data_chunk_iterator_kwargs: dict
+            keyword arguments passed to the constructor of the DataChunkIterator class.
         **kwargs:
             DEPRECATED. Use data_io_kwargs instead.
             kwargs are passed to the constructor of the DataIO class.
+
+        Notes
+        -----
+        If data_chunk_iterator_class is not None, the data is wrapped in the DataChunkIterator before being wrapped in
+        the DataIO. This allows for rewriting the backend configuration of hdf5 datasets.
         """
         if kwargs or (data_io_kwargs is None):
             warn(
@@ -855,8 +867,11 @@ def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kw
             )
             data_io_kwargs = kwargs
         data = self.fields.get(dataset_name)
+        data_chunk_iterator_kwargs = data_chunk_iterator_kwargs or dict()
         if data is None:
             raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class")
+        if data_chunk_iterator_class is not None:
+            data = data_chunk_iterator_class(data=data, **data_chunk_iterator_kwargs)
         self.fields[dataset_name] = data_io_class(data=data, **data_io_kwargs)
 
 
@@ -894,13 +909,19 @@ def set_dataio(self, **kwargs):
         warn(
             "Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.",
             DeprecationWarning,
-            stacklevel=2,
+            stacklevel=3,
         )
         dataio = getargs('dataio', kwargs)
         dataio.data = self.__data
         self.__data = dataio
 
-    def set_data_io(self, data_io_class: Type[DataIO], data_io_kwargs: dict) -> None:
+    def set_data_io(
+        self,
+        data_io_class: Type[DataIO],
+        data_io_kwargs: dict,
+        data_chunk_iterator_class: Optional[Type[AbstractDataChunkIterator]] = None,
+        data_chunk_iterator_kwargs: dict = None,
+    ) -> None:
         """
         Apply DataIO object to the data held by this Data object.
 
@@ -910,8 +931,21 @@ def set_data_io(self, data_io_class: Type[DataIO], data_io_kwargs: dict) -> None
             The DataIO to apply to the data held by this Data.
         data_io_kwargs: dict
             The keyword arguments to pass to the DataIO.
+        data_chunk_iterator_class: Type[AbstractDataChunkIterator]
+            The DataChunkIterator to use for the DataIO. If None, no DataChunkIterator is used.
+        data_chunk_iterator_kwargs: dict
+            The keyword arguments to pass to the DataChunkIterator.
+
+        Notes
+        -----
+        If data_chunk_iterator_class is not None, the data is wrapped in the DataChunkIterator before being wrapped in
+        the DataIO. This allows for rewriting the backend configuration of hdf5 datasets.
         """
-        self.__data = data_io_class(data=self.__data, **data_io_kwargs)
+        data_chunk_iterator_kwargs = data_chunk_iterator_kwargs or dict()
+        data = self.__data
+        if data_chunk_iterator_class is not None:
+            data = data_chunk_iterator_class(data=data, **data_chunk_iterator_kwargs)
+        self.__data = data_io_class(data=data, **data_io_kwargs)
 
     @docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'})
     def transform(self, **kwargs):
@@ -1142,7 +1176,9 @@ def _func(self, **kwargs):
                     # still need to mark self as modified
                     self.set_modified()
                 if tmp.name in d:
-                    msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name)
+                    msg = (f"Cannot add {tmp.__class__} '{tmp.name}' at 0x{id(tmp)} to dict attribute '{attr_name}' in "
+                           f"{cls} '{self.name}'. {d[tmp.name].__class__} '{tmp.name}' at 0x{id(d[tmp.name])} "
+                           f"already exists in '{attr_name}' and has the same name.")
                     raise ValueError(msg)
                 d[tmp.name] = tmp
             return container
diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py
index 798a40973..91400da84 100644
--- a/src/hdmf/data_utils.py
+++ b/src/hdmf/data_utils.py
@@ -18,7 +18,8 @@
 from .utils import docval, getargs, popargs, docval_macro, get_data_shape
 
 def append_data(data, arg):
-    if isinstance(data, (list, DataIO)):
+    from hdmf.backends.hdf5.h5_utils import HDMFDataset
+    if isinstance(data, (list, DataIO, HDMFDataset)):
         data.append(arg)
         return data
     elif type(data).__name__ == 'TermSetWrapper': # circular import
diff --git a/src/hdmf/query.py b/src/hdmf/query.py
index 835b295c5..9693b0b1c 100644
--- a/src/hdmf/query.py
+++ b/src/hdmf/query.py
@@ -163,6 +163,12 @@ def __next__(self):
     def next(self):
         return self.dataset.next()
 
+    def append(self, arg):
+        """
+        Override this method to support appending to backend-specific datasets
+        """
+        pass # pragma: no cover
+
 
 class ReferenceResolver(metaclass=ABCMeta):
     """
diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py
index a2ae0bd37..57232bd25 100644
--- a/src/hdmf/spec/namespace.py
+++ b/src/hdmf/spec/namespace.py
@@ -50,13 +50,13 @@ def __init__(self, **kwargs):
             self['full_name'] = full_name
         if version == str(SpecNamespace.UNVERSIONED):
             # the unversioned version may be written to file as a string and read from file as a string
-            warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name, stacklevel=2)
+            warn(f"Loaded namespace '{name}' is unversioned. Please notify the extension author.")
             version = SpecNamespace.UNVERSIONED
         if version is None:
             # version is required on write -- see YAMLSpecWriter.write_namespace -- but can be None on read in order to
             # be able to read older files with extensions that are missing the version key.
-            warn(("Loaded namespace '%s' is missing the required key 'version'. Version will be set to '%s'. "
-                  "Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED), stacklevel=2)
+            warn(f"Loaded namespace '{name}' is missing the required key 'version'. Version will be set to "
+                 f"'{SpecNamespace.UNVERSIONED}'. Please notify the extension author.")
             version = SpecNamespace.UNVERSIONED
         self['version'] = version
         if date is not None:
@@ -466,15 +466,19 @@ def __load_namespace(self, namespace, reader, resolve=True):
         return included_types
 
     def __register_type(self, ndt, inc_ns, catalog, registered_types):
-        spec = inc_ns.get_spec(ndt)
-        spec_file = inc_ns.catalog.get_spec_source_file(ndt)
-        self.__register_dependent_types(spec, inc_ns, catalog, registered_types)
-        if isinstance(spec, DatasetSpec):
-            built_spec = self.dataset_spec_cls.build_spec(spec)
+        if ndt in registered_types:
+            # already registered
+            pass
         else:
-            built_spec = self.group_spec_cls.build_spec(spec)
-        registered_types.add(ndt)
-        catalog.register_spec(built_spec, spec_file)
+            spec = inc_ns.get_spec(ndt)
+            spec_file = inc_ns.catalog.get_spec_source_file(ndt)
+            self.__register_dependent_types(spec, inc_ns, catalog, registered_types)
+            if isinstance(spec, DatasetSpec):
+                built_spec = self.dataset_spec_cls.build_spec(spec)
+            else:
+                built_spec = self.group_spec_cls.build_spec(spec)
+            registered_types.add(ndt)
+            catalog.register_spec(built_spec, spec_file)
 
     def __register_dependent_types(self, spec, inc_ns, catalog, registered_types):
         """Ensure that classes for all types used by this type are registered
@@ -529,7 +533,7 @@ def load_namespaces(self, **kwargs):
                 if ns['version'] != self.__namespaces.get(ns['name'])['version']:
                     # warn if the cached namespace differs from the already loaded namespace
                     warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
-                         % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']), stacklevel=2)
+                         % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']))
             else:
                 to_load.append(ns)
         # now load specs into namespace
diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py
index 6d7d29e49..e10d5e43e 100644
--- a/src/hdmf/spec/spec.py
+++ b/src/hdmf/spec/spec.py
@@ -1,7 +1,6 @@
 import re
 from abc import ABCMeta
 from collections import OrderedDict
-from copy import deepcopy
 from warnings import warn
 
 from ..utils import docval, getargs, popargs, get_docval
@@ -84,7 +83,7 @@ class ConstructableDict(dict, metaclass=ABCMeta):
     def build_const_args(cls, spec_dict):
         ''' Build constructor arguments for this ConstructableDict class from a dictionary '''
         # main use cases are when spec_dict is a ConstructableDict or a spec dict read from a file
-        return deepcopy(spec_dict)
+        return spec_dict.copy()
 
     @classmethod
     def build_spec(cls, spec_dict):
@@ -322,7 +321,7 @@ def __init__(self, **kwargs):
         default_name = getargs('default_name', kwargs)
         if default_name:
             if name is not None:
-                warn("found 'default_name' with 'name' - ignoring 'default_name'", stacklevel=2)
+                warn("found 'default_name' with 'name' - ignoring 'default_name'")
             else:
                 self['default_name'] = default_name
         self.__attributes = dict()
@@ -648,6 +647,7 @@ def build_const_args(cls, spec_dict):
     {'name': 'linkable', 'type': bool, 'doc': 'whether or not this group can be linked', 'default': True},
     {'name': 'quantity', 'type': (str, int), 'doc': 'the required number of allowed instance', 'default': 1},
     {'name': 'default_value', 'type': None, 'doc': 'a default value for this dataset', 'default': None},
+    {'name': 'value', 'type': None, 'doc': 'a fixed value for this dataset', 'default': None},
     {'name': 'data_type_def', 'type': str, 'doc': 'the data type this specification represents', 'default': None},
     {'name': 'data_type_inc', 'type': (str, 'DatasetSpec'),
      'doc': 'the data type this specification extends', 'default': None},
@@ -662,7 +662,8 @@ class DatasetSpec(BaseStorageSpec):
 
     @docval(*_dataset_args)
     def __init__(self, **kwargs):
-        doc, shape, dims, dtype, default_value = popargs('doc', 'shape', 'dims', 'dtype', 'default_value', kwargs)
+        doc, shape, dims, dtype = popargs('doc', 'shape', 'dims', 'dtype', kwargs)
+        default_value, value = popargs('default_value', 'value', kwargs)
         if shape is not None:
             self['shape'] = shape
         if dims is not None:
@@ -685,6 +686,8 @@ def __init__(self, **kwargs):
         super().__init__(doc, **kwargs)
         if default_value is not None:
             self['default_value'] = default_value
+        if value is not None:
+            self['value'] = value
         if self.name is not None:
             valid_quant_vals = [1, 'zero_or_one', ZERO_OR_ONE]
             if self.quantity not in valid_quant_vals:
@@ -762,6 +765,11 @@ def default_value(self):
         '''The default value of the dataset or None if not specified'''
         return self.get('default_value', None)
 
+    @property
+    def value(self):
+        '''The fixed value of the dataset or None if not specified'''
+        return self.get('value', None)
+
     @classmethod
     def dtype_spec_cls(cls):
         ''' The class to use when constructing DtypeSpec objects
diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py
index 5e0b61539..50db79c40 100644
--- a/src/hdmf/utils.py
+++ b/src/hdmf/utils.py
@@ -1140,7 +1140,7 @@ def update(self, other):
 
 @docval_macro('array_data')
 class StrDataset(h5py.Dataset):
-    """Wrapper to decode strings on reading the dataset"""
+    """Wrapper to decode strings on reading the dataset. Use only for h5py 3+."""
     def __init__(self, dset, encoding, errors='strict'):
         self.dset = dset
         if encoding is None:
diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index e39011d9f..2668da1ec 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -134,7 +134,7 @@ def get_type(data, builder_dtype=None):
     elif isinstance(data, ReferenceResolver):
         return data.dtype, None
     # Numpy nd-array data
-    elif isinstance(data, np.ndarray):
+    elif isinstance(data, np.ndarray) and len(data.dtype) <= 1:
         if data.size > 0:
             return get_type(data[0], builder_dtype)
         else:
@@ -147,11 +147,14 @@ def get_type(data, builder_dtype=None):
     # Case for h5py.Dataset and other I/O specific array types
     else:
         # Compound dtype
-        if builder_dtype and isinstance(builder_dtype, list):
+        if builder_dtype and len(builder_dtype) > 1:
             dtypes = []
             string_formats = []
             for i in range(len(builder_dtype)):
-                dtype, string_format = get_type(data[0][i])
+                if len(np.shape(data)) == 0:
+                    dtype, string_format = get_type(data[()][i])
+                else:
+                    dtype, string_format = get_type(data[0][i])
                 dtypes.append(dtype)
                 string_formats.append(string_format)
             return dtypes, string_formats
@@ -438,7 +441,9 @@ def validate(self, **kwargs):
             except EmptyArrayError:
                 # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
                 pass
-        if isinstance(builder.dtype, list):
+        if builder.dtype is not None and len(builder.dtype) > 1 and len(np.shape(builder.data)) == 0:
+            shape = ()  # scalar compound dataset
+        elif isinstance(builder.dtype, list):
             shape = (len(builder.data), )  # only 1D datasets with compound types are supported
         else:
             shape = get_data_shape(data)
diff --git a/tests/unit/build_tests/mapper_tests/test_build.py b/tests/unit/build_tests/mapper_tests/test_build.py
index b90ad6f1a..28cc9518e 100644
--- a/tests/unit/build_tests/mapper_tests/test_build.py
+++ b/tests/unit/build_tests/mapper_tests/test_build.py
@@ -4,7 +4,7 @@
 from hdmf import Container, Data, TermSet, TermSetWrapper
 from hdmf.common import VectorData, get_type_map
 from hdmf.build import ObjectMapper, BuildManager, TypeMap, GroupBuilder, DatasetBuilder
-from hdmf.build.warnings import DtypeConversionWarning
+from hdmf.build.warnings import DtypeConversionWarning, IncorrectDatasetShapeBuildWarning
 from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, Spec
 from hdmf.testing import TestCase
 from hdmf.utils import docval, getargs
@@ -650,3 +650,287 @@ def test_build_incorrect_dtype(self):
         msg = "could not resolve dtype for BarData 'my_bar'"
         with self.assertRaisesWith(Exception, msg):
             self.manager.build(bar_data_holder_inst, source='test.h5')
+
+
+class BuildDatasetShapeMixin(TestCase, metaclass=ABCMeta):
+
+    def setUp(self):
+        self.set_up_specs()
+        spec_catalog = SpecCatalog()
+        spec_catalog.register_spec(self.bar_data_spec, 'test.yaml')
+        spec_catalog.register_spec(self.bar_data_holder_spec, 'test.yaml')
+        namespace = SpecNamespace(
+            doc='a test namespace',
+            name=CORE_NAMESPACE,
+            schema=[{'source': 'test.yaml'}],
+            version='0.1.0',
+            catalog=spec_catalog
+        )
+        namespace_catalog = NamespaceCatalog()
+        namespace_catalog.add_namespace(CORE_NAMESPACE, namespace)
+        type_map = TypeMap(namespace_catalog)
+        type_map.register_container_type(CORE_NAMESPACE, 'BarData', BarData)
+        type_map.register_container_type(CORE_NAMESPACE, 'BarDataHolder', BarDataHolder)
+        type_map.register_map(BarData, ExtBarDataMapper)
+        type_map.register_map(BarDataHolder, ObjectMapper)
+        self.manager = BuildManager(type_map)
+
+    def set_up_specs(self):
+        shape, dims = self.get_base_shape_dims()
+        self.bar_data_spec = DatasetSpec(
+            doc='A test dataset specification with a data type',
+            data_type_def='BarData',
+            dtype='int',
+            shape=shape,
+            dims=dims,
+        )
+        self.bar_data_holder_spec = GroupSpec(
+            doc='A container of multiple extended BarData objects',
+            data_type_def='BarDataHolder',
+            datasets=[self.get_dataset_inc_spec()],
+        )
+
+    @abstractmethod
+    def get_base_shape_dims(self):
+        pass
+
+    @abstractmethod
+    def get_dataset_inc_spec(self):
+        pass
+
+
+class TestBuildDatasetOneOptionBadShapeUnspecified1(BuildDatasetShapeMixin):
+    """Test dataset spec shape = 2D any length, data = 1D. Should raise warning and set dimension_labels to None."""
+
+    def get_base_shape_dims(self):
+        return [None, None], ['a', 'b']
+
+    def get_dataset_inc_spec(self):
+        dataset_inc_spec = DatasetSpec(
+            doc='A BarData',
+            data_type_inc='BarData',
+            quantity='*',
+        )
+        return dataset_inc_spec
+
+    def test_build(self):
+        """
+        Test build of BarDataHolder which contains a BarData.
+        """
+        # NOTE: attr1 doesn't map to anything but is required in the test container class
+        bar_data_inst = BarData(name='my_bar', data=[1, 2, 3], attr1='a string')
+        bar_data_holder_inst = BarDataHolder(
+            name='my_bar_holder',
+            bar_datas=[bar_data_inst],
+        )
+
+        msg = "Shape of data does not match shape in spec 'BarData'"
+        with self.assertWarnsWith(IncorrectDatasetShapeBuildWarning, msg):
+            builder = self.manager.build(bar_data_holder_inst, source='test.h5')
+        assert builder.datasets['my_bar'].dimension_labels is None
+
+
+class TestBuildDatasetOneOptionBadShapeUnspecified2(BuildDatasetShapeMixin):
+    """Test dataset spec shape = (any, 2), data = (3, 1). Should raise warning and set dimension_labels to None."""
+
+    def get_base_shape_dims(self):
+        return [None, 2], ['a', 'b']
+
+    def get_dataset_inc_spec(self):
+        dataset_inc_spec = DatasetSpec(
+            doc='A BarData',
+            data_type_inc='BarData',
+            quantity='*',
+        )
+        return dataset_inc_spec
+
+    def test_build(self):
+        """
+        Test build of BarDataHolder which contains a BarData.
+        """
+        # NOTE: attr1 doesn't map to anything but is required in the test container class
+        bar_data_inst = BarData(name='my_bar', data=[[1], [2], [3]], attr1='a string')
+        bar_data_holder_inst = BarDataHolder(
+            name='my_bar_holder',
+            bar_datas=[bar_data_inst],
+        )
+
+        msg = "Shape of data does not match shape in spec 'BarData'"
+        with self.assertWarnsWith(IncorrectDatasetShapeBuildWarning, msg):
+            builder = self.manager.build(bar_data_holder_inst, source='test.h5')
+        assert builder.datasets['my_bar'].dimension_labels is None
+
+
+class TestBuildDatasetTwoOptionsBadShapeUnspecified(BuildDatasetShapeMixin):
+    """Test dataset spec shape = (any, 2) or (any, 3), data = (3, 1).
+    Should raise warning and set dimension_labels to None.
+    """
+
+    def get_base_shape_dims(self):
+        return [[None, 2], [None, 3]], [['a', 'b1'], ['a', 'b2']]
+
+    def get_dataset_inc_spec(self):
+        dataset_inc_spec = DatasetSpec(
+            doc='A BarData',
+            data_type_inc='BarData',
+            quantity='*',
+        )
+        return dataset_inc_spec
+
+    def test_build(self):
+        """
+        Test build of BarDataHolder which contains a BarData.
+        """
+        # NOTE: attr1 doesn't map to anything but is required in the test container class
+        bar_data_inst = BarData(name='my_bar', data=[[1], [2], [3]], attr1='a string')
+        bar_data_holder_inst = BarDataHolder(
+            name='my_bar_holder',
+            bar_datas=[bar_data_inst],
+        )
+
+        msg = "Shape of data does not match any allowed shapes in spec 'BarData'"
+        with self.assertWarnsWith(IncorrectDatasetShapeBuildWarning, msg):
+            builder = self.manager.build(bar_data_holder_inst, source='test.h5')
+        assert builder.datasets['my_bar'].dimension_labels is None
+
+
+class TestBuildDatasetDimensionLabelsUnspecified(BuildDatasetShapeMixin):
+
+    def get_base_shape_dims(self):
+        return None, None
+
+    def get_dataset_inc_spec(self):
+        dataset_inc_spec = DatasetSpec(
+            doc='A BarData',
+            data_type_inc='BarData',
+            quantity='*',
+        )
+        return dataset_inc_spec
+
+    def test_build(self):
+        """
+        Test build of BarDataHolder which contains a BarData.
+        """
+        # NOTE: attr1 doesn't map to anything but is required in the test container class
+        bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string')
+        bar_data_holder_inst = BarDataHolder(
+            name='my_bar_holder',
+            bar_datas=[bar_data_inst],
+        )
+
+        builder = self.manager.build(bar_data_holder_inst, source='test.h5')
+        assert builder.datasets['my_bar'].dimension_labels is None
+
+
+class TestBuildDatasetDimensionLabelsOneOption(BuildDatasetShapeMixin):
+
+    def get_base_shape_dims(self):
+        return [None, None], ['a', 'b']
+
+    def get_dataset_inc_spec(self):
+        dataset_inc_spec = DatasetSpec(
+            doc='A BarData',
+            data_type_inc='BarData',
+            quantity='*',
+        )
+        return dataset_inc_spec
+
+    def test_build(self):
+        """
+        Test build of BarDataHolder which contains a BarData.
+        """
+        # NOTE: attr1 doesn't map to anything but is required in the test container class
+        bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string')
+        bar_data_holder_inst = BarDataHolder(
+            name='my_bar_holder',
+            bar_datas=[bar_data_inst],
+        )
+
+        builder = self.manager.build(bar_data_holder_inst, source='test.h5')
+        assert builder.datasets['my_bar'].dimension_labels == ('a', 'b')
+
+
+class TestBuildDatasetDimensionLabelsTwoOptionsOneMatch(BuildDatasetShapeMixin):
+
+    def get_base_shape_dims(self):
+        return [[None], [None, None]], [['a'], ['a', 'b']]
+
+    def get_dataset_inc_spec(self):
+        dataset_inc_spec = DatasetSpec(
+            doc='A BarData',
+            data_type_inc='BarData',
+            quantity='*',
+        )
+        return dataset_inc_spec
+
+    def test_build(self):
+        """
+        Test build of BarDataHolder which contains a BarData.
+        """
+        # NOTE: attr1 doesn't map to anything but is required in the test container class
+        bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string')
+        bar_data_holder_inst = BarDataHolder(
+            name='my_bar_holder',
+            bar_datas=[bar_data_inst],
+        )
+
+        builder = self.manager.build(bar_data_holder_inst, source='test.h5')
+        assert builder.datasets['my_bar'].dimension_labels == ('a', 'b')
+
+
+class TestBuildDatasetDimensionLabelsTwoOptionsTwoMatches(BuildDatasetShapeMixin):
+
+    def get_base_shape_dims(self):
+        return [[None, None], [None, 3]], [['a', 'b1'], ['a', 'b2']]
+
+    def get_dataset_inc_spec(self):
+        dataset_inc_spec = DatasetSpec(
+            doc='A BarData',
+            data_type_inc='BarData',
+            quantity='*',
+        )
+        return dataset_inc_spec
+
+    def test_build(self):
+        """
+        Test build of BarDataHolder which contains a BarData.
+        """
+        # NOTE: attr1 doesn't map to anything but is required in the test container class
+        bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string')
+        bar_data_holder_inst = BarDataHolder(
+            name='my_bar_holder',
+            bar_datas=[bar_data_inst],
+        )
+
+        builder = self.manager.build(bar_data_holder_inst, source='test.h5')
+        assert builder.datasets['my_bar'].dimension_labels == ('a', 'b2')
+
+
+class TestBuildDatasetDimensionLabelsOneOptionRefined(BuildDatasetShapeMixin):
+
+    def get_base_shape_dims(self):
+        return [None, None], ['a', 'b1']
+
+    def get_dataset_inc_spec(self):
+        dataset_inc_spec = DatasetSpec(
+            doc='A BarData',
+            data_type_inc='BarData',
+            quantity='*',
+            shape=[None, 3],
+            dims=['a', 'b2'],
+        )
+        return dataset_inc_spec
+
+    def test_build(self):
+        """
+        Test build of BarDataHolder which contains a BarData.
+        """
+        # NOTE: attr1 doesn't map to anything but is required in the test container class
+        bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string')
+        bar_data_holder_inst = BarDataHolder(
+            name='my_bar_holder',
+            bar_datas=[bar_data_inst],
+        )
+
+        builder = self.manager.build(bar_data_holder_inst, source='test.h5')
+        assert builder.datasets['my_bar'].dimension_labels == ('a', 'b2')
diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py
index 52fdc4839..42a55b470 100644
--- a/tests/unit/build_tests/test_classgenerator.py
+++ b/tests/unit/build_tests/test_classgenerator.py
@@ -7,7 +7,9 @@
 from hdmf.build import TypeMap, CustomClassGenerator
 from hdmf.build.classgenerator import ClassGenerator, MCIClassGenerator
 from hdmf.container import Container, Data, MultiContainerInterface, AbstractContainer
-from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, LinkSpec
+from hdmf.spec import (
+    GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, LinkSpec, RefSpec
+)
 from hdmf.testing import TestCase
 from hdmf.utils import get_docval, docval
 
@@ -180,10 +182,11 @@ def test_dynamic_container_creation(self):
         baz_spec = GroupSpec('A test extension with no Container class',
                              data_type_def='Baz', data_type_inc=self.bar_spec,
                              attributes=[AttributeSpec('attr3', 'a float attribute', 'float'),
-                                         AttributeSpec('attr4', 'another float attribute', 'float')])
+                                         AttributeSpec('attr4', 'another float attribute', 'float'),
+                                         AttributeSpec('attr_array', 'an array attribute', 'text', shape=(None,)),])
         self.spec_catalog.register_spec(baz_spec, 'extension.yaml')
         cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE)
-        expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'skip_post_init'}
+        expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'skip_post_init'}
         received_args = set()
 
         for x in get_docval(cls.__init__):
@@ -211,7 +214,7 @@ def test_dynamic_container_creation_defaults(self):
                                          AttributeSpec('attr4', 'another float attribute', 'float')])
         self.spec_catalog.register_spec(baz_spec, 'extension.yaml')
         cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE)
-        expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'foo', 'skip_post_init'}
+        expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'foo', 'skip_post_init'}
         received_args = set(map(lambda x: x['name'], get_docval(cls.__init__)))
         self.assertSetEqual(expected_args, received_args)
         self.assertEqual(cls.__name__, 'Baz')
@@ -733,9 +736,18 @@ def _build_separate_namespaces(self):
                 GroupSpec(data_type_inc='Bar', doc='a bar', quantity='?')
             ]
         )
+        moo_spec = DatasetSpec(
+            doc='A test dataset that is a 1D array of object references of Baz',
+            data_type_def='Moo',
+            shape=(None,),
+            dtype=RefSpec(
+                reftype='object',
+                target_type='Baz'
+            )
+        )
         create_load_namespace_yaml(
             namespace_name='ndx-test',
-            specs=[baz_spec],
+            specs=[baz_spec, moo_spec],
             output_dir=self.test_dir,
             incl_types={
                 CORE_NAMESPACE: ['Bar'],
@@ -827,6 +839,171 @@ def test_get_class_include_from_separate_ns_4(self):
 
         self._check_classes(baz_cls, bar_cls, bar_cls2, qux_cls, qux_cls2)
 
+class TestGetClassObjectReferences(TestCase):
+
+    def setUp(self):
+        self.test_dir = tempfile.mkdtemp()
+        if os.path.exists(self.test_dir):  # start clean
+            self.tearDown()
+        os.mkdir(self.test_dir)
+        self.type_map = TypeMap()
+
+    def tearDown(self):
+        shutil.rmtree(self.test_dir)
+
+    def test_get_class_include_dataset_of_references(self):
+        """Test that get_class resolves datasets of object references."""
+        qux_spec = DatasetSpec(
+            doc='A test extension',
+            data_type_def='Qux'
+        )
+        moo_spec = DatasetSpec(
+            doc='A test dataset that is a 1D array of object references of Qux',
+            data_type_def='Moo',
+            shape=(None,),
+            dtype=RefSpec(
+                reftype='object',
+                target_type='Qux'
+            ),
+        )
+
+        create_load_namespace_yaml(
+            namespace_name='ndx-test',
+            specs=[qux_spec, moo_spec],
+            output_dir=self.test_dir,
+            incl_types={},
+            type_map=self.type_map
+        )
+        # no types should be resolved to start
+        assert self.type_map.get_container_classes('ndx-test') == []
+
+        self.type_map.get_dt_container_cls('Moo', 'ndx-test')
+        # now, Moo and Qux should be resolved
+        assert len(self.type_map.get_container_classes('ndx-test')) == 2
+        assert "Moo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+        assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+
+    def test_get_class_include_attribute_object_reference(self):
+        """Test that get_class resolves data types with an attribute that is an object reference."""
+        qux_spec = DatasetSpec(
+            doc='A test extension',
+            data_type_def='Qux'
+        )
+        woo_spec = DatasetSpec(
+            doc='A test dataset that has a scalar object reference to a Qux',
+            data_type_def='Woo',
+            attributes=[
+                AttributeSpec(
+                    name='attr1',
+                    doc='a string attribute',
+                    dtype=RefSpec(reftype='object', target_type='Qux')
+                ),
+            ]
+        )
+        create_load_namespace_yaml(
+            namespace_name='ndx-test',
+            specs=[qux_spec, woo_spec],
+            output_dir=self.test_dir,
+            incl_types={},
+            type_map=self.type_map
+        )
+        # no types should be resolved to start
+        assert self.type_map.get_container_classes('ndx-test') == []
+
+        self.type_map.get_dt_container_cls('Woo', 'ndx-test')
+        # now, Woo and Qux should be resolved
+        assert len(self.type_map.get_container_classes('ndx-test')) == 2
+        assert "Woo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+        assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+
+    def test_get_class_include_nested_object_reference(self):
+        """Test that get_class resolves nested datasets that are object references."""
+        qux_spec = DatasetSpec(
+            doc='A test extension',
+            data_type_def='Qux'
+        )
+        spam_spec = DatasetSpec(
+            doc='A test extension',
+            data_type_def='Spam',
+            shape=(None,),
+            dtype=RefSpec(
+                reftype='object',
+                target_type='Qux'
+            ),
+        )
+        goo_spec = GroupSpec(
+            doc='A test dataset that has a nested dataset (Spam) that has a scalar object reference to a Qux',
+            data_type_def='Goo',
+            datasets=[
+                DatasetSpec(
+                    doc='a dataset',
+                    data_type_inc='Spam',
+                ),
+            ],
+        )
+
+        create_load_namespace_yaml(
+            namespace_name='ndx-test',
+            specs=[qux_spec, spam_spec, goo_spec],
+            output_dir=self.test_dir,
+            incl_types={},
+            type_map=self.type_map
+        )
+        # no types should be resolved to start
+        assert self.type_map.get_container_classes('ndx-test') == []
+
+        self.type_map.get_dt_container_cls('Goo', 'ndx-test')
+        # now, Goo, Spam, and Qux should be resolved
+        assert len(self.type_map.get_container_classes('ndx-test')) == 3
+        assert "Goo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+        assert "Spam" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+        assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+
+    def test_get_class_include_nested_attribute_object_reference(self):
+        """Test that get_class resolves nested datasets that have an attribute that is an object reference."""
+        qux_spec = DatasetSpec(
+            doc='A test extension',
+            data_type_def='Qux'
+        )
+        bam_spec = DatasetSpec(
+            doc='A test extension',
+            data_type_def='Bam',
+            attributes=[
+                AttributeSpec(
+                    name='attr1',
+                    doc='a string attribute',
+                    dtype=RefSpec(reftype='object', target_type='Qux')
+                ),
+            ],
+        )
+        boo_spec = GroupSpec(
+            doc='A test dataset that has a nested dataset (Spam) that has a scalar object reference to a Qux',
+            data_type_def='Boo',
+            datasets=[
+                DatasetSpec(
+                    doc='a dataset',
+                    data_type_inc='Bam',
+                ),
+            ],
+        )
+
+        create_load_namespace_yaml(
+            namespace_name='ndx-test',
+            specs=[qux_spec, bam_spec, boo_spec],
+            output_dir=self.test_dir,
+            incl_types={},
+            type_map=self.type_map
+        )
+        # no types should be resolved to start
+        assert self.type_map.get_container_classes('ndx-test') == []
+
+        self.type_map.get_dt_container_cls('Boo', 'ndx-test')
+        # now, Boo, Bam, and Qux should be resolved
+        assert len(self.type_map.get_container_classes('ndx-test')) == 3
+        assert "Boo" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+        assert "Bam" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+        assert "Qux" in [c.__name__ for c in self.type_map.get_container_classes('ndx-test')]
+
 
 class EmptyBar(Container):
     pass
diff --git a/tests/unit/build_tests/test_io_manager.py b/tests/unit/build_tests/test_io_manager.py
index 01421e218..a3be47cf7 100644
--- a/tests/unit/build_tests/test_io_manager.py
+++ b/tests/unit/build_tests/test_io_manager.py
@@ -341,7 +341,7 @@ def test_get_dt_container_cls(self):
         self.assertIs(ret, Foo)
 
     def test_get_dt_container_cls_no_namespace(self):
-        with self.assertRaisesWith(ValueError, "Namespace could not be resolved."):
+        with self.assertRaisesWith(ValueError, "Namespace could not be resolved for data type 'Unknown'."):
             self.type_map.get_dt_container_cls(data_type="Unknown")
 
 
diff --git a/tests/unit/build_tests/test_io_map.py b/tests/unit/build_tests/test_io_map.py
index 63f397682..730530a5a 100644
--- a/tests/unit/build_tests/test_io_map.py
+++ b/tests/unit/build_tests/test_io_map.py
@@ -1,4 +1,4 @@
-from hdmf.utils import docval, getargs
+from hdmf.utils import StrDataset, docval, getargs
 from hdmf import Container, Data
 from hdmf.backends.hdf5 import H5DataIO
 from hdmf.build import (GroupBuilder, DatasetBuilder, ObjectMapper, BuildManager, TypeMap, LinkBuilder,
@@ -7,11 +7,15 @@
 from hdmf.spec import (GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, RefSpec,
                        LinkSpec)
 from hdmf.testing import TestCase
+import h5py
 from abc import ABCMeta, abstractmethod
 import unittest
+import numpy as np
 
 from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map
 
+H5PY_3 = h5py.__version__.startswith('3')
+
 
 class Bar(Container):
 
@@ -20,24 +24,27 @@ class Bar(Container):
             {'name': 'attr1', 'type': str, 'doc': 'an attribute'},
             {'name': 'attr2', 'type': int, 'doc': 'another attribute'},
             {'name': 'attr3', 'type': float, 'doc': 'a third attribute', 'default': 3.14},
+            {'name': 'attr_array', 'type': 'array_data', 'doc': 'another attribute', 'default': (1, 2, 3)},
             {'name': 'foo', 'type': 'Foo', 'doc': 'a group', 'default': None})
     def __init__(self, **kwargs):
-        name, data, attr1, attr2, attr3, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3', 'foo', kwargs)
+        name, data, attr1, attr2, attr3, attr_array, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3',
+                                                                   'attr_array', 'foo', kwargs)
         super().__init__(name=name)
         self.__data = data
         self.__attr1 = attr1
         self.__attr2 = attr2
         self.__attr3 = attr3
+        self.__attr_array = attr_array
         self.__foo = foo
         if self.__foo is not None and self.__foo.parent is None:
             self.__foo.parent = self
 
     def __eq__(self, other):
-        attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo')
+        attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'attr_array', 'foo')
         return all(getattr(self, a) == getattr(other, a) for a in attrs)
 
     def __str__(self):
-        attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo')
+        attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'attr_array', 'foo')
         return ','.join('%s=%s' % (a, getattr(self, a)) for a in attrs)
 
     @property
@@ -60,6 +67,10 @@ def attr2(self):
     def attr3(self):
         return self.__attr3
 
+    @property
+    def attr_array(self):
+        return self.__attr_array
+
     @property
     def foo(self):
         return self.__foo
@@ -333,12 +344,15 @@ def test_build_1d(self):
                              datasets=[DatasetSpec('an example dataset', 'text', name='data', shape=(None,),
                                                    attributes=[AttributeSpec(
                                                        'attr2', 'an example integer attribute', 'int')])],
-                             attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')])
+                             attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'),
+                                         AttributeSpec('attr_array', 'an example array attribute', 'text',
+                                            shape=(None,))])
         type_map = self.customSetUp(bar_spec)
         type_map.register_map(Bar, BarMapper)
-        bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10)
+        bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10, attr_array=['a', 'b', 'c', 'd'])
         builder = type_map.build(bar_inst)
-        self.assertEqual(builder.get('data').data, ['a', 'b', 'c', 'd'])
+        np.testing.assert_array_equal(builder.get('data').data, np.array(['a', 'b', 'c', 'd']))
+        np.testing.assert_array_equal(builder.get('attr_array'), np.array(['a', 'b', 'c', 'd']))
 
     def test_build_scalar(self):
         bar_spec = GroupSpec('A test group specification with a data type',
@@ -353,6 +367,228 @@ def test_build_scalar(self):
         builder = type_map.build(bar_inst)
         self.assertEqual(builder.get('data').data, "['a', 'b', 'c', 'd']")
 
+    def test_build_2d_lol(self):
+        bar_spec = GroupSpec(
+            doc='A test group specification with a data type',
+            data_type_def='Bar',
+            datasets=[
+                DatasetSpec(
+                    doc='an example dataset',
+                    dtype='text',
+                    name='data',
+                    shape=(None, None),
+                    attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+                )
+            ],
+            attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+                                      shape=(None, None))],
+        )
+        type_map = self.customSetUp(bar_spec)
+        type_map.register_map(Bar, BarMapper)
+        str_lol_2d = [['aa', 'bb'], ['cc', 'dd']]
+        bar_inst = Bar('my_bar', str_lol_2d, 'value1', 10, attr_array=str_lol_2d)
+        builder = type_map.build(bar_inst)
+        self.assertEqual(builder.get('data').data, str_lol_2d)
+        self.assertEqual(builder.get('attr_array'), str_lol_2d)
+
+    def test_build_2d_ndarray(self):
+        bar_spec = GroupSpec(
+            doc='A test group specification with a data type',
+            data_type_def='Bar',
+            datasets=[
+                DatasetSpec(
+                    doc='an example dataset',
+                    dtype='text',
+                    name='data',
+                    shape=(None, None),
+                    attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+                )
+            ],
+            attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+                                      shape=(None, None))],
+        )
+        type_map = self.customSetUp(bar_spec)
+        type_map.register_map(Bar, BarMapper)
+        str_array_2d = np.array([['aa', 'bb'], ['cc', 'dd']])
+        bar_inst = Bar('my_bar', str_array_2d, 'value1', 10, attr_array=str_array_2d)
+        builder = type_map.build(bar_inst)
+        np.testing.assert_array_equal(builder.get('data').data, str_array_2d)
+        np.testing.assert_array_equal(builder.get('attr_array'), str_array_2d)
+
+    def test_build_3d_lol(self):
+        bar_spec = GroupSpec(
+            doc='A test group specification with a data type',
+            data_type_def='Bar',
+            datasets=[
+                DatasetSpec(
+                    doc='an example dataset',
+                    dtype='text',
+                    name='data',
+                    shape=(None, None, None),
+                    attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+                )
+            ],
+            attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+                                      shape=(None, None, None))],
+        )
+        type_map = self.customSetUp(bar_spec)
+        type_map.register_map(Bar, BarMapper)
+        str_lol_3d = [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]]
+        bar_inst = Bar('my_bar', str_lol_3d, 'value1', 10, attr_array=str_lol_3d)
+        builder = type_map.build(bar_inst)
+        self.assertEqual(builder.get('data').data, str_lol_3d)
+        self.assertEqual(builder.get('attr_array'), str_lol_3d)
+
+    def test_build_3d_ndarray(self):
+        bar_spec = GroupSpec(
+            doc='A test group specification with a data type',
+            data_type_def='Bar',
+            datasets=[
+                DatasetSpec(
+                    doc='an example dataset',
+                    dtype='text',
+                    name='data',
+                    shape=(None, None, None),
+                    attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+                )
+            ],
+            attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+                                      shape=(None, None, None))],
+        )
+        type_map = self.customSetUp(bar_spec)
+        type_map.register_map(Bar, BarMapper)
+        str_array_3d = np.array([[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]])
+        bar_inst = Bar('my_bar', str_array_3d, 'value1', 10, attr_array=str_array_3d)
+        builder = type_map.build(bar_inst)
+        np.testing.assert_array_equal(builder.get('data').data, str_array_3d)
+        np.testing.assert_array_equal(builder.get('attr_array'), str_array_3d)
+
+    @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+")
+    def test_build_1d_h5py_3_dataset(self):
+        bar_spec = GroupSpec(
+            doc='A test group specification with a data type',
+            data_type_def='Bar',
+            datasets=[
+                DatasetSpec(
+                    doc='an example dataset',
+                    dtype='text',
+                    name='data',
+                    shape=(None, ),
+                    attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+                )
+            ],
+            attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+                                      shape=(None, ))],
+        )
+        type_map = self.customSetUp(bar_spec)
+        type_map.register_map(Bar, BarMapper)
+        # create in-memory hdf5 file that is discarded after closing
+        with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
+            str_array_1d = np.array(
+                ['aa', 'bb', 'cc', 'dd'],
+                dtype=h5py.special_dtype(vlen=str)
+            )
+            # wrap the dataset in a StrDataset to mimic how HDF5IO would read this dataset with h5py 3+
+            dataset = StrDataset(f.create_dataset('data', data=str_array_1d), None)
+            bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset)
+            builder = type_map.build(bar_inst)
+            np.testing.assert_array_equal(builder.get('data').data, dataset[:])
+            np.testing.assert_array_equal(builder.get('attr_array'), dataset[:])
+
+    @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+")
+    def test_build_3d_h5py_3_dataset(self):
+        bar_spec = GroupSpec(
+            doc='A test group specification with a data type',
+            data_type_def='Bar',
+            datasets=[
+                DatasetSpec(
+                    doc='an example dataset',
+                    dtype='text',
+                    name='data',
+                    shape=(None, None, None),
+                    attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+                )
+            ],
+            attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+                                      shape=(None, None, None))],
+        )
+        type_map = self.customSetUp(bar_spec)
+        type_map.register_map(Bar, BarMapper)
+        # create in-memory hdf5 file that is discarded after closing
+        with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
+            str_array_3d = np.array(
+                [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]],
+                dtype=h5py.special_dtype(vlen=str)
+            )
+            # wrap the dataset in a StrDataset to mimic how HDF5IO would read this dataset with h5py 3+
+            dataset = StrDataset(f.create_dataset('data', data=str_array_3d), None)
+            bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset)
+            builder = type_map.build(bar_inst)
+            np.testing.assert_array_equal(builder.get('data').data, dataset[:])
+            np.testing.assert_array_equal(builder.get('attr_array'), dataset[:])
+
+    @unittest.skipIf(H5PY_3, "Create dataset differently for h5py < 3")
+    def test_build_1d_h5py_2_dataset(self):
+        bar_spec = GroupSpec(
+            doc='A test group specification with a data type',
+            data_type_def='Bar',
+            datasets=[
+                DatasetSpec(
+                    doc='an example dataset',
+                    dtype='text',
+                    name='data',
+                    shape=(None, ),
+                    attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+                )
+            ],
+            attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+                                      shape=(None, ))],
+        )
+        type_map = self.customSetUp(bar_spec)
+        type_map.register_map(Bar, BarMapper)
+        # create in-memory hdf5 file that is discarded after closing
+        with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
+            str_array_1d = np.array(
+                ['aa', 'bb', 'cc', 'dd'],
+                dtype=h5py.special_dtype(vlen=str)
+            )
+            dataset = f.create_dataset('data', data=str_array_1d)
+            bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset)
+            builder = type_map.build(bar_inst)
+            np.testing.assert_array_equal(builder.get('data').data, dataset[:])
+            np.testing.assert_array_equal(builder.get('attr_array'), dataset[:])
+
+    @unittest.skipIf(H5PY_3, "Create dataset differently for h5py < 3")
+    def test_build_3d_h5py_2_dataset(self):
+        bar_spec = GroupSpec(
+            doc='A test group specification with a data type',
+            data_type_def='Bar',
+            datasets=[
+                DatasetSpec(
+                    doc='an example dataset',
+                    dtype='text',
+                    name='data',
+                    shape=(None, None, None),
+                    attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+                )
+            ],
+            attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+                                      shape=(None, None, None))],
+        )
+        type_map = self.customSetUp(bar_spec)
+        type_map.register_map(Bar, BarMapper)
+        # create in-memory hdf5 file that is discarded after closing
+        with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
+            str_array_3d = np.array(
+                [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]],
+                dtype=h5py.special_dtype(vlen=str)
+            )
+            dataset = f.create_dataset('data', data=str_array_3d)
+            bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset)
+            builder = type_map.build(bar_inst)
+            np.testing.assert_array_equal(builder.get('data').data, dataset[:])
+            np.testing.assert_array_equal(builder.get('attr_array'), dataset[:])
+
     def test_build_dataio(self):
         bar_spec = GroupSpec('A test group specification with a data type',
                              data_type_def='Bar',
diff --git a/tests/unit/spec_tests/test_dataset_spec.py b/tests/unit/spec_tests/test_dataset_spec.py
index 008e8c6fc..c9db14635 100644
--- a/tests/unit/spec_tests/test_dataset_spec.py
+++ b/tests/unit/spec_tests/test_dataset_spec.py
@@ -246,6 +246,10 @@ def test_data_type_property_value(self):
                                   data_type_inc=data_type_inc, data_type_def=data_type_def)
                 self.assertEqual(group.data_type, data_type)
 
+    def test_constructor_value(self):
+        spec = DatasetSpec(doc='my first dataset', dtype='int', name='dataset1', value=42)
+        assert spec.value == 42
+
     def test_build_warn_extra_args(self):
         spec_dict = {
             'name': 'dataset1',
diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 5a4fd5a32..58119ce9b 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -24,7 +24,7 @@
 from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError
 from hdmf.spec.catalog import SpecCatalog
 from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace
-from hdmf.spec.spec import GroupSpec
+from hdmf.spec.spec import GroupSpec, DtypeSpec
 from hdmf.testing import TestCase, remove_test_file
 from hdmf.common.resources import HERD
 from hdmf.term_set import TermSet, TermSetWrapper
@@ -144,6 +144,16 @@ def test_write_dataset_string(self):
             read_a = read_a.decode('utf-8')
         self.assertEqual(read_a, a)
 
+    def test_write_dataset_scalar_compound(self):
+        cmpd_dtype = np.dtype([('x', np.int32), ('y', np.float64)])
+        a = np.array((1, 0.1), dtype=cmpd_dtype)
+        self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a,
+                                                     dtype=[DtypeSpec('x', doc='x', dtype='int32'),
+                                                            DtypeSpec('y', doc='y', dtype='float64')]))
+        dset = self.f['test_dataset']
+        self.assertTupleEqual(dset.shape, ())
+        self.assertEqual(dset[()].tolist(), a.tolist())
+
     ##########################################
     #  write_dataset tests: TermSetWrapper
     ##########################################
@@ -164,6 +174,31 @@ def test_write_dataset_list(self):
         dset = self.f['test_dataset']
         self.assertTrue(np.all(dset[:] == a))
 
+    def test_write_dataset_lol_strings(self):
+        a = [['aa', 'bb'], ['cc', 'dd']]
+        self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={}))
+        dset = self.f['test_dataset']
+        decoded_dset = [[item.decode('utf-8') if isinstance(item, bytes) else item for item in sublist]
+                        for sublist in dset[:]]
+        self.assertTrue(decoded_dset == a)
+
+    def test_write_dataset_list_compound_datatype(self):
+        a = np.array([(1, 2, 0.5), (3, 4, 0.5)], dtype=[('x', 'int'), ('y', 'int'), ('z', 'float')])
+        dset_builder = DatasetBuilder(
+                    name='test_dataset',
+                    data=a.tolist(),
+                    attributes={},
+                    dtype=[
+                        DtypeSpec('x', doc='x', dtype='int'),
+                        DtypeSpec('y', doc='y', dtype='int'),
+                        DtypeSpec('z', doc='z', dtype='float'),
+                    ],
+                )
+        self.io.write_dataset(self.f, dset_builder)
+        dset = self.f['test_dataset']
+        for field in a.dtype.names:
+            self.assertTrue(np.all(dset[field][:] == a[field]))
+
     def test_write_dataset_list_compress_gzip(self):
         a = H5DataIO(np.arange(30).reshape(5, 2, 3),
                      compression='gzip',
@@ -572,6 +607,12 @@ def test_pass_through_of_chunk_shape_generic_data_chunk_iterator(self):
     #############################################
     #  H5DataIO general
     #############################################
+    def test_pass_through_of_maxshape_on_h5dataset(self):
+        k = 10
+        self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(k), attributes={}))
+        dset = H5DataIO(self.f['test_dataset'])
+        self.assertEqual(dset.maxshape, (k,))
+
     def test_warning_on_non_gzip_compression(self):
         # Make sure no warning is issued when using gzip
         with warnings.catch_warnings(record=True) as w:
@@ -762,6 +803,17 @@ def test_read_str(self):
                 self.assertEqual(str(bldr['test_dataset'].data),
                                  '<HDF5 dataset "test_dataset": shape (5,), type "|O">')
 
+    def test_read_scalar_compound(self):
+        cmpd_dtype = np.dtype([('x', np.int32), ('y', np.float64)])
+        a = np.array((1, 0.1), dtype=cmpd_dtype)
+        self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a,
+                                                     dtype=[DtypeSpec('x', doc='x', dtype='int32'),
+                                                            DtypeSpec('y', doc='y', dtype='float64')]))
+        self.io.close()
+        with HDF5IO(self.path, 'r') as io:
+            bldr = io.read_builder()
+            np.testing.assert_array_equal(bldr['test_dataset'].data[()], a)
+
 
 class TestRoundTrip(TestCase):
 
@@ -2958,6 +3010,57 @@ def test_append_data(self):
             self.assertEqual(f['foofile_data'].file.filename, self.paths[1])
             self.assertIsInstance(f.attrs['foo_ref_attr'], h5py.Reference)
 
+    def test_append_dataset_of_references(self):
+        """Test that exporting a written container with a dataset of references works."""
+        bazs = []
+        num_bazs = 1
+        for i in range(num_bazs):
+            bazs.append(Baz(name='baz%d' % i))
+        array_bazs=np.array(bazs)
+        wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,))
+        baz_data = BazData(name='baz_data1', data=wrapped_bazs)
+        bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data)
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io:
+            write_io.write(bucket)
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io:
+            read_bucket1 = append_io.read()
+            new_baz = Baz(name='new')
+            read_bucket1.add_baz(new_baz)
+            append_io.write(read_bucket1)
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io:
+            read_bucket1 = ref_io.read()
+            DoR = read_bucket1.baz_data.data
+            DoR.append(read_bucket1.bazs['new'])
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io:
+            read_bucket1 = read_io.read()
+            self.assertEqual(len(read_bucket1.baz_data.data), 2)
+            self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"])
+
+    def test_append_dataset_of_references_orphaned_target(self):
+        bazs = []
+        num_bazs = 1
+        for i in range(num_bazs):
+            bazs.append(Baz(name='baz%d' % i))
+        array_bazs=np.array(bazs)
+        wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,))
+        baz_data = BazData(name='baz_data1', data=wrapped_bazs)
+        bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data)
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io:
+            write_io.write(bucket)
+
+        with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io:
+            read_bucket1 = ref_io.read()
+            new_baz = Baz(name='new')
+            read_bucket1.add_baz(new_baz)
+            DoR = read_bucket1.baz_data.data
+            with self.assertRaises(ValueError):
+                DoR.append(read_bucket1.bazs['new'])
+
     def test_append_external_link_data(self):
         """Test that exporting a written container after adding a link with link_data=True creates external links."""
         foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
@@ -3666,6 +3769,14 @@ def test_dataio_shape_then_data(self):
         with self.assertRaisesRegex(ValueError, "Setting data when dtype and shape are not None is not supported"):
             dataio.data = list()
 
+    def test_dataio_maxshape(self):
+        dataio = H5DataIO(data=np.arange(10), maxshape=(None,))
+        self.assertEqual(dataio.maxshape, (None,))
+
+    def test_dataio_maxshape_from_data(self):
+        dataio = H5DataIO(data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+        self.assertEqual(dataio.maxshape, (10,))
+
 
 def test_hdf5io_can_read():
     assert not HDF5IO.can_read("not_a_file")
@@ -3690,6 +3801,11 @@ def __init__(self, **kwargs):
                 self.data2 = kwargs["data2"]
 
         self.obj = ContainerWithData("name", [1, 2, 3, 4, 5], None)
+        self.file_path = get_temp_filepath()
+
+    def tearDown(self):
+        if os.path.exists(self.file_path):
+            os.remove(self.file_path)
 
     def test_set_data_io(self):
         self.obj.set_data_io("data1", H5DataIO, data_io_kwargs=dict(chunks=True))
@@ -3712,6 +3828,31 @@ def test_set_data_io_old_api(self):
         self.assertIsInstance(self.obj.data1, H5DataIO)
         self.assertTrue(self.obj.data1.io_settings["chunks"])
 
+    def test_set_data_io_h5py_dataset(self):
+        file = File(self.file_path, 'w')
+        data = file.create_dataset('data', data=[1, 2, 3, 4, 5], chunks=(3,))
+        class ContainerWithData(Container):
+            __fields__ = ('data',)
+
+            @docval(
+                {"name": "name", "doc": "name", "type": str},
+                {'name': 'data', 'doc': 'field1 doc', 'type': h5py.Dataset},
+            )
+            def __init__(self, **kwargs):
+                super().__init__(name=kwargs["name"])
+                self.data = kwargs["data"]
+
+        container = ContainerWithData("name", data)
+        container.set_data_io(
+            "data",
+            H5DataIO,
+            data_io_kwargs=dict(chunks=(2,)),
+            data_chunk_iterator_class=DataChunkIterator,
+        )
+
+        self.assertIsInstance(container.data, H5DataIO)
+        self.assertEqual(container.data.io_settings["chunks"], (2,))
+        file.close()
 
 class TestDataSetDataIO(TestCase):
 
@@ -3720,8 +3861,30 @@ class MyData(Data):
             pass
 
         self.data = MyData("my_data", [1, 2, 3])
+        self.file_path = get_temp_filepath()
+
+    def tearDown(self):
+        if os.path.exists(self.file_path):
+            os.remove(self.file_path)
 
     def test_set_data_io(self):
         self.data.set_data_io(H5DataIO, dict(chunks=True))
         assert isinstance(self.data.data, H5DataIO)
         assert self.data.data.io_settings["chunks"]
+
+    def test_set_data_io_h5py_dataset(self):
+        file = File(self.file_path, 'w')
+        data = file.create_dataset('data', data=[1, 2, 3, 4, 5], chunks=(3,))
+        class MyData(Data):
+            pass
+
+        my_data = MyData("my_data", data)
+        my_data.set_data_io(
+            H5DataIO,
+            data_io_kwargs=dict(chunks=(2,)),
+            data_chunk_iterator_class=DataChunkIterator,
+        )
+
+        self.assertIsInstance(my_data.data, H5DataIO)
+        self.assertEqual(my_data.data.io_settings["chunks"], (2,))
+        file.close()
diff --git a/tests/unit/test_multicontainerinterface.py b/tests/unit/test_multicontainerinterface.py
index c705d0a6e..6da81c2cc 100644
--- a/tests/unit/test_multicontainerinterface.py
+++ b/tests/unit/test_multicontainerinterface.py
@@ -198,7 +198,10 @@ def test_add_single_dup(self):
         """Test that adding a container to the attribute dict correctly adds the container."""
         obj1 = Container('obj1')
         foo = Foo(obj1)
-        msg = "'obj1' already exists in Foo 'Foo'"
+        msg = (f"Cannot add <class 'hdmf.container.Container'> 'obj1' at 0x{id(obj1)} to dict attribute "
+               "'containers' in <class 'tests.unit.test_multicontainerinterface.Foo'> 'Foo'. "
+               f"<class 'hdmf.container.Container'> 'obj1' at 0x{id(obj1)} already exists in 'containers' "
+               "and has the same name.")
         with self.assertRaisesWith(ValueError, msg):
             foo.add_container(obj1)
 
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index 95ff5d98e..dd79cfce5 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -501,6 +501,28 @@ def test_np_bool_for_bool(self):
         results = self.vmap.validate(bar_builder)
         self.assertEqual(len(results), 0)
 
+    def test_scalar_compound_dtype(self):
+        """Test that validator allows scalar compound dtype data where a compound dtype is specified."""
+        spec_catalog = SpecCatalog()
+        dtype = [DtypeSpec('x', doc='x', dtype='int'), DtypeSpec('y', doc='y', dtype='float')]
+        spec = GroupSpec('A test group specification with a data type',
+                         data_type_def='Bar',
+                         datasets=[DatasetSpec('an example dataset', dtype, name='data',)],
+                         attributes=[AttributeSpec('attr1', 'an example attribute', 'text',)])
+        spec_catalog.register_spec(spec, 'test2.yaml')
+        self.namespace = SpecNamespace(
+            'a test namespace', CORE_NAMESPACE, [{'source': 'test2.yaml'}], version='0.1.0', catalog=spec_catalog)
+        self.vmap = ValidatorMap(self.namespace)
+
+        value = np.array((1, 2.2), dtype=[('x', 'int'), ('y', 'float')])
+        bar_builder = GroupBuilder('my_bar',
+                                   attributes={'data_type': 'Bar', 'attr1': 'test'},
+                                   datasets=[DatasetBuilder(name='data',
+                                                            data=value,
+                                                            dtype=[DtypeSpec('x', doc='x', dtype='int'),
+                                                                   DtypeSpec('y', doc='y', dtype='float'),],),])
+        results = self.vmap.validate(bar_builder)
+        self.assertEqual(len(results), 0)
 
 class Test1DArrayValidation(TestCase):