diff --git a/ci/environment.yml b/ci/environment.yml
index 883463a..70a4c9c 100644
--- a/ci/environment.yml
+++ b/ci/environment.yml
@@ -13,6 +13,8 @@ dependencies:
   - ujson
   - packaging
   - universal_pathlib
+  - hdf5plugin
+  - numcodecs
   # Testing
   - codecov
   - pre-commit
@@ -27,7 +29,11 @@ dependencies:
   - fsspec
   - s3fs
   - fastparquet
+  - imagecodecs>=2024.6.1
   # for opening tiff files
   - tifffile
   # for opening FITS files
   - astropy
+  - pip
+  - pip:
+    - imagecodecs-numcodecs==2024.6.1
diff --git a/ci/min-deps.yml b/ci/min-deps.yml
index 7ca8c0b..af4a732 100644
--- a/ci/min-deps.yml
+++ b/ci/min-deps.yml
@@ -10,9 +10,11 @@ dependencies:
   - xarray>=2024.6.0
   - numpy>=2.0.0
   - numcodecs
+  - imagecodecs>=2024.6.1
   - packaging
   - ujson
   - universal_pathlib
+  - hdf5plugin
   # Testing
   - codecov
   - pre-commit
@@ -24,3 +26,6 @@ dependencies:
   - pytest
   - pooch
   - fsspec
+  - pip
+  - pip:
+    - imagecodecs-numcodecs==2024.6.1
diff --git a/ci/upstream.yml b/ci/upstream.yml
index 2c2680b..f6c66df 100644
--- a/ci/upstream.yml
+++ b/ci/upstream.yml
@@ -11,6 +11,9 @@ dependencies:
   - packaging
   - ujson
   - universal_pathlib
+  - hdf5plugin
+  - numcodecs
+  - imagecodecs>=2024.6.1
   # Testing
   - codecov
   - pre-commit
@@ -28,3 +31,4 @@ dependencies:
     - git+https://github.com/pydata/xarray@zarr-v3  # zarr-v3 compatibility branch
     - git+https://github.com/zarr-developers/numcodecs@zarr3-codecs  # zarr-v3 compatibility branch
     # - git+https://github.com/fsspec/kerchunk@main  # kerchunk is currently incompatible with zarr-python v3 (https://github.com/fsspec/kerchunk/pull/516)
+    - imagecodecs-numcodecs==2024.6.1
diff --git a/pyproject.toml b/pyproject.toml
index d216b26..672398f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,11 @@ dependencies = [
     "numpy>=2.0.0",
     "packaging",
     "universal-pathlib",
+    "h5py",
+    "hdf5plugin",
     "numcodecs",
+    "imagecodecs",
+    "imagecodecs-numcodecs==2024.6.1",
     "ujson",
 ]
 
@@ -36,9 +40,12 @@ test = [
     "fsspec",
     "h5netcdf",
     "h5py",
+    "hdf5plugin",
     "kerchunk>=0.2.5",
     "mypy",
     "netcdf4",
+    "numcodecs",
+    "imagecodecs-numcodecs",
     "pandas-stubs",
     "pooch",
     "pre-commit",
diff --git a/virtualizarr/backend.py b/virtualizarr/backend.py
index 32403d0..3ab76d1 100644
--- a/virtualizarr/backend.py
+++ b/virtualizarr/backend.py
@@ -19,6 +19,7 @@
     TIFFVirtualBackend,
     ZarrV3VirtualBackend,
 )
+from virtualizarr.readers.common import VirtualBackend
 from virtualizarr.utils import _FsspecFSFromFilepath, check_for_collisions
 
 # TODO add entrypoint to allow external libraries to add to this mapping
@@ -27,9 +28,9 @@
     "zarr_v3": ZarrV3VirtualBackend,
     "dmrpp": DMRPPVirtualBackend,
     # all the below call one of the kerchunk backends internally (https://fsspec.github.io/kerchunk/reference.html#file-format-backends)
-    "netcdf3": NetCDF3VirtualBackend,
     "hdf5": HDF5VirtualBackend,
     "netcdf4": HDF5VirtualBackend,  # note this is the same as for hdf5
+    "netcdf3": NetCDF3VirtualBackend,
     "tiff": TIFFVirtualBackend,
     "fits": FITSVirtualBackend,
 }
@@ -113,6 +114,7 @@ def open_virtual_dataset(
     indexes: Mapping[str, Index] | None = None,
     virtual_array_class=ManifestArray,
     reader_options: Optional[dict] = None,
+    backend: Optional[VirtualBackend] = None,
 ) -> Dataset:
     """
     Open a file or store as an xarray Dataset wrapping virtualized zarr arrays.
@@ -182,7 +184,10 @@ def open_virtual_dataset(
             filepath=filepath, reader_options=reader_options
         )
 
-    backend_cls = VIRTUAL_BACKENDS.get(filetype.name.lower())
+    if backend:
+        backend_cls = backend
+    else:
+        backend_cls = VIRTUAL_BACKENDS.get(filetype.name.lower())
 
     if backend_cls is None:
         raise NotImplementedError(f"Unsupported file type: {filetype.name}")
diff --git a/virtualizarr/readers/__init__.py b/virtualizarr/readers/__init__.py
index 0f83ba3..c776a9a 100644
--- a/virtualizarr/readers/__init__.py
+++ b/virtualizarr/readers/__init__.py
@@ -1,5 +1,6 @@
 from virtualizarr.readers.dmrpp import DMRPPVirtualBackend
 from virtualizarr.readers.fits import FITSVirtualBackend
+from virtualizarr.readers.hdf import HDFVirtualBackend
 from virtualizarr.readers.hdf5 import HDF5VirtualBackend
 from virtualizarr.readers.kerchunk import KerchunkVirtualBackend
 from virtualizarr.readers.netcdf3 import NetCDF3VirtualBackend
@@ -9,6 +10,7 @@
 __all__ = [
     "DMRPPVirtualBackend",
     "FITSVirtualBackend",
+    "HDFVirtualBackend",
     "HDF5VirtualBackend",
     "KerchunkVirtualBackend",
     "NetCDF3VirtualBackend",
diff --git a/virtualizarr/readers/hdf.py b/virtualizarr/readers/hdf.py
new file mode 100644
index 0000000..dd67475
--- /dev/null
+++ b/virtualizarr/readers/hdf.py
@@ -0,0 +1,318 @@
+import math
+from typing import Dict, Iterable, List, Mapping, Optional, Union
+
+import h5py  # type: ignore
+import numpy as np
+from xarray import Dataset, Index, Variable
+
+from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
+from virtualizarr.readers.common import (
+    VirtualBackend,
+    construct_virtual_dataset,
+    open_loadable_vars_and_indexes,
+)
+from virtualizarr.readers.hdf_filters import cfcodec_from_dataset, codecs_from_dataset
+from virtualizarr.types import ChunkKey
+from virtualizarr.utils import _FsspecFSFromFilepath, check_for_collisions
+from virtualizarr.zarr import ZArray
+
+
+class HDFVirtualBackend(VirtualBackend):
+    @staticmethod
+    def open_virtual_dataset(
+        filepath: str,
+        group: str | None = None,
+        drop_variables: Iterable[str] | None = None,
+        loadable_variables: Iterable[str] | None = None,
+        decode_times: bool | None = None,
+        indexes: Mapping[str, Index] | None = None,
+        reader_options: Optional[dict] = None,
+    ) -> Dataset:
+        drop_variables, loadable_variables = check_for_collisions(
+            drop_variables,
+            loadable_variables,
+        )
+
+        virtual_vars = HDFVirtualBackend._virtual_vars_from_hdf(
+            path=filepath,
+            group=group,
+            drop_variables=drop_variables + loadable_variables,
+            reader_options=reader_options,
+        )
+
+        loadable_vars, indexes = open_loadable_vars_and_indexes(
+            filepath,
+            loadable_variables=loadable_variables,
+            reader_options=reader_options,
+            drop_variables=drop_variables,
+            indexes=indexes,
+            group=group,
+            decode_times=decode_times,
+        )
+
+        attrs = HDFVirtualBackend._attrs_from_root_group(
+            path=filepath, reader_options=reader_options, group=group
+        )
+
+        coord_names = attrs.pop("coordinates", [])
+
+        return construct_virtual_dataset(
+            virtual_vars=virtual_vars,
+            loadable_vars=loadable_vars,
+            indexes=indexes,
+            coord_names=coord_names,
+            attrs=attrs,
+        )
+
+    @staticmethod
+    def _dataset_chunk_manifest(
+        path: str, dataset: h5py.Dataset
+    ) -> Optional[ChunkManifest]:
+        """
+        Generate ChunkManifest for HDF5 dataset.
+
+        Parameters
+        ----------
+        path: str
+            The path the HDF5 container file
+         dset : h5py.Dataset
+            HDF5 dataset for which to create a ChunkManifest
+
+        Returns
+        -------
+        ChunkManifest
+            A Virtualizarr ChunkManifest
+        """
+        dsid = dataset.id
+        if dataset.chunks is None:
+            if dsid.get_offset() is None:
+                return None
+            else:
+                key_list = [0] * (len(dataset.shape) or 1)
+                key = ".".join(map(str, key_list))
+                chunk_entry = ChunkEntry(
+                    path=path, offset=dsid.get_offset(), length=dsid.get_storage_size()
+                )
+                chunk_key = ChunkKey(key)
+                chunk_entries = {chunk_key: chunk_entry.dict()}
+                chunk_manifest = ChunkManifest(entries=chunk_entries)
+                return chunk_manifest
+        else:
+            num_chunks = dsid.get_num_chunks()
+            if num_chunks == 0:
+                raise ValueError("The dataset is chunked but contains no chunks")
+            shape = tuple(
+                math.ceil(a / b) for a, b in zip(dataset.shape, dataset.chunks)
+            )
+            paths = np.empty(shape, dtype=np.dtypes.StringDType)  # type: ignore
+            offsets = np.empty(shape, dtype=np.uint64)
+            lengths = np.empty(shape, dtype=np.uint64)
+
+            def get_key(blob):
+                return tuple(
+                    [a // b for a, b in zip(blob.chunk_offset, dataset.chunks)]
+                )
+
+            def add_chunk_info(blob):
+                key = get_key(blob)
+                paths[key] = path
+                offsets[key] = blob.byte_offset
+                lengths[key] = blob.size
+
+            has_chunk_iter = callable(getattr(dsid, "chunk_iter", None))
+            if has_chunk_iter:
+                dsid.chunk_iter(add_chunk_info)
+            else:
+                for index in range(num_chunks):
+                    add_chunk_info(dsid.get_chunk_info(index))
+
+            chunk_manifest = ChunkManifest.from_arrays(
+                paths=paths, offsets=offsets, lengths=lengths
+            )
+            return chunk_manifest
+
+    @staticmethod
+    def _dataset_dims(dataset: h5py.Dataset) -> Union[List[str], List[None]]:
+        """
+        Get a list of dimension scale names attached to input HDF5 dataset.
+
+        This is required by the xarray package to work with Zarr arrays. Only
+        one dimension scale per dataset dimension is allowed. If dataset is
+        dimension scale, it will be considered as the dimension to itself.
+
+        Parameters
+        ----------
+        dataset : h5py.Dataset
+            HDF5 dataset.
+
+        Returns
+        -------
+        list
+            List with HDF5 path names of dimension scales attached to input
+            dataset.
+        """
+        dims = list()
+        rank = len(dataset.shape)
+        if rank:
+            for n in range(rank):
+                num_scales = len(dataset.dims[n])
+                if num_scales == 1:
+                    dims.append(dataset.dims[n][0].name[1:])
+                elif h5py.h5ds.is_scale(dataset.id):
+                    dims.append(dataset.name[1:])
+                elif num_scales > 1:
+                    raise ValueError(
+                        f"{dataset.name}: {len(dataset.dims[n])} "
+                        f"dimension scales attached to dimension #{n}"
+                    )
+                elif num_scales == 0:
+                    # Some HDF5 files do not have dimension scales.
+                    # If this is the case, `num_scales` will be 0.
+                    # In this case, we mimic netCDF4 and assign phony dimension names.
+                    # See https://github.com/fsspec/kerchunk/issues/41
+                    dims.append(f"phony_dim_{n}")
+        return dims
+
+    @staticmethod
+    def _extract_attrs(h5obj: Union[h5py.Dataset, h5py.Group]):
+        """
+        Extract attributes from an HDF5 group or dataset.
+
+        Parameters
+        ----------
+        h5obj : h5py.Group or h5py.Dataset
+            An HDF5 group or dataset.
+        """
+        _HIDDEN_ATTRS = {
+            "REFERENCE_LIST",
+            "CLASS",
+            "DIMENSION_LIST",
+            "NAME",
+            "_Netcdf4Dimid",
+            "_Netcdf4Coordinates",
+            "_nc3_strict",
+            "_NCProperties",
+        }
+        attrs = {}
+        for n, v in h5obj.attrs.items():
+            if n in _HIDDEN_ATTRS:
+                continue
+            # Fix some attribute values to avoid JSON encoding exceptions...
+            if isinstance(v, bytes):
+                v = v.decode("utf-8") or " "
+            elif isinstance(v, (np.ndarray, np.number, np.bool_)):
+                if v.dtype.kind == "S":
+                    v = v.astype(str)
+                if n == "_FillValue":
+                    continue
+                elif v.size == 1:
+                    v = v.flatten()[0]
+                    if isinstance(v, (np.ndarray, np.number, np.bool_)):
+                        v = v.tolist()
+                else:
+                    v = v.tolist()
+            elif isinstance(v, h5py._hl.base.Empty):
+                v = ""
+            if v == "DIMENSION_SCALE":
+                continue
+
+            attrs[n] = v
+        return attrs
+
+    @staticmethod
+    def _dataset_to_variable(path: str, dataset: h5py.Dataset) -> Optional[Variable]:
+        # This chunk determination logic mirrors zarr-python's create
+        # https://github.com/zarr-developers/zarr-python/blob/main/zarr/creation.py#L62-L66
+
+        chunks = dataset.chunks if dataset.chunks else dataset.shape
+        codecs = codecs_from_dataset(dataset)
+        cfcodec = cfcodec_from_dataset(dataset)
+        attrs = HDFVirtualBackend._extract_attrs(dataset)
+        if cfcodec:
+            codecs.insert(0, cfcodec["codec"])
+            dtype = cfcodec["target_dtype"]
+            attrs.pop("scale_factor", None)
+            attrs.pop("add_offset", None)
+            fill_value = cfcodec["codec"].decode(dataset.fillvalue)
+        else:
+            dtype = dataset.dtype
+            fill_value = dataset.fillvalue
+        if isinstance(fill_value, np.ndarray):
+            fill_value = fill_value[0]
+        if np.isnan(fill_value):
+            fill_value = float("nan")
+        if isinstance(fill_value, np.generic):
+            fill_value = fill_value.item()
+        filters = [codec.get_config() for codec in codecs]
+        zarray = ZArray(
+            chunks=chunks,
+            compressor=None,
+            dtype=dtype,
+            fill_value=fill_value,
+            filters=filters,
+            order="C",
+            shape=dataset.shape,
+            zarr_format=2,
+        )
+        dims = HDFVirtualBackend._dataset_dims(dataset)
+        manifest = HDFVirtualBackend._dataset_chunk_manifest(path, dataset)
+        if manifest:
+            marray = ManifestArray(zarray=zarray, chunkmanifest=manifest)
+            variable = Variable(data=marray, dims=dims, attrs=attrs)
+        else:
+            variable = Variable(data=np.empty(dataset.shape), dims=dims, attrs=attrs)
+        return variable
+
+    @staticmethod
+    def _virtual_vars_from_hdf(
+        path: str,
+        group: Optional[str] = None,
+        drop_variables: Optional[List[str]] = None,
+        reader_options: Optional[dict] = {
+            "storage_options": {"key": "", "secret": "", "anon": True}
+        },
+    ) -> Dict[str, Variable]:
+        if drop_variables is None:
+            drop_variables = []
+        open_file = _FsspecFSFromFilepath(
+            filepath=path, reader_options=reader_options
+        ).open_file()
+        f = h5py.File(open_file, mode="r")
+        if group:
+            g = f[group]
+            if not isinstance(g, h5py.Group):
+                raise ValueError("The provided group is not an HDF group")
+        else:
+            g = f
+        variables = {}
+        for key in g.keys():
+            if key not in drop_variables:
+                if isinstance(g[key], h5py.Dataset):
+                    variable = HDFVirtualBackend._dataset_to_variable(path, g[key])
+                    if variable is not None:
+                        variables[key] = variable
+                else:
+                    raise NotImplementedError("Nested groups are not yet supported")
+
+        return variables
+
+    @staticmethod
+    def _attrs_from_root_group(
+        path: str,
+        group: Optional[str] = None,
+        reader_options: Optional[dict] = {
+            "storage_options": {"key": "", "secret": "", "anon": True}
+        },
+    ):
+        open_file = _FsspecFSFromFilepath(
+            filepath=path, reader_options=reader_options
+        ).open_file()
+        f = h5py.File(open_file, mode="r")
+        if group:
+            g = f[group]
+            if not isinstance(g, h5py.Group):
+                raise ValueError("The provided group is not an HDF group")
+        else:
+            g = f
+        attrs = HDFVirtualBackend._extract_attrs(g)
+        return attrs
diff --git a/virtualizarr/readers/hdf_filters.py b/virtualizarr/readers/hdf_filters.py
new file mode 100644
index 0000000..aedf89b
--- /dev/null
+++ b/virtualizarr/readers/hdf_filters.py
@@ -0,0 +1,135 @@
+import dataclasses
+from typing import List, Tuple, TypedDict, Union
+
+import h5py  # type: ignore
+import hdf5plugin  # type: ignore
+import numcodecs.registry as registry
+import numpy as np
+from numcodecs.abc import Codec
+from numcodecs.fixedscaleoffset import FixedScaleOffset
+from xarray.coding.variables import _choose_float_dtype
+
+_non_standard_filters = {
+    "gzip": "zlib",
+    "lzf": "imagecodecs_lzf",
+}
+
+_hdf5plugin_imagecodecs = {"lz4": "imagecodecs_lz4h5", "bzip2": "imagecodecs_bz2"}
+
+
+@dataclasses.dataclass
+class BloscProperties:
+    blocksize: int
+    clevel: int
+    shuffle: int
+    cname: str
+
+    def __post_init__(self):
+        blosc_compressor_codes = {
+            value: key
+            for key, value in hdf5plugin._filters.Blosc._Blosc__COMPRESSIONS.items()
+        }
+        self.cname = blosc_compressor_codes[self.cname]
+
+
+@dataclasses.dataclass
+class ZstdProperties:
+    level: int
+
+
+@dataclasses.dataclass
+class ShuffleProperties:
+    elementsize: int
+
+
+@dataclasses.dataclass
+class ZlibProperties:
+    level: int
+
+
+class CFCodec(TypedDict):
+    target_dtype: np.dtype
+    codec: Codec
+
+
+def _filter_to_codec(
+    filter_id: str, filter_properties: Union[int, None, Tuple] = None
+) -> Codec:
+    id_int = None
+    id_str = None
+    try:
+        id_int = int(filter_id)
+    except ValueError:
+        id_str = filter_id
+    conf = {}
+    if id_str:
+        if id_str in _non_standard_filters.keys():
+            id = _non_standard_filters[id_str]
+        else:
+            id = id_str
+        if id == "zlib":
+            zlib_props = ZlibProperties(level=filter_properties)  # type: ignore
+            conf = dataclasses.asdict(zlib_props)
+        if id == "shuffle" and isinstance(filter_properties, tuple):
+            shuffle_props = ShuffleProperties(elementsize=filter_properties[0])
+            conf = dataclasses.asdict(shuffle_props)
+        conf["id"] = id  # type: ignore[assignment]
+    if id_int:
+        filter = hdf5plugin.get_filters(id_int)[0]
+        id = filter.filter_name
+        if id in _hdf5plugin_imagecodecs.keys():
+            id = _hdf5plugin_imagecodecs[id]
+        if id == "blosc" and isinstance(filter_properties, tuple):
+            blosc_fields = [field.name for field in dataclasses.fields(BloscProperties)]
+            blosc_props = BloscProperties(
+                **{k: v for k, v in zip(blosc_fields, filter_properties[-4:])}
+            )
+            conf = dataclasses.asdict(blosc_props)
+        if id == "zstd" and isinstance(filter_properties, tuple):
+            zstd_props = ZstdProperties(level=filter_properties[0])
+            conf = dataclasses.asdict(zstd_props)
+        conf["id"] = id
+    codec = registry.get_codec(conf)
+    return codec
+
+
+def cfcodec_from_dataset(dataset: h5py.Dataset) -> Codec | None:
+    attributes = {attr: dataset.attrs[attr] for attr in dataset.attrs}
+    mapping = {}
+    if "scale_factor" in attributes:
+        try:
+            scale_factor = attributes["scale_factor"][0]
+        except IndexError:
+            scale_factor = attributes["scale_factor"]
+        mapping["scale_factor"] = float(1 / scale_factor)
+    else:
+        mapping["scale_factor"] = 1
+    if "add_offset" in attributes:
+        try:
+            offset = attributes["add_offset"][0]
+        except IndexError:
+            offset = attributes["add_offset"]
+        mapping["add_offset"] = float(offset)
+    else:
+        mapping["add_offset"] = 0
+    if mapping["scale_factor"] != 1 or mapping["add_offset"] != 0:
+        float_dtype = _choose_float_dtype(dtype=dataset.dtype, mapping=mapping)
+        target_dtype = np.dtype(float_dtype)
+        codec = FixedScaleOffset(
+            offset=mapping["add_offset"],
+            scale=mapping["scale_factor"],
+            dtype=target_dtype,
+            astype=dataset.dtype,
+        )
+        cfcodec = CFCodec(target_dtype=target_dtype, codec=codec)
+        return cfcodec
+    else:
+        return None
+
+
+def codecs_from_dataset(dataset: h5py.Dataset) -> List[Codec]:
+    codecs = []
+    for filter_id, filter_properties in dataset._filters.items():
+        codec = _filter_to_codec(filter_id, filter_properties)
+        codecs.append(codec)
+    return codecs
diff --git a/virtualizarr/tests/test_backend.py b/virtualizarr/tests/test_backend.py
index 43a6bbd..2368848 100644
--- a/virtualizarr/tests/test_backend.py
+++ b/virtualizarr/tests/test_backend.py
@@ -11,6 +11,7 @@
 from virtualizarr import open_virtual_dataset
 from virtualizarr.backend import FileType, automatically_determine_filetype
 from virtualizarr.manifests import ManifestArray
+from virtualizarr.readers.hdf import HDFVirtualBackend
 from virtualizarr.tests import (
     has_astropy,
     has_tifffile,
@@ -82,14 +83,15 @@ def test_FileType():
 
 
 @requires_kerchunk
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestOpenVirtualDatasetIndexes:
-    def test_no_indexes(self, netcdf4_file):
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+    def test_no_indexes(self, netcdf4_file, hdf_backend):
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
         assert vds.indexes == {}
 
-    def test_create_default_indexes(self, netcdf4_file):
+    def test_create_default_indexes(self, netcdf4_file, hdf_backend):
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds = open_virtual_dataset(netcdf4_file, indexes=None)
+            vds = open_virtual_dataset(netcdf4_file, indexes=None, backend=hdf_backend)
         ds = open_dataset(netcdf4_file, decode_times=True)
 
         # TODO use xr.testing.assert_identical(vds.indexes, ds.indexes) instead once class supported by assertion comparison, see https://github.com/pydata/xarray/issues/5812
@@ -113,7 +115,8 @@ def index_mappings_equal(indexes1: Mapping[str, Index], indexes2: Mapping[str, I
 
 
 @requires_kerchunk
-def test_cftime_index(tmpdir):
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+def test_cftime_index(tmpdir, hdf_backend):
     """Ensure a virtual dataset contains the same indexes as an Xarray dataset"""
     # Note: Test was created to debug: https://github.com/zarr-developers/VirtualiZarr/issues/168
     ds = xr.Dataset(
@@ -129,7 +132,10 @@ def test_cftime_index(tmpdir):
     )
     ds.to_netcdf(f"{tmpdir}/tmp.nc")
     vds = open_virtual_dataset(
-        f"{tmpdir}/tmp.nc", loadable_variables=["time", "lat", "lon"], indexes={}
+        f"{tmpdir}/tmp.nc",
+        loadable_variables=["time", "lat", "lon"],
+        indexes={},
+        backend=hdf_backend,
     )
     # TODO use xr.testing.assert_identical(vds.indexes, ds.indexes) instead once class supported by assertion comparison, see https://github.com/pydata/xarray/issues/5812
     assert index_mappings_equal(vds.xindexes, ds.xindexes)
@@ -139,15 +145,16 @@ def test_cftime_index(tmpdir):
 
 
 @requires_kerchunk
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestOpenVirtualDatasetAttrs:
-    def test_drop_array_dimensions(self, netcdf4_file):
+    def test_drop_array_dimensions(self, netcdf4_file, hdf_backend):
         # regression test for GH issue #150
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
         assert "_ARRAY_DIMENSIONS" not in vds["air"].attrs
 
-    def test_coordinate_variable_attrs_preserved(self, netcdf4_file):
+    def test_coordinate_variable_attrs_preserved(self, netcdf4_file, hdf_backend):
         # regression test for GH issue #155
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
         assert vds["lat"].attrs == {
             "standard_name": "latitude",
             "long_name": "Latitude",
@@ -165,7 +172,8 @@ class TestReadFromS3:
     @pytest.mark.parametrize(
         "indexes", [None, {}], ids=["None index", "empty dict index"]
     )
-    def test_anon_read_s3(self, filetype, indexes):
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_anon_read_s3(self, filetype, indexes, hdf_backend):
         """Parameterized tests for empty vs supplied indexes and filetypes."""
         # TODO: Switch away from this s3 url after minIO is implemented.
         fpath = "s3://carbonplan-share/virtualizarr/local.nc"
@@ -174,6 +182,7 @@ def test_anon_read_s3(self, filetype, indexes):
             filetype=filetype,
             indexes=indexes,
             reader_options={"storage_options": {"anon": True}},
+            backend=hdf_backend,
         )
 
         assert vds.dims == {"time": 2920, "lat": 25, "lon": 53}
@@ -182,6 +191,7 @@ def test_anon_read_s3(self, filetype, indexes):
 
 
 @network
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestReadFromURL:
     @pytest.mark.parametrize(
         "filetype, url",
@@ -228,10 +238,15 @@ class TestReadFromURL:
             ),
         ],
     )
-    def test_read_from_url(self, filetype, url):
+    def test_read_from_url(self, hdf_backend, filetype, url):
         if filetype in ["grib", "jpg", "hdf4"]:
             with pytest.raises(NotImplementedError):
-                vds = open_virtual_dataset(url, reader_options={}, indexes={})
+                vds = open_virtual_dataset(
+                    url,
+                    reader_options={},
+                    indexes={},
+                    backend=hdf_backend,
+                )
         elif filetype == "hdf5":
             vds = open_virtual_dataset(
                 url,
@@ -239,13 +254,14 @@ def test_read_from_url(self, filetype, url):
                 drop_variables=["listOfCovarianceTerms", "listOfPolarizations"],
                 indexes={},
                 reader_options={},
+                backend=hdf_backend,
             )
             assert isinstance(vds, xr.Dataset)
         else:
-            vds = open_virtual_dataset(url, indexes={})
+            vds = open_virtual_dataset(url, indexes={}, backend=hdf_backend)
             assert isinstance(vds, xr.Dataset)
 
-    def test_virtualizarr_vs_local_nisar(self):
+    def test_virtualizarr_vs_local_nisar(self, hdf_backend):
         import fsspec
 
         # Open group directly from locally cached file with xarray
@@ -268,6 +284,7 @@ def test_virtualizarr_vs_local_nisar(self):
             group=hdf_group,
             indexes={},
             drop_variables=["listOfCovarianceTerms", "listOfPolarizations"],
+            backend=hdf_backend,
         )
         tmpref = "/tmp/cmip6.json"
         vds.virtualize.to_kerchunk(tmpref, format="json")
@@ -279,10 +296,14 @@ def test_virtualizarr_vs_local_nisar(self):
 
 @requires_kerchunk
 class TestLoadVirtualDataset:
-    def test_loadable_variables(self, netcdf4_file):
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_loadable_variables(self, netcdf4_file, hdf_backend):
         vars_to_load = ["air", "time"]
         vds = open_virtual_dataset(
-            netcdf4_file, loadable_variables=vars_to_load, indexes={}
+            netcdf4_file,
+            loadable_variables=vars_to_load,
+            indexes={},
+            backend=hdf_backend,
         )
 
         for name in vds.variables:
@@ -304,11 +325,20 @@ def test_explicit_filetype(self, netcdf4_file):
         with pytest.raises(NotImplementedError):
             open_virtual_dataset(netcdf4_file, filetype="grib")
 
-    def test_group_kwarg(self, hdf5_groups_file):
-        with pytest.raises(ValueError, match="Multiple HDF Groups found"):
-            open_virtual_dataset(hdf5_groups_file)
-        with pytest.raises(ValueError, match="not found in"):
-            open_virtual_dataset(hdf5_groups_file, group="doesnt_exist")
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_group_kwarg(self, hdf5_groups_file, hdf_backend):
+        if hdf_backend:
+            with pytest.raises(NotImplementedError, match="Nested groups"):
+                open_virtual_dataset(hdf5_groups_file, backend=hdf_backend)
+            with pytest.raises(KeyError, match="doesn't exist"):
+                open_virtual_dataset(
+                    hdf5_groups_file, group="doesnt_exist", backend=hdf_backend
+                )
+        else:
+            with pytest.raises(ValueError, match="Multiple HDF Groups found"):
+                open_virtual_dataset(hdf5_groups_file)
+            with pytest.raises(ValueError, match="not found in"):
+                open_virtual_dataset(hdf5_groups_file, group="doesnt_exist")
 
         vars_to_load = ["air", "time"]
         vds = open_virtual_dataset(
@@ -316,6 +346,7 @@ def test_group_kwarg(self, hdf5_groups_file):
             group="test/group",
             loadable_variables=vars_to_load,
             indexes={},
+            backend=hdf_backend,
         )
         full_ds = xr.open_dataset(
             hdf5_groups_file,
@@ -340,13 +371,15 @@ def test_open_virtual_dataset_passes_expected_args(
         }
         mock_read_kerchunk.assert_called_once_with(**args)
 
-    def test_open_dataset_with_empty(self, hdf5_empty, tmpdir):
-        vds = open_virtual_dataset(hdf5_empty)
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_open_dataset_with_empty(self, hdf5_empty, tmpdir, hdf_backend):
+        vds = open_virtual_dataset(hdf5_empty, backend=hdf_backend)
         assert vds.empty.dims == ()
         assert vds.empty.attrs == {"empty": "true"}
 
-    def test_open_dataset_with_scalar(self, hdf5_scalar, tmpdir):
-        vds = open_virtual_dataset(hdf5_scalar)
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_open_dataset_with_scalar(self, hdf5_scalar, tmpdir, hdf_backend):
+        vds = open_virtual_dataset(hdf5_scalar, backend=hdf_backend)
         assert vds.scalar.dims == ()
         assert vds.scalar.attrs == {"scalar": "true"}
 
diff --git a/virtualizarr/tests/test_integration.py b/virtualizarr/tests/test_integration.py
index 09d0c0a..0a39eb3 100644
--- a/virtualizarr/tests/test_integration.py
+++ b/virtualizarr/tests/test_integration.py
@@ -5,6 +5,7 @@
 
 from virtualizarr import open_virtual_dataset
 from virtualizarr.manifests import ChunkManifest, ManifestArray
+from virtualizarr.readers.hdf import HDFVirtualBackend
 from virtualizarr.tests import requires_kerchunk
 from virtualizarr.translators.kerchunk import (
     dataset_from_kerchunk_refs,
@@ -63,8 +64,9 @@ def test_no_duplicates_find_var_names():
         ),
     ],
 )
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 def test_numpy_arrays_to_inlined_kerchunk_refs(
-    netcdf4_file, inline_threshold, vars_to_inline
+    netcdf4_file, inline_threshold, vars_to_inline, hdf_backend
 ):
     from kerchunk.hdf import SingleHdf5ToZarr
 
@@ -75,7 +77,7 @@ def test_numpy_arrays_to_inlined_kerchunk_refs(
 
     # loading the variables should produce same result as inlining them using kerchunk
     vds = open_virtual_dataset(
-        netcdf4_file, loadable_variables=vars_to_inline, indexes={}
+        netcdf4_file, loadable_variables=vars_to_inline, indexes={}, backend=hdf_backend
     )
     refs = vds.virtualize.to_kerchunk(format="dict")
 
@@ -90,7 +92,8 @@ def test_numpy_arrays_to_inlined_kerchunk_refs(
 @requires_kerchunk
 @pytest.mark.parametrize("format", ["dict", "json", "parquet"])
 class TestKerchunkRoundtrip:
-    def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_kerchunk_roundtrip_no_concat(self, tmpdir, format, hdf_backend):
         # set up example xarray dataset
         ds = xr.tutorial.open_dataset("air_temperature", decode_times=False)
 
@@ -98,7 +101,7 @@ def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
         ds.to_netcdf(f"{tmpdir}/air.nc")
 
         # use open_dataset_via_kerchunk to read it as references
-        vds = open_virtual_dataset(f"{tmpdir}/air.nc", indexes={})
+        vds = open_virtual_dataset(f"{tmpdir}/air.nc", indexes={}, backend=hdf_backend)
 
         if format == "dict":
             # write those references to an in-memory kerchunk-formatted references dictionary
@@ -115,11 +118,18 @@ def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
                 f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
             )
 
-        # assert identical to original dataset
-        xrt.assert_identical(roundtrip, ds)
+        # assert all_close to original dataset
+        xrt.assert_allclose(roundtrip, ds)
 
+        # assert coordinate attributes are maintained
+        for coord in ds.coords:
+            assert ds.coords[coord].attrs == roundtrip.coords[coord].attrs
+
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
     @pytest.mark.parametrize("decode_times,time_vars", [(False, []), (True, ["time"])])
-    def test_kerchunk_roundtrip_concat(self, tmpdir, format, decode_times, time_vars):
+    def test_kerchunk_roundtrip_concat(
+        self, tmpdir, format, hdf_backend, decode_times, time_vars
+    ):
         # set up example xarray dataset
         ds = xr.tutorial.open_dataset("air_temperature", decode_times=decode_times)
 
@@ -135,11 +145,13 @@ def test_kerchunk_roundtrip_concat(self, tmpdir, format, decode_times, time_vars
             f"{tmpdir}/air1.nc",
             indexes={},
             loadable_variables=time_vars,
+            backend=hdf_backend,
         )
         vds2 = open_virtual_dataset(
             f"{tmpdir}/air2.nc",
             indexes={},
             loadable_variables=time_vars,
+            backend=hdf_backend,
         )
 
         if decode_times is False:
@@ -168,9 +180,14 @@ def test_kerchunk_roundtrip_concat(self, tmpdir, format, decode_times, time_vars
             roundtrip = xr.open_dataset(
                 f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=decode_times
             )
+
         if decode_times is False:
-            # assert identical to original dataset
-            xrt.assert_identical(roundtrip, ds)
+            # assert all_close to original dataset
+            xrt.assert_allclose(roundtrip, ds)
+
+            # assert coordinate attributes are maintained
+            for coord in ds.coords:
+                assert ds.coords[coord].attrs == roundtrip.coords[coord].attrs
         else:
             # they are very very close! But assert_allclose doesn't seem to work on datetimes
             assert (roundtrip.time - ds.time).sum() == 0
@@ -178,7 +195,8 @@ def test_kerchunk_roundtrip_concat(self, tmpdir, format, decode_times, time_vars
             assert roundtrip.time.encoding["units"] == ds.time.encoding["units"]
             assert roundtrip.time.encoding["calendar"] == ds.time.encoding["calendar"]
 
-    def test_non_dimension_coordinates(self, tmpdir, format):
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_non_dimension_coordinates(self, tmpdir, format, hdf_backend):
         # regression test for GH issue #105
 
         # set up example xarray dataset containing non-dimension coordinate variables
@@ -187,7 +205,9 @@ def test_non_dimension_coordinates(self, tmpdir, format):
         # save it to disk as netCDF (in temporary directory)
         ds.to_netcdf(f"{tmpdir}/non_dim_coords.nc")
 
-        vds = open_virtual_dataset(f"{tmpdir}/non_dim_coords.nc", indexes={})
+        vds = open_virtual_dataset(
+            f"{tmpdir}/non_dim_coords.nc", indexes={}, backend=hdf_backend
+        )
 
         assert "lat" in vds.coords
         assert "coordinates" not in vds.attrs
@@ -208,7 +228,11 @@ def test_non_dimension_coordinates(self, tmpdir, format):
             )
 
         # assert equal to original dataset
-        xrt.assert_identical(roundtrip, ds)
+        xrt.assert_allclose(roundtrip, ds)
+
+        # assert coordinate attributes are maintained
+        for coord in ds.coords:
+            assert ds.coords[coord].attrs == roundtrip.coords[coord].attrs
 
     def test_datetime64_dtype_fill_value(self, tmpdir, format):
         chunks_dict = {
@@ -256,11 +280,12 @@ def test_datetime64_dtype_fill_value(self, tmpdir, format):
 
 
 @requires_kerchunk
-def test_open_scalar_variable(tmpdir):
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+def test_open_scalar_variable(tmpdir, hdf_backend):
     # regression test for GH issue #100
 
     ds = xr.Dataset(data_vars={"a": 0})
     ds.to_netcdf(f"{tmpdir}/scalar.nc")
 
-    vds = open_virtual_dataset(f"{tmpdir}/scalar.nc", indexes={})
+    vds = open_virtual_dataset(f"{tmpdir}/scalar.nc", indexes={}, backend=hdf_backend)
     assert vds["a"].shape == ()
diff --git a/virtualizarr/tests/test_readers/conftest.py b/virtualizarr/tests/test_readers/conftest.py
new file mode 100644
index 0000000..b0b7c41
--- /dev/null
+++ b/virtualizarr/tests/test_readers/conftest.py
@@ -0,0 +1,305 @@
+import h5py  # type: ignore
+import hdf5plugin  # type: ignore
+import numpy as np
+import pytest
+import xarray as xr
+from packaging.version import Version
+from xarray.tests.test_dataset import create_test_data
+from xarray.util.print_versions import netcdf_and_hdf5_versions
+
+
+@pytest.fixture
+def empty_chunks_hdf5_file(tmpdir):
+    ds = xr.Dataset({"data": []})
+    filepath = f"{tmpdir}/empty_chunks.nc"
+    ds.to_netcdf(filepath, engine="h5netcdf")
+    return filepath
+
+
+@pytest.fixture
+def empty_dataset_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/empty_dataset.nc"
+    f = h5py.File(filepath, "w")
+    f.create_dataset("data", shape=(0,), dtype="f")
+    return filepath
+
+
+@pytest.fixture
+def no_chunks_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/no_chunks.nc"
+    f = h5py.File(filepath, "w")
+    data = np.random.random((10, 10))
+    f.create_dataset(name="data", data=data, chunks=None)
+    return filepath
+
+
+@pytest.fixture
+def chunked_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/chunks.nc"
+    f = h5py.File(filepath, "w")
+    data = np.random.random((100, 100))
+    f.create_dataset(name="data", data=data, chunks=(50, 50))
+    return filepath
+
+
+@pytest.fixture
+def single_dimension_scale_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/single_dimension_scale.nc"
+    f = h5py.File(filepath, "w")
+    data = [1, 2]
+    x = [0, 1]
+    f.create_dataset(name="data", data=data)
+    f.create_dataset(name="x", data=x)
+    f["x"].make_scale()
+    f["data"].dims[0].attach_scale(f["x"])
+    return filepath
+
+
+@pytest.fixture
+def is_scale_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/is_scale.nc"
+    f = h5py.File(filepath, "w")
+    data = [1, 2]
+    f.create_dataset(name="data", data=data)
+    f["data"].make_scale()
+    return filepath
+
+
+@pytest.fixture
+def multiple_dimension_scales_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/multiple_dimension_scales.nc"
+    f = h5py.File(filepath, "w")
+    data = [1, 2]
+    f.create_dataset(name="data", data=data)
+    f.create_dataset(name="x", data=[0, 1])
+    f.create_dataset(name="y", data=[0, 1])
+    f["x"].make_scale()
+    f["y"].make_scale()
+    f["data"].dims[0].attach_scale(f["x"])
+    f["data"].dims[0].attach_scale(f["y"])
+    return filepath
+
+
+@pytest.fixture
+def chunked_dimensions_netcdf4_file(tmpdir):
+    filepath = f"{tmpdir}/chunks_dimension.nc"
+    f = h5py.File(filepath, "w")
+    data = np.random.random((100, 100))
+    x = np.random.random((100))
+    y = np.random.random((100))
+    f.create_dataset(name="data", data=data, chunks=(50, 50))
+    f.create_dataset(name="x", data=x)
+    f.create_dataset(name="y", data=y)
+    f["data"].dims[0].attach_scale(f["x"])
+    f["data"].dims[1].attach_scale(f["y"])
+    return filepath
+
+
+@pytest.fixture
+def string_attributes_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/attributes.nc"
+    f = h5py.File(filepath, "w")
+    data = np.random.random((10, 10))
+    f.create_dataset(name="data", data=data, chunks=None)
+    f["data"].attrs["attribute_name"] = "attribute_name"
+    f["data"].attrs["attribute_name2"] = "attribute_name2"
+    return filepath
+
+
+@pytest.fixture
+def root_attributes_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/root_attributes.nc"
+    f = h5py.File(filepath, "w")
+    f.attrs["attribute_name"] = "attribute_name"
+    return filepath
+
+
+@pytest.fixture
+def group_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/group.nc"
+    f = h5py.File(filepath, "w")
+    g = f.create_group("group")
+    data = np.random.random((10, 10))
+    g.create_dataset("data", data=data)
+    return filepath
+
+
+@pytest.fixture
+def nested_group_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/nested_group.nc"
+    f = h5py.File(filepath, "w")
+    g = f.create_group("group")
+    data = np.random.random((10, 10))
+    g.create_dataset("data", data=data)
+    g.create_group("nested_group")
+    return filepath
+
+
+@pytest.fixture
+def multiple_datasets_hdf5_file(tmpdir):
+    filepath = f"{tmpdir}/multiple_datasets.nc"
+    f = h5py.File(filepath, "w")
+    data = np.random.random((10, 10))
+    f.create_dataset(name="data", data=data, chunks=None)
+    f.create_dataset(name="data2", data=data, chunks=None)
+    return filepath
+
+
+@pytest.fixture
+def np_uncompressed():
+    return np.arange(100)
+
+
+@pytest.fixture(params=["gzip", "blosc_lz4", "lz4", "bzip2", "zstd", "shuffle"])
+def filter_encoded_hdf5_file(tmpdir, np_uncompressed, request):
+    filepath = f"{tmpdir}/{request.param}.nc"
+    f = h5py.File(filepath, "w")
+    if request.param == "gzip":
+        f.create_dataset(
+            name="data", data=np_uncompressed, compression="gzip", compression_opts=1
+        )
+    if request.param == "blosc_lz4":
+        f.create_dataset(
+            name="data",
+            data=np_uncompressed,
+            **hdf5plugin.Blosc(cname="lz4", clevel=9, shuffle=hdf5plugin.Blosc.SHUFFLE),
+        )
+    if request.param == "lz4":
+        f.create_dataset(name="data", data=np_uncompressed, **hdf5plugin.LZ4(nbytes=0))
+    if request.param == "bzip2":
+        f.create_dataset(name="data", data=np_uncompressed, **hdf5plugin.BZip2())
+    if request.param == "zstd":
+        f.create_dataset(name="data", data=np_uncompressed, **hdf5plugin.Zstd(clevel=2))
+    if request.param == "shuffle":
+        f.create_dataset(name="data", data=np_uncompressed, shuffle=True)
+
+    return filepath
+
+
+@pytest.fixture(params=["gzip"])
+def filter_encoded_roundtrip_hdf5_file(tmpdir, request):
+    ds = xr.tutorial.open_dataset("air_temperature")
+    encoding = {}
+    if request.param == "gzip":
+        encoding_config = {"zlib": True, "complevel": 1}
+
+    for var_name in ds.variables:
+        encoding[var_name] = encoding_config
+
+    filepath = f"{tmpdir}/{request.param}_xarray.nc"
+    ds.to_netcdf(filepath, engine="h5netcdf", encoding=encoding)
+    return filepath
+
+
+@pytest.fixture()
+def skip_test_for_libhdf5_version():
+    versions = netcdf_and_hdf5_versions()
+    libhdf5_version = Version(versions[0][1])
+    return libhdf5_version < Version("1.14")
+
+
+@pytest.fixture(params=["blosc_zlib"])
+def filter_encoded_roundtrip_netcdf4_file(
+    tmpdir, request, skip_test_for_libhdf5_version
+):
+    if skip_test_for_libhdf5_version:
+        pytest.skip("Requires libhdf5 >= 1.14")
+    ds = create_test_data(dim_sizes=(20, 80, 10))
+    if "blosc" in request.param:
+        encoding_config = {
+            "compression": request.param,
+            "chunksizes": (20, 40),
+            "original_shape": ds.var2.shape,
+            "blosc_shuffle": 1,
+            "fletcher32": False,
+        }
+    #  Check on how handle scalar dim.
+    ds = ds.drop_dims("dim3")
+    ds["var2"].encoding.update(encoding_config)
+    filepath = f"{tmpdir}/{request.param}_xarray.nc"
+    ds.to_netcdf(filepath, engine="netcdf4")
+    return {"filepath": filepath, "compressor": request.param}
+
+
+@pytest.fixture
+def np_uncompressed_int16():
+    return np.arange(100, dtype=np.int16)
+
+
+@pytest.fixture
+def offset():
+    return np.float32(5.0)
+
+
+@pytest.fixture
+def add_offset_hdf5_file(tmpdir, np_uncompressed_int16, offset):
+    filepath = f"{tmpdir}/offset.nc"
+    f = h5py.File(filepath, "w")
+    data = np_uncompressed_int16 - offset
+    f.create_dataset(name="data", data=data, chunks=True)
+    f["data"].attrs.create(name="add_offset", data=offset)
+    return filepath
+
+
+@pytest.fixture
+def scale_factor():
+    return 0.01
+
+
+@pytest.fixture
+def scale_add_offset_hdf5_file(tmpdir, np_uncompressed_int16, offset, scale_factor):
+    filepath = f"{tmpdir}/scale_offset.nc"
+    f = h5py.File(filepath, "w")
+    data = (np_uncompressed_int16 - offset) / scale_factor
+    f.create_dataset(name="data", data=data, chunks=True)
+    f["data"].attrs.create(name="add_offset", data=offset)
+    f["data"].attrs.create(name="scale_factor", data=np.array([scale_factor]))
+    return filepath
+
+
+@pytest.fixture()
+def chunked_roundtrip_hdf5_file(tmpdir):
+    ds = create_test_data(dim_sizes=(20, 80, 10))
+    ds = ds.drop_dims("dim3")
+    filepath = f"{tmpdir}/chunked_xarray.nc"
+    ds.to_netcdf(
+        filepath, engine="netcdf4", encoding={"var2": {"chunksizes": (10, 10)}}
+    )
+    return filepath
+
+
+@pytest.fixture(params=["gzip", "zlib"])
+def filter_and_cf_roundtrip_hdf5_file(tmpdir, request):
+    x = np.arange(100)
+    y = np.arange(100)
+    fill_value = np.int16(-9999)
+    temperature = 0.1 * x[:, None] + 0.1 * y[None, :]
+    temperature[0][0] = fill_value
+    ds = xr.Dataset(
+        {"temperature": (["x", "y"], temperature)},
+        coords={"x": np.arange(100), "y": np.arange(100)},
+    )
+    encoding = {
+        "temperature": {
+            "dtype": "int16",
+            "scale_factor": 0.1,
+            "add_offset": 273.15,
+            "_FillValue": fill_value,
+        },
+        "x": {"_FillValue": fill_value},
+        "y": {"_FillValue": fill_value},
+    }
+    if request.param == "gzip":
+        encoding["temperature"]["compression"] = "gzip"
+        encoding["temperature"]["compression_opts"] = 7
+
+    if request.param == "zlib":
+        encoding["temperature"]["zlib"] = True
+        encoding["temperature"]["complevel"] = 9
+
+    from random import randint
+
+    filepath = f"{tmpdir}/{request.param}_{randint(0,100)}_cf_roundtrip.nc"
+    ds.to_netcdf(filepath, engine="h5netcdf", encoding=encoding)
+
+    return filepath
diff --git a/virtualizarr/tests/test_readers/test_hdf.py b/virtualizarr/tests/test_readers/test_hdf.py
new file mode 100644
index 0000000..0e51fe2
--- /dev/null
+++ b/virtualizarr/tests/test_readers/test_hdf.py
@@ -0,0 +1,148 @@
+import h5py  # type: ignore
+import pytest
+
+from virtualizarr.readers.hdf import HDFVirtualBackend
+
+
+class TestDatasetChunkManifest:
+    def test_empty_chunks(self, empty_chunks_hdf5_file):
+        f = h5py.File(empty_chunks_hdf5_file)
+        ds = f["data"]
+        with pytest.raises(ValueError, match="chunked but contains no chunks"):
+            HDFVirtualBackend._dataset_chunk_manifest(
+                path=empty_chunks_hdf5_file, dataset=ds
+            )
+
+    @pytest.mark.skip("Need to differentiate non coordinate dimensions from empty")
+    def test_empty_dataset(self, empty_dataset_hdf5_file):
+        f = h5py.File(empty_dataset_hdf5_file)
+        ds = f["data"]
+        with pytest.raises(ValueError, match="no space allocated in the file"):
+            HDFVirtualBackend._dataset_chunk_manifest(
+                path=empty_dataset_hdf5_file, dataset=ds
+            )
+
+    def test_no_chunking(self, no_chunks_hdf5_file):
+        f = h5py.File(no_chunks_hdf5_file)
+        ds = f["data"]
+        manifest = HDFVirtualBackend._dataset_chunk_manifest(
+            path=no_chunks_hdf5_file, dataset=ds
+        )
+        assert manifest.shape_chunk_grid == (1, 1)
+
+    def test_chunked(self, chunked_hdf5_file):
+        f = h5py.File(chunked_hdf5_file)
+        ds = f["data"]
+        manifest = HDFVirtualBackend._dataset_chunk_manifest(
+            path=chunked_hdf5_file, dataset=ds
+        )
+        assert manifest.shape_chunk_grid == (2, 2)
+
+    def test_chunked_roundtrip(self, chunked_roundtrip_hdf5_file):
+        f = h5py.File(chunked_roundtrip_hdf5_file)
+        ds = f["var2"]
+        manifest = HDFVirtualBackend._dataset_chunk_manifest(
+            path=chunked_roundtrip_hdf5_file, dataset=ds
+        )
+        assert manifest.shape_chunk_grid == (2, 8)
+
+
+class TestDatasetDims:
+    def test_single_dimension_scale(self, single_dimension_scale_hdf5_file):
+        f = h5py.File(single_dimension_scale_hdf5_file)
+        ds = f["data"]
+        dims = HDFVirtualBackend._dataset_dims(ds)
+        assert dims[0] == "x"
+
+    def test_is_dimension_scale(self, is_scale_hdf5_file):
+        f = h5py.File(is_scale_hdf5_file)
+        ds = f["data"]
+        dims = HDFVirtualBackend._dataset_dims(ds)
+        assert dims[0] == "data"
+
+    def test_multiple_dimension_scales(self, multiple_dimension_scales_hdf5_file):
+        f = h5py.File(multiple_dimension_scales_hdf5_file)
+        ds = f["data"]
+        with pytest.raises(ValueError, match="dimension scales attached"):
+            HDFVirtualBackend._dataset_dims(ds)
+
+    def test_no_dimension_scales(self, no_chunks_hdf5_file):
+        f = h5py.File(no_chunks_hdf5_file)
+        ds = f["data"]
+        dims = HDFVirtualBackend._dataset_dims(ds)
+        assert dims == ["phony_dim_0", "phony_dim_1"]
+
+
+class TestDatasetToVariable:
+    def test_chunked_dataset(self, chunked_dimensions_netcdf4_file):
+        f = h5py.File(chunked_dimensions_netcdf4_file)
+        ds = f["data"]
+        var = HDFVirtualBackend._dataset_to_variable(
+            chunked_dimensions_netcdf4_file, ds
+        )
+        assert var.chunks == (50, 50)
+
+    def test_not_chunked_dataset(self, single_dimension_scale_hdf5_file):
+        f = h5py.File(single_dimension_scale_hdf5_file)
+        ds = f["data"]
+        var = HDFVirtualBackend._dataset_to_variable(
+            single_dimension_scale_hdf5_file, ds
+        )
+        assert var.chunks == (2,)
+
+    def test_dataset_attributes(self, string_attributes_hdf5_file):
+        f = h5py.File(string_attributes_hdf5_file)
+        ds = f["data"]
+        var = HDFVirtualBackend._dataset_to_variable(string_attributes_hdf5_file, ds)
+        assert var.attrs["attribute_name"] == "attribute_name"
+
+
+class TestExtractAttributes:
+    def test_string_attribute(self, string_attributes_hdf5_file):
+        f = h5py.File(string_attributes_hdf5_file)
+        ds = f["data"]
+        attrs = HDFVirtualBackend._extract_attrs(ds)
+        assert attrs["attribute_name"] == "attribute_name"
+
+    def test_root_attribute(self, root_attributes_hdf5_file):
+        f = h5py.File(root_attributes_hdf5_file)
+        attrs = HDFVirtualBackend._extract_attrs(f)
+        assert attrs["attribute_name"] == "attribute_name"
+
+    def test_multiple_attributes(self, string_attributes_hdf5_file):
+        f = h5py.File(string_attributes_hdf5_file)
+        ds = f["data"]
+        attrs = HDFVirtualBackend._extract_attrs(ds)
+        assert len(attrs.keys()) == 2
+
+
+class TestVirtualVarsFromHDF:
+    def test_variable_with_dimensions(self, chunked_dimensions_netcdf4_file):
+        variables = HDFVirtualBackend._virtual_vars_from_hdf(
+            chunked_dimensions_netcdf4_file
+        )
+        assert len(variables) == 3
+
+    def test_nested_groups_not_implemented(self, nested_group_hdf5_file):
+        with pytest.raises(NotImplementedError):
+            HDFVirtualBackend._virtual_vars_from_hdf(
+                path=nested_group_hdf5_file, group="group"
+            )
+
+    def test_drop_variables(self, multiple_datasets_hdf5_file):
+        variables = HDFVirtualBackend._virtual_vars_from_hdf(
+            path=multiple_datasets_hdf5_file, drop_variables=["data2"]
+        )
+        assert "data2" not in variables.keys()
+
+    def test_dataset_in_group(self, group_hdf5_file):
+        variables = HDFVirtualBackend._virtual_vars_from_hdf(
+            path=group_hdf5_file, group="group"
+        )
+        assert len(variables) == 1
+
+    def test_non_group_error(self, group_hdf5_file):
+        with pytest.raises(ValueError):
+            HDFVirtualBackend._virtual_vars_from_hdf(
+                path=group_hdf5_file, group="group/data"
+            )
diff --git a/virtualizarr/tests/test_readers/test_hdf_filters.py b/virtualizarr/tests/test_readers/test_hdf_filters.py
new file mode 100644
index 0000000..d0bde94
--- /dev/null
+++ b/virtualizarr/tests/test_readers/test_hdf_filters.py
@@ -0,0 +1,115 @@
+import h5py  # type: ignore
+import imagecodecs
+import numcodecs
+import numpy as np
+
+from virtualizarr.readers.hdf_filters import (
+    _filter_to_codec,
+    cfcodec_from_dataset,
+    codecs_from_dataset,
+)
+
+
+class TestFilterToCodec:
+    def test_gzip_uses_zlib_numcodec(self):
+        codec = _filter_to_codec("gzip", 1)
+        assert isinstance(codec, numcodecs.zlib.Zlib)
+
+    def test_lzf(self):
+        codec = _filter_to_codec("lzf")
+        assert isinstance(codec, imagecodecs.numcodecs.Lzf)
+
+    def test_blosc(self):
+        codec = _filter_to_codec("32001", (2, 2, 8, 800, 9, 2, 1))
+        assert isinstance(codec, numcodecs.blosc.Blosc)
+        expected_config = {
+            "id": "blosc",
+            "blocksize": 800,
+            "clevel": 9,
+            "shuffle": 2,
+            "cname": "lz4",
+        }
+        assert codec.get_config() == expected_config
+
+    def test_zstd(self):
+        codec = _filter_to_codec("32015", (5,))
+        assert isinstance(codec, numcodecs.zstd.Zstd)
+        expected_config = {"id": "zstd", "level": 5}
+        assert codec.get_config() == expected_config
+
+    def test_shuffle(self):
+        codec = _filter_to_codec("shuffle", (7,))
+        assert isinstance(codec, numcodecs.shuffle.Shuffle)
+        expected_config = {"id": "shuffle", "elementsize": 7}
+        assert codec.get_config() == expected_config
+
+
+class TestCodecsFromDataSet:
+    def test_numcodec_decoding(self, np_uncompressed, filter_encoded_hdf5_file):
+        f = h5py.File(filter_encoded_hdf5_file)
+        ds = f["data"]
+        chunk_info = ds.id.get_chunk_info(0)
+        codecs = codecs_from_dataset(ds)
+        with open(filter_encoded_hdf5_file, "rb") as file:
+            file.seek(chunk_info.byte_offset)
+            bytes_read = file.read(chunk_info.size)
+            decoded = codecs[0].decode(bytes_read)
+            if isinstance(decoded, np.ndarray):
+                assert decoded.tobytes() == np_uncompressed.tobytes()
+            else:
+                assert decoded == np_uncompressed.tobytes()
+
+
+class TestCFCodecFromDataset:
+    def test_no_cf_convention(self, filter_encoded_hdf5_file):
+        f = h5py.File(filter_encoded_hdf5_file)
+        ds = f["data"]
+        cf_codec = cfcodec_from_dataset(ds)
+        assert cf_codec is None
+
+    def test_cf_scale_factor(self, netcdf4_file):
+        f = h5py.File(netcdf4_file)
+        ds = f["air"]
+        cf_codec = cfcodec_from_dataset(ds)
+        assert cf_codec["target_dtype"] == np.dtype(np.float64)
+        assert cf_codec["codec"].scale == 100.0
+        assert cf_codec["codec"].offset == 0
+        assert cf_codec["codec"].dtype == "<f8"
+        assert cf_codec["codec"].astype == "<i2"
+
+    def test_cf_add_offset(self, add_offset_hdf5_file):
+        f = h5py.File(add_offset_hdf5_file)
+        ds = f["data"]
+        cf_codec = cfcodec_from_dataset(ds)
+        assert cf_codec["target_dtype"] == np.dtype(np.float64)
+        assert cf_codec["codec"].scale == 1
+        assert cf_codec["codec"].offset == 5
+        assert cf_codec["codec"].dtype == "<f8"
+
+    def test_cf_codec_decoding_offset(
+        self, add_offset_hdf5_file, np_uncompressed_int16
+    ):
+        f = h5py.File(add_offset_hdf5_file)
+        ds = f["data"]
+        chunk_info = ds.id.get_chunk_info(0)
+        cfcodec = cfcodec_from_dataset(ds)
+        with open(add_offset_hdf5_file, "rb") as file:
+            file.seek(chunk_info.byte_offset)
+            bytes_read = file.read(chunk_info.size)
+            decoded = cfcodec["codec"].decode(bytes_read)
+            assert np.array_equal(decoded, np_uncompressed_int16)
+            assert decoded.dtype == np.float64
+
+    def test_cf_codec_decoding_scale_offset(
+        self, scale_add_offset_hdf5_file, np_uncompressed_int16
+    ):
+        f = h5py.File(scale_add_offset_hdf5_file)
+        ds = f["data"]
+        chunk_info = ds.id.get_chunk_info(0)
+        cfcodec = cfcodec_from_dataset(ds)
+        with open(scale_add_offset_hdf5_file, "rb") as file:
+            file.seek(chunk_info.byte_offset)
+            bytes_read = file.read(chunk_info.size)
+            decoded = cfcodec["codec"].decode(bytes_read)
+            assert np.allclose(decoded, np_uncompressed_int16)
+            assert decoded.dtype == np.float64
diff --git a/virtualizarr/tests/test_readers/test_hdf_integration.py b/virtualizarr/tests/test_readers/test_hdf_integration.py
new file mode 100644
index 0000000..f2d2367
--- /dev/null
+++ b/virtualizarr/tests/test_readers/test_hdf_integration.py
@@ -0,0 +1,53 @@
+import pytest
+import xarray as xr
+import xarray.testing as xrt
+
+import virtualizarr
+from virtualizarr.backend import FileType
+from virtualizarr.readers.hdf import HDFVirtualBackend
+from virtualizarr.tests import requires_kerchunk
+
+
+@requires_kerchunk
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+class TestIntegration:
+    @pytest.mark.xfail(reason="0 time start is being interpreted as fillvalue")
+    def test_filters_h5netcdf_roundtrip(
+        self, tmpdir, filter_encoded_roundtrip_hdf5_file, hdf_backend
+    ):
+        ds = xr.open_dataset(filter_encoded_roundtrip_hdf5_file, decode_times=True)
+        vds = virtualizarr.open_virtual_dataset(
+            filter_encoded_roundtrip_hdf5_file,
+            loadable_variables=["time"],
+            cftime_variables=["time"],
+            backend=hdf_backend,
+        )
+        kerchunk_file = f"{tmpdir}/kerchunk.json"
+        vds.virtualize.to_kerchunk(kerchunk_file, format="json")
+        roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk", decode_times=True)
+        xrt.assert_allclose(ds, roundtrip)
+
+    def test_filters_netcdf4_roundtrip(
+        self, tmpdir, filter_encoded_roundtrip_netcdf4_file, hdf_backend
+    ):
+        filepath = filter_encoded_roundtrip_netcdf4_file["filepath"]
+        ds = xr.open_dataset(filepath)
+        vds = virtualizarr.open_virtual_dataset(
+            filepath, filetype=FileType("netcdf4"), backend=hdf_backend
+        )
+        kerchunk_file = f"{tmpdir}/kerchunk.json"
+        vds.virtualize.to_kerchunk(kerchunk_file, format="json")
+        roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk")
+        xrt.assert_equal(ds, roundtrip)
+
+    def test_filter_and_cf_roundtrip(
+        self, tmpdir, filter_and_cf_roundtrip_hdf5_file, hdf_backend
+    ):
+        ds = xr.open_dataset(filter_and_cf_roundtrip_hdf5_file)
+        vds = virtualizarr.open_virtual_dataset(
+            filter_and_cf_roundtrip_hdf5_file, backend=hdf_backend
+        )
+        kerchunk_file = f"{tmpdir}/filter_cf_kerchunk.json"
+        vds.virtualize.to_kerchunk(kerchunk_file, format="json")
+        roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk")
+        xrt.assert_allclose(ds, roundtrip)
diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py
index 062eda5..12f6fad 100644
--- a/virtualizarr/tests/test_xarray.py
+++ b/virtualizarr/tests/test_xarray.py
@@ -4,6 +4,7 @@
 
 from virtualizarr import open_virtual_dataset
 from virtualizarr.manifests import ChunkManifest, ManifestArray
+from virtualizarr.readers.hdf import HDFVirtualBackend
 from virtualizarr.tests import requires_kerchunk
 from virtualizarr.zarr import ZArray
 
@@ -224,14 +225,15 @@ def test_concat_dim_coords_along_existing_dim(self):
 
 
 @requires_kerchunk
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestCombineUsingIndexes:
-    def test_combine_by_coords(self, netcdf4_files):
+    def test_combine_by_coords(self, netcdf4_files, hdf_backend):
         filepath1, filepath2 = netcdf4_files
 
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds1 = open_virtual_dataset(filepath1)
+            vds1 = open_virtual_dataset(filepath1, backend=hdf_backend)
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds2 = open_virtual_dataset(filepath2)
+            vds2 = open_virtual_dataset(filepath2, backend=hdf_backend)
 
         combined_vds = xr.combine_by_coords(
             [vds2, vds1],
@@ -240,13 +242,13 @@ def test_combine_by_coords(self, netcdf4_files):
         assert combined_vds.xindexes["time"].to_pandas_index().is_monotonic_increasing
 
     @pytest.mark.xfail(reason="Not yet implemented, see issue #18")
-    def test_combine_by_coords_keeping_manifestarrays(self, netcdf4_files):
+    def test_combine_by_coords_keeping_manifestarrays(self, netcdf4_files, hdf_backend):
         filepath1, filepath2 = netcdf4_files
 
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds1 = open_virtual_dataset(filepath1)
+            vds1 = open_virtual_dataset(filepath1, backend=hdf_backend)
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds2 = open_virtual_dataset(filepath2)
+            vds2 = open_virtual_dataset(filepath2, backend=hdf_backend)
 
         combined_vds = xr.combine_by_coords(
             [vds2, vds1],
@@ -258,17 +260,18 @@ def test_combine_by_coords_keeping_manifestarrays(self, netcdf4_files):
 
 
 @requires_kerchunk
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestRenamePaths:
-    def test_rename_to_str(self, netcdf4_file):
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+    def test_rename_to_str(self, netcdf4_file, hdf_backend):
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
         renamed_vds = vds.virtualize.rename_paths("s3://bucket/air.nc")
         assert (
             renamed_vds["air"].data.manifest.dict()["0.0.0"]["path"]
             == "s3://bucket/air.nc"
         )
 
-    def test_rename_using_function(self, netcdf4_file):
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+    def test_rename_using_function(self, netcdf4_file, hdf_backend):
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
 
         def local_to_s3_url(old_local_path: str) -> str:
             from pathlib import Path
@@ -284,15 +287,20 @@ def local_to_s3_url(old_local_path: str) -> str:
             == "s3://bucket/air.nc"
         )
 
-    def test_invalid_type(self, netcdf4_file):
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+    def test_invalid_type(self, netcdf4_file, hdf_backend):
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
 
         with pytest.raises(TypeError):
             vds.virtualize.rename_paths(["file1.nc", "file2.nc"])
 
-    def test_mixture_of_manifestarrays_and_numpy_arrays(self, netcdf4_file):
+    def test_mixture_of_manifestarrays_and_numpy_arrays(
+        self, netcdf4_file, hdf_backend
+    ):
         vds = open_virtual_dataset(
-            netcdf4_file, indexes={}, loadable_variables=["lat", "lon"]
+            netcdf4_file,
+            indexes={},
+            loadable_variables=["lat", "lon"],
+            backend=hdf_backend,
         )
         renamed_vds = vds.virtualize.rename_paths("s3://bucket/air.nc")
         assert (