From 7cf9de640cd1550e792ed2db20e258643f39c741 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 16 Sep 2024 12:53:58 -0400 Subject: [PATCH] setup for zarr --- src/nwbinspector/_nwb_inspection.py | 82 ++++++++++--------------- src/nwbinspector/tools/__init__.py | 3 +- src/nwbinspector/tools/_read_nwbfile.py | 22 ++++--- tests/test_inspector.py | 21 +++++-- 4 files changed, 65 insertions(+), 63 deletions(-) diff --git a/src/nwbinspector/_nwb_inspection.py b/src/nwbinspector/_nwb_inspection.py index b119c7eb..70b8744f 100644 --- a/src/nwbinspector/_nwb_inspection.py +++ b/src/nwbinspector/_nwb_inspection.py @@ -10,17 +10,17 @@ import pynwb from natsort import natsorted -from packaging.version import Version from tqdm import tqdm -from . import available_checks, configure_checks +from . import available_checks +from ._configuration import configure_checks from ._registration import Importance, InspectorMessage +from .tools._read_nwbfile import read_nwbfile from .utils import ( FilePathType, OptionalListOfStrings, PathType, calculate_number_of_cpu, - get_package_version, ) @@ -143,17 +143,16 @@ def inspect_all( # Manual identifier check over all files in the folder path identifiers = defaultdict(list) for nwbfile_path in nwbfiles: - with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: - try: - nwbfile = io.read() - identifiers[nwbfile.identifier].append(nwbfile_path) - except Exception as exception: - yield InspectorMessage( - message=traceback.format_exc(), - importance=Importance.ERROR, - check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", - file_path=nwbfile_path, - ) + try: + nwbfile = read_nwbfile(nwbfile_path=nwbfile_path) + identifiers[nwbfile.identifier].append(nwbfile_path) + except Exception as exception: + yield InspectorMessage( + message=traceback.format_exc(), + importance=Importance.ERROR, + check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", + file_path=nwbfile_path, + ) if len(identifiers) != len(nwbfiles): for identifier, nwbfiles_with_identifier in identifiers.items(): @@ -239,7 +238,7 @@ def inspect_nwbfile( config : dict Dictionary valid against our JSON configuration schema. Can specify a mapping of importance levels and list of check functions whose importance you wish to change. - Typically loaded via json.load from a valid .json file + Typically loaded via `json.load` from a valid .json file. ignore: list, optional Names of functions to skip. select: list, optional @@ -269,20 +268,10 @@ def inspect_nwbfile( filterwarnings(action="ignore", message="No cached namespaces found in .*") filterwarnings(action="ignore", message="Ignoring cached namespace .*") - if not skip_validate and get_package_version("pynwb") >= Version("2.2.0"): - validation_error_list, _ = pynwb.validate(paths=[nwbfile_path]) - for validation_namespace_errors in validation_error_list: - for validation_error in validation_namespace_errors: - yield InspectorMessage( - message=validation_error.reason, - importance=Importance.PYNWB_VALIDATION, - check_function_name=validation_error.name, - location=validation_error.location, - file_path=nwbfile_path, - ) + try: + in_memory_nwbfile, io = read_nwbfile(nwbfile_path=nwbfile_path) - with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: - if not skip_validate and get_package_version("pynwb") < Version("2.2.0"): + if not skip_validate: validation_errors = pynwb.validate(io=io) for validation_error in validation_errors: yield InspectorMessage( @@ -293,26 +282,23 @@ def inspect_nwbfile( file_path=nwbfile_path, ) - try: - in_memory_nwbfile = io.read() - - for inspector_message in inspect_nwbfile_object( - nwbfile_object=in_memory_nwbfile, - checks=checks, - config=config, - ignore=ignore, - select=select, - importance_threshold=importance_threshold, - ): - inspector_message.file_path = nwbfile_path - yield inspector_message - except Exception as exception: - yield InspectorMessage( - message=traceback.format_exc(), - importance=Importance.ERROR, - check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", - file_path=nwbfile_path, - ) + for inspector_message in inspect_nwbfile_object( + nwbfile_object=in_memory_nwbfile, + checks=checks, + config=config, + ignore=ignore, + select=select, + importance_threshold=importance_threshold, + ): + inspector_message.file_path = nwbfile_path + yield inspector_message + except Exception as exception: + yield InspectorMessage( + message=traceback.format_exc(), + importance=Importance.ERROR, + check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", + file_path=nwbfile_path, + ) # TODO: deprecate once subject types and dandi schemas have been extended diff --git a/src/nwbinspector/tools/__init__.py b/src/nwbinspector/tools/__init__.py index cac8b068..674f827a 100644 --- a/src/nwbinspector/tools/__init__.py +++ b/src/nwbinspector/tools/__init__.py @@ -1,8 +1,9 @@ from ._dandi import get_s3_urls_and_dandi_paths from ._nwb import all_of_type, get_nwbfile_path_from_internal_object -from ._read_nwbfile import read_nwbfile +from ._read_nwbfile import BACKEND_IO_CLASSES, read_nwbfile __all__ = [ + "BACKEND_IO_CLASSES", "get_s3_urls_and_dandi_paths", "all_of_type", "get_nwbfile_path_from_internal_object", diff --git a/src/nwbinspector/tools/_read_nwbfile.py b/src/nwbinspector/tools/_read_nwbfile.py index 07d61d80..f7a274c9 100644 --- a/src/nwbinspector/tools/_read_nwbfile.py +++ b/src/nwbinspector/tools/_read_nwbfile.py @@ -5,14 +5,15 @@ from warnings import filterwarnings import h5py +from hdmf.backends.io import HDMFIO from pynwb import NWBHDF5IO, NWBFile -_BACKEND_IO_CLASSES = dict(hdf5=NWBHDF5IO) +BACKEND_IO_CLASSES = dict(hdf5=NWBHDF5IO) try: from hdmf_zarr import NWBZarrIO - _BACKEND_IO_CLASSES.update(zarr=NWBZarrIO) + BACKEND_IO_CLASSES.update(zarr=NWBZarrIO) except ModuleNotFoundError as exception: if str(exception) != "No module named 'hdmf_zarr'": # not the exception we're looking for, so re-raise raise exception @@ -47,11 +48,11 @@ def _get_backend(path: str, method: Literal["local", "fsspec", "ros3"]): if method == "fsspec": fs = _init_fsspec(path=path) with fs.open(path=path, mode="rb") as file: - for backend_name, backend_class in _BACKEND_IO_CLASSES.items(): + for backend_name, backend_class in BACKEND_IO_CLASSES.items(): if backend_class.can_read(path=file): possible_backends.append(backend_name) else: - for backend_name, backend_class in _BACKEND_IO_CLASSES.items(): + for backend_name, backend_class in BACKEND_IO_CLASSES.items(): if backend_class.can_read(path): possible_backends.append(backend_name) @@ -69,7 +70,7 @@ def read_nwbfile( nwbfile_path: Union[str, Path], method: Optional[Literal["local", "fsspec", "ros3"]] = None, backend: Optional[Literal["hdf5", "zarr"]] = None, -) -> NWBFile: +) -> tuple[NWBFile, HDMFIO]: """ Read an NWB file using the specified (or auto-detected) method and specified (or auto-detected) backend. @@ -88,7 +89,10 @@ def read_nwbfile( Returns ------- - pynwb.NWBFile + nwbfile : pynwb.NWBFile + The in-memory NWBFile object. + io : hdmf.backends.io.HDMFIO + The initialized HDMFIO object used to read the file. """ nwbfile_path = str(nwbfile_path) # If pathlib.Path, cast to str; if already str, no harm done @@ -109,7 +113,7 @@ def read_nwbfile( ) backend = backend or _get_backend(nwbfile_path, method) - if method == "local" and not _BACKEND_IO_CLASSES[ # Temporary until .can_read() is able to work on streamed bytes + if method == "local" and not BACKEND_IO_CLASSES[ # Temporary until .can_read() is able to work on streamed bytes backend ].can_read(path=nwbfile_path): raise IOError(f"The chosen backend ({backend}) is unable to read the file! Please select a different backend.") @@ -127,7 +131,7 @@ def read_nwbfile( io_kwargs.update(path=nwbfile_path) if method == "ros3": io_kwargs.update(driver="ros3") - io = _BACKEND_IO_CLASSES[backend](**io_kwargs) + io = BACKEND_IO_CLASSES[backend](**io_kwargs) nwbfile = io.read() - return nwbfile + return (nwbfile, io) diff --git a/tests/test_inspector.py b/tests/test_inspector.py index 50b7defb..acb30495 100644 --- a/tests/test_inspector.py +++ b/tests/test_inspector.py @@ -31,8 +31,11 @@ check_timestamps_match_first_dimension, ) from nwbinspector.testing import make_minimal_nwbfile +from nwbinspector.tools import BACKEND_IO_CLASSES from nwbinspector.utils import FilePathType +IO_CLASSES_TO_BACKEND = {v: k for k, v in BACKEND_IO_CLASSES.items()} + def add_big_dataset_no_compression(nwbfile: NWBFile): time_series = TimeSeries( @@ -85,8 +88,10 @@ def add_simple_table(nwbfile: NWBFile): nwbfile.add_acquisition(time_intervals) -class TestInspector(TestCase): - """A common helper class for testing the NWBInspector.""" +class TestInspectorOnBackend(TestCase): + """A common helper class for testing the NWBInspector on files of a specific backend (HDF5 or Zarr).""" + + BackendIOClass: HDMFIO @staticmethod def assertFileExists(path: FilePathType): @@ -124,7 +129,8 @@ def assertLogFileContentsEqual( self.assertEqual(first=test_file_lines[skip_first_n_lines:-1], second=true_file_lines) -class TestInspectorAPI(TestInspector): +class TestInspectorAPIHDF5(TestInspectorOnBackend): + BackendIOClass = BACKEND_IO_CLASSES["hdf5"] maxDiff = None @classmethod @@ -149,10 +155,11 @@ def setUpClass(cls): # Third file to be left without violations add_non_matching_timestamps_dimension(nwbfiles[3]) - cls.nwbfile_paths = [str(cls.tempdir / f"testing{j}.nwb") for j in range(num_nwbfiles)] + suffix = IO_CLASSES_TO_BACKEND[cls.BackendIOClass] + cls.nwbfile_paths = [str(cls.tempdir / f"testing{j}.nwb.{suffix}") for j in range(num_nwbfiles)] cls.nwbfile_paths[3] = str(cls.tempdir / f"._testing3.nwb") for nwbfile_path, nwbfile in zip(cls.nwbfile_paths, nwbfiles): - with NWBHDF5IO(path=nwbfile_path, mode="w") as io: + with cls.BackendIOClass(path=nwbfile_path, mode="w") as io: io.write(nwbfile) @classmethod @@ -581,6 +588,10 @@ def test_inspect_nwbfile_dandi_config(self): self.assertCountEqual(first=test_results, second=true_results) +class TestInspectorAPIHZarr(TestInspectorAPIHDF5): + BackendIOClass = BACKEND_IO_CLASSES["zarr"] + + class TestDANDIConfig(TestInspector): maxDiff = None