From 0a27c83d2e638a76b3626d4eb4a0408f23c725c0 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:13:13 -0400 Subject: [PATCH] Integrate and test Zarr backend (#513) * setup for zarr * fix imports * fix test class names; add more zarr * changelog * control when io is returned * debugs * debugs * debugs * debugs * debugs * down to only one * remove f strings automatically * remove some * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * override mystery * fix streaming path * Update tests/test_inspector.py Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> * PR suggestions * adjust docstring * add Zarr to caching * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rollback caching * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Ben Dichter Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- CHANGELOG.md | 3 + MANIFEST.in | 5 +- src/nwbinspector/__init__.py | 8 + src/nwbinspector/_configuration.py | 7 +- .../config.schema.json | 0 .../dandi.inspector_config.yaml | 0 src/nwbinspector/_nwb_inspection.py | 79 +++---- src/nwbinspector/_registration.py | 12 +- src/nwbinspector/checks/_nwb_containers.py | 23 +- src/nwbinspector/tools/__init__.py | 3 +- src/nwbinspector/tools/_read_nwbfile.py | 39 +-- src/nwbinspector/utils/_utils.py | 3 +- .../{ => expected_reports}/000126_report.txt | 0 ...true_nwbinspector_default_report_hdf5.txt} | 8 +- .../true_nwbinspector_default_report_zarr.txt | 34 +++ ...spector_report_with_dandi_config_hdf5.txt} | 18 +- ...nspector_report_with_dandi_config_zarr.txt | 45 ++++ tests/streaming_cli_tests.py | 9 +- tests/test_inspector.py | 222 +++++++++++++----- 19 files changed, 373 insertions(+), 145 deletions(-) rename src/nwbinspector/{ => _internal_configs}/config.schema.json (100%) rename src/nwbinspector/{internal_configs => _internal_configs}/dandi.inspector_config.yaml (100%) rename tests/{ => expected_reports}/000126_report.txt (100%) rename tests/{true_nwbinspector_default_report.txt => expected_reports/true_nwbinspector_default_report_hdf5.txt} (65%) create mode 100644 tests/expected_reports/true_nwbinspector_default_report_zarr.txt rename tests/{true_nwbinspector_report_with_dandi_config.txt => expected_reports/true_nwbinspector_report_with_dandi_config_hdf5.txt} (58%) create mode 100644 tests/expected_reports/true_nwbinspector_report_with_dandi_config_zarr.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 357e75628..7193363b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ ### Deprecation (API) * The `inspect_nwb` method has been removed. Please use `inspect_nwbfile` or `inspect_nwbfile_object` instead. [#505](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/505) +### New Features +* Added Zarr support. [#513](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/513) + ### Improvements * Removed the `robust_ros3_read` utility helper. [#506](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/506) * Simplified the `nwbinspector.testing` configuration framework. [#509](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/509) diff --git a/MANIFEST.in b/MANIFEST.in index f8ca9e67f..6f5bd76e7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,4 @@ graft tests global-exclude *.py[cod] -include src/nwbinspector/internal_configs/dandi.inspector_config.yaml -include src/nwbinspector/config.schema.json -include requirements.txt +include src/nwbinspector/_internal_configs/dandi.inspector_config.yaml +include src/nwbinspector/_internal_configs/config.schema.json diff --git a/src/nwbinspector/__init__.py b/src/nwbinspector/__init__.py index 4c5f6a3d6..0f21d7c29 100644 --- a/src/nwbinspector/__init__.py +++ b/src/nwbinspector/__init__.py @@ -26,6 +26,10 @@ # Still keeping the legacy magic version attribute requested by some users __version__ = importlib.metadata.version(distribution_name="nwbinspector") +# Note: this is not exposed at this outer level, but is used here to trigger the automatic submodule import +# (otherwise someone would have to import nwbinspector.testing explicitly) +from .testing import check_streaming_tests_enabled # noqa: F401 + __all__ = [ "available_checks", "default_check_registry", @@ -51,4 +55,8 @@ "FormatterOptions", "organize_messages", "__version__", + # Public submodules + "checks", + "testing", + "utils", ] diff --git a/src/nwbinspector/_configuration.py b/src/nwbinspector/_configuration.py index b8692fe64..c5d3995e6 100644 --- a/src/nwbinspector/_configuration.py +++ b/src/nwbinspector/_configuration.py @@ -13,12 +13,15 @@ from . import available_checks from ._registration import Importance -INTERNAL_CONFIGS = dict(dandi=Path(__file__).parent / "internal_configs" / "dandi.inspector_config.yaml") +INTERNAL_CONFIGS = dict( + dandi=Path(__file__).parent / "_internal_configs" / "dandi.inspector_config.yaml", +) def validate_config(config: dict): """Validate an instance of configuration against the official schema.""" - with open(file=Path(__file__).parent / "config.schema.json", mode="r") as fp: + config_schema_file_path = Path(__file__).parent / "_internal_configs" / "config.schema.json" + with open(file=config_schema_file_path, mode="r") as fp: schema = json.load(fp=fp) jsonschema.validate(instance=config, schema=schema) diff --git a/src/nwbinspector/config.schema.json b/src/nwbinspector/_internal_configs/config.schema.json similarity index 100% rename from src/nwbinspector/config.schema.json rename to src/nwbinspector/_internal_configs/config.schema.json diff --git a/src/nwbinspector/internal_configs/dandi.inspector_config.yaml b/src/nwbinspector/_internal_configs/dandi.inspector_config.yaml similarity index 100% rename from src/nwbinspector/internal_configs/dandi.inspector_config.yaml rename to src/nwbinspector/_internal_configs/dandi.inspector_config.yaml diff --git a/src/nwbinspector/_nwb_inspection.py b/src/nwbinspector/_nwb_inspection.py index 400f26b36..55406998e 100644 --- a/src/nwbinspector/_nwb_inspection.py +++ b/src/nwbinspector/_nwb_inspection.py @@ -12,8 +12,10 @@ from natsort import natsorted from tqdm import tqdm -from . import available_checks, configure_checks +from . import available_checks +from ._configuration import configure_checks from ._registration import Importance, InspectorMessage +from .tools._read_nwbfile import read_nwbfile from .utils import ( FilePathType, OptionalListOfStrings, @@ -127,7 +129,7 @@ def inspect_all( progress_bar_options = dict(position=0, leave=False) if in_path.is_dir(): - nwbfiles = list(in_path.rglob("*.nwb")) + nwbfiles = list(in_path.rglob("*.nwb*")) # Remove any macOS sidecar files nwbfiles = [nwbfile for nwbfile in nwbfiles if not nwbfile.name.startswith("._")] @@ -141,17 +143,16 @@ def inspect_all( # Manual identifier check over all files in the folder path identifiers = defaultdict(list) for nwbfile_path in nwbfiles: - with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: - try: - nwbfile = io.read() - identifiers[nwbfile.identifier].append(nwbfile_path) - except Exception as exception: - yield InspectorMessage( - message=traceback.format_exc(), - importance=Importance.ERROR, - check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", - file_path=nwbfile_path, - ) + try: + nwbfile = read_nwbfile(nwbfile_path=nwbfile_path) + identifiers[nwbfile.identifier].append(nwbfile_path) + except Exception as exception: + yield InspectorMessage( + message=traceback.format_exc(), + importance=Importance.ERROR, + check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", + file_path=nwbfile_path, + ) if len(identifiers) != len(nwbfiles): for identifier, nwbfiles_with_identifier in identifiers.items(): @@ -198,7 +199,7 @@ def inspect_all( yield message else: for nwbfile_path in nwbfiles_iterable: - for message in inspect_nwbfile(nwbfile_path=nwbfile_path, checks=checks): + for message in inspect_nwbfile(nwbfile_path=nwbfile_path, checks=checks, skip_validate=skip_validate): yield message @@ -237,7 +238,7 @@ def inspect_nwbfile( config : dict Dictionary valid against our JSON configuration schema. Can specify a mapping of importance levels and list of check functions whose importance you wish to change. - Typically loaded via json.load from a valid .json file + Typically loaded via `json.load` from a valid .json file. ignore: list, optional Names of functions to skip. select: list, optional @@ -267,10 +268,12 @@ def inspect_nwbfile( filterwarnings(action="ignore", message="No cached namespaces found in .*") filterwarnings(action="ignore", message="Ignoring cached namespace .*") - if not skip_validate: - validation_error_list, _ = pynwb.validate(paths=[nwbfile_path]) - for validation_namespace_errors in validation_error_list: - for validation_error in validation_namespace_errors: + try: + in_memory_nwbfile, io = read_nwbfile(nwbfile_path=nwbfile_path, return_io=True) + + if not skip_validate: + validation_errors = pynwb.validate(io=io) + for validation_error in validation_errors: yield InspectorMessage( message=validation_error.reason, importance=Importance.PYNWB_VALIDATION, @@ -279,27 +282,23 @@ def inspect_nwbfile( file_path=nwbfile_path, ) - with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: - try: - in_memory_nwbfile = io.read() - - for inspector_message in inspect_nwbfile_object( - nwbfile_object=in_memory_nwbfile, - checks=checks, - config=config, - ignore=ignore, - select=select, - importance_threshold=importance_threshold, - ): - inspector_message.file_path = nwbfile_path - yield inspector_message - except Exception as exception: - yield InspectorMessage( - message=traceback.format_exc(), - importance=Importance.ERROR, - check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", - file_path=nwbfile_path, - ) + for inspector_message in inspect_nwbfile_object( + nwbfile_object=in_memory_nwbfile, + checks=checks, + config=config, + ignore=ignore, + select=select, + importance_threshold=importance_threshold, + ): + inspector_message.file_path = nwbfile_path + yield inspector_message + except Exception as exception: + yield InspectorMessage( + message=traceback.format_exc(), + importance=Importance.ERROR, + check_function_name=f"During io.read() - {type(exception)}: {str(exception)}", + file_path=nwbfile_path, + ) # TODO: deprecate once subject types and dandi schemas have been extended diff --git a/src/nwbinspector/_registration.py b/src/nwbinspector/_registration.py index e772a413f..5ce7fa515 100644 --- a/src/nwbinspector/_registration.py +++ b/src/nwbinspector/_registration.py @@ -4,6 +4,7 @@ from typing import Optional import h5py +import zarr from pynwb import NWBFile from pynwb.ecephys import Device, ElectrodeGroup from pynwb.file import Subject @@ -93,7 +94,7 @@ def _auto_parse(check_function, obj, result: Optional[InspectorMessage] = None): def _parse_location(neurodata_object) -> Optional[str]: - """Grab the object location from a h5py.Dataset or a container content that is an h5py.Dataset object.""" + """Grab the object location from a dataset or a container content that is an dataset object.""" known_locations = { NWBFile: "/", Subject: "/general/subject", @@ -105,13 +106,16 @@ def _parse_location(neurodata_object) -> Optional[str]: for key, val in known_locations.items(): if isinstance(neurodata_object, key): return val - """Infer the human-readable path of the object within an NWBFile by tracing its parents.""" + + # Infer the human-readable path of the object within an NWBFile by tracing its parents if neurodata_object.parent is None: return "/" # Best solution: object is or has a HDF5 Dataset - if isinstance(neurodata_object, h5py.Dataset): + if isinstance(neurodata_object, (h5py.Dataset, zarr.Array)): return neurodata_object.name else: - for field in neurodata_object.fields.values(): + for field_name, field in neurodata_object.fields.items(): if isinstance(field, h5py.Dataset): return field.parent.name + elif isinstance(field, zarr.Array): + return field.name.removesuffix(f"/{field_name}") diff --git a/src/nwbinspector/checks/_nwb_containers.py b/src/nwbinspector/checks/_nwb_containers.py index 8f8861d6e..2d45c39fa 100644 --- a/src/nwbinspector/checks/_nwb_containers.py +++ b/src/nwbinspector/checks/_nwb_containers.py @@ -3,6 +3,7 @@ import os import h5py +import zarr from pynwb import NWBContainer from .._registration import Importance, InspectorMessage, Severity, register_check @@ -19,9 +20,16 @@ def check_large_dataset_compression(nwb_container: NWBContainer, gb_lower_bound: Best Practice: :ref:`best_practice_compression` """ for field in getattr(nwb_container, "fields", dict()).values(): - if not isinstance(field, h5py.Dataset): + if not isinstance(field, (h5py.Dataset, zarr.Array)): continue - if field.compression is None and field.size * field.dtype.itemsize > gb_lower_bound * 1e9: + + compression_indicator = None + if isinstance(field, h5py.Dataset): + compression_indicator = field.compression + elif isinstance(field, zarr.Array): + compression_indicator = field.compressor + + if compression_indicator is not None and field.size * field.dtype.itemsize > gb_lower_bound * 1e9: return InspectorMessage( severity=Severity.HIGH, message=f"{os.path.split(field.name)[1]} is a large uncompressed dataset! Please enable compression.", @@ -44,10 +52,17 @@ def check_small_dataset_compression( Best Practice: :ref:`best_practice_compression` """ for field in getattr(nwb_container, "fields", dict()).values(): - if not isinstance(field, h5py.Dataset): + if not isinstance(field, (h5py.Dataset, zarr.Array)): continue + + compression_indicator = None + if isinstance(field, h5py.Dataset): + compression_indicator = field.compression + elif isinstance(field, zarr.Array): + compression_indicator = field.compressor + if ( - field.compression is None + compression_indicator is None and mb_lower_bound * 1e6 < field.size * field.dtype.itemsize < gb_upper_bound * 1e9 ): if field.size * field.dtype.itemsize > gb_severity_threshold * 1e9: diff --git a/src/nwbinspector/tools/__init__.py b/src/nwbinspector/tools/__init__.py index cac8b0683..674f827a2 100644 --- a/src/nwbinspector/tools/__init__.py +++ b/src/nwbinspector/tools/__init__.py @@ -1,8 +1,9 @@ from ._dandi import get_s3_urls_and_dandi_paths from ._nwb import all_of_type, get_nwbfile_path_from_internal_object -from ._read_nwbfile import read_nwbfile +from ._read_nwbfile import BACKEND_IO_CLASSES, read_nwbfile __all__ = [ + "BACKEND_IO_CLASSES", "get_s3_urls_and_dandi_paths", "all_of_type", "get_nwbfile_path_from_internal_object", diff --git a/src/nwbinspector/tools/_read_nwbfile.py b/src/nwbinspector/tools/_read_nwbfile.py index 07d61d802..20fe30b08 100644 --- a/src/nwbinspector/tools/_read_nwbfile.py +++ b/src/nwbinspector/tools/_read_nwbfile.py @@ -5,17 +5,14 @@ from warnings import filterwarnings import h5py +from hdmf.backends.io import HDMFIO +from hdmf_zarr import NWBZarrIO from pynwb import NWBHDF5IO, NWBFile -_BACKEND_IO_CLASSES = dict(hdf5=NWBHDF5IO) - -try: - from hdmf_zarr import NWBZarrIO - - _BACKEND_IO_CLASSES.update(zarr=NWBZarrIO) -except ModuleNotFoundError as exception: - if str(exception) != "No module named 'hdmf_zarr'": # not the exception we're looking for, so re-raise - raise exception +BACKEND_IO_CLASSES = dict( + hdf5=NWBHDF5IO, + zarr=NWBZarrIO, +) def _get_method(path: str): @@ -47,11 +44,11 @@ def _get_backend(path: str, method: Literal["local", "fsspec", "ros3"]): if method == "fsspec": fs = _init_fsspec(path=path) with fs.open(path=path, mode="rb") as file: - for backend_name, backend_class in _BACKEND_IO_CLASSES.items(): + for backend_name, backend_class in BACKEND_IO_CLASSES.items(): if backend_class.can_read(path=file): possible_backends.append(backend_name) else: - for backend_name, backend_class in _BACKEND_IO_CLASSES.items(): + for backend_name, backend_class in BACKEND_IO_CLASSES.items(): if backend_class.can_read(path): possible_backends.append(backend_name) @@ -69,7 +66,8 @@ def read_nwbfile( nwbfile_path: Union[str, Path], method: Optional[Literal["local", "fsspec", "ros3"]] = None, backend: Optional[Literal["hdf5", "zarr"]] = None, -) -> NWBFile: + return_io: bool = False, +) -> Union[NWBFile, tuple[NWBFile, HDMFIO]]: """ Read an NWB file using the specified (or auto-detected) method and specified (or auto-detected) backend. @@ -85,10 +83,16 @@ def read_nwbfile( backend : "hdf5", "zarr", or None (default) Type of backend used to write the file. The default auto-detects the type of the file. + return_io : bool, default: False + Whether to return the HDMFIO object used to open the file. Returns ------- - pynwb.NWBFile + nwbfile : pynwb.NWBFile + The in-memory NWBFile object. + io : hdmf.backends.io.HDMFIO, optional + Only passed if `return_io` is True. + The initialized HDMFIO object used to read the file. """ nwbfile_path = str(nwbfile_path) # If pathlib.Path, cast to str; if already str, no harm done @@ -109,7 +113,7 @@ def read_nwbfile( ) backend = backend or _get_backend(nwbfile_path, method) - if method == "local" and not _BACKEND_IO_CLASSES[ # Temporary until .can_read() is able to work on streamed bytes + if method == "local" and not BACKEND_IO_CLASSES[ # Temporary until .can_read() is able to work on streamed bytes backend ].can_read(path=nwbfile_path): raise IOError(f"The chosen backend ({backend}) is unable to read the file! Please select a different backend.") @@ -127,7 +131,10 @@ def read_nwbfile( io_kwargs.update(path=nwbfile_path) if method == "ros3": io_kwargs.update(driver="ros3") - io = _BACKEND_IO_CLASSES[backend](**io_kwargs) + io = BACKEND_IO_CLASSES[backend](**io_kwargs) nwbfile = io.read() - return nwbfile + if return_io: + return (nwbfile, io) + else: # Note: do not be concerned about io object closing due to garbage collection here + return nwbfile # (it is attached as an attribute to the NWBFile object) diff --git a/src/nwbinspector/utils/_utils.py b/src/nwbinspector/utils/_utils.py index a0f06661f..a1fddd411 100644 --- a/src/nwbinspector/utils/_utils.py +++ b/src/nwbinspector/utils/_utils.py @@ -10,6 +10,7 @@ import h5py import numpy as np +import zarr from hdmf.backends.hdf5.h5_utils import H5Dataset from numpy.typing import ArrayLike from packaging import version @@ -25,7 +26,7 @@ @lru_cache(maxsize=MAX_CACHE_ITEMS) def _cache_data_retrieval_command( - data: h5py.Dataset, reduced_selection: tuple[tuple[Optional[int], Optional[int], Optional[int]]] + data: Union[h5py.Dataset, zarr.Array], reduced_selection: tuple[tuple[Optional[int], Optional[int], Optional[int]]] ) -> np.ndarray: """LRU caching for _cache_data_selection cannot be applied to list inputs; this expects the tuple or Dataset.""" selection = tuple([slice(*reduced_slice) for reduced_slice in reduced_selection]) # reconstitute the slices diff --git a/tests/000126_report.txt b/tests/expected_reports/000126_report.txt similarity index 100% rename from tests/000126_report.txt rename to tests/expected_reports/000126_report.txt diff --git a/tests/true_nwbinspector_default_report.txt b/tests/expected_reports/true_nwbinspector_default_report_hdf5.txt similarity index 65% rename from tests/true_nwbinspector_default_report.txt rename to tests/expected_reports/true_nwbinspector_default_report_hdf5.txt index aa0447eba..8fd1cefe5 100644 --- a/tests/true_nwbinspector_default_report.txt +++ b/tests/expected_reports/true_nwbinspector_default_report_hdf5.txt @@ -15,20 +15,20 @@ Found 5 issues over 2 files: 0 CRITICAL =========== -0.0 ./testing0.nwb: check_data_orientation - 'SpatialSeries' object at location '/processing/behavior/Position/my_spatial_series' +0.0 ./testing0.nwb.hdf5: check_data_orientation - 'SpatialSeries' object at location '/processing/behavior/Position/my_spatial_series' Message: Data may be in the wrong orientation. Time should be in the first dimension, and is usually the longest dimension. Here, another dimension is longer. -0.1 ./testing0.nwb: check_timestamps_match_first_dimension - 'TimeSeries' object at location '/acquisition/test_time_series_3' +0.1 ./testing0.nwb.hdf5: check_timestamps_match_first_dimension - 'TimeSeries' object at location '/acquisition/test_time_series_3' Message: The length of the first dimension of data (4) does not match the length of timestamps (3). 1 BEST_PRACTICE_VIOLATION ========================== -1.2 ./testing0.nwb and 1 other file: check_regular_timestamps - 'TimeSeries' object at location '/acquisition/test_time_series_2' +1.2 ./testing0.nwb.hdf5 and 1 other file: check_regular_timestamps - 'TimeSeries' object at location '/acquisition/test_time_series_2' Message: TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 and rate=0.5 instead of timestamps. 2 BEST_PRACTICE_SUGGESTION =========================== -2.3 ./testing0.nwb: check_small_dataset_compression - 'TimeSeries' object at location '/acquisition/test_time_series_1' +2.3 ./testing0.nwb.hdf5: check_small_dataset_compression - 'TimeSeries' object at location '/acquisition/test_time_series_1' Message: data is not compressed. Consider enabling compression when writing a dataset. diff --git a/tests/expected_reports/true_nwbinspector_default_report_zarr.txt b/tests/expected_reports/true_nwbinspector_default_report_zarr.txt new file mode 100644 index 000000000..957e68adc --- /dev/null +++ b/tests/expected_reports/true_nwbinspector_default_report_zarr.txt @@ -0,0 +1,34 @@ +************************************************** +NWBInspector Report Summary + +Timestamp: 2022-04-01 13:32:13.756390-04:00 +Platform: Windows-10-10.0.19043-SP0 +NWBInspector version: 0.3.6 + +Found 5 issues over 2 files: + 2 - CRITICAL + 2 - BEST_PRACTICE_VIOLATION + 1 - BEST_PRACTICE_SUGGESTION +************************************************** + + +0 CRITICAL +=========== + +0.0 ./testing0.nwb.zarr: check_data_orientation - 'SpatialSeries' object at location '/processing/behavior/Position/my_spatial_series' + Message: Data may be in the wrong orientation. Time should be in the first dimension, and is usually the longest dimension. Here, another dimension is longer. + +0.1 ./testing0.nwb.zarr: check_timestamps_match_first_dimension - 'TimeSeries' object at location '/acquisition/test_time_series_3' + Message: The length of the first dimension of data (4) does not match the length of timestamps (3). + +1 BEST_PRACTICE_VIOLATION +========================== + +1.2 ./testing0.nwb.zarr and 1 other file: check_regular_timestamps - 'TimeSeries' object at location '/acquisition/test_time_series_2' + Message: TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 and rate=0.5 instead of timestamps. + +2 BEST_PRACTICE_SUGGESTION +=========================== + +2.3 ./testing0.nwb.zarr: check_small_dataset_compression - 'TimeSeries' object at location '/acquisition/test_time_series_1' + Message: data is not compressed. Consider enabling compression when writing a dataset. diff --git a/tests/true_nwbinspector_report_with_dandi_config.txt b/tests/expected_reports/true_nwbinspector_report_with_dandi_config_hdf5.txt similarity index 58% rename from tests/true_nwbinspector_report_with_dandi_config.txt rename to tests/expected_reports/true_nwbinspector_report_with_dandi_config_hdf5.txt index e6f80f0f1..4b282018b 100644 --- a/tests/true_nwbinspector_report_with_dandi_config.txt +++ b/tests/expected_reports/true_nwbinspector_report_with_dandi_config_hdf5.txt @@ -14,32 +14,32 @@ Found 10 issues over 2 files: 0 CRITICAL =========== -0.0 ./testing0.nwb and 1 other file: check_subject_exists - 'NWBFile' object at location '/' +0.0 ./testing0.nwb.hdf5 and 1 other file: check_subject_exists - 'NWBFile' object at location '/' Message: Subject is missing. -0.1 ./testing0.nwb: check_timestamps_match_first_dimension - 'TimeSeries' object at location '/acquisition/test_time_series_3' +0.1 ./testing0.nwb.hdf5: check_timestamps_match_first_dimension - 'TimeSeries' object at location '/acquisition/test_time_series_3' Message: The length of the first dimension of data (4) does not match the length of timestamps (3). 1 BEST_PRACTICE_VIOLATION ========================== -1.2 ./testing0.nwb: check_time_interval_time_columns - 'TimeIntervals' object with name 'test_table' +1.2 ./testing0.nwb.hdf5: check_time_interval_time_columns - 'TimeIntervals' object with name 'test_table' Message: ['start_time'] are time columns but the values are not in ascending order. All times should be in seconds with respect to the session start time. -1.3 ./testing0.nwb: check_regular_timestamps - 'TimeSeries' object at location '/acquisition/test_time_series_2' +1.3 ./testing0.nwb.hdf5: check_regular_timestamps - 'TimeSeries' object at location '/acquisition/test_time_series_2' Message: TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 and rate=0.5 instead of timestamps. -1.4 ./testing0.nwb: check_data_orientation - 'SpatialSeries' object at location '/processing/behavior/Position/my_spatial_series' +1.4 ./testing0.nwb.hdf5: check_data_orientation - 'SpatialSeries' object at location '/processing/behavior/Position/my_spatial_series' Message: Data may be in the wrong orientation. Time should be in the first dimension, and is usually the longest dimension. Here, another dimension is longer. -1.5 ./testing0.nwb: check_missing_unit - 'TimeSeries' object at location '/acquisition/test_time_series_3' +1.5 ./testing0.nwb.hdf5: check_missing_unit - 'TimeSeries' object at location '/acquisition/test_time_series_3' Message: Missing text for attribute 'unit'. Please specify the scientific unit of the 'data'. -1.6 ./testing0.nwb: check_missing_unit - 'TimeSeries' object at location '/acquisition/test_time_series_2' +1.6 ./testing0.nwb.hdf5: check_missing_unit - 'TimeSeries' object at location '/acquisition/test_time_series_2' Message: Missing text for attribute 'unit'. Please specify the scientific unit of the 'data'. -1.7 ./testing0.nwb: check_missing_unit - 'TimeSeries' object at location '/acquisition/test_time_series_1' +1.7 ./testing0.nwb.hdf5: check_missing_unit - 'TimeSeries' object at location '/acquisition/test_time_series_1' Message: Missing text for attribute 'unit'. Please specify the scientific unit of the 'data'. -1.8 ./testing1.nwb: check_data_orientation - 'TimeSeries' object at location '/acquisition/my_spatial_series' +1.8 ./testing1.nwb.hdf5: check_data_orientation - 'TimeSeries' object at location '/acquisition/my_spatial_series' Message: Data may be in the wrong orientation. Time should be in the first dimension, and is usually the longest dimension. Here, another dimension is longer. diff --git a/tests/expected_reports/true_nwbinspector_report_with_dandi_config_zarr.txt b/tests/expected_reports/true_nwbinspector_report_with_dandi_config_zarr.txt new file mode 100644 index 000000000..934b7e5fb --- /dev/null +++ b/tests/expected_reports/true_nwbinspector_report_with_dandi_config_zarr.txt @@ -0,0 +1,45 @@ +************************************************** +NWBInspector Report Summary + +Timestamp: 2022-04-01 13:32:13.756390-04:00 +Platform: Windows-10-10.0.19043-SP0 +NWBInspector version: 0.3.6 + +Found 10 issues over 2 files: + 3 - CRITICAL + 7 - BEST_PRACTICE_VIOLATION +************************************************** + + +0 CRITICAL +=========== + +0.0 ./testing0.nwb.zarr and 1 other file: check_subject_exists - 'NWBFile' object at location '/' + Message: Subject is missing. + +0.1 ./testing0.nwb.zarr: check_timestamps_match_first_dimension - 'TimeSeries' object at location '/acquisition/test_time_series_3' + Message: The length of the first dimension of data (4) does not match the length of timestamps (3). + +1 BEST_PRACTICE_VIOLATION +========================== + +1.2 ./testing0.nwb.zarr: check_time_interval_time_columns - 'TimeIntervals' object with name 'test_table' + Message: ['start_time'] are time columns but the values are not in ascending order. All times should be in seconds with respect to the session start time. + +1.3 ./testing0.nwb.zarr: check_regular_timestamps - 'TimeSeries' object at location '/acquisition/test_time_series_2' + Message: TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 and rate=0.5 instead of timestamps. + +1.4 ./testing0.nwb.zarr: check_data_orientation - 'SpatialSeries' object at location '/processing/behavior/Position/my_spatial_series' + Message: Data may be in the wrong orientation. Time should be in the first dimension, and is usually the longest dimension. Here, another dimension is longer. + +1.5 ./testing0.nwb.zarr: check_missing_unit - 'TimeSeries' object at location '/acquisition/test_time_series_3' + Message: Missing text for attribute 'unit'. Please specify the scientific unit of the 'data'. + +1.6 ./testing0.nwb.zarr: check_missing_unit - 'TimeSeries' object at location '/acquisition/test_time_series_2' + Message: Missing text for attribute 'unit'. Please specify the scientific unit of the 'data'. + +1.7 ./testing0.nwb.zarr: check_missing_unit - 'TimeSeries' object at location '/acquisition/test_time_series_1' + Message: Missing text for attribute 'unit'. Please specify the scientific unit of the 'data'. + +1.8 ./testing1.nwb.zarr: check_data_orientation - 'TimeSeries' object at location '/acquisition/my_spatial_series' + Message: Data may be in the wrong orientation. Time should be in the first dimension, and is usually the longest dimension. Here, another dimension is longer. diff --git a/tests/streaming_cli_tests.py b/tests/streaming_cli_tests.py index e8e91d38d..7f116d0c8 100644 --- a/tests/streaming_cli_tests.py +++ b/tests/streaming_cli_tests.py @@ -10,6 +10,7 @@ from nwbinspector.testing import check_streaming_tests_enabled STREAMING_TESTS_ENABLED, DISABLED_STREAMING_TESTS_REASON = check_streaming_tests_enabled() +EXPECTED_REPORTS_FOLDER_PATH = Path(__file__).parent / "expected_reports" @pytest.mark.skipif(not STREAMING_TESTS_ENABLED, reason=DISABLED_STREAMING_TESTS_REASON or "") @@ -24,7 +25,7 @@ def test_dandiset_streaming_cli(tmpdir: py.path.local): with open(file=console_output_file_path, mode="r") as io: test_console_output = io.readlines() - expected_output_file_path = Path(__file__).parent / "000126_report.txt" + expected_output_file_path = EXPECTED_REPORTS_FOLDER_PATH / "000126_report.txt" with open(file=expected_output_file_path, mode="r") as io: expected_console_output = io.readlines() @@ -47,7 +48,7 @@ def test_dandiset_streaming_cli_with_version(tmpdir: py.path.local): with open(file=console_output_file_path, mode="r") as io: test_console_output = io.readlines() - expected_output_file_path = Path(__file__).parent / "000126_report.txt" + expected_output_file_path = EXPECTED_REPORTS_FOLDER_PATH / "000126_report.txt" with open(file=expected_output_file_path, mode="r") as io: expected_console_output = io.readlines() @@ -77,7 +78,7 @@ def test_dandiset_streaming_cli_saved_report(tmpdir: py.path.local): with open(file=report_file_path, mode="r") as io: test_report = io.readlines() - expected_report_file_path = Path(__file__).parent / "000126_report.txt" + expected_report_file_path = EXPECTED_REPORTS_FOLDER_PATH / "000126_report.txt" with open(file=expected_report_file_path, mode="r") as io: expected_report = io.readlines() @@ -110,7 +111,7 @@ def test_dandiset_streaming_cli_with_version_saved_report(tmpdir: py.path.local) with open(file=report_file_path, mode="r") as io: test_report = io.readlines() - expected_report_file_path = Path(__file__).parent / "000126_report.txt" + expected_report_file_path = EXPECTED_REPORTS_FOLDER_PATH / "000126_report.txt" with open(file=expected_report_file_path, mode="r") as io: expected_report = io.readlines() diff --git a/tests/test_inspector.py b/tests/test_inspector.py index aa0896258..b3d86c916 100644 --- a/tests/test_inspector.py +++ b/tests/test_inspector.py @@ -3,12 +3,15 @@ from pathlib import Path from shutil import rmtree from tempfile import mkdtemp +from typing import Type from unittest import TestCase +import hdmf_zarr import numpy as np +from hdmf.backends.io import HDMFIO from hdmf.common import DynamicTable from natsort import natsorted -from pynwb import NWBHDF5IO, NWBFile, TimeSeries +from pynwb import NWBFile, TimeSeries from pynwb.behavior import Position, SpatialSeries from pynwb.file import Subject, TimeIntervals @@ -31,10 +34,33 @@ check_timestamps_match_first_dimension, ) from nwbinspector.testing import make_minimal_nwbfile +from nwbinspector.tools import BACKEND_IO_CLASSES from nwbinspector.utils import FilePathType +IO_CLASSES_TO_BACKEND = {v: k for k, v in BACKEND_IO_CLASSES.items()} +EXPECTED_REPORTS_FOLDER_PATH = Path(__file__).parent / "expected_reports" + + +@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=DynamicTable) +def iterable_check_function(table: DynamicTable): + for col in table.columns: + yield InspectorMessage(message=f"Column: {col.name}") + + +def add_big_dataset_no_compression(nwbfile: NWBFile, zarr: bool = False) -> None: + # Zarr automatically compresses by default + # So to get a test case that is not compressed, forcibly disable the compressor + if zarr: + time_series = TimeSeries( + name="test_time_series_1", + data=hdmf_zarr.ZarrDataIO(np.zeros(shape=int(1.1e9 / np.dtype("float").itemsize)), compressor=False), + rate=1.0, + unit="", + ) + nwbfile.add_acquisition(time_series) + + return -def add_big_dataset_no_compression(nwbfile: NWBFile): time_series = TimeSeries( name="test_time_series_1", data=np.zeros(shape=int(1.1e9 / np.dtype("float").itemsize)), rate=1.0, unit="" ) @@ -85,8 +111,11 @@ def add_simple_table(nwbfile: NWBFile): nwbfile.add_acquisition(time_intervals) -class TestInspector(TestCase): - """A common helper class for testing the NWBInspector.""" +class TestInspectorOnBackend(TestCase): + """A common helper class for testing the NWBInspector on files of a specific backend (HDF5 or Zarr).""" + + BackendIOClass: Type[HDMFIO] + skip_validate = False # TODO: can be removed once NWBZarrIO validation issues are resolved @staticmethod def assertFileExists(path: FilePathType): @@ -94,37 +123,49 @@ def assertFileExists(path: FilePathType): assert path.exists() def assertLogFileContentsEqual( - self, test_file_path: FilePathType, true_file_path: FilePathType, skip_first_newlines: bool = False + self, + test_file_path: FilePathType, + true_file_path: FilePathType, + skip_first_newlines: bool = True, + skip_last_newlines: bool = True, ): - skip_first_n_lines = 0 with open(file=test_file_path, mode="r") as test_file: - test_file_lines = test_file.readlines() + test_file_lines = [x.rstrip("\n") for x in test_file.readlines()] with open(file=true_file_path, mode="r") as true_file: - true_file_lines = true_file.readlines() + true_file_lines = [x.rstrip("\n") for x in true_file.readlines()] + skip_first_n_lines = 0 if skip_first_newlines: for line_number, test_line in enumerate(test_file_lines): - if test_line != "\n": + if len(test_line) > 8: # Can sometimes be a CLI specific byte such as '\x1b[0m\x1b[0m' skip_first_n_lines = line_number break - else: - skip_first_n_lines = 0 + + skip_last_n_lines = 0 + if skip_last_newlines: + for line_number, test_line in enumerate(test_file_lines[::-1]): + if len(test_line) > 4: # Can sometimes be a CLI specific byte such as '\x1b[0m' + skip_last_n_lines = line_number - 1 # Adjust for negative slicing + break for line_number, test_line in enumerate(test_file_lines): if "Timestamp: " in test_line: # Transform the test file header to match ground true example - test_file_lines[line_number] = "Timestamp: 2022-04-01 13:32:13.756390-04:00\n" - test_file_lines[line_number + 1] = "Platform: Windows-10-10.0.19043-SP0\n" - test_file_lines[line_number + 2] = "NWBInspector version: 0.3.6\n" + test_file_lines[line_number] = "Timestamp: 2022-04-01 13:32:13.756390-04:00" + test_file_lines[line_number + 1] = "Platform: Windows-10-10.0.19043-SP0" + test_file_lines[line_number + 2] = "NWBInspector version: 0.3.6" if ".nwb" in test_line: # Transform temporary testing path and formatted to hardcoded fake path str_loc = test_line.find(".nwb") correction_str = test_line.replace(test_line[5 : str_loc - 8], "./") # noqa: E203 (black) test_file_lines[line_number] = correction_str - self.assertEqual(first=test_file_lines[skip_first_n_lines:-1], second=true_file_lines) + self.assertEqual(first=test_file_lines[skip_first_n_lines : -(1 + skip_last_n_lines)], second=true_file_lines) -class TestInspectorAPI(TestInspector): +class TestInspectorAPIAndCLIHDF5(TestInspectorOnBackend): + BackendIOClass = BACKEND_IO_CLASSES["hdf5"] + skip_validate = False + true_report_file_path = EXPECTED_REPORTS_FOLDER_PATH / "true_nwbinspector_default_report_hdf5.txt" maxDiff = None @classmethod @@ -140,7 +181,7 @@ def setUpClass(cls): nwbfiles = list() for j in range(num_nwbfiles): nwbfiles.append(make_minimal_nwbfile()) - add_big_dataset_no_compression(nwbfiles[0]) + add_big_dataset_no_compression(nwbfiles[0], zarr=cls.BackendIOClass is BACKEND_IO_CLASSES["zarr"]) add_regular_timestamps(nwbfiles[0]) add_flipped_data_orientation_to_processing(nwbfiles[0]) add_non_matching_timestamps_dimension(nwbfiles[0]) @@ -149,18 +190,21 @@ def setUpClass(cls): # Third file to be left without violations add_non_matching_timestamps_dimension(nwbfiles[3]) - cls.nwbfile_paths = [str(cls.tempdir / f"testing{j}.nwb") for j in range(num_nwbfiles)] + suffix = IO_CLASSES_TO_BACKEND[cls.BackendIOClass] + cls.nwbfile_paths = [str(cls.tempdir / f"testing{j}.nwb.{suffix}") for j in range(num_nwbfiles)] cls.nwbfile_paths[3] = str(cls.tempdir / "._testing3.nwb") for nwbfile_path, nwbfile in zip(cls.nwbfile_paths, nwbfiles): - with NWBHDF5IO(path=nwbfile_path, mode="w") as io: + with cls.BackendIOClass(path=nwbfile_path, mode="w") as io: io.write(nwbfile) @classmethod def tearDownClass(cls): - rmtree(cls.tempdir) + rmtree(cls.tempdir, ignore_errors=True) def test_inspect_all(self): - test_results = list(inspect_all(path=self.tempdir, select=[x.__name__ for x in self.checks])) + test_results = list( + inspect_all(path=self.tempdir, select=[x.__name__ for x in self.checks], skip_validate=self.skip_validate) + ) true_results = [ InspectorMessage( message="data is not compressed. Consider enabling compression when writing a dataset.", @@ -294,7 +338,9 @@ def test_inspect_all_parallel(self): self.assertCountEqual(first=test_results, second=true_results) def test_inspect_nwbfile(self): - test_results = list(inspect_nwbfile(nwbfile_path=self.nwbfile_paths[0], checks=self.checks)) + test_results = list( + inspect_nwbfile(nwbfile_path=self.nwbfile_paths[0], checks=self.checks, skip_validate=self.skip_validate) + ) true_results = [ InspectorMessage( message="data is not compressed. Consider enabling compression when writing a dataset.", @@ -346,7 +392,10 @@ def test_inspect_nwbfile(self): def test_inspect_nwbfile_importance_threshold_as_importance(self): test_results = list( inspect_nwbfile( - nwbfile_path=self.nwbfile_paths[0], checks=self.checks, importance_threshold=Importance.CRITICAL + nwbfile_path=self.nwbfile_paths[0], + checks=self.checks, + importance_threshold=Importance.CRITICAL, + skip_validate=self.skip_validate, ) ) true_results = [ @@ -376,7 +425,12 @@ def test_inspect_nwbfile_importance_threshold_as_importance(self): def test_inspect_nwbfile_importance_threshold_as_string(self): test_results = list( - inspect_nwbfile(nwbfile_path=self.nwbfile_paths[0], checks=self.checks, importance_threshold="CRITICAL") + inspect_nwbfile( + nwbfile_path=self.nwbfile_paths[0], + checks=self.checks, + importance_threshold="CRITICAL", + skip_validate=self.skip_validate, + ) ) true_results = [ InspectorMessage( @@ -408,12 +462,13 @@ def test_command_line_runs_cli_only(self): os.system( f"nwbinspector {str(self.tempdir)} --overwrite --select check_timestamps_match_first_dimension," "check_data_orientation,check_regular_timestamps,check_small_dataset_compression " - "--modules random,math,datetime" + "--modules random,math,datetime " + "--skip-validate " f"> {console_output_file}" ) self.assertLogFileContentsEqual( test_file_path=console_output_file, - true_file_path=Path(__file__).parent / "true_nwbinspector_default_report.txt", + true_file_path=self.true_report_file_path, skip_first_newlines=True, ) @@ -421,12 +476,13 @@ def test_command_line_runs_cli_only_parallel(self): console_output_file = self.tempdir / "test_console_output_2.txt" os.system( f"nwbinspector {str(self.tempdir)} --n-jobs 2 --overwrite --select check_timestamps_match_first_dimension," - "check_data_orientation,check_regular_timestamps,check_small_dataset_compression" + "check_data_orientation,check_regular_timestamps,check_small_dataset_compression " + "--skip-validate " f"> {console_output_file}" ) self.assertLogFileContentsEqual( test_file_path=console_output_file, - true_file_path=Path(__file__).parent / "true_nwbinspector_default_report.txt", + true_file_path=self.true_report_file_path, skip_first_newlines=True, ) @@ -434,7 +490,8 @@ def test_command_line_saves_report(self): console_output_file = self.tempdir / "test_console_output_3.txt" os.system( f"nwbinspector {str(self.nwbfile_paths[0])} " - f"--report-file-path {self.tempdir / 'test_nwbinspector_report_1.txt'}" + f"--report-file-path {self.tempdir / 'test_nwbinspector_report_1.txt'} " + "--skip-validate " f"> {console_output_file}" ) self.assertFileExists(path=self.tempdir / "test_nwbinspector_report_1.txt") @@ -444,14 +501,15 @@ def test_command_line_organizes_levels(self): os.system( f"nwbinspector {str(self.nwbfile_paths[0])} " f"--report-file-path {self.tempdir / 'test_nwbinspector_report_2.txt'} " - "--levels importance,check_function_name,file_path" + "--levels importance,check_function_name,file_path " + "--skip-validate " f"> {console_output_file}" ) self.assertFileExists(path=self.tempdir / "test_nwbinspector_report_2.txt") def test_command_line_runs_saves_json(self): json_fpath = self.tempdir / "nwbinspector_results.json" - os.system(f"nwbinspector {str(self.nwbfile_paths[0])} --json-file-path {json_fpath}") + os.system(f"nwbinspector {str(self.nwbfile_paths[0])} --json-file-path {json_fpath} " f"--skip-validate ") self.assertFileExists(path=json_fpath) def test_command_line_on_directory_matches_file(self): @@ -459,22 +517,22 @@ def test_command_line_on_directory_matches_file(self): os.system( f"nwbinspector {str(self.tempdir)} --overwrite --select check_timestamps_match_first_dimension," "check_data_orientation,check_regular_timestamps,check_small_dataset_compression" - f" --report-file-path {self.tempdir / 'test_nwbinspector_report_3.txt'}" + f" --report-file-path {self.tempdir / 'test_nwbinspector_report_3.txt'} " + "--skip-validate " f"> {console_output_file}" ) self.assertLogFileContentsEqual( test_file_path=self.tempdir / "test_nwbinspector_report_3.txt", - true_file_path=Path(__file__).parent / "true_nwbinspector_default_report.txt", + true_file_path=self.true_report_file_path, skip_first_newlines=True, ) def test_iterable_check_function(self): - @register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=DynamicTable) - def iterable_check_function(table: DynamicTable): - for col in table.columns: - yield InspectorMessage(message=f"Column: {col.name}") - - test_results = list(inspect_nwbfile(nwbfile_path=self.nwbfile_paths[0], select=["iterable_check_function"])) + test_results = list( + inspect_nwbfile( + nwbfile_path=self.nwbfile_paths[0], select=["iterable_check_function"], skip_validate=self.skip_validate + ) + ) true_results = [ InspectorMessage( message="Column: start_time", @@ -496,7 +554,9 @@ def iterable_check_function(table: DynamicTable): self.assertCountEqual(first=test_results, second=true_results) def test_inspect_nwbfile_manual_iteration(self): - generator = inspect_nwbfile(nwbfile_path=self.nwbfile_paths[0], checks=self.checks) + generator = inspect_nwbfile( + nwbfile_path=self.nwbfile_paths[0], checks=self.checks, skip_validate=self.skip_validate + ) message = next(generator) true_result = InspectorMessage( message="data is not compressed. Consider enabling compression when writing a dataset.", @@ -511,7 +571,9 @@ def test_inspect_nwbfile_manual_iteration(self): self.assertEqual(message, true_result) def test_inspect_nwbfile_manual_iteration_stop(self): - generator = inspect_nwbfile(nwbfile_path=self.nwbfile_paths[2], checks=self.checks) + generator = inspect_nwbfile( + nwbfile_path=self.nwbfile_paths[2], checks=self.checks, skip_validate=self.skip_validate + ) with self.assertRaises(expected_exception=StopIteration): next(generator) @@ -522,6 +584,7 @@ def test_inspect_nwbfile_dandi_config(self): nwbfile_path=self.nwbfile_paths[0], checks=config_checks, config=load_config(filepath_or_keyword="dandi"), + skip_validate=self.skip_validate, ) ) true_results = [ @@ -581,7 +644,16 @@ def test_inspect_nwbfile_dandi_config(self): self.assertCountEqual(first=test_results, second=true_results) -class TestDANDIConfig(TestInspector): +class TestInspectorAPIAndCLIZarr(TestInspectorAPIAndCLIHDF5): + BackendIOClass = BACKEND_IO_CLASSES["zarr"] + true_report_file_path = EXPECTED_REPORTS_FOLDER_PATH / "true_nwbinspector_default_report_zarr.txt" + skip_validate = True + + +class TestDANDIConfigHDF5(TestInspectorOnBackend): + BackendIOClass = BACKEND_IO_CLASSES["hdf5"] + true_report_file_path = EXPECTED_REPORTS_FOLDER_PATH / "true_nwbinspector_report_with_dandi_config_hdf5.txt" + skip_validate = False maxDiff = None @classmethod @@ -599,14 +671,15 @@ def setUpClass(cls): add_simple_table(nwbfiles[0]) add_flipped_data_orientation_to_acquisition(nwbfiles[1]) - cls.nwbfile_paths = [str(cls.tempdir / f"testing{j}.nwb") for j in range(num_nwbfiles)] + suffix = IO_CLASSES_TO_BACKEND[cls.BackendIOClass] + cls.nwbfile_paths = [str(cls.tempdir / f"testing{j}.nwb.{suffix}") for j in range(num_nwbfiles)] for nwbfile_path, nwbfile in zip(cls.nwbfile_paths, nwbfiles): - with NWBHDF5IO(path=nwbfile_path, mode="w") as io: + with cls.BackendIOClass(path=nwbfile_path, mode="w") as io: io.write(nwbfile) @classmethod def tearDownClass(cls): - rmtree(cls.tempdir) + rmtree(cls.tempdir, ignore_errors=True) def test_inspect_nwbfile_dandi_config_critical_only_entire_registry(self): test_results = list( @@ -615,6 +688,7 @@ def test_inspect_nwbfile_dandi_config_critical_only_entire_registry(self): checks=available_checks, config=load_config(filepath_or_keyword="dandi"), importance_threshold=Importance.CRITICAL, + skip_validate=self.skip_validate, ) ) true_results = [ @@ -646,6 +720,7 @@ def test_inspect_nwbfile_dandi_config_violation_and_above_entire_registry(self): checks=available_checks, config=load_config(filepath_or_keyword="dandi"), importance_threshold=Importance.BEST_PRACTICE_VIOLATION, + skip_validate=self.skip_validate, ) ) true_results = [ @@ -678,18 +753,27 @@ def test_inspect_nwbfile_dandi_config_critical_only_entire_registry_cli(self): console_output_file_path = self.tempdir / "test_console_output.txt" os.system( - f"nwbinspector {str(self.tempdir)} --overwrite --config dandi --threshold BEST_PRACTICE_VIOLATION" + f"nwbinspector {str(self.tempdir)} --overwrite --config dandi --threshold BEST_PRACTICE_VIOLATION " + f"--skip-validate " f"> {console_output_file_path}" ) self.assertLogFileContentsEqual( test_file_path=console_output_file_path, - true_file_path=Path(__file__).parent / "true_nwbinspector_report_with_dandi_config.txt", + true_file_path=self.true_report_file_path, skip_first_newlines=True, ) -class TestCheckUniqueIdentifiersPass(TestCase): +class TestDANDIConfigZarr(TestDANDIConfigHDF5): + BackendIOClass = BACKEND_IO_CLASSES["zarr"] + true_report_file_path = EXPECTED_REPORTS_FOLDER_PATH / "true_nwbinspector_report_with_dandi_config_zarr.txt" + skip_validate = True + + +class TestCheckUniqueIdentifiersPassHDF5(TestCase): + BackendIOClass = BACKEND_IO_CLASSES["hdf5"] + skip_validate = True maxDiff = None @classmethod @@ -700,20 +784,30 @@ def setUpClass(cls): for j in range(num_nwbfiles): unique_id_nwbfiles.append(make_minimal_nwbfile()) - cls.unique_id_nwbfile_paths = [str(cls.tempdir / f"unique_id_testing{j}.nwb") for j in range(num_nwbfiles)] + suffix = IO_CLASSES_TO_BACKEND[cls.BackendIOClass] + cls.unique_id_nwbfile_paths = [ + str(cls.tempdir / f"unique_id_testing{j}.nwb.{suffix}") for j in range(num_nwbfiles) + ] for nwbfile_path, nwbfile in zip(cls.unique_id_nwbfile_paths, unique_id_nwbfiles): - with NWBHDF5IO(path=nwbfile_path, mode="w") as io: + with cls.BackendIOClass(path=nwbfile_path, mode="w") as io: io.write(nwbfile) @classmethod def tearDownClass(cls): - rmtree(cls.tempdir) + rmtree(cls.tempdir, ignore_errors=True) def test_check_unique_identifiers_pass(self): - assert list(inspect_all(path=self.tempdir, select=["check_data_orientation"])) == [] + test_message = list( + inspect_all(path=self.tempdir, select=["check_data_orientation"], skip_validate=self.skip_validate) + ) + expected_message = [] + assert test_message == expected_message -class TestCheckUniqueIdentifiersFail(TestCase): + +class TestCheckUniqueIdentifiersFailHDF5(TestCase): + BackendIOClass = BACKEND_IO_CLASSES["hdf5"] + skip_validate = True maxDiff = None @classmethod @@ -730,19 +824,23 @@ def setUpClass(cls): ) ) + suffix = IO_CLASSES_TO_BACKEND[cls.BackendIOClass] cls.non_unique_id_nwbfile_paths = [ - str(cls.tempdir / f"non_unique_id_testing{j}.nwb") for j in range(num_nwbfiles) + str(cls.tempdir / f"non_unique_id_testing{j}.nwb.{suffix}") for j in range(num_nwbfiles) ] for nwbfile_path, nwbfile in zip(cls.non_unique_id_nwbfile_paths, non_unique_id_nwbfiles): - with NWBHDF5IO(path=nwbfile_path, mode="w") as io: + with cls.BackendIOClass(path=nwbfile_path, mode="w") as io: io.write(nwbfile) @classmethod def tearDownClass(cls): - rmtree(cls.tempdir) + rmtree(cls.tempdir, ignore_errors=True) def test_check_unique_identifiers_fail(self): - assert list(inspect_all(path=self.tempdir, select=["check_data_orientation"])) == [ + test_messages = list( + inspect_all(path=self.tempdir, select=["check_data_orientation"], skip_validate=self.skip_validate) + ) + expected_messages = [ InspectorMessage( message=( "The identifier 'not a unique identifier!' is used across the .nwb files: " @@ -759,6 +857,16 @@ def test_check_unique_identifiers_fail(self): ) ] + assert test_messages == expected_messages + + +class TestCheckUniqueIdentifiersPassZarr(TestCheckUniqueIdentifiersPassHDF5): + BackendIOClass = BACKEND_IO_CLASSES["zarr"] + + +class TestCheckUniqueIdentifiersFailZarr(TestCheckUniqueIdentifiersFailHDF5): + BackendIOClass = BACKEND_IO_CLASSES["zarr"] + def test_dandi_config_in_vitro_injection(): """Test that a subject_id starting with 'protein' excludes meaningless CRITICAL-elevated subject checks."""