Skip to content

Commit

Permalink
setup for zarr
Browse files Browse the repository at this point in the history
  • Loading branch information
CodyCBakerPhD committed Sep 16, 2024
1 parent c7861f4 commit 7cf9de6
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 63 deletions.
82 changes: 34 additions & 48 deletions src/nwbinspector/_nwb_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@

import pynwb
from natsort import natsorted
from packaging.version import Version
from tqdm import tqdm

from . import available_checks, configure_checks
from . import available_checks
from ._configuration import configure_checks
from ._registration import Importance, InspectorMessage
from .tools._read_nwbfile import read_nwbfile
from .utils import (
FilePathType,
OptionalListOfStrings,
PathType,
calculate_number_of_cpu,
get_package_version,
)


Expand Down Expand Up @@ -143,17 +143,16 @@ def inspect_all(
# Manual identifier check over all files in the folder path
identifiers = defaultdict(list)
for nwbfile_path in nwbfiles:
with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io:
try:
nwbfile = io.read()
identifiers[nwbfile.identifier].append(nwbfile_path)
except Exception as exception:
yield InspectorMessage(
message=traceback.format_exc(),
importance=Importance.ERROR,
check_function_name=f"During io.read() - {type(exception)}: {str(exception)}",
file_path=nwbfile_path,
)
try:
nwbfile = read_nwbfile(nwbfile_path=nwbfile_path)
identifiers[nwbfile.identifier].append(nwbfile_path)
except Exception as exception:
yield InspectorMessage(
message=traceback.format_exc(),
importance=Importance.ERROR,
check_function_name=f"During io.read() - {type(exception)}: {str(exception)}",
file_path=nwbfile_path,
)

if len(identifiers) != len(nwbfiles):
for identifier, nwbfiles_with_identifier in identifiers.items():
Expand Down Expand Up @@ -239,7 +238,7 @@ def inspect_nwbfile(
config : dict
Dictionary valid against our JSON configuration schema.
Can specify a mapping of importance levels and list of check functions whose importance you wish to change.
Typically loaded via json.load from a valid .json file
Typically loaded via `json.load` from a valid .json file.
ignore: list, optional
Names of functions to skip.
select: list, optional
Expand Down Expand Up @@ -269,20 +268,10 @@ def inspect_nwbfile(
filterwarnings(action="ignore", message="No cached namespaces found in .*")
filterwarnings(action="ignore", message="Ignoring cached namespace .*")

if not skip_validate and get_package_version("pynwb") >= Version("2.2.0"):
validation_error_list, _ = pynwb.validate(paths=[nwbfile_path])
for validation_namespace_errors in validation_error_list:
for validation_error in validation_namespace_errors:
yield InspectorMessage(
message=validation_error.reason,
importance=Importance.PYNWB_VALIDATION,
check_function_name=validation_error.name,
location=validation_error.location,
file_path=nwbfile_path,
)
try:
in_memory_nwbfile, io = read_nwbfile(nwbfile_path=nwbfile_path)

with pynwb.NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io:
if not skip_validate and get_package_version("pynwb") < Version("2.2.0"):
if not skip_validate:
validation_errors = pynwb.validate(io=io)
for validation_error in validation_errors:
yield InspectorMessage(
Expand All @@ -293,26 +282,23 @@ def inspect_nwbfile(
file_path=nwbfile_path,
)

try:
in_memory_nwbfile = io.read()

for inspector_message in inspect_nwbfile_object(
nwbfile_object=in_memory_nwbfile,
checks=checks,
config=config,
ignore=ignore,
select=select,
importance_threshold=importance_threshold,
):
inspector_message.file_path = nwbfile_path
yield inspector_message
except Exception as exception:
yield InspectorMessage(
message=traceback.format_exc(),
importance=Importance.ERROR,
check_function_name=f"During io.read() - {type(exception)}: {str(exception)}",
file_path=nwbfile_path,
)
for inspector_message in inspect_nwbfile_object(
nwbfile_object=in_memory_nwbfile,
checks=checks,
config=config,
ignore=ignore,
select=select,
importance_threshold=importance_threshold,
):
inspector_message.file_path = nwbfile_path
yield inspector_message
except Exception as exception:
yield InspectorMessage(
message=traceback.format_exc(),
importance=Importance.ERROR,
check_function_name=f"During io.read() - {type(exception)}: {str(exception)}",
file_path=nwbfile_path,
)


# TODO: deprecate once subject types and dandi schemas have been extended
Expand Down
3 changes: 2 additions & 1 deletion src/nwbinspector/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from ._dandi import get_s3_urls_and_dandi_paths
from ._nwb import all_of_type, get_nwbfile_path_from_internal_object
from ._read_nwbfile import read_nwbfile
from ._read_nwbfile import BACKEND_IO_CLASSES, read_nwbfile

__all__ = [
"BACKEND_IO_CLASSES",
"get_s3_urls_and_dandi_paths",
"all_of_type",
"get_nwbfile_path_from_internal_object",
Expand Down
22 changes: 13 additions & 9 deletions src/nwbinspector/tools/_read_nwbfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
from warnings import filterwarnings

import h5py
from hdmf.backends.io import HDMFIO
from pynwb import NWBHDF5IO, NWBFile

_BACKEND_IO_CLASSES = dict(hdf5=NWBHDF5IO)
BACKEND_IO_CLASSES = dict(hdf5=NWBHDF5IO)

try:
from hdmf_zarr import NWBZarrIO

_BACKEND_IO_CLASSES.update(zarr=NWBZarrIO)
BACKEND_IO_CLASSES.update(zarr=NWBZarrIO)
except ModuleNotFoundError as exception:
if str(exception) != "No module named 'hdmf_zarr'": # not the exception we're looking for, so re-raise
raise exception
Expand Down Expand Up @@ -47,11 +48,11 @@ def _get_backend(path: str, method: Literal["local", "fsspec", "ros3"]):
if method == "fsspec":
fs = _init_fsspec(path=path)
with fs.open(path=path, mode="rb") as file:
for backend_name, backend_class in _BACKEND_IO_CLASSES.items():
for backend_name, backend_class in BACKEND_IO_CLASSES.items():
if backend_class.can_read(path=file):
possible_backends.append(backend_name)
else:
for backend_name, backend_class in _BACKEND_IO_CLASSES.items():
for backend_name, backend_class in BACKEND_IO_CLASSES.items():
if backend_class.can_read(path):
possible_backends.append(backend_name)

Expand All @@ -69,7 +70,7 @@ def read_nwbfile(
nwbfile_path: Union[str, Path],
method: Optional[Literal["local", "fsspec", "ros3"]] = None,
backend: Optional[Literal["hdf5", "zarr"]] = None,
) -> NWBFile:
) -> tuple[NWBFile, HDMFIO]:
"""
Read an NWB file using the specified (or auto-detected) method and specified (or auto-detected) backend.
Expand All @@ -88,7 +89,10 @@ def read_nwbfile(
Returns
-------
pynwb.NWBFile
nwbfile : pynwb.NWBFile
The in-memory NWBFile object.
io : hdmf.backends.io.HDMFIO
The initialized HDMFIO object used to read the file.
"""
nwbfile_path = str(nwbfile_path) # If pathlib.Path, cast to str; if already str, no harm done

Expand All @@ -109,7 +113,7 @@ def read_nwbfile(
)

backend = backend or _get_backend(nwbfile_path, method)
if method == "local" and not _BACKEND_IO_CLASSES[ # Temporary until .can_read() is able to work on streamed bytes
if method == "local" and not BACKEND_IO_CLASSES[ # Temporary until .can_read() is able to work on streamed bytes
backend
].can_read(path=nwbfile_path):
raise IOError(f"The chosen backend ({backend}) is unable to read the file! Please select a different backend.")
Expand All @@ -127,7 +131,7 @@ def read_nwbfile(
io_kwargs.update(path=nwbfile_path)
if method == "ros3":
io_kwargs.update(driver="ros3")
io = _BACKEND_IO_CLASSES[backend](**io_kwargs)
io = BACKEND_IO_CLASSES[backend](**io_kwargs)
nwbfile = io.read()

return nwbfile
return (nwbfile, io)
21 changes: 16 additions & 5 deletions tests/test_inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@
check_timestamps_match_first_dimension,
)
from nwbinspector.testing import make_minimal_nwbfile
from nwbinspector.tools import BACKEND_IO_CLASSES
from nwbinspector.utils import FilePathType

IO_CLASSES_TO_BACKEND = {v: k for k, v in BACKEND_IO_CLASSES.items()}


def add_big_dataset_no_compression(nwbfile: NWBFile):
time_series = TimeSeries(
Expand Down Expand Up @@ -85,8 +88,10 @@ def add_simple_table(nwbfile: NWBFile):
nwbfile.add_acquisition(time_intervals)


class TestInspector(TestCase):
"""A common helper class for testing the NWBInspector."""
class TestInspectorOnBackend(TestCase):
"""A common helper class for testing the NWBInspector on files of a specific backend (HDF5 or Zarr)."""

BackendIOClass: HDMFIO

@staticmethod
def assertFileExists(path: FilePathType):
Expand Down Expand Up @@ -124,7 +129,8 @@ def assertLogFileContentsEqual(
self.assertEqual(first=test_file_lines[skip_first_n_lines:-1], second=true_file_lines)


class TestInspectorAPI(TestInspector):
class TestInspectorAPIHDF5(TestInspectorOnBackend):
BackendIOClass = BACKEND_IO_CLASSES["hdf5"]
maxDiff = None

@classmethod
Expand All @@ -149,10 +155,11 @@ def setUpClass(cls):
# Third file to be left without violations
add_non_matching_timestamps_dimension(nwbfiles[3])

cls.nwbfile_paths = [str(cls.tempdir / f"testing{j}.nwb") for j in range(num_nwbfiles)]
suffix = IO_CLASSES_TO_BACKEND[cls.BackendIOClass]
cls.nwbfile_paths = [str(cls.tempdir / f"testing{j}.nwb.{suffix}") for j in range(num_nwbfiles)]
cls.nwbfile_paths[3] = str(cls.tempdir / f"._testing3.nwb")
for nwbfile_path, nwbfile in zip(cls.nwbfile_paths, nwbfiles):
with NWBHDF5IO(path=nwbfile_path, mode="w") as io:
with cls.BackendIOClass(path=nwbfile_path, mode="w") as io:
io.write(nwbfile)

@classmethod
Expand Down Expand Up @@ -581,6 +588,10 @@ def test_inspect_nwbfile_dandi_config(self):
self.assertCountEqual(first=test_results, second=true_results)


class TestInspectorAPIHZarr(TestInspectorAPIHDF5):
BackendIOClass = BACKEND_IO_CLASSES["zarr"]


class TestDANDIConfig(TestInspector):
maxDiff = None

Expand Down

0 comments on commit 7cf9de6

Please sign in to comment.