From 245c3db4d7df8910878fb9b0c95b27a2825715f8 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Fri, 3 May 2024 08:27:22 -0700 Subject: [PATCH] Enable explicit use of key tuples (instead of *Indexer objects) in indexing adapters and explicitly indexed arrays (#8870) * pass key tuple to indexing adapters and explicitly indexed arrays * update indexing in StackedBytesArray * Update indexing in StackedBytesArray * Add _IndexerKey type to _typing.py * Update indexing in StackedBytesArray * use tuple indexing in test_backend_array_deprecation_warning * Add support for CompatIndexedTuple in explicit indexing adapter This commit updates the `explicit_indexing_adapter` function to accept both `ExplicitIndexer` and the new `CompatIndexedTuple`. The `CompatIndexedTuple` is designed to facilitate the transition towards using raw tuples by carrying additional metadata about the indexing type (basic, vectorized, or outer). * remove unused code * type hint fixes * fix docstrings * fix tests * fix docstrings * Apply suggestions from code review Co-authored-by: Deepak Cherian * update docstrings and pass tuples directly * Some test cleanup * update docstring * use `BasicIndexer` instead of `CompatIndexedTuple` * support explicit indexing with tuples * fix mypy errors * remove unused IndexerMaker * Update LazilyIndexedArray._updated_key to support explicit indexing with tuples --------- Co-authored-by: Deepak Cherian Co-authored-by: Deepak Cherian --- xarray/coding/strings.py | 20 +- xarray/coding/variables.py | 6 +- xarray/core/indexing.py | 280 ++++++++++++++++------------ xarray/namedarray/_typing.py | 1 + xarray/tests/__init__.py | 10 - xarray/tests/test_backends.py | 2 +- xarray/tests/test_coding_strings.py | 15 +- xarray/tests/test_dataset.py | 32 ++-- xarray/tests/test_indexing.py | 45 +++-- 9 files changed, 212 insertions(+), 199 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index db95286f6aa..6df92c256b9 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -17,6 +17,7 @@ from xarray.core import indexing from xarray.core.utils import module_available from xarray.core.variable import Variable +from xarray.namedarray._typing import _IndexerKey from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array @@ -220,8 +221,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer] + >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[(slice(None),)] array(b'abc', dtype='|S3') """ @@ -240,7 +240,7 @@ def __init__(self, array): @property def dtype(self): - return np.dtype("S" + str(self.array.shape[-1])) + return np.dtype(f"S{str(self.array.shape[-1])}") @property def shape(self) -> tuple[int, ...]: @@ -249,15 +249,17 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key): + def _vindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key): + def _oindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key): + def __getitem__(self, key: _IndexerKey): + from xarray.core.indexing import BasicIndexer + # require slicing the last dimension completely - key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) - if key.tuple[-1] != slice(None): + indexer = indexing.expanded_indexer(key, self.array.ndim) + if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[key]) + return _numpy_char_to_bytes(self.array[BasicIndexer(indexer)]) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..98bbbbaeb2c 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -99,8 +99,7 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> NativeEndiannessArray(x).dtype dtype('int16') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> NativeEndiannessArray(x)[indexer].dtype + >>> NativeEndiannessArray(x)[(slice(None),)].dtype dtype('int16') """ @@ -137,8 +136,7 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> BoolTypeArray(x).dtype dtype('bool') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> BoolTypeArray(x)[indexer].dtype + >>> BoolTypeArray(x)[(slice(None),)].dtype dtype('bool') """ diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 7d6191883e1..2b8cd202e4e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from datetime import timedelta from html import escape -from typing import TYPE_CHECKING, Any, Callable, overload +from typing import TYPE_CHECKING, Any, Callable, Literal, overload import numpy as np import pandas as pd @@ -36,7 +36,7 @@ from xarray.core.indexes import Index from xarray.core.variable import Variable - from xarray.namedarray._typing import _Shape, duckarray + from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -324,13 +324,13 @@ class ExplicitIndexer: __slots__ = ("_key",) - def __init__(self, key: tuple[Any, ...]): + def __init__(self, key: _IndexerKey): if type(self) is ExplicitIndexer: raise TypeError("cannot instantiate base ExplicitIndexer objects") self._key = tuple(key) @property - def tuple(self) -> tuple[Any, ...]: + def tuple(self) -> _IndexerKey: return self._key def __repr__(self) -> str: @@ -516,30 +516,29 @@ class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): __slots__ = () def get_duck_array(self): - key = BasicIndexer((slice(None),) * self.ndim) - return self[key] + return self[(slice(None),) * self.ndim] def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: # This is necessary because we apply the indexing key in self.get_duck_array() # Note this is the base class for all lazy indexing classes return np.asarray(self.get_duck_array(), dtype=dtype) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) @@ -575,9 +574,9 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: def get_duck_array(self): return self.array.get_duck_array() - def __getitem__(self, key: Any): - key = expanded_indexer(key, self.ndim) - indexer = self.indexer_cls(key) + def __getitem__(self, key: _IndexerKey | slice): + _key = expanded_indexer(key, self.ndim) + indexer = self.indexer_cls(_key) result = apply_indexer(self.array, indexer) @@ -623,8 +622,13 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): self.array = as_indexable(array) self.key = key - def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: - iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) + def _updated_key( + self, new_key: ExplicitIndexer | _IndexerKey + ) -> BasicIndexer | OuterIndexer: + _new_key_tuple = ( + new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key + ) + iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) full_key = [] for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, integer_types): @@ -673,31 +677,29 @@ def get_duck_array(self): def transpose(self, order): return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(indexer)) - def _vindex_set(self, key: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, key: _IndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key: OuterIndexer, value: Any) -> None: - full_key = self._updated_key(key) - self.array.oindex[full_key] = value + def _oindex_set(self, key: _IndexerKey, value: Any) -> None: + full_key = self._updated_key(OuterIndexer(key)) + self.array.oindex[full_key.tuple] = value - def __setitem__(self, key: BasicIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(key) - full_key = self._updated_key(key) - self.array[full_key] = value + def __setitem__(self, key: _IndexerKey, value: Any) -> None: + full_key = self._updated_key(BasicIndexer(key)) + self.array[full_key.tuple] = value def __repr__(self) -> str: return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" @@ -756,25 +758,25 @@ def get_duck_array(self): def _updated_key(self, new_key: ExplicitIndexer): return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _oindex_get(self, indexer: _IndexerKey): + return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: VectorizedIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _vindex_get(self, indexer: _IndexerKey): + return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) + + def __getitem__(self, indexer: _IndexerKey): - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) # If the indexed array becomes a scalar, return LazilyIndexedArray - if all(isinstance(ind, integer_types) for ind in indexer.tuple): - key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple)) + if all(isinstance(ind, integer_types) for ind in indexer): + key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) return LazilyIndexedArray(self.array, key) - return type(self)(self.array, self._updated_key(indexer)) + return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) def transpose(self, order): key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." @@ -807,29 +809,27 @@ def _ensure_copied(self): def get_duck_array(self): return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array[indexer] = value @@ -857,27 +857,25 @@ def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value @@ -1040,29 +1038,63 @@ def explicit_indexing_adapter( return result +class CompatIndexedTuple(tuple): + """ + A tuple subclass used to transition existing backend implementations towards the use of raw tuples + for indexing by carrying additional metadata about the type of indexing being + performed ('basic', 'vectorized', or 'outer'). This class serves as a bridge, allowing + backend arrays that currently expect this metadata to function correctly while + maintaining the outward behavior of a regular tuple. + + This class is particularly useful during the phase where the backend implementations are + not yet capable of directly accepting raw tuples without additional context about + the indexing type. It ensures that these backends can still correctly interpret and + process indexing operations by providing them with the necessary contextual information. + """ + + def __new__(cls, iterable, indexer_type: Literal["basic", "vectorized", "outer"]): + obj = super().__new__(cls, iterable) + obj.indexer_type = indexer_type # type: ignore[attr-defined] + return obj + + def __repr__(self): + return f"CompatIndexedTuple({super().__repr__()}, indexer_type='{self.indexer_type}')" + + def apply_indexer(indexable, indexer: ExplicitIndexer): """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[indexer] + return indexable.vindex[CompatIndexedTuple(indexer.tuple, "vectorized")] elif isinstance(indexer, OuterIndexer): - return indexable.oindex[indexer] + return indexable.oindex[CompatIndexedTuple(indexer.tuple, "outer")] else: - return indexable[indexer] + return indexable[CompatIndexedTuple(indexer.tuple, "basic")] def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: """Set values in an indexable object using an indexer.""" if isinstance(indexer, VectorizedIndexer): - indexable.vindex[indexer] = value + indexable.vindex[indexer.tuple] = value elif isinstance(indexer, OuterIndexer): - indexable.oindex[indexer] = value + indexable.oindex[indexer.tuple] = value else: - indexable[indexer] = value + indexable[indexer.tuple] = value def decompose_indexer( - indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport + indexer: ExplicitIndexer | CompatIndexedTuple, + shape: _Shape, + indexing_support: IndexingSupport, ) -> tuple[ExplicitIndexer, ExplicitIndexer]: + if isinstance(indexer, CompatIndexedTuple): + # recreate the indexer object from the tuple and the type of indexing. + # This is necessary to ensure that the backend array can correctly interpret the indexing operation. + if indexer.indexer_type == "vectorized": # type: ignore[attr-defined] + indexer = VectorizedIndexer(indexer) + elif indexer.indexer_type == "outer": # type: ignore[attr-defined] + indexer = OuterIndexer(indexer) + else: + indexer = BasicIndexer(indexer) if isinstance(indexer, VectorizedIndexer): return _decompose_vectorized_indexer(indexer, shape, indexing_support) if isinstance(indexer, (BasicIndexer, OuterIndexer)): @@ -1131,10 +1163,10 @@ def _decompose_vectorized_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = OuterIndexer((np.array([0, 1, 3]), np.array([2, 3]))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array).oindex[backend_indexer] + ... array = NumpyIndexingAdapter(array).oindex[backend_indexer.tuple] >>> np_indexer = VectorizedIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # vectorized indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).vindex[np_indexer] + ... NumpyIndexingAdapter(array).vindex[np_indexer.tuple] array([ 2, 21, 8]) """ assert isinstance(indexer, VectorizedIndexer) @@ -1213,10 +1245,10 @@ def _decompose_outer_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = BasicIndexer((slice(0, 3), slice(2, 4))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array)[backend_indexer] + ... array = NumpyIndexingAdapter(array)[backend_indexer.tuple] >>> np_indexer = OuterIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # outer indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).oindex[np_indexer] + ... NumpyIndexingAdapter(array).oindex[np_indexer.tuple] array([[ 2, 3, 2], [14, 15, 14], [ 8, 9, 8]]) @@ -1520,25 +1552,28 @@ def __init__(self, array): def transpose(self, order): return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer): - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_get(self, indexer: _IndexerKey): + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): array = NumpyVIndexAdapter(self.array) - return array[indexer.tuple] + return array[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer): array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) return array[key] - def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: + def _safe_setitem(self, array, key: _IndexerKey, value: Any) -> None: try: array[key] = value except ValueError as exc: @@ -1551,21 +1586,24 @@ def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: else: raise exc - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: array = NumpyVIndexAdapter(self.array) - self._safe_setitem(array, indexer.tuple, value) + self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _IndexerKey | ExplicitIndexer, value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) self._safe_setitem(array, key, value) @@ -1594,30 +1632,28 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): # manual orthogonal indexing (implemented like DaskIndexingAdapter) - key = indexer.tuple + value = self.array - for axis, subkey in reversed(list(enumerate(key))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _IndexerKey): + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - self.array[indexer.tuple] = value + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + self.array[indexer] = value def transpose(self, order): xp = self.array.__array_namespace__() @@ -1635,38 +1671,35 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer): - key = indexer.tuple + def _oindex_get(self, indexer: _IndexerKey): try: - return self.array[key] + return self.array[indexer] except NotImplementedError: # manual orthogonal indexing value = self.array - for axis, subkey in reversed(list(enumerate(key))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: VectorizedIndexer): - return self.array.vindex[indexer.tuple] + def _vindex_get(self, indexer: _IndexerKey): + return self.array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _IndexerKey): + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer.tuple) + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer) if num_non_slices > 1: raise NotImplementedError( "xarray can't set arrays with multiple " "array indices to dask yet." ) - self.array[indexer.tuple] = value + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: - self.array.vindex[indexer.tuple] = value + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + self.array.vindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + self.array[indexer] = value def transpose(self, order): return self.array.transpose(order) @@ -1728,13 +1761,14 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _prepare_key(self, key: tuple[Any, ...]) -> tuple[Any, ...]: - if isinstance(key, tuple) and len(key) == 1: + def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: + _key = key.tuple if isinstance(key, ExplicitIndexer) else key + if isinstance(_key, tuple) and len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) - (key,) = key + (_key,) = _key - return key + return _key def _handle_result( self, result: Any @@ -1751,7 +1785,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1759,7 +1793,7 @@ def _oindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1770,7 +1804,7 @@ def _oindex_get( return self._handle_result(result) def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1778,7 +1812,7 @@ def _vindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1789,7 +1823,7 @@ def _vindex_get( return self._handle_result(result) def __getitem__( - self, indexer: ExplicitIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1797,7 +1831,7 @@ def __getitem__( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1862,7 +1896,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1876,7 +1910,7 @@ def _oindex_get( return result def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1889,7 +1923,7 @@ def _vindex_get( result.level = self.level return result - def __getitem__(self, indexer: ExplicitIndexer): + def __getitem__(self, indexer: _IndexerKey): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level @@ -1911,7 +1945,7 @@ def _get_array_subset(self) -> np.ndarray: if self.size > threshold: pos = threshold // 2 indices = np.concatenate([np.arange(0, pos), np.arange(-pos, 0)]) - subset = self[OuterIndexer((indices,))] + subset = self[(indices,)] else: subset = self diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index b715973814f..243c2382472 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,6 +95,7 @@ def dtype(self) -> _DType_co: ... _IndexKey = Union[int, slice, "ellipsis"] _IndexKeys = tuple[Union[_IndexKey], ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] +_IndexerKey = tuple[Any, ...] _AttrsLike = Union[Mapping[Any, Any], None] diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index c202e191293..f2d95ece8fb 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -264,16 +264,6 @@ def __getitem__(self, key): return key -class IndexerMaker: - def __init__(self, indexer_cls): - self._indexer_cls = indexer_cls - - def __getitem__(self, key): - if not isinstance(key, tuple): - key = (key,) - return self._indexer_cls(key) - - def source_ndarray(array): """Given an ndarray, return the base object which holds its memory, or the object itself. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 89ebdb9fb82..df5b174c0ef 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5811,7 +5811,7 @@ def _getitem(self, key): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - la.vindex[indexer].get_duck_array() + la.vindex[indexer.tuple].get_duck_array() captured = capsys.readouterr() assert len(w) == 1 diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 51f63ea72dd..0feac5b15eb 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -7,9 +7,7 @@ from xarray import Variable from xarray.coding import strings -from xarray.core import indexing from xarray.tests import ( - IndexerMaker, assert_array_equal, assert_identical, requires_dask, @@ -150,10 +148,9 @@ def test_StackedBytesArray() -> None: assert len(actual) == len(expected) assert_array_equal(expected, actual) - B = IndexerMaker(indexing.BasicIndexer) - assert_array_equal(expected[:1], actual[B[:1]]) + assert_array_equal(expected[:1], actual[(slice(1),)]) with pytest.raises(IndexError): - actual[B[:, :2]] + actual[slice(None), slice(2)] def test_StackedBytesArray_scalar() -> None: @@ -168,10 +165,8 @@ def test_StackedBytesArray_scalar() -> None: with pytest.raises(TypeError): len(actual) np.testing.assert_array_equal(expected, actual) - - B = IndexerMaker(indexing.BasicIndexer) with pytest.raises(IndexError): - actual[B[:2]] + actual[(slice(2),)] def test_StackedBytesArray_vectorized_indexing() -> None: @@ -179,9 +174,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None: stacked = strings.StackedBytesArray(array) expected = np.array([[b"abc", b"def"], [b"def", b"abc"]]) - V = IndexerMaker(indexing.VectorizedIndexer) - indexer = V[np.array([[0, 1], [1, 0]])] - actual = stacked.vindex[indexer] + actual = stacked.vindex[(np.array([[0, 1], [1, 0]]),)] assert_array_equal(actual, expected) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 301596e032f..75ae4d67574 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -244,7 +244,7 @@ def get_array(self): return self.array def __getitem__(self, key): - return self.array[key.tuple] + return self.array[(key if isinstance(key, tuple) else key.tuple)] class AccessibleAsDuckArrayDataStore(backends.InMemoryDataStore): @@ -5070,28 +5070,26 @@ def test_lazy_load(self) -> None: ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - def test_lazy_load_duck_array(self) -> None: + @pytest.mark.parametrize("decode_cf", [True, False]) + def test_lazy_load_duck_array(self, decode_cf) -> None: store = AccessibleAsDuckArrayDataStore() create_test_data().dump_to_store(store) - for decode_cf in [True, False]: - ds = open_dataset(store, decode_cf=decode_cf) - with pytest.raises(UnexpectedDataAccess): - ds["var1"].values + ds = open_dataset(store, decode_cf=decode_cf) + with pytest.raises(UnexpectedDataAccess): + ds["var1"].values - # these should not raise UnexpectedDataAccess: - ds.var1.data - ds.isel(time=10) - ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - repr(ds) + # these should not raise UnexpectedDataAccess: + ds.var1.data + ds.isel(time=10) + ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) + repr(ds) - # preserve the duck array type and don't cast to array - assert isinstance(ds["var1"].load().data, DuckArrayWrapper) - assert isinstance( - ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper - ) + # preserve the duck array type and don't cast to array + assert isinstance(ds["var1"].load().data, DuckArrayWrapper) + assert isinstance(ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper) - ds.close() + ds.close() def test_dropna(self) -> None: x = np.random.randn(4, 4) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index f019d3c789c..b5da4a75439 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -12,7 +12,6 @@ from xarray.core.indexes import PandasIndex, PandasMultiIndex from xarray.core.types import T_Xarray from xarray.tests import ( - IndexerMaker, ReturnItem, assert_array_equal, assert_identical, @@ -20,8 +19,6 @@ requires_dask, ) -B = IndexerMaker(indexing.BasicIndexer) - class TestIndexCallable: def test_getitem(self): @@ -433,7 +430,7 @@ def test_lazily_indexed_array_vindex_setitem(self) -> None: NotImplementedError, match=r"Lazy item assignment with the vectorized indexer is not yet", ): - lazy.vindex[indexer] = 0 + lazy.vindex[indexer.tuple] = 0 @pytest.mark.parametrize( "indexer_class, key, value", @@ -449,10 +446,10 @@ def test_lazily_indexed_array_setitem(self, indexer_class, key, value) -> None: if indexer_class is indexing.BasicIndexer: indexer = indexer_class(key) - lazy[indexer] = value + lazy[indexer.tuple] = value elif indexer_class is indexing.OuterIndexer: indexer = indexer_class(key) - lazy.oindex[indexer] = value + lazy.oindex[indexer.tuple] = value assert_array_equal(original[key], value) @@ -461,16 +458,16 @@ class TestCopyOnWriteArray: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.zeros(10)) def test_sub_array(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.CopyOnWriteArray) - child[B[:]] = 0 + child[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.arange(10)) assert_array_equal(child, np.zeros(5)) @@ -478,7 +475,7 @@ def test_sub_array(self) -> None: def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" class TestMemoryCachedArray: @@ -491,7 +488,7 @@ def test_wrapper(self) -> None: def test_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) @@ -500,13 +497,13 @@ def test_sub_array(self) -> None: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.MemoryCachedArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.zeros(10)) def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" def test_base_explicit_indexer() -> None: @@ -615,7 +612,7 @@ def test_arrayize_vectorized_indexer(self) -> None: vindex, self.data.shape ) np.testing.assert_array_equal( - self.data.vindex[vindex], self.data.vindex[vindex_array] + self.data.vindex[vindex.tuple], self.data.vindex[vindex_array.tuple] ) actual = indexing._arrayize_vectorized_indexer( @@ -731,35 +728,35 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: # Dispatch to appropriate indexing method if indexer_mode.startswith("vectorized"): - expected = indexing_adapter.vindex[indexer] + expected = indexing_adapter.vindex[indexer.tuple] elif indexer_mode.startswith("outer"): - expected = indexing_adapter.oindex[indexer] + expected = indexing_adapter.oindex[indexer.tuple] else: - expected = indexing_adapter[indexer] # Basic indexing + expected = indexing_adapter[indexer.tuple] # Basic indexing if isinstance(backend_ind, indexing.VectorizedIndexer): - array = indexing_adapter.vindex[backend_ind] + array = indexing_adapter.vindex[backend_ind.tuple] elif isinstance(backend_ind, indexing.OuterIndexer): - array = indexing_adapter.oindex[backend_ind] + array = indexing_adapter.oindex[backend_ind.tuple] else: - array = indexing_adapter[backend_ind] + array = indexing_adapter[backend_ind.tuple] if len(np_ind.tuple) > 0: array_indexing_adapter = indexing.NumpyIndexingAdapter(array) if isinstance(np_ind, indexing.VectorizedIndexer): - array = array_indexing_adapter.vindex[np_ind] + array = array_indexing_adapter.vindex[np_ind.tuple] elif isinstance(np_ind, indexing.OuterIndexer): - array = array_indexing_adapter.oindex[np_ind] + array = array_indexing_adapter.oindex[np_ind.tuple] else: - array = array_indexing_adapter[np_ind] + array = array_indexing_adapter[np_ind.tuple] np.testing.assert_array_equal(expected, array) if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) assert isinstance(combined_ind, indexing.VectorizedIndexer) - array = indexing_adapter.vindex[combined_ind] + array = indexing_adapter.vindex[combined_ind.tuple] np.testing.assert_array_equal(expected, array)