Numpy 2 compatibility (#1499)

scverse · May 17, 2024 · 105f354 · 105f354
1 parent 1461fec
commit 105f354
Show file tree

Hide file tree

Showing 12 changed files with 36 additions and 20 deletions.
diff --git a/docs/release-notes/0.10.8.md b/docs/release-notes/0.10.8.md
@@ -4,7 +4,7 @@
 ```
 
 * Write out `64bit` indptr when appropriate for {func}`~anndata.experimental.concat_on_disk` {pr}`1493` {user}`ilan-gold`
-
+* Support for Numpy 2 {pr}`1499` {user}`flying-sheep`
 
 ```{rubric} Documentation
 ```

diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py
@@ -443,7 +443,7 @@ def _init_as_actual(
                 elif isinstance(X, (ZarrArray, DaskArray)):
                     X = X.astype(dtype)
                 else:  # is np.ndarray or a subclass, convert to true np.ndarray
-                    X = np.array(X, dtype, copy=False)
+                    X = np.asarray(X, dtype)
             # data matrix and shape
             self._X = X
             n_obs, n_vars = X.shape
@@ -1114,7 +1114,7 @@ def _remove_unused_categories(
                 # Reset colors
                 del uns[color_key]
             else:
-                idx = np.where(np.in1d(all_categories, df_sub[k].cat.categories))[0]
+                idx = np.where(np.isin(all_categories, df_sub[k].cat.categories))[0]
                 uns[color_key] = np.array(color_vec)[(idx,)]
 
     def rename_categories(self, key: str, categories: Sequence[Any]):

diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py
@@ -115,7 +115,11 @@ def not_missing(v) -> bool:
 # TODO: Hopefully this will stop being an issue in the future and this code can be removed.
 @singledispatch
 def equal(a, b) -> bool:
-    return np.array_equal(a, asarray(b))
+    a = asarray(a)
+    b = asarray(b)
+    if a.ndim == b.ndim == 0:
+        return bool(a == b)
+    return np.array_equal(a, b)
 
 
 @equal.register(pd.DataFrame)
@@ -549,7 +553,7 @@ def apply(self, el, *, axis, fill_value=None):
 
     def _apply_to_df(self, el: pd.DataFrame, *, axis, fill_value=None):
         if fill_value is None:
-            fill_value = np.NaN
+            fill_value = np.nan
         return el.reindex(self.new_idx, axis=axis, fill_value=fill_value)
 
     def _apply_to_dask_array(self, el: DaskArray, *, axis, fill_value=None):

diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py
@@ -403,7 +403,7 @@ def shape(self) -> tuple[int, int]:
         if shape is None:
             # TODO warn
             shape = self.group.attrs.get("h5sparse_shape")
-        return tuple(shape)
+        return tuple(map(int, shape))
 
     @property
     def value(self) -> ss.spmatrix:

diff --git a/src/anndata/_io/h5ad.py b/src/anndata/_io/h5ad.py
@@ -331,7 +331,7 @@ def read_dataset(dataset: h5py.Dataset):
         return value
     elif isinstance(value.dtype, str):
         pass
-    elif issubclass(value.dtype.type, np.string_):
+    elif issubclass(value.dtype.type, np.bytes_):
         value = value.astype(str)
         # Backwards compat, old datasets have strings as one element 1d arrays
         if len(value) == 1:

diff --git a/src/anndata/_io/zarr.py b/src/anndata/_io/zarr.py
@@ -108,7 +108,7 @@ def read_dataset(dataset: zarr.Array):
         pass
     elif issubclass(value.dtype.type, np.str_):
         value = value.astype(object)
-    elif issubclass(value.dtype.type, np.string_):
+    elif issubclass(value.dtype.type, np.bytes_):
         value = value.astype(str).astype(object)  # bytestring -> unicode -> str
     elif len(value.dtype.descr) > 1:  # Compound dtype
         # For backwards compat, now strings are written as variable length

diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py
@@ -226,7 +226,7 @@ def _from_fixed_length_strings(value):
             dt_type = dt_type[0]
         # Fixing issue introduced with h5py v2.10.0, see:
         # https://github.com/h5py/h5py/issues/1307
-        if issubclass(np.dtype(dt_type).type, np.string_):
+        if issubclass(np.dtype(dt_type).type, np.bytes_):
             dt_list[1] = f"U{int(dt_type[2:])}"
         elif is_annotated or np.issubdtype(np.dtype(dt_type), np.str_):
             dt_list[1] = "O"  # Assumption that it’s a vlen str

diff --git a/src/anndata/experimental/merge.py b/src/anndata/experimental/merge.py
@@ -5,11 +5,7 @@
 from collections.abc import Collection, Iterable, Mapping, Sequence
 from functools import singledispatch
 from pathlib import Path
-from typing import (
-    Any,
-    Callable,
-    Literal,
-)
+from typing import Any, Callable, Literal
 
 import numpy as np
 import pandas as pd

diff --git a/src/anndata/tests/_helpers.py b/src/anndata/tests/_helpers.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+import os
+
+import pytest
+
+xfail_if_numpy2_loompy = pytest.mark.xfail(
+    os.environ.get("DEPENDENCIES_VERSION", "latest") == "pre-release",
+    reason="loompy still uses `np.string_`, removed in `numpy==2.0`",
+    raises=AttributeError,
+)
diff --git a/tests/test_layers.py b/tests/test_layers.py
@@ -9,6 +9,7 @@
 from numba.core.errors import NumbaDeprecationWarning
 
 from anndata import AnnData, read_h5ad, read_loom
+from anndata.tests._helpers import xfail_if_numpy2_loompy
 from anndata.tests.helpers import gen_typed_df_t2_size
 
 X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
@@ -75,13 +76,16 @@ def test_readwrite(backing_h5ad):
     assert (adata.layers["L"] == adata_read.layers["L"]).all()
 
 
+@xfail_if_numpy2_loompy
 @pytest.mark.skipif(find_spec("loompy") is None, reason="loompy not installed")
 def test_readwrite_loom(tmp_path):
     loom_path = tmp_path / "test.loom"
     adata = AnnData(X=X, layers=dict(L=L.copy()))
 
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", category=NumbaDeprecationWarning)
+        # loompy uses “is” for ints
+        warnings.filterwarnings("ignore", category=SyntaxWarning)
         warnings.filterwarnings(
             "ignore",
             message=r"datetime.datetime.utcnow\(\) is deprecated",

diff --git a/tests/test_obspvarp.py b/tests/test_obspvarp.py
@@ -91,7 +91,7 @@ def test_setting_sparse(adata: AnnData):
     "df,homogenous,dtype",
     [
         (lambda dim: gen_typed_df_t2_size(dim, dim), True, np.object_),
-        (lambda dim: pd.DataFrame(np.random.randn(dim, dim)), False, np.float_),
+        (lambda dim: pd.DataFrame(np.random.randn(dim, dim)), False, np.floating),
     ],
     ids=["heterogeneous", "homogeneous"],
 )

diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py
@@ -20,11 +20,8 @@
 import anndata as ad
 from anndata._io.specs.registry import IORegistryError
 from anndata.compat import DaskArray, SpArray, _read_attr
-from anndata.tests.helpers import (
-    as_dense_dask_array,
-    assert_equal,
-    gen_adata,
-)
+from anndata.tests._helpers import xfail_if_numpy2_loompy
+from anndata.tests.helpers import as_dense_dask_array, assert_equal, gen_adata
 
 if TYPE_CHECKING:
     from os import PathLike
@@ -382,6 +379,7 @@ def test_changed_obs_var_names(tmp_path, diskfmt):
         assert_equal(read, modified, exact=True)
 
 
+@xfail_if_numpy2_loompy
 @pytest.mark.skipif(not find_spec("loompy"), reason="Loompy is not installed")
 @pytest.mark.parametrize("typ", [np.array, csr_matrix])
 @pytest.mark.parametrize("obsm_mapping", [{}, dict(X_composed=["oanno3", "oanno4"])])
@@ -398,6 +396,8 @@ def test_readwrite_loom(typ, obsm_mapping, varm_mapping, tmp_path):
 
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", category=NumbaDeprecationWarning)
+        # loompy uses “is” for ints
+        warnings.filterwarnings("ignore", category=SyntaxWarning)
         warnings.filterwarnings(
             "ignore",
             message=r"datetime.datetime.utcnow\(\) is deprecated",
@@ -433,6 +433,7 @@ def test_readwrite_loom(typ, obsm_mapping, varm_mapping, tmp_path):
     assert adata.var_names.name == var_dim
 
 
+@xfail_if_numpy2_loompy
 @pytest.mark.skipif(not find_spec("loompy"), reason="Loompy is not installed")
 def test_readloom_deprecations(tmp_path):
     loom_pth = tmp_path / "test.loom"