Merge remote-tracking branch 'origin/main' into backend-indexing

* origin/main: call `np.cross` with 3D vectors only (#8993) Mark `test_use_cftime_false_standard_calendar_in_range` as an expected failure (#8996) Migration of datatree/ops.py -> datatree_ops.py (#8976) avoid a couple of warnings in `polyfit` (#8939)
pydata · May 3, 2024 · 2fd3b8b · 2fd3b8b
2 parents 245c3db + aaa778c
commit 2fd3b8b
Show file tree

Hide file tree

Showing 13 changed files with 175 additions and 43 deletions.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -73,6 +73,8 @@ Internal Changes
   ``xarray/testing/assertions`` for ``DataTree``. (:pull:`8967`)
   By `Owen Littlejohns <https://github.com/owenlittlejohns>`_ and
   `Tom Nicholas <https://github.com/TomNicholas>`_.
+- Migrates ``ops.py`` functionality into ``xarray/core/datatree_ops.py`` (:pull:`8976`)
+  By `Matt Savoie <https://github.com/flamingbear>`_ and `Tom Nicholas <https://github.com/TomNicholas>`_.
 - ``transpose``, ``set_dims``, ``stack`` & ``unstack`` now use a ``dim`` kwarg
   rather than ``dims`` or ``dimensions``. This is the final change to make xarray methods
   consistent with their use of ``dim``. Using the existing kwarg will raise a

diff --git a/pyproject.toml b/pyproject.toml
@@ -330,6 +330,8 @@ filterwarnings = [
   "default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning",
   "default:Duplicate dimension names present:UserWarning:xarray.namedarray.core",
   "default:::xarray.tests.test_strategies",
+  # TODO: remove once we know how to deal with a changed signature in protocols
+  "ignore:__array__ implementation doesn't accept a copy keyword, so passing copy=False failed.",
 ]
 
 log_cli_level = "INFO"

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -5269,7 +5269,7 @@ def differentiate(
         edge_order: Literal[1, 2] = 1,
         datetime_unit: DatetimeUnitOptions = None,
     ) -> Self:
-        """ Differentiate the array with the second order accurate central
+        """Differentiate the array with the second order accurate central
         differences.
 
         .. note::

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -1524,7 +1524,7 @@ def __iter__(self) -> Iterator[Hashable]:
 
     else:
 
-        def __array__(self, dtype=None):
+        def __array__(self, dtype=None, copy=None):
             raise TypeError(
                 "cannot directly convert an xarray.Dataset into a "
                 "numpy array. Instead, create an xarray.DataArray "
@@ -8354,7 +8354,7 @@ def differentiate(
         edge_order: Literal[1, 2] = 1,
         datetime_unit: DatetimeUnitOptions | None = None,
     ) -> Self:
-        """ Differentiate with the second order accurate central
+        """Differentiate with the second order accurate central
         differences.
 
         .. note::
@@ -8937,9 +8937,7 @@ def polyfit(
         lhs = np.vander(x, order)
 
         if rcond is None:
-            rcond = (
-                x.shape[0] * np.core.finfo(x.dtype).eps  # type: ignore[attr-defined]
-            )
+            rcond = x.shape[0] * np.finfo(x.dtype).eps
 
         # Weights:
         if w is not None:

diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py
@@ -23,6 +23,11 @@
     check_isomorphic,
     map_over_subtree,
 )
+from xarray.core.datatree_ops import (
+    DataTreeArithmeticMixin,
+    MappedDatasetMethodsMixin,
+    MappedDataWithCoords,
+)
 from xarray.core.datatree_render import RenderDataTree
 from xarray.core.formatting import datatree_repr
 from xarray.core.formatting_html import (
@@ -42,11 +47,6 @@
 )
 from xarray.core.variable import Variable
 from xarray.datatree_.datatree.common import TreeAttrAccessMixin
-from xarray.datatree_.datatree.ops import (
-    DataTreeArithmeticMixin,
-    MappedDatasetMethodsMixin,
-    MappedDataWithCoords,
-)
 
 try:
     from xarray.core.variable import calculate_dimensions
@@ -624,7 +624,7 @@ def __bool__(self) -> bool:
     def __iter__(self) -> Iterator[Hashable]:
         return itertools.chain(self.ds.data_vars, self.children)
 
-    def __array__(self, dtype=None):
+    def __array__(self, dtype=None, copy=None):
         raise TypeError(
             "cannot directly convert a DataTree into a "
             "numpy array. Instead, create an xarray.DataArray "

diff --git a/xarray/core/datatree_mapping.py b/xarray/core/datatree_mapping.py
@@ -98,10 +98,10 @@ def map_over_subtree(func: Callable) -> Callable:
         Function will not be applied to any nodes without datasets.
     *args : tuple, optional
         Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
-        via .ds .
+        via `.ds`.
     **kwargs : Any
         Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
-        via .ds .
+        via `.ds`.
 
     Returns
     -------

diff --git a/xarray/datatree_/datatree/ops.py → xarray/core/datatree_ops.py b/xarray/datatree_/datatree/ops.py → xarray/core/datatree_ops.py
@@ -1,7 +1,9 @@
+from __future__ import annotations
+
+import re
 import textwrap
 
 from xarray.core.dataset import Dataset
-
 from xarray.core.datatree_mapping import map_over_subtree
 
 """
@@ -12,11 +14,10 @@
 """
 
 
-_MAPPED_DOCSTRING_ADDENDUM = textwrap.fill(
+_MAPPED_DOCSTRING_ADDENDUM = (
     "This method was copied from xarray.Dataset, but has been altered to "
     "call the method on the Datasets stored in every node of the subtree. "
-    "See the `map_over_subtree` function for more details.",
-    width=117,
+    "See the `map_over_subtree` function for more details."
 )
 
 # TODO equals, broadcast_equals etc.
@@ -173,7 +174,7 @@ def _wrap_then_attach_to_cls(
     target_cls_dict, source_cls, methods_to_set, wrap_func=None
 ):
     """
-    Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree)
+    Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree).
 
     Result is like having written this in the classes' definition:
     ```
@@ -208,16 +209,62 @@ def method_name(self, *args, **kwargs):
         if wrap_func is map_over_subtree:
             # Add a paragraph to the method's docstring explaining how it's been mapped
             orig_method_docstring = orig_method.__doc__
-            # if orig_method_docstring is not None:
-            #     if "\n" in orig_method_docstring:
-            #         new_method_docstring = orig_method_docstring.replace(
-            #             "\n", _MAPPED_DOCSTRING_ADDENDUM, 1
-            #         )
-            #     else:
-            #         new_method_docstring = (
-            #             orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}"
-            #         )
-            setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring)
+
+            if orig_method_docstring is not None:
+                new_method_docstring = insert_doc_addendum(
+                    orig_method_docstring, _MAPPED_DOCSTRING_ADDENDUM
+                )
+                setattr(target_cls_dict[method_name], "__doc__", new_method_docstring)
+
+
+def insert_doc_addendum(docstring: str | None, addendum: str) -> str | None:
+    """Insert addendum after first paragraph or at the end of the docstring.
+
+    There are a number of Dataset's functions that are wrapped. These come from
+    Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin.
+
+    The majority of the docstrings fall into a parseable pattern. Those that
+    don't, just have the addendum appeneded after. None values are returned.
+
+    """
+    if docstring is None:
+        return None
+
+    pattern = re.compile(
+        r"^(?P<start>(\S+)?(.*?))(?P<paragraph_break>\n\s*\n)(?P<whitespace>[ ]*)(?P<rest>.*)",
+        re.DOTALL,
+    )
+    capture = re.match(pattern, docstring)
+    if capture is None:
+        ### single line docstring.
+        return (
+            docstring
+            + "\n\n"
+            + textwrap.fill(
+                addendum,
+                subsequent_indent="    ",
+                width=79,
+            )
+        )
+
+    if len(capture.groups()) == 6:
+        return (
+            capture["start"]
+            + capture["paragraph_break"]
+            + capture["whitespace"]
+            + ".. note::\n"
+            + textwrap.fill(
+                addendum,
+                initial_indent=capture["whitespace"] + "    ",
+                subsequent_indent=capture["whitespace"] + "    ",
+                width=79,
+            )
+            + capture["paragraph_break"]
+            + capture["whitespace"]
+            + capture["rest"]
+        )
+    else:
+        return docstring
 
 
 class MappedDatasetMethodsMixin:

diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
@@ -142,6 +142,7 @@ def _importorskip(
     not has_scipy_or_netCDF4, reason="requires scipy or netCDF4"
 )
 has_numpy_array_api, requires_numpy_array_api = _importorskip("numpy", "1.26.0")
+has_numpy_2, requires_numpy_2 = _importorskip("numpy", "2.0.0")
 
 
 def _importorskip_h5netcdf_ros3():

diff --git a/xarray/tests/test_assertions.py b/xarray/tests/test_assertions.py
@@ -149,7 +149,7 @@ def dims(self):
             warnings.warn("warning in test")
             return super().dims
 
-        def __array__(self):
+        def __array__(self, dtype=None, copy=None):
             warnings.warn("warning in test")
             return super().__array__()
 

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -63,6 +63,7 @@
     assert_no_warnings,
     has_dask,
     has_netCDF4,
+    has_numpy_2,
     has_scipy,
     mock,
     network,
@@ -5088,6 +5089,9 @@ def test_use_cftime_true(calendar, units_year) -> None:
 
 @requires_scipy_or_netCDF4
 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
+@pytest.mark.xfail(
+    has_numpy_2, reason="https://github.com/pandas-dev/pandas/issues/56996"
+)
 def test_use_cftime_false_standard_calendar_in_range(calendar) -> None:
     x = [0, 1]
     time = [0, 720]

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
@@ -2500,32 +2500,32 @@ def test_polyfit_polyval_integration(
         [
             xr.DataArray([1, 2, 3]),
             xr.DataArray([4, 5, 6]),
-            [1, 2, 3],
-            [4, 5, 6],
+            np.array([1, 2, 3]),
+            np.array([4, 5, 6]),
             "dim_0",
             -1,
         ],
         [
             xr.DataArray([1, 2]),
             xr.DataArray([4, 5, 6]),
-            [1, 2],
-            [4, 5, 6],
+            np.array([1, 2, 0]),
+            np.array([4, 5, 6]),
             "dim_0",
             -1,
         ],
         [
             xr.Variable(dims=["dim_0"], data=[1, 2, 3]),
             xr.Variable(dims=["dim_0"], data=[4, 5, 6]),
-            [1, 2, 3],
-            [4, 5, 6],
+            np.array([1, 2, 3]),
+            np.array([4, 5, 6]),
             "dim_0",
             -1,
         ],
         [
             xr.Variable(dims=["dim_0"], data=[1, 2]),
             xr.Variable(dims=["dim_0"], data=[4, 5, 6]),
-            [1, 2],
-            [4, 5, 6],
+            np.array([1, 2, 0]),
+            np.array([4, 5, 6]),
             "dim_0",
             -1,
         ],
@@ -2564,8 +2564,8 @@ def test_polyfit_polyval_integration(
                 dims=["cartesian"],
                 coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])),
             ),
-            [0, 0, 1],
-            [4, 5, 6],
+            np.array([0, 0, 1]),
+            np.array([4, 5, 6]),
             "cartesian",
             -1,
         ],
@@ -2580,8 +2580,8 @@ def test_polyfit_polyval_integration(
                 dims=["cartesian"],
                 coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])),
             ),
-            [1, 0, 2],
-            [4, 5, 6],
+            np.array([1, 0, 2]),
+            np.array([4, 5, 6]),
             "cartesian",
             -1,
         ],

diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py
@@ -1,10 +1,12 @@
 from copy import copy, deepcopy
+from textwrap import dedent
 
 import numpy as np
 import pytest
 
 import xarray as xr
 from xarray.core.datatree import DataTree
+from xarray.core.datatree_ops import _MAPPED_DOCSTRING_ADDENDUM, insert_doc_addendum
 from xarray.core.treenode import NotFoundInTreeError
 from xarray.testing import assert_equal, assert_identical
 from xarray.tests import create_test_data, source_ndarray
@@ -824,3 +826,79 @@ def test_tree(self, create_test_datatree):
         expected = create_test_datatree(modify=lambda ds: np.sin(ds))
         result_tree = np.sin(dt)
         assert_equal(result_tree, expected)
+
+
+class TestDocInsertion:
+    """Tests map_over_subtree docstring injection."""
+
+    def test_standard_doc(self):
+
+        dataset_doc = dedent(
+            """\
+            Manually trigger loading and/or computation of this dataset's data
+                    from disk or a remote source into memory and return this dataset.
+                    Unlike compute, the original dataset is modified and returned.
+
+                    Normally, it should not be necessary to call this method in user code,
+                    because all xarray functions should either work on deferred data or
+                    load data automatically. However, this method can be necessary when
+                    working with many file objects on disk.
+
+                    Parameters
+                    ----------
+                    **kwargs : dict
+                        Additional keyword arguments passed on to ``dask.compute``.
+
+                    See Also
+                    --------
+                    dask.compute"""
+        )
+
+        expected_doc = dedent(
+            """\
+            Manually trigger loading and/or computation of this dataset's data
+                    from disk or a remote source into memory and return this dataset.
+                    Unlike compute, the original dataset is modified and returned.
+
+                    .. note::
+                        This method was copied from xarray.Dataset, but has been altered to
+                        call the method on the Datasets stored in every node of the
+                        subtree. See the `map_over_subtree` function for more details.
+
+                    Normally, it should not be necessary to call this method in user code,
+                    because all xarray functions should either work on deferred data or
+                    load data automatically. However, this method can be necessary when
+                    working with many file objects on disk.
+
+                    Parameters
+                    ----------
+                    **kwargs : dict
+                        Additional keyword arguments passed on to ``dask.compute``.
+
+                    See Also
+                    --------
+                    dask.compute"""
+        )
+
+        wrapped_doc = insert_doc_addendum(dataset_doc, _MAPPED_DOCSTRING_ADDENDUM)
+
+        assert expected_doc == wrapped_doc
+
+    def test_one_liner(self):
+        mixin_doc = "Same as abs(a)."
+
+        expected_doc = dedent(
+            """\
+            Same as abs(a).
+
+            This method was copied from xarray.Dataset, but has been altered to call the
+                method on the Datasets stored in every node of the subtree. See the
+                `map_over_subtree` function for more details."""
+        )
+
+        actual_doc = insert_doc_addendum(mixin_doc, _MAPPED_DOCSTRING_ADDENDUM)
+        assert expected_doc == actual_doc
+
+    def test_none(self):
+        actual_doc = insert_doc_addendum(None, _MAPPED_DOCSTRING_ADDENDUM)
+        assert actual_doc is None