Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into backend-indexing
Browse files Browse the repository at this point in the history
* origin/main:
  call `np.cross` with 3D vectors only (#8993)
  Mark `test_use_cftime_false_standard_calendar_in_range` as an expected failure (#8996)
  Migration of datatree/ops.py -> datatree_ops.py (#8976)
  avoid a couple of warnings in `polyfit` (#8939)
  • Loading branch information
andersy005 committed May 3, 2024
2 parents 245c3db + aaa778c commit 2fd3b8b
Show file tree
Hide file tree
Showing 13 changed files with 175 additions and 43 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ Internal Changes
``xarray/testing/assertions`` for ``DataTree``. (:pull:`8967`)
By `Owen Littlejohns <https://github.com/owenlittlejohns>`_ and
`Tom Nicholas <https://github.com/TomNicholas>`_.
- Migrates ``ops.py`` functionality into ``xarray/core/datatree_ops.py`` (:pull:`8976`)
By `Matt Savoie <https://github.com/flamingbear>`_ and `Tom Nicholas <https://github.com/TomNicholas>`_.
- ``transpose``, ``set_dims``, ``stack`` & ``unstack`` now use a ``dim`` kwarg
rather than ``dims`` or ``dimensions``. This is the final change to make xarray methods
consistent with their use of ``dim``. Using the existing kwarg will raise a
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,8 @@ filterwarnings = [
"default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning",
"default:Duplicate dimension names present:UserWarning:xarray.namedarray.core",
"default:::xarray.tests.test_strategies",
# TODO: remove once we know how to deal with a changed signature in protocols
"ignore:__array__ implementation doesn't accept a copy keyword, so passing copy=False failed.",
]

log_cli_level = "INFO"
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -5269,7 +5269,7 @@ def differentiate(
edge_order: Literal[1, 2] = 1,
datetime_unit: DatetimeUnitOptions = None,
) -> Self:
""" Differentiate the array with the second order accurate central
"""Differentiate the array with the second order accurate central
differences.
.. note::
Expand Down
8 changes: 3 additions & 5 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1524,7 +1524,7 @@ def __iter__(self) -> Iterator[Hashable]:

else:

def __array__(self, dtype=None):
def __array__(self, dtype=None, copy=None):
raise TypeError(
"cannot directly convert an xarray.Dataset into a "
"numpy array. Instead, create an xarray.DataArray "
Expand Down Expand Up @@ -8354,7 +8354,7 @@ def differentiate(
edge_order: Literal[1, 2] = 1,
datetime_unit: DatetimeUnitOptions | None = None,
) -> Self:
""" Differentiate with the second order accurate central
"""Differentiate with the second order accurate central
differences.
.. note::
Expand Down Expand Up @@ -8937,9 +8937,7 @@ def polyfit(
lhs = np.vander(x, order)

if rcond is None:
rcond = (
x.shape[0] * np.core.finfo(x.dtype).eps # type: ignore[attr-defined]
)
rcond = x.shape[0] * np.finfo(x.dtype).eps

# Weights:
if w is not None:
Expand Down
12 changes: 6 additions & 6 deletions xarray/core/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
check_isomorphic,
map_over_subtree,
)
from xarray.core.datatree_ops import (
DataTreeArithmeticMixin,
MappedDatasetMethodsMixin,
MappedDataWithCoords,
)
from xarray.core.datatree_render import RenderDataTree
from xarray.core.formatting import datatree_repr
from xarray.core.formatting_html import (
Expand All @@ -42,11 +47,6 @@
)
from xarray.core.variable import Variable
from xarray.datatree_.datatree.common import TreeAttrAccessMixin
from xarray.datatree_.datatree.ops import (
DataTreeArithmeticMixin,
MappedDatasetMethodsMixin,
MappedDataWithCoords,
)

try:
from xarray.core.variable import calculate_dimensions
Expand Down Expand Up @@ -624,7 +624,7 @@ def __bool__(self) -> bool:
def __iter__(self) -> Iterator[Hashable]:
return itertools.chain(self.ds.data_vars, self.children)

def __array__(self, dtype=None):
def __array__(self, dtype=None, copy=None):
raise TypeError(
"cannot directly convert a DataTree into a "
"numpy array. Instead, create an xarray.DataArray "
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/datatree_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ def map_over_subtree(func: Callable) -> Callable:
Function will not be applied to any nodes without datasets.
*args : tuple, optional
Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
via .ds .
via `.ds`.
**kwargs : Any
Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
via .ds .
via `.ds`.
Returns
-------
Expand Down
77 changes: 62 additions & 15 deletions xarray/datatree_/datatree/ops.py → xarray/core/datatree_ops.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

import re
import textwrap

from xarray.core.dataset import Dataset

from xarray.core.datatree_mapping import map_over_subtree

"""
Expand All @@ -12,11 +14,10 @@
"""


_MAPPED_DOCSTRING_ADDENDUM = textwrap.fill(
_MAPPED_DOCSTRING_ADDENDUM = (
"This method was copied from xarray.Dataset, but has been altered to "
"call the method on the Datasets stored in every node of the subtree. "
"See the `map_over_subtree` function for more details.",
width=117,
"See the `map_over_subtree` function for more details."
)

# TODO equals, broadcast_equals etc.
Expand Down Expand Up @@ -173,7 +174,7 @@ def _wrap_then_attach_to_cls(
target_cls_dict, source_cls, methods_to_set, wrap_func=None
):
"""
Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree)
Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree).
Result is like having written this in the classes' definition:
```
Expand Down Expand Up @@ -208,16 +209,62 @@ def method_name(self, *args, **kwargs):
if wrap_func is map_over_subtree:
# Add a paragraph to the method's docstring explaining how it's been mapped
orig_method_docstring = orig_method.__doc__
# if orig_method_docstring is not None:
# if "\n" in orig_method_docstring:
# new_method_docstring = orig_method_docstring.replace(
# "\n", _MAPPED_DOCSTRING_ADDENDUM, 1
# )
# else:
# new_method_docstring = (
# orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}"
# )
setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring)

if orig_method_docstring is not None:
new_method_docstring = insert_doc_addendum(
orig_method_docstring, _MAPPED_DOCSTRING_ADDENDUM
)
setattr(target_cls_dict[method_name], "__doc__", new_method_docstring)


def insert_doc_addendum(docstring: str | None, addendum: str) -> str | None:
"""Insert addendum after first paragraph or at the end of the docstring.
There are a number of Dataset's functions that are wrapped. These come from
Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin.
The majority of the docstrings fall into a parseable pattern. Those that
don't, just have the addendum appeneded after. None values are returned.
"""
if docstring is None:
return None

pattern = re.compile(
r"^(?P<start>(\S+)?(.*?))(?P<paragraph_break>\n\s*\n)(?P<whitespace>[ ]*)(?P<rest>.*)",
re.DOTALL,
)
capture = re.match(pattern, docstring)
if capture is None:
### single line docstring.
return (
docstring
+ "\n\n"
+ textwrap.fill(
addendum,
subsequent_indent=" ",
width=79,
)
)

if len(capture.groups()) == 6:
return (
capture["start"]
+ capture["paragraph_break"]
+ capture["whitespace"]
+ ".. note::\n"
+ textwrap.fill(
addendum,
initial_indent=capture["whitespace"] + " ",
subsequent_indent=capture["whitespace"] + " ",
width=79,
)
+ capture["paragraph_break"]
+ capture["whitespace"]
+ capture["rest"]
)
else:
return docstring


class MappedDatasetMethodsMixin:
Expand Down
1 change: 1 addition & 0 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def _importorskip(
not has_scipy_or_netCDF4, reason="requires scipy or netCDF4"
)
has_numpy_array_api, requires_numpy_array_api = _importorskip("numpy", "1.26.0")
has_numpy_2, requires_numpy_2 = _importorskip("numpy", "2.0.0")


def _importorskip_h5netcdf_ros3():
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def dims(self):
warnings.warn("warning in test")
return super().dims

def __array__(self):
def __array__(self, dtype=None, copy=None):
warnings.warn("warning in test")
return super().__array__()

Expand Down
4 changes: 4 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
assert_no_warnings,
has_dask,
has_netCDF4,
has_numpy_2,
has_scipy,
mock,
network,
Expand Down Expand Up @@ -5088,6 +5089,9 @@ def test_use_cftime_true(calendar, units_year) -> None:

@requires_scipy_or_netCDF4
@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
@pytest.mark.xfail(
has_numpy_2, reason="https://github.com/pandas-dev/pandas/issues/56996"
)
def test_use_cftime_false_standard_calendar_in_range(calendar) -> None:
x = [0, 1]
time = [0, 720]
Expand Down
24 changes: 12 additions & 12 deletions xarray/tests/test_computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2500,32 +2500,32 @@ def test_polyfit_polyval_integration(
[
xr.DataArray([1, 2, 3]),
xr.DataArray([4, 5, 6]),
[1, 2, 3],
[4, 5, 6],
np.array([1, 2, 3]),
np.array([4, 5, 6]),
"dim_0",
-1,
],
[
xr.DataArray([1, 2]),
xr.DataArray([4, 5, 6]),
[1, 2],
[4, 5, 6],
np.array([1, 2, 0]),
np.array([4, 5, 6]),
"dim_0",
-1,
],
[
xr.Variable(dims=["dim_0"], data=[1, 2, 3]),
xr.Variable(dims=["dim_0"], data=[4, 5, 6]),
[1, 2, 3],
[4, 5, 6],
np.array([1, 2, 3]),
np.array([4, 5, 6]),
"dim_0",
-1,
],
[
xr.Variable(dims=["dim_0"], data=[1, 2]),
xr.Variable(dims=["dim_0"], data=[4, 5, 6]),
[1, 2],
[4, 5, 6],
np.array([1, 2, 0]),
np.array([4, 5, 6]),
"dim_0",
-1,
],
Expand Down Expand Up @@ -2564,8 +2564,8 @@ def test_polyfit_polyval_integration(
dims=["cartesian"],
coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])),
),
[0, 0, 1],
[4, 5, 6],
np.array([0, 0, 1]),
np.array([4, 5, 6]),
"cartesian",
-1,
],
Expand All @@ -2580,8 +2580,8 @@ def test_polyfit_polyval_integration(
dims=["cartesian"],
coords=dict(cartesian=(["cartesian"], ["x", "y", "z"])),
),
[1, 0, 2],
[4, 5, 6],
np.array([1, 0, 2]),
np.array([4, 5, 6]),
"cartesian",
-1,
],
Expand Down
78 changes: 78 additions & 0 deletions xarray/tests/test_datatree.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from copy import copy, deepcopy
from textwrap import dedent

import numpy as np
import pytest

import xarray as xr
from xarray.core.datatree import DataTree
from xarray.core.datatree_ops import _MAPPED_DOCSTRING_ADDENDUM, insert_doc_addendum
from xarray.core.treenode import NotFoundInTreeError
from xarray.testing import assert_equal, assert_identical
from xarray.tests import create_test_data, source_ndarray
Expand Down Expand Up @@ -824,3 +826,79 @@ def test_tree(self, create_test_datatree):
expected = create_test_datatree(modify=lambda ds: np.sin(ds))
result_tree = np.sin(dt)
assert_equal(result_tree, expected)


class TestDocInsertion:
"""Tests map_over_subtree docstring injection."""

def test_standard_doc(self):

dataset_doc = dedent(
"""\
Manually trigger loading and/or computation of this dataset's data
from disk or a remote source into memory and return this dataset.
Unlike compute, the original dataset is modified and returned.
Normally, it should not be necessary to call this method in user code,
because all xarray functions should either work on deferred data or
load data automatically. However, this method can be necessary when
working with many file objects on disk.
Parameters
----------
**kwargs : dict
Additional keyword arguments passed on to ``dask.compute``.
See Also
--------
dask.compute"""
)

expected_doc = dedent(
"""\
Manually trigger loading and/or computation of this dataset's data
from disk or a remote source into memory and return this dataset.
Unlike compute, the original dataset is modified and returned.
.. note::
This method was copied from xarray.Dataset, but has been altered to
call the method on the Datasets stored in every node of the
subtree. See the `map_over_subtree` function for more details.
Normally, it should not be necessary to call this method in user code,
because all xarray functions should either work on deferred data or
load data automatically. However, this method can be necessary when
working with many file objects on disk.
Parameters
----------
**kwargs : dict
Additional keyword arguments passed on to ``dask.compute``.
See Also
--------
dask.compute"""
)

wrapped_doc = insert_doc_addendum(dataset_doc, _MAPPED_DOCSTRING_ADDENDUM)

assert expected_doc == wrapped_doc

def test_one_liner(self):
mixin_doc = "Same as abs(a)."

expected_doc = dedent(
"""\
Same as abs(a).
This method was copied from xarray.Dataset, but has been altered to call the
method on the Datasets stored in every node of the subtree. See the
`map_over_subtree` function for more details."""
)

actual_doc = insert_doc_addendum(mixin_doc, _MAPPED_DOCSTRING_ADDENDUM)
assert expected_doc == actual_doc

def test_none(self):
actual_doc = insert_doc_addendum(None, _MAPPED_DOCSTRING_ADDENDUM)
assert actual_doc is None
Loading

0 comments on commit 2fd3b8b

Please sign in to comment.