Skip to content

Commit

Permalink
Migration of datatree/ops.py -> datatree_ops.py (#8976)
Browse files Browse the repository at this point in the history
* DAS-2065: direct migration of datatree/ops.py -> datatree_ops.py

I considered wedging this into core/ops.py, but the datatree/ops.py stuff is
kind of spread into core/ops.py and generated_aggregations.py.

* DAS-2065: doc tweak

* DAS-2065: Fix leading space in docstrings

These are the only docstring that have a leading space and that was causing
problems injecting the map_over_subtree information in the Datatree doc strings.

* DAS-2065: Puts the docstring addendum as second paragraph

This works on most of the docstrings.

The DatasetOpsMixin functions (round, argsorg, conj and conjugate) have
different format and this gets inserted after the name (which is non standard
in most docs) but before the description.

* DAS-2065: Change doc search to named captures

just for clarity.

* DAS-2065: Additonal update to make the addendum a Note

Just syntactic sugar to make that work

* DAS-2065: Adds tests to doc_addendum

* DAS-2065: Add credits

* DAS-2065: Adds types
  • Loading branch information
flamingbear authored May 2, 2024
1 parent 748bb3a commit f5ae623
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 24 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ Internal Changes
``xarray/testing/assertions`` for ``DataTree``. (:pull:`8967`)
By `Owen Littlejohns <https://github.com/owenlittlejohns>`_ and
`Tom Nicholas <https://github.com/TomNicholas>`_.
- Migrates ``ops.py`` functionality into ``xarray/core/datatree_ops.py`` (:pull:`8976`)
By `Matt Savoie <https://github.com/flamingbear>`_ and `Tom Nicholas <https://github.com/TomNicholas>`_.
- ``transpose``, ``set_dims``, ``stack`` & ``unstack`` now use a ``dim`` kwarg
rather than ``dims`` or ``dimensions``. This is the final change to make xarray methods
consistent with their use of ``dim``. Using the existing kwarg will raise a
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -5269,7 +5269,7 @@ def differentiate(
edge_order: Literal[1, 2] = 1,
datetime_unit: DatetimeUnitOptions = None,
) -> Self:
""" Differentiate the array with the second order accurate central
"""Differentiate the array with the second order accurate central
differences.
.. note::
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8354,7 +8354,7 @@ def differentiate(
edge_order: Literal[1, 2] = 1,
datetime_unit: DatetimeUnitOptions | None = None,
) -> Self:
""" Differentiate with the second order accurate central
"""Differentiate with the second order accurate central
differences.
.. note::
Expand Down
10 changes: 5 additions & 5 deletions xarray/core/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
check_isomorphic,
map_over_subtree,
)
from xarray.core.datatree_ops import (
DataTreeArithmeticMixin,
MappedDatasetMethodsMixin,
MappedDataWithCoords,
)
from xarray.core.datatree_render import RenderDataTree
from xarray.core.formatting import datatree_repr
from xarray.core.formatting_html import (
Expand All @@ -42,11 +47,6 @@
)
from xarray.core.variable import Variable
from xarray.datatree_.datatree.common import TreeAttrAccessMixin
from xarray.datatree_.datatree.ops import (
DataTreeArithmeticMixin,
MappedDatasetMethodsMixin,
MappedDataWithCoords,
)

try:
from xarray.core.variable import calculate_dimensions
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/datatree_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ def map_over_subtree(func: Callable) -> Callable:
Function will not be applied to any nodes without datasets.
*args : tuple, optional
Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
via .ds .
via `.ds`.
**kwargs : Any
Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
via .ds .
via `.ds`.
Returns
-------
Expand Down
77 changes: 62 additions & 15 deletions xarray/datatree_/datatree/ops.py → xarray/core/datatree_ops.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

import re
import textwrap

from xarray.core.dataset import Dataset

from xarray.core.datatree_mapping import map_over_subtree

"""
Expand All @@ -12,11 +14,10 @@
"""


_MAPPED_DOCSTRING_ADDENDUM = textwrap.fill(
_MAPPED_DOCSTRING_ADDENDUM = (
"This method was copied from xarray.Dataset, but has been altered to "
"call the method on the Datasets stored in every node of the subtree. "
"See the `map_over_subtree` function for more details.",
width=117,
"See the `map_over_subtree` function for more details."
)

# TODO equals, broadcast_equals etc.
Expand Down Expand Up @@ -173,7 +174,7 @@ def _wrap_then_attach_to_cls(
target_cls_dict, source_cls, methods_to_set, wrap_func=None
):
"""
Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree)
Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree).
Result is like having written this in the classes' definition:
```
Expand Down Expand Up @@ -208,16 +209,62 @@ def method_name(self, *args, **kwargs):
if wrap_func is map_over_subtree:
# Add a paragraph to the method's docstring explaining how it's been mapped
orig_method_docstring = orig_method.__doc__
# if orig_method_docstring is not None:
# if "\n" in orig_method_docstring:
# new_method_docstring = orig_method_docstring.replace(
# "\n", _MAPPED_DOCSTRING_ADDENDUM, 1
# )
# else:
# new_method_docstring = (
# orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}"
# )
setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring)

if orig_method_docstring is not None:
new_method_docstring = insert_doc_addendum(
orig_method_docstring, _MAPPED_DOCSTRING_ADDENDUM
)
setattr(target_cls_dict[method_name], "__doc__", new_method_docstring)


def insert_doc_addendum(docstring: str | None, addendum: str) -> str | None:
"""Insert addendum after first paragraph or at the end of the docstring.
There are a number of Dataset's functions that are wrapped. These come from
Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin.
The majority of the docstrings fall into a parseable pattern. Those that
don't, just have the addendum appeneded after. None values are returned.
"""
if docstring is None:
return None

pattern = re.compile(
r"^(?P<start>(\S+)?(.*?))(?P<paragraph_break>\n\s*\n)(?P<whitespace>[ ]*)(?P<rest>.*)",
re.DOTALL,
)
capture = re.match(pattern, docstring)
if capture is None:
### single line docstring.
return (
docstring
+ "\n\n"
+ textwrap.fill(
addendum,
subsequent_indent=" ",
width=79,
)
)

if len(capture.groups()) == 6:
return (
capture["start"]
+ capture["paragraph_break"]
+ capture["whitespace"]
+ ".. note::\n"
+ textwrap.fill(
addendum,
initial_indent=capture["whitespace"] + " ",
subsequent_indent=capture["whitespace"] + " ",
width=79,
)
+ capture["paragraph_break"]
+ capture["whitespace"]
+ capture["rest"]
)
else:
return docstring


class MappedDatasetMethodsMixin:
Expand Down
78 changes: 78 additions & 0 deletions xarray/tests/test_datatree.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from copy import copy, deepcopy
from textwrap import dedent

import numpy as np
import pytest

import xarray as xr
from xarray.core.datatree import DataTree
from xarray.core.datatree_ops import _MAPPED_DOCSTRING_ADDENDUM, insert_doc_addendum
from xarray.core.treenode import NotFoundInTreeError
from xarray.testing import assert_equal, assert_identical
from xarray.tests import create_test_data, source_ndarray
Expand Down Expand Up @@ -824,3 +826,79 @@ def test_tree(self, create_test_datatree):
expected = create_test_datatree(modify=lambda ds: np.sin(ds))
result_tree = np.sin(dt)
assert_equal(result_tree, expected)


class TestDocInsertion:
"""Tests map_over_subtree docstring injection."""

def test_standard_doc(self):

dataset_doc = dedent(
"""\
Manually trigger loading and/or computation of this dataset's data
from disk or a remote source into memory and return this dataset.
Unlike compute, the original dataset is modified and returned.
Normally, it should not be necessary to call this method in user code,
because all xarray functions should either work on deferred data or
load data automatically. However, this method can be necessary when
working with many file objects on disk.
Parameters
----------
**kwargs : dict
Additional keyword arguments passed on to ``dask.compute``.
See Also
--------
dask.compute"""
)

expected_doc = dedent(
"""\
Manually trigger loading and/or computation of this dataset's data
from disk or a remote source into memory and return this dataset.
Unlike compute, the original dataset is modified and returned.
.. note::
This method was copied from xarray.Dataset, but has been altered to
call the method on the Datasets stored in every node of the
subtree. See the `map_over_subtree` function for more details.
Normally, it should not be necessary to call this method in user code,
because all xarray functions should either work on deferred data or
load data automatically. However, this method can be necessary when
working with many file objects on disk.
Parameters
----------
**kwargs : dict
Additional keyword arguments passed on to ``dask.compute``.
See Also
--------
dask.compute"""
)

wrapped_doc = insert_doc_addendum(dataset_doc, _MAPPED_DOCSTRING_ADDENDUM)

assert expected_doc == wrapped_doc

def test_one_liner(self):
mixin_doc = "Same as abs(a)."

expected_doc = dedent(
"""\
Same as abs(a).
This method was copied from xarray.Dataset, but has been altered to call the
method on the Datasets stored in every node of the subtree. See the
`map_over_subtree` function for more details."""
)

actual_doc = insert_doc_addendum(mixin_doc, _MAPPED_DOCSTRING_ADDENDUM)
assert expected_doc == actual_doc

def test_none(self):
actual_doc = insert_doc_addendum(None, _MAPPED_DOCSTRING_ADDENDUM)
assert actual_doc is None

0 comments on commit f5ae623

Please sign in to comment.