Merge branch 'main' into flox-preserve-dtype

* main: (26 commits) Forbid modifying names of DataTree objects with parents (pydata#9494) DAS-2155 - Merge datatree documentation into main docs. (pydata#9033) Make illegal path-like variable names when constructing a DataTree from a Dataset (pydata#9378) Ensure TreeNode doesn't copy in-place (pydata#9482) `open_groups` for zarr backends (pydata#9469) Update pyproject.toml (pydata#9484) New whatsnew section (pydata#9483) Release notes for v2024.09.0 (pydata#9480) Fix `DataTree.coords.__setitem__` by adding `DataTreeCoordinates` class (pydata#9451) Rename DataTree's "ds" and "data" to "dataset" (pydata#9476) Update DataTree repr to indicate inheritance (pydata#9470) Bump pypa/gh-action-pypi-publish in the actions group (pydata#9460) Repo checker (pydata#9450) Add days_in_year and decimal_year to dt accessor (pydata#9105) remove parent argument from DataTree.__init__ (pydata#9465) Fix inheritance in DataTree.copy() (pydata#9457) Implement `DataTree.__delitem__` (pydata#9453) Add ASV for datatree.from_dict (pydata#9459) Make the first argument in DataTree.from_dict positional only (pydata#9446) Fix typos across the code, doc and comments (pydata#9443) ...
dcherian · Sep 15, 2024 · 5c9f291 · 5c9f291
2 parents eebb067 + 8db6bc9
commit 5c9f291
Show file tree

Hide file tree

Showing 150 changed files with 3,342 additions and 4,385 deletions.
diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
@@ -81,8 +81,7 @@ jobs:
           #
           # If dependencies emit warnings we can't do anything about, add ignores to
           # `xarray/tests/__init__.py`.
-          # [MHS, 01/25/2024] Skip datatree_ documentation remove after #8572
-          python -m pytest --doctest-modules xarray --ignore xarray/tests --ignore xarray/datatree_ -Werror
+          python -m pytest --doctest-modules xarray --ignore xarray/tests -Werror
 
   mypy:
     name: Mypy

diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml
@@ -88,7 +88,7 @@ jobs:
           path: dist
       - name: Publish package to TestPyPI
         if: github.event_name == 'push'
-        uses: pypa/[email protected].0
+        uses: pypa/[email protected].1
         with:
           repository_url: https://test.pypi.org/legacy/
           verbose: true
@@ -111,6 +111,6 @@ jobs:
           name: releases
           path: dist
       - name: Publish package to PyPI
-        uses: pypa/[email protected].0
+        uses: pypa/[email protected].1
         with:
           verbose: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 # https://pre-commit.com/
 ci:
     autoupdate_schedule: monthly
-exclude: 'xarray/datatree_.*'
+    autoupdate_commit_msg: 'Update pre-commit hooks'
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.6.0

diff --git a/MANIFEST.in b/MANIFEST.in
diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py
@@ -7,8 +7,6 @@
 import pandas as pd
 
 import xarray as xr
-from xarray.backends.api import open_datatree
-from xarray.core.datatree import DataTree
 
 from . import _skip_slow, parameterized, randint, randn, requires_dask
 
@@ -556,7 +554,7 @@ def make_datatree(self, nchildren=10):
             for group in range(self.nchildren)
         }
         dtree = root | nested_tree1 | nested_tree2 | nested_tree3
-        self.dtree = DataTree.from_dict(dtree)
+        self.dtree = xr.DataTree.from_dict(dtree)
 
 
 class IOReadDataTreeNetCDF4(IONestedDataTree):
@@ -574,10 +572,10 @@ def setup(self):
         dtree.to_netcdf(filepath=self.filepath)
 
     def time_load_datatree_netcdf4(self):
-        open_datatree(self.filepath, engine="netcdf4").load()
+        xr.open_datatree(self.filepath, engine="netcdf4").load()
 
     def time_open_datatree_netcdf4(self):
-        open_datatree(self.filepath, engine="netcdf4")
+        xr.open_datatree(self.filepath, engine="netcdf4")
 
 
 class IOWriteNetCDFDask:
@@ -724,7 +722,7 @@ class PerformanceBackend(xr.backends.BackendEntrypoint):
             def open_dataset(
                 self,
                 filename_or_obj: str | os.PathLike | None,
-                drop_variables: tuple[str] = None,
+                drop_variables: tuple[str, ...] = None,
                 *,
                 mask_and_scale=True,
                 decode_times=True,

diff --git a/asv_bench/benchmarks/datatree.py b/asv_bench/benchmarks/datatree.py
@@ -0,0 +1,15 @@
+import xarray as xr
+from xarray.core.datatree import DataTree
+
+
+class Datatree:
+    def setup(self):
+        run1 = DataTree.from_dict({"run1": xr.Dataset({"a": 1})})
+        self.d_few = {"run1": run1}
+        self.d_many = {f"run{i}": xr.Dataset({"a": 1}) for i in range(100)}
+
+    def time_from_dict_few(self):
+        DataTree.from_dict(self.d_few)
+
+    def time_from_dict_many(self):
+        DataTree.from_dict(self.d_many)
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -174,7 +174,7 @@ def setup(self, use_cftime, use_flox):
         # GH9426 - deep-copying CFTime object arrays is weirdly slow
         asda = xr.DataArray(time)
         labeled_time = []
-        for year, month in zip(asda.dt.year, asda.dt.month):
+        for year, month in zip(asda.dt.year, asda.dt.month, strict=True):
             labeled_time.append(cftime.datetime(year, month, 1))
 
         self.da = xr.DataArray(

diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -64,7 +64,7 @@ def time_rolling_long(self, func, pandas, use_bottleneck):
     def time_rolling_np(self, window_, min_periods, use_bottleneck):
         with xr.set_options(use_bottleneck=use_bottleneck):
             self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce(
-                getattr(np, "nansum")
+                np.nansum
             ).load()
 
     @parameterized(

diff --git a/ci/requirements/bare-minimum.yml b/ci/requirements/bare-minimum.yml
@@ -11,6 +11,6 @@ dependencies:
   - pytest-env
   - pytest-xdist
   - pytest-timeout
-  - numpy=1.23
+  - numpy=1.24
   - packaging=23.1
-  - pandas=2.0
+  - pandas=2.1
diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
@@ -39,7 +39,8 @@ dependencies:
   - sphinx-copybutton
   - sphinx-design
   - sphinx-inline-tabs
-  - sphinx>=5.0
+  - sphinx>=5.0,<7.0 # https://github.com/executablebooks/sphinx-book-theme/issues/749
+  - sphinxcontrib-srclinks
   - sphinx-remove-toctrees
   - sphinxext-opengraph
   - sphinxext-rediraffe

diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml
@@ -9,37 +9,37 @@ dependencies:
   # doc/user-guide/installing.rst, doc/user-guide/plotting.rst and setup.py.
   - python=3.10
   - array-api-strict=1.0  # dependency for testing the array api compat
-  - boto3=1.26
+  - boto3=1.28
   - bottleneck=1.3
-  - cartopy=0.21
+  - cartopy=0.22
   - cftime=1.6
   - coveralls
-  - dask-core=2023.4
-  - distributed=2023.4
+  - dask-core=2023.9
+  - distributed=2023.9
   # Flox > 0.8 has a bug with numbagg versions
   # It will require numbagg > 0.6
   # so we should just skip that series eventually
   # or keep flox pinned for longer than necessary
   - flox=0.7
-  - h5netcdf=1.1
+  - h5netcdf=1.2
   # h5py and hdf5 tend to cause conflicts
   # for e.g. hdf5 1.12 conflicts with h5py=3.1
   # prioritize bumping other packages instead
   - h5py=3.8
   - hdf5=1.12
   - hypothesis
-  - iris=3.4
+  - iris=3.7
   - lxml=4.9  # Optional dep of pydap
   - matplotlib-base=3.7
   - nc-time-axis=1.4
   # netcdf follows a 1.major.minor[.patch] convention
   # (see https://github.com/Unidata/netcdf4-python/issues/1090)
   - netcdf4=1.6.0
-  - numba=0.56
+  - numba=0.57
   - numbagg=0.2.1
-  - numpy=1.23
+  - numpy=1.24
   - packaging=23.1
-  - pandas=2.0
+  - pandas=2.1
   - pint=0.22
   - pip
   - pydap=3.4
@@ -49,9 +49,9 @@ dependencies:
   - pytest-xdist
   - pytest-timeout
   - rasterio=1.3
-  - scipy=1.10
+  - scipy=1.11
   - seaborn=0.12
   - sparse=0.14
   - toolz=0.12
-  - typing_extensions=4.5
-  - zarr=2.14
+  - typing_extensions=4.7
+  - zarr=2.16
diff --git a/design_notes/flexible_indexes_notes.md b/design_notes/flexible_indexes_notes.md
@@ -71,7 +71,7 @@ An `XarrayIndex` subclass must/should/may implement the following properties/met
 - a `data` property to access index's data and map it to coordinate data (see [Section 4](#4-indexvariable))
 - a `__getitem__()` implementation to propagate the index through DataArray/Dataset indexing operations
 - `equals()`, `union()` and `intersection()` methods for data alignment (see [Section 2.6](#26-using-indexes-for-data-alignment))
-- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coodinates))
+- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coordinates))
 - a method that may return a new index and that will be called when one of the corresponding coordinates is dropped from the Dataset/DataArray (multi-coordinate indexes)
 - `encode()`/`decode()` methods that would allow storage-agnostic serialization and fast-path reconstruction of the underlying index object(s) (see [Section 2.8](#28-index-encoding))
 - one or more "non-standard" methods or properties that could be leveraged in Xarray 3rd-party extensions like Dataset/DataArray accessors (see [Section 2.7](#27-using-indexes-for-other-purposes))

diff --git a/design_notes/grouper_objects.md b/design_notes/grouper_objects.md
@@ -166,7 +166,7 @@ where `|` represents chunk boundaries. A simple rechunking to
 ```
 000|111122|3333
 ```
-would make this resampling reduction an embarassingly parallel blockwise problem.
+would make this resampling reduction an embarrassingly parallel blockwise problem.
 
 Similarly consider monthly-mean climatologies for which the month numbers might be
 ```

diff --git a/design_notes/named_array_design_doc.md b/design_notes/named_array_design_doc.md
@@ -258,7 +258,7 @@ Questions:
    Variable.coarsen_reshape
    Variable.rolling_window
 
-   Variable.set_dims # split this into broadcas_to and expand_dims
+   Variable.set_dims # split this into broadcast_to and expand_dims
 
 
 # Reordering/Reshaping