MESMER-group · mathause · Aug 30, 2024 · Aug 20, 2024 · Aug 20, 2024 · Aug 20, 2024
diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml
@@ -86,3 +86,32 @@ jobs:
           env_vars: RUNNER_OS,PYTHON_VERSION
           name: codecov-umbrella
           fail_ci_if_error: false
+
+  # ====================================================================================
+
+  min-version-policy:
+    name: Minimum Version Policy
+    runs-on: "ubuntu-latest"
+    defaults:
+      run:
+        shell: bash -l {0}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Fetch all history for all branches and tags.
+
+      - name: Create conda environment
+        uses: mamba-org/setup-micromamba@v1
+        with:
+          environment-name: min-version-policy
+          micromamba-version: 'latest'
+          create-args: >-
+            python=3.10
+            pyyaml
+            conda
+            python-dateutil
+          channels: conda-forge
+
+      - name: minimum versions policy
+        run: |
+          python ci/min_deps_check.py ci/requirements/min-all-deps.yml
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -16,14 +16,31 @@ Breaking changes
   (`#495 <https://github.com/MESMER-group/mesmer/pull/495>`_). By `Victoria Bauer`_.
 - Using Cholesky decomposition for finding covariance localization radius and drawing from the multivariate normal distribution (`#408 <https://github.com/MESMER-group/mesmer/pull/408>`_)
   By `Victoria Bauer`_.
-- The supported versions of some dependencies were changed (`#399 <https://github.com/MESMER-group/mesmer/pull/399>`_, `#405 <https://github.com/MESMER-group/mesmer/pull/405>`_):
-
-  ============ ============= =========
-  Package      Old           New
-  ============ ============= =========
-  regionmask    >=0.8        >=0.9
-  statsmodels  not specified >=0.13
-  ============ ============= =========
+- The supported versions of some dependencies were changed
+  (`#399 <https://github.com/MESMER-group/mesmer/pull/399>`_,
+  `#405 <https://github.com/MESMER-group/mesmer/pull/405>`_, and
+  `#503 <https://github.com/MESMER-group/mesmer/pull/503>`_):
+
+  ================= ============= =========
+  Package           Old           New
+  ================= ============= =========
+  **cartopy**       not specified 0.22
+  **dask**          not specified 2023.8
+  **joblib**        not specified 1.3
+  **netcdf4**       not specified 1.6
+  **numpy**         not specified 1.24
+  **packaging**     not specified 23.1
+  **pandas**        2.0           no change
+  **pooch**         not specified 1.7
+  **properscoring** not specified 0.1
+  **pyproj**        not specified 3.6
+  **regionmask**    0.8           0.10
+  **scikit-learn**  not specified 1.3
+  **scipy**         not specified 1.11
+  **shapely**       not specified 2.0
+  **statsmodels**   not specified 0.14
+  **xarray**        2023.04       2023.7
+  ================= ============= =========
 
 Deprecations
 ^^^^^^^^^^^^

diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python
+"""Fetch from conda database all available versions of the xarray dependencies and their
+publication date. Compare it against requirements/*-min-all-deps.yml to verify the
+policy on obsolete dependencies is being followed. Print a pretty report :)
+"""
+
+# min_deps_check.py is copied from xarray:
+# https://github.com/pydata/xarray/blob/main/ci/min_deps_check.py
+# Used under the terms of xarray's license, see licenses/XARRAY_LICENSE.
+
+import itertools
+import sys
+from collections.abc import Iterator
+from datetime import datetime
+
+import conda.api  # type: ignore[import]
+import yaml
+from dateutil.relativedelta import relativedelta
+
+CHANNELS = ["conda-forge", "defaults"]
+IGNORE_DEPS = {
+    "black",
+    "coveralls",
+    "flake8",
+    "hypothesis",
+    "isort",
+    "mypy",
+    "pip",
+    "pytest-cov",
+    "pytest-xdist",
+    "pytest",
+    "setuptools",
+}
+
+POLICY_MONTHS = {"python": 30, "numpy": 18}
+POLICY_MONTHS_DEFAULT = 12
+POLICY_OVERRIDE: dict[str, tuple[int, int]] = {}
+errors = []
+
+
+def error(msg: str) -> None:
+    global errors
+    errors.append(msg)
+    print("ERROR:", msg)
+
+
+def warning(msg: str) -> None:
+    print("WARNING:", msg)
+
+
+def parse_requirements(fname) -> Iterator[tuple[str, int, int, int | None]]:
+    """Load requirements/py*-min-all-deps.yml
+
+    Yield (package name, major version, minor version, [patch version])
+    """
+    global errors
+
+    with open(fname) as fh:
+        contents = yaml.safe_load(fh)
+    for row in contents["dependencies"]:
+        if isinstance(row, dict) and list(row) == ["pip"]:
+            continue
+        pkg, eq, version = row.partition("=")
+        if pkg.rstrip("<>") in IGNORE_DEPS:
+            continue
+        if pkg.endswith("<") or pkg.endswith(">") or eq != "=":
+            error("package should be pinned with exact version: " + row)
+            continue
+
+        try:
+            version_tup = tuple(int(x) for x in version.split("."))
+        except ValueError:
+            raise ValueError("non-numerical version: " + row)
+
+        if len(version_tup) == 2:
+            yield (pkg, *version_tup, None)  # type: ignore[misc]
+        elif len(version_tup) == 3:
+            yield (pkg, *version_tup)  # type: ignore[misc]
+        else:
+            raise ValueError("expected major.minor or major.minor.patch: " + row)
+
+
+def query_conda(pkg: str) -> dict[tuple[int, int], datetime]:
+    """Query the conda repository for a specific package
+
+    Return map of {(major version, minor version): publication date}
+    """
+
+    def metadata(entry):
+        version = entry.version
+
+        time = datetime.fromtimestamp(entry.timestamp)
+        major, minor = map(int, version.split(".")[:2])
+
+        return (major, minor), time
+
+    raw_data = conda.api.SubdirData.query_all(pkg, channels=CHANNELS)
+    data = sorted(metadata(entry) for entry in raw_data if entry.timestamp != 0)
+
+    release_dates = {
+        version: [time for _, time in group if time is not None]
+        for version, group in itertools.groupby(data, key=lambda x: x[0])
+    }
+    out = {version: min(dates) for version, dates in release_dates.items() if dates}
+
+    # Hardcoded fix to work around incorrect dates in conda
+    if pkg == "python":
+        out.update(
+            {
+                (2, 7): datetime(2010, 6, 3),
+                (3, 5): datetime(2015, 9, 13),
+                (3, 6): datetime(2016, 12, 23),
+                (3, 7): datetime(2018, 6, 27),
+                (3, 8): datetime(2019, 10, 14),
+                (3, 9): datetime(2020, 10, 5),
+                (3, 10): datetime(2021, 10, 4),
+                (3, 11): datetime(2022, 10, 24),
+            }
+        )
+
+    return out
+
+
+def process_pkg(
+    pkg: str, req_major: int, req_minor: int, req_patch: int | None
+) -> tuple[str, str, str, str, str, str]:
+    """Compare package version from requirements file to available versions in conda.
+    Return row to build pandas dataframe:
+
+    - package name
+    - major.minor.[patch] version in requirements file
+    - publication date of version in requirements file (YYYY-MM-DD)
+    - major.minor version suggested by policy
+    - publication date of version suggested by policy (YYYY-MM-DD)
+    - status ("<", "=", "> (!)")
+    """
+    print(f"Analyzing {pkg}...")
+    versions = query_conda(pkg)
+
+    try:
+        req_published = versions[req_major, req_minor]
+    except KeyError:
+        error("not found in conda: " + pkg)
+        return pkg, fmt_version(req_major, req_minor, req_patch), "-", "-", "-", "(!)"
+
+    policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT)
+    policy_published = datetime.now() - relativedelta(months=policy_months)
+
+    filtered_versions = [
+        version
+        for version, published in versions.items()
+        if published < policy_published
+    ]
+    policy_major, policy_minor = max(filtered_versions, default=(req_major, req_minor))
+
+    try:
+        policy_major, policy_minor = POLICY_OVERRIDE[pkg]
+    except KeyError:
+        pass
+    policy_published_actual = versions[policy_major, policy_minor]
+
+    if (req_major, req_minor) < (policy_major, policy_minor):
+        status = "<"
+    elif (req_major, req_minor) > (policy_major, policy_minor):
+        status = "> (!)"
+        delta = relativedelta(datetime.now(), req_published).normalized()
+        n_months = delta.years * 12 + delta.months
+        warning(
+            f"Package is too new: {pkg}={req_major}.{req_minor} was "
+            f"published on {req_published:%Y-%m-%d} "
+            f"which was {n_months} months ago (policy is {policy_months} months)"
+        )
+    else:
+        status = "="
+
+    if req_patch is not None:
+        warning("patch version should not appear in requirements file: " + pkg)
+        status += " (w)"
+
+    return (
+        pkg,
+        fmt_version(req_major, req_minor, req_patch),
+        req_published.strftime("%Y-%m-%d"),
+        fmt_version(policy_major, policy_minor),
+        policy_published_actual.strftime("%Y-%m-%d"),
+        status,
+    )
+
+
+def fmt_version(major: int, minor: int, patch: int = None) -> str:
+    if patch is None:
+        return f"{major}.{minor}"
+    else:
+        return f"{major}.{minor}.{patch}"
+
+
+def main(fname) -> None:
+    rows = [
+        process_pkg(pkg, major, minor, patch)
+        for pkg, major, minor, patch in parse_requirements(fname)
+    ]
+
+    print("\nPackage           Required             Policy               Status")
+    print("----------------- -------------------- -------------------- ------")
+    fmt = "{:17} {:7} ({:10}) {:7} ({:10}) {}"
+    for row in rows:
+        print(fmt.format(*row))
+
+    if errors:
+        print("\nErrors:")
+        print("-------")
+        for i, e in enumerate(errors):
+            print(f"{i + 1}. {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    fnames = sys.argv[1:]
+
+    for fname in fnames:
+        main(fname)
diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml
@@ -6,22 +6,22 @@ channels:
 
 dependencies:
  - python=3.9
- - cartopy
- - dask
- - joblib
- - netcdf4
- - numpy
- - packaging
+ - cartopy=0.22
+ - dask=2023.8
+ - joblib=1.3
+ - netcdf4=1.6
+ - numpy=1.24
+ - packaging=23.1
  - pandas=2.0
- - pooch
- - properscoring
- - pyproj
- - regionmask=0.9
- - scikit-learn
- - scipy
- - shapely=1.8 # required by regionmask < 0.10
- - statsmodels=0.13
- - xarray=2023.04
+ - pooch=1.7
+ - properscoring=0.1
+ - pyproj=3.6
+ - regionmask=0.10
+ - scikit-learn=1.3
+ - scipy=1.11
+ - shapely=2.0
+ - statsmodels=0.14
+ - xarray=2023.7
 # for testing
  - pytest
  - pytest-cov
diff --git a/docs/source/development.rst b/docs/source/development.rst
@@ -119,7 +119,7 @@ Formatting
 
 To help us focus on what the code does, not how it looks, we use a couple of automatic formatting tools.
 We use the following tools:
-- `ruff check<https://docs.astral.sh/ruff/>`_ to check and fix small code errors
+- `ruff check <https://docs.astral.sh/ruff/>`_ to check and fix small code errors
 - `black <https://github.com/psf/black>`_ to auto-format the code
 
 These automatically format the code for us and tell use where the errors are.

diff --git a/mesmer/core/grid.py b/mesmer/core/grid.py
@@ -1,6 +1,5 @@
 import pandas as pd
 import xarray as xr
-from packaging.version import Version
 
 
 def _lon_to_180(lon):
@@ -112,10 +111,6 @@ def stack_lat_lon(
     data = data.stack(dims)
 
     if not multiindex:
-        # there is a bug in xarray v2022.06 (Index refactor)
-        if Version(xr.__version__) == Version("2022.6"):
-            raise TypeError("There is a bug in xarray v2022.06. Please update xarray.")
-
         data = data.reset_index(stack_dim)
 
     if dropna:

diff --git a/setup.cfg b/setup.cfg
@@ -32,20 +32,20 @@ zip_safe = False  # https://mypy.readthedocs.io/en/latest/installed_packages.htm
 include_package_data = True
 python_requires = >=3.9
 install_requires =
-    dask[array,distributed]
-    joblib
-    netcdf4
-    numpy
-    packaging
+    dask[array,distributed] >=2023.8
+    joblib >=1.3
+    netcdf4 >=1.6
+    numpy >=1.24
+    packaging >=23.1
     pandas >=2.0
-    pooch
-    properscoring
-    pyproj
+    pooch >=1.7
+    properscoring >=0.1
+    pyproj >=3.6
     regionmask >=0.9
-    scikit-learn # only for the tests
-    scipy
-    statsmodels >=0.13
-    xarray >=2023.04 # because pandas 2 is required
+    scikit-learn >=1.3 # only for the tests
+    scipy >=1.11
+    statsmodels >=0.14
+    xarray >=2023.07
 
 [options.extras_require]
 complete =