From c8057e94978ef9ce2260ec8f03cfa396c50c7dc0 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 20 Aug 2024 18:18:15 +0200 Subject: [PATCH 1/6] fix minimum versions of dependencies --- .github/workflows/ci-additional.yml | 44 ++++++ ci/min_deps_check.py | 221 ++++++++++++++++++++++++++++ ci/requirements/min-all-deps.yml | 30 ++-- setup.cfg | 24 +-- 4 files changed, 292 insertions(+), 27 deletions(-) create mode 100644 .github/workflows/ci-additional.yml create mode 100644 ci/min_deps_check.py diff --git a/.github/workflows/ci-additional.yml b/.github/workflows/ci-additional.yml new file mode 100644 index 00000000..e70d479f --- /dev/null +++ b/.github/workflows/ci-additional.yml @@ -0,0 +1,44 @@ +name: CI Additional +on: + push: + branches: + - "*" + pull_request: + branches: + - "*" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + FORCE_COLOR: 3 + +jobs: + + min-version-policy: + name: Minimum Version Policy + runs-on: "ubuntu-latest" + defaults: + run: + shell: bash -l {0} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + + - name: Create conda environment + uses: mamba-org/setup-micromamba@v1 + with: + environment-name: min-version-policy + micromamba-version: 'latest' + create-args: >- + python=3.10 + pyyaml + conda + python-dateutil + channels: conda-forge + + - name: minimum versions policy + run: | + python ci/min_deps_check.py ci/requirements/min-all-deps.yml diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py new file mode 100644 index 00000000..0d319e65 --- /dev/null +++ b/ci/min_deps_check.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python +"""Fetch from conda database all available versions of the xarray dependencies and their +publication date. Compare it against requirements/*-min-all-deps.yml to verify the +policy on obsolete dependencies is being followed. Print a pretty report :) +""" + +# min_deps_check.py is copied from xarray: +# https://github.com/pydata/xarray/blob/main/ci/min_deps_check.py +# Used under the terms of xarray's license, see licenses/XARRAY_LICENSE. + +import itertools +import sys +from collections.abc import Iterator +from datetime import datetime + +import conda.api # type: ignore[import] +import yaml +from dateutil.relativedelta import relativedelta + +CHANNELS = ["conda-forge", "defaults"] +IGNORE_DEPS = { + "black", + "coveralls", + "flake8", + "hypothesis", + "isort", + "mypy", + "pip", + "pytest-cov", + "pytest-xdist", + "pytest", + "setuptools", +} + +POLICY_MONTHS = {"python": 30, "numpy": 18} +POLICY_MONTHS_DEFAULT = 12 +POLICY_OVERRIDE: dict[str, tuple[int, int]] = {} +errors = [] + + +def error(msg: str) -> None: + global errors + errors.append(msg) + print("ERROR:", msg) + + +def warning(msg: str) -> None: + print("WARNING:", msg) + + +def parse_requirements(fname) -> Iterator[tuple[str, int, int, int | None]]: + """Load requirements/py*-min-all-deps.yml + + Yield (package name, major version, minor version, [patch version]) + """ + global errors + + with open(fname) as fh: + contents = yaml.safe_load(fh) + for row in contents["dependencies"]: + if isinstance(row, dict) and list(row) == ["pip"]: + continue + pkg, eq, version = row.partition("=") + if pkg.rstrip("<>") in IGNORE_DEPS: + continue + if pkg.endswith("<") or pkg.endswith(">") or eq != "=": + error("package should be pinned with exact version: " + row) + continue + + try: + version_tup = tuple(int(x) for x in version.split(".")) + except ValueError: + raise ValueError("non-numerical version: " + row) + + if len(version_tup) == 2: + yield (pkg, *version_tup, None) # type: ignore[misc] + elif len(version_tup) == 3: + yield (pkg, *version_tup) # type: ignore[misc] + else: + raise ValueError("expected major.minor or major.minor.patch: " + row) + + +def query_conda(pkg: str) -> dict[tuple[int, int], datetime]: + """Query the conda repository for a specific package + + Return map of {(major version, minor version): publication date} + """ + + def metadata(entry): + version = entry.version + + time = datetime.fromtimestamp(entry.timestamp) + major, minor = map(int, version.split(".")[:2]) + + return (major, minor), time + + raw_data = conda.api.SubdirData.query_all(pkg, channels=CHANNELS) + data = sorted(metadata(entry) for entry in raw_data if entry.timestamp != 0) + + release_dates = { + version: [time for _, time in group if time is not None] + for version, group in itertools.groupby(data, key=lambda x: x[0]) + } + out = {version: min(dates) for version, dates in release_dates.items() if dates} + + # Hardcoded fix to work around incorrect dates in conda + if pkg == "python": + out.update( + { + (2, 7): datetime(2010, 6, 3), + (3, 5): datetime(2015, 9, 13), + (3, 6): datetime(2016, 12, 23), + (3, 7): datetime(2018, 6, 27), + (3, 8): datetime(2019, 10, 14), + (3, 9): datetime(2020, 10, 5), + (3, 10): datetime(2021, 10, 4), + (3, 11): datetime(2022, 10, 24), + } + ) + + return out + + +def process_pkg( + pkg: str, req_major: int, req_minor: int, req_patch: int | None +) -> tuple[str, str, str, str, str, str]: + """Compare package version from requirements file to available versions in conda. + Return row to build pandas dataframe: + + - package name + - major.minor.[patch] version in requirements file + - publication date of version in requirements file (YYYY-MM-DD) + - major.minor version suggested by policy + - publication date of version suggested by policy (YYYY-MM-DD) + - status ("<", "=", "> (!)") + """ + print(f"Analyzing {pkg}...") + versions = query_conda(pkg) + + try: + req_published = versions[req_major, req_minor] + except KeyError: + error("not found in conda: " + pkg) + return pkg, fmt_version(req_major, req_minor, req_patch), "-", "-", "-", "(!)" + + policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT) + policy_published = datetime.now() - relativedelta(months=policy_months) + + filtered_versions = [ + version + for version, published in versions.items() + if published < policy_published + ] + policy_major, policy_minor = max(filtered_versions, default=(req_major, req_minor)) + + try: + policy_major, policy_minor = POLICY_OVERRIDE[pkg] + except KeyError: + pass + policy_published_actual = versions[policy_major, policy_minor] + + if (req_major, req_minor) < (policy_major, policy_minor): + status = "<" + elif (req_major, req_minor) > (policy_major, policy_minor): + status = "> (!)" + delta = relativedelta(datetime.now(), req_published).normalized() + n_months = delta.years * 12 + delta.months + warning( + f"Package is too new: {pkg}={req_major}.{req_minor} was " + f"published on {req_published:%Y-%m-%d} " + f"which was {n_months} months ago (policy is {policy_months} months)" + ) + else: + status = "=" + + if req_patch is not None: + warning("patch version should not appear in requirements file: " + pkg) + status += " (w)" + + return ( + pkg, + fmt_version(req_major, req_minor, req_patch), + req_published.strftime("%Y-%m-%d"), + fmt_version(policy_major, policy_minor), + policy_published_actual.strftime("%Y-%m-%d"), + status, + ) + + +def fmt_version(major: int, minor: int, patch: int = None) -> str: + if patch is None: + return f"{major}.{minor}" + else: + return f"{major}.{minor}.{patch}" + + +def main(fname) -> None: + rows = [ + process_pkg(pkg, major, minor, patch) + for pkg, major, minor, patch in parse_requirements(fname) + ] + + print("\nPackage Required Policy Status") + print("----------------- -------------------- -------------------- ------") + fmt = "{:17} {:7} ({:10}) {:7} ({:10}) {}" + for row in rows: + print(fmt.format(*row)) + + if errors: + print("\nErrors:") + print("-------") + for i, e in enumerate(errors): + print(f"{i + 1}. {e}") + sys.exit(1) + + +if __name__ == "__main__": + fnames = sys.argv[1:] + + for fname in fnames: + main(fname) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index c44165ef..d396988e 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -6,22 +6,22 @@ channels: dependencies: - python=3.9 - - cartopy - - dask - - joblib - - netcdf4 - - numpy - - packaging + - cartopy=0.22 + - dask=2023.8 + - joblib=1.3 + - netcdf4=1.6 + - numpy=1.24 + - packaging=23.1 - pandas=2.0 - - pooch - - properscoring - - pyproj - - regionmask=0.9 - - scikit-learn - - scipy - - shapely=1.8 # required by regionmask < 0.10 - - statsmodels=0.13 - - xarray=2023.04 + - pooch=1.7 + - properscoring=0.1 + - pyproj=3.6 + - regionmask=0.10 + - scikit-learn=1.3 + - scipy=1.11 + - shapely=2.0 + - statsmodels=0.14 + - xarray=2023.7 # for testing - pytest - pytest-cov diff --git a/setup.cfg b/setup.cfg index 130f88d1..cb855416 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,20 +32,20 @@ zip_safe = False # https://mypy.readthedocs.io/en/latest/installed_packages.htm include_package_data = True python_requires = >=3.9 install_requires = - dask[array,distributed] - joblib - netcdf4 - numpy - packaging + dask[array,distributed] >=2023.8 + joblib >=1.3 + netcdf4 >=1.6 + numpy >=1.24 + packaging >=23.1 pandas >=2.0 - pooch - properscoring - pyproj + pooch >=1.7 + properscoring >=0.1 + pyproj >=3.6 regionmask >=0.9 - scikit-learn # only for the tests - scipy - statsmodels >=0.13 - xarray >=2023.04 # because pandas 2 is required + scikit-learn >=1.3 # only for the tests + scipy >=1.11 + statsmodels >=0.14 + xarray >=2023.07 [options.extras_require] complete = From 2698d870fe0ffdbaf61144633b715377cb439500 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 20 Aug 2024 18:39:11 +0200 Subject: [PATCH 2/6] changelog --- CHANGELOG.rst | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 354c42a5..a9696d8d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,14 +16,31 @@ Breaking changes (`#495 `_). By `Victoria Bauer`_. - Using Cholesky decomposition for finding covariance localization radius and drawing from the multivariate normal distribution (`#408 `_) By `Victoria Bauer`_. -- The supported versions of some dependencies were changed (`#399 `_, `#405 `_): - - ============ ============= ========= - Package Old New - ============ ============= ========= - regionmask >=0.8 >=0.9 - statsmodels not specified >=0.13 - ============ ============= ========= +- The supported versions of some dependencies were changed + (`#399 `_, + `#405 `_, and + `#503 `_): + + ================= ============= ========= + Package Old New + ================= ============= ========= + **cartopy** not specified 0.22 + **dask** not specified 2023.8 + **joblib** not specified 1.3 + **netcdf4** not specified 1.6 + **numpy** not specified 1.24 + **packaging** not specified 23.1 + **pandas** 2.0 no change + **pooch** not specified 1.7 + **properscoring** not specified 0.1 + **pyproj** not specified 3.6 + **regionmask** 0.8 0.10 + **scikit-learn** not specified 1.3 + **scipy** not specified 1.11 + **shapely** not specified 2.0 + **statsmodels** not specified 0.14 + **xarray** 2023.04 2023.7 + ================= ============= ========= Deprecations ^^^^^^^^^^^^ From 4411c13a73813212583c4ad2e5c7b252d6c547c8 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 20 Aug 2024 18:39:24 +0200 Subject: [PATCH 3/6] fix link (unrelated) --- docs/source/development.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/development.rst b/docs/source/development.rst index 8bf9042e..3fe14745 100644 --- a/docs/source/development.rst +++ b/docs/source/development.rst @@ -119,7 +119,7 @@ Formatting To help us focus on what the code does, not how it looks, we use a couple of automatic formatting tools. We use the following tools: -- `ruff check`_ to check and fix small code errors +- `ruff check `_ to check and fix small code errors - `black `_ to auto-format the code These automatically format the code for us and tell use where the errors are. From 0b87d032783d093aee3c299c7eb051bc677f1ebe Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 20 Aug 2024 19:07:04 +0200 Subject: [PATCH 4/6] remove no-longer needed version checks --- mesmer/core/grid.py | 6 ------ tests/unit/test_linear_regression.py | 8 +------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/mesmer/core/grid.py b/mesmer/core/grid.py index e1c6889f..004a04cf 100644 --- a/mesmer/core/grid.py +++ b/mesmer/core/grid.py @@ -1,7 +1,5 @@ import pandas as pd import xarray as xr -from packaging.version import Version - def _lon_to_180(lon): @@ -112,10 +110,6 @@ def stack_lat_lon( data = data.stack(dims) if not multiindex: - # there is a bug in xarray v2022.06 (Index refactor) - if Version(xr.__version__) == Version("2022.6"): - raise TypeError("There is a bug in xarray v2022.06. Please update xarray.") - data = data.reset_index(stack_dim) if dropna: diff --git a/tests/unit/test_linear_regression.py b/tests/unit/test_linear_regression.py index 8ab0d30e..1ec43a0b 100644 --- a/tests/unit/test_linear_regression.py +++ b/tests/unit/test_linear_regression.py @@ -225,13 +225,7 @@ def test_linear_regression_errors(lr_method_or_function): def test_unequal_coords(pred0, pred1, tgt, weights): - # updated error message with the indexing refactor - if Version(xr.__version__) >= Version("2022.06"): - match = "cannot align objects" - else: - match = "indexes along dimension 'time' are not equal" - - with pytest.raises(ValueError, match=match): + with pytest.raises(ValueError, match="cannot align objects"): lr_method_or_function( {"pred0": pred0, "pred1": pred1}, tgt, dim="time", weights=weights ) From 3b26d73c9e8a014ed940248107cc13273c490cf2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 Aug 2024 17:07:19 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mesmer/core/grid.py | 1 + tests/unit/test_linear_regression.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mesmer/core/grid.py b/mesmer/core/grid.py index 004a04cf..1c7d11f5 100644 --- a/mesmer/core/grid.py +++ b/mesmer/core/grid.py @@ -1,6 +1,7 @@ import pandas as pd import xarray as xr + def _lon_to_180(lon): with xr.set_options(keep_attrs=True): diff --git a/tests/unit/test_linear_regression.py b/tests/unit/test_linear_regression.py index 1ec43a0b..46bc43e0 100644 --- a/tests/unit/test_linear_regression.py +++ b/tests/unit/test_linear_regression.py @@ -4,7 +4,6 @@ import numpy.testing as npt import pytest import xarray as xr -from packaging.version import Version import mesmer from mesmer.testing import trend_data_1D, trend_data_2D @@ -225,7 +224,7 @@ def test_linear_regression_errors(lr_method_or_function): def test_unequal_coords(pred0, pred1, tgt, weights): - with pytest.raises(ValueError, match="cannot align objects"): + with pytest.raises(ValueError, match="cannot align objects"): lr_method_or_function( {"pred0": pred0, "pred1": pred1}, tgt, dim="time", weights=weights ) From 1f69af0348f52f220e3dba53e528bdae89e13888 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 23 Aug 2024 10:28:52 +0200 Subject: [PATCH 6/6] move ci job --- .github/workflows/ci-additional.yml | 44 ----------------------------- .github/workflows/ci-workflow.yml | 29 +++++++++++++++++++ 2 files changed, 29 insertions(+), 44 deletions(-) delete mode 100644 .github/workflows/ci-additional.yml diff --git a/.github/workflows/ci-additional.yml b/.github/workflows/ci-additional.yml deleted file mode 100644 index e70d479f..00000000 --- a/.github/workflows/ci-additional.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: CI Additional -on: - push: - branches: - - "*" - pull_request: - branches: - - "*" - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -env: - FORCE_COLOR: 3 - -jobs: - - min-version-policy: - name: Minimum Version Policy - runs-on: "ubuntu-latest" - defaults: - run: - shell: bash -l {0} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetch all history for all branches and tags. - - - name: Create conda environment - uses: mamba-org/setup-micromamba@v1 - with: - environment-name: min-version-policy - micromamba-version: 'latest' - create-args: >- - python=3.10 - pyyaml - conda - python-dateutil - channels: conda-forge - - - name: minimum versions policy - run: | - python ci/min_deps_check.py ci/requirements/min-all-deps.yml diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 540f165e..925034c6 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -86,3 +86,32 @@ jobs: env_vars: RUNNER_OS,PYTHON_VERSION name: codecov-umbrella fail_ci_if_error: false + + # ==================================================================================== + + min-version-policy: + name: Minimum Version Policy + runs-on: "ubuntu-latest" + defaults: + run: + shell: bash -l {0} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + + - name: Create conda environment + uses: mamba-org/setup-micromamba@v1 + with: + environment-name: min-version-policy + micromamba-version: 'latest' + create-args: >- + python=3.10 + pyyaml + conda + python-dateutil + channels: conda-forge + + - name: minimum versions policy + run: | + python ci/min_deps_check.py ci/requirements/min-all-deps.yml