From b609c6ea2663fc37fefc5c2e798e9544c36ef563 Mon Sep 17 00:00:00 2001 From: "Benjamin T. Schwertfeger" Date: Mon, 20 May 2024 10:04:41 +0200 Subject: [PATCH] Resolve "QDM not working with longer simp length" (#102) --- .../workflows/dependabot_auto_approve.yaml | 39 +++ cmethods/distribution.py | 10 - cmethods/utils.py | 15 +- doc/methods.rst | 4 +- tests/helper.py | 10 +- tests/test_methods.py | 12 +- tests/test_methods_different_input_shape.py | 259 ++++++++++++++++++ tests/test_utils.py | 2 +- tests/test_zarr_dask_compatibility.py | 4 +- 9 files changed, 322 insertions(+), 33 deletions(-) create mode 100644 .github/workflows/dependabot_auto_approve.yaml create mode 100644 tests/test_methods_different_input_shape.py diff --git a/.github/workflows/dependabot_auto_approve.yaml b/.github/workflows/dependabot_auto_approve.yaml new file mode 100644 index 0000000..db5f69e --- /dev/null +++ b/.github/workflows/dependabot_auto_approve.yaml @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2024 Benjamin Thomas Schwertfeger +# GitHub: https://github.com/btschwertfeger +# +# Workflow that approves and merges all pull requests from the dependabot[bot] +# author. +# +# Source (May, 2024): +# - https://blog.somewhatabstract.com/2021/10/11/setting-up-dependabot-with-github-actions-to-approve-and-merge/ + +name: Dependabot auto-merge +on: pull_request_target + +permissions: + pull-requests: write + contents: write + +jobs: + dependabot: + runs-on: ubuntu-latest + if: ${{ github.actor == 'dependabot[bot]' }} + steps: + - name: Dependabot metadata + id: dependabot-metadata + uses: dependabot/fetch-metadata@v1.1.1 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + - name: Approve a PR + if: ${{ steps.dependabot-metadata.outputs.update-type != 'version-update:semver-major' }} + run: gh pr review --approve "$PR_URL" + env: + PR_URL: ${{ github.event.pull_request.html_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Enable auto-merge for Dependabot PRs + if: ${{ steps.dependabot-metadata.outputs.update-type != 'version-update:semver-major' }} + run: gh pr merge --auto --squash "$PR_URL" + env: + PR_URL: ${{ github.event.pull_request.html_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/cmethods/distribution.py b/cmethods/distribution.py index ca7565c..2c63856 100644 --- a/cmethods/distribution.py +++ b/cmethods/distribution.py @@ -62,11 +62,6 @@ def quantile_mapping( cdf_obs = get_cdf(obs, xbins) cdf_simh = get_cdf(simh, xbins) - cdf_simh = np.interp( - cdf_simh, - (cdf_simh.min(), cdf_simh.max()), - (cdf_obs.min(), cdf_obs.max()), - ) if kind in ADDITIVE: epsilon = np.interp(simp, xbins, cdf_simh) # Eq. 1 @@ -129,11 +124,6 @@ def detrended_quantile_mapping( cdf_obs = get_cdf(obs, xbins) cdf_simh = get_cdf(simh, xbins) - cdf_simh = np.interp( - cdf_simh, - (cdf_simh.min(), cdf_simh.max()), - (cdf_obs.min(), cdf_obs.max()), - ) # detrended => shift mean of $X_{sim,p}$ to range of $X_{sim,h}$ to adjust extremes res = np.zeros(len(simp.values)) diff --git a/cmethods/utils.py b/cmethods/utils.py index 52073f0..fdca6c8 100644 --- a/cmethods/utils.py +++ b/cmethods/utils.py @@ -149,11 +149,11 @@ def get_pdf( :linenos: :caption: Compute the probability density function :math:`P(x)` - >>> from cmethods import CMethods as cm + >>> from cmethods get_pdf >>> x = [1, 2, 3, 4, 5, 5, 5, 6, 7, 8, 9, 10] >>> xbins = [0, 3, 6, 10] - >>> print(cm.get_pdf(x=x, xbins=xbins)) + >>> print(get_pdf(x=x, xbins=xbins)) [2, 5, 5] """ pdf, _ = np.histogram(x, xbins) @@ -178,17 +178,18 @@ def get_cdf( .. code-block:: python :linenos: - :caption: Compute the cmmulative distribution function :math:`F(x)` + :caption: Compute the cumulative distribution function :math:`F(x)` - >>> from cmethods import CMethods as cm + >>> from cmethods.utils import get_cdf >>> x = [1, 2, 3, 4, 5, 5, 5, 6, 7, 8, 9, 10] >>> xbins = [0, 3, 6, 10] - >>> print(cm.get_cdf(x=x, xbins=xbins)) - [0, 2, 7, 12] + >>> print(get_cdf(x=x, xbins=xbins)) + [0.0, 0.16666667, 0.58333333, 1.] """ pdf, _ = np.histogram(x, xbins) - return np.insert(np.cumsum(pdf), 0, 0.0) + cdf = np.insert(np.cumsum(pdf), 0, 0.0) + return cdf / cdf[-1] def get_inverse_of_cdf( diff --git a/doc/methods.rst b/doc/methods.rst index a6ee684..1e088f0 100644 --- a/doc/methods.rst +++ b/doc/methods.rst @@ -171,7 +171,9 @@ The Delta Method bias correction technique can be applied on stochastic and non-stochastic climate variables to minimize deviations in the mean values between predicted and observed time-series of past and future time periods. -This method requires that the time series can be grouped by ``time.month``. +This method requires that the time series can be grouped by ``time.month`` while +the reference data of the control period must have the same temporal resolution +as the data that is going to be adjusted. Since the multiplicative scaling can result in very high scaling factors, a maximum scaling factor of 10 is set. This can be changed by passing the desired diff --git a/tests/helper.py b/tests/helper.py index 2f1e6ff..ef63b70 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -8,8 +8,6 @@ from __future__ import annotations -from typing import List - import numpy as np import xarray as xr from sklearn.metrics import mean_squared_error @@ -62,7 +60,7 @@ def get_datasets(kind: str) -> tuple[xr.Dataset, xr.Dataset, xr.Dataset, xr.Data ) latitudes = np.arange(23, 27, 1) - def get_hist_temp_for_lat(lat: int) -> List[float]: + def get_hist_temp_for_lat(lat: int) -> list[float]: """Returns a fake interval time series by latitude value""" return 273.15 - ( lat * np.cos(2 * np.pi * historical_time.dayofyear / 365) @@ -71,7 +69,7 @@ def get_hist_temp_for_lat(lat: int) -> List[float]: + 0.1 * (historical_time - historical_time[0]).days / 365 ) - def get_fake_hist_precipitation_data() -> List[float]: + def get_fake_hist_precipitation_data() -> list[float]: """Returns ratio based fake time series""" pr = ( np.cos(2 * np.pi * historical_time.dayofyear / 365) @@ -120,7 +118,7 @@ def get_dataset(data, time, kind: str) -> xr.Dataset: ) obsh = get_dataset(data, historical_time, kind=kind) obsp = get_dataset(data * 1.02, historical_time, kind=kind) - simh = get_dataset(data * 0.98, historical_time, kind=kind) - simp = get_dataset(data * 0.09, future_time, kind=kind) + simh = get_dataset(data * 0.95, historical_time, kind=kind) + simp = get_dataset(data * 0.965, future_time, kind=kind) return obsh, obsp, simh, simp diff --git a/tests/test_methods.py b/tests/test_methods.py index 315c09c..315e823 100644 --- a/tests/test_methods.py +++ b/tests/test_methods.py @@ -27,9 +27,9 @@ ("method", "kind"), [ ("linear_scaling", "+"), + ("linear_scaling", "*"), ("variance_scaling", "+"), ("delta_method", "+"), - ("linear_scaling", "*"), ("delta_method", "*"), ], ) @@ -65,9 +65,9 @@ def test_1d_scaling( ("method", "kind"), [ ("linear_scaling", "+"), + ("linear_scaling", "*"), ("variance_scaling", "+"), ("delta_method", "+"), - ("linear_scaling", "*"), ("delta_method", "*"), ], ) @@ -111,8 +111,8 @@ def test_3d_scaling( ("method", "kind"), [ ("linear_scaling", "+"), - ("variance_scaling", "+"), ("linear_scaling", "*"), + ("variance_scaling", "+"), ], ) def test_3d_scaling_different_time_span( @@ -160,8 +160,8 @@ def test_3d_scaling_different_time_span( ("method", "kind"), [ ("quantile_mapping", "+"), - ("quantile_delta_mapping", "+"), ("quantile_mapping", "*"), + ("quantile_delta_mapping", "+"), ("quantile_delta_mapping", "*"), ], ) @@ -192,8 +192,8 @@ def test_1d_distribution( ("method", "kind"), [ ("quantile_mapping", "+"), - ("quantile_delta_mapping", "+"), ("quantile_mapping", "*"), + ("quantile_delta_mapping", "+"), ("quantile_delta_mapping", "*"), ], ) @@ -224,8 +224,8 @@ def test_3d_distribution( ("method", "kind"), [ ("quantile_mapping", "+"), - ("quantile_delta_mapping", "+"), ("quantile_mapping", "*"), + ("quantile_delta_mapping", "+"), ("quantile_delta_mapping", "*"), ], ) diff --git a/tests/test_methods_different_input_shape.py b/tests/test_methods_different_input_shape.py new file mode 100644 index 0000000..cf7aec7 --- /dev/null +++ b/tests/test_methods_different_input_shape.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2024 Benjamin Thomas Schwertfeger +# GitHub: https://github.com/btschwertfeger +# + +""" +Module implementing the unit tests that check if the input data sets can have +different shapes. + +TODO: Remove the copy-paste stuff here. That could be done way simpler. +""" + +from __future__ import annotations + +import pytest + +from cmethods import adjust +from cmethods.types import XRData_t + +from .helper import is_1d_rmse_better + +N_QUANTILES = 100 + + +@pytest.mark.parametrize( + ("method", "kind"), + [ + ("linear_scaling", "+"), + ("linear_scaling", "*"), + ("variance_scaling", "+"), + ], +) +def test_1d_scaling_obs_shorter( + datasets: dict, + method: str, + kind: str, +) -> None: + obsh: XRData_t = datasets[kind]["obsh"][:7300, 0, 0].rename({"time": "t_time"}) # 20/30 years + obsp: XRData_t = datasets[kind]["obsp"][:, 0, 0] + simh: XRData_t = datasets[kind]["simh"][:, 0, 0] + simp: XRData_t = datasets[kind]["simp"][:, 0, 0] + + # not group + result: XRData_t = adjust( + method=method, + obs=obsh, + simh=simh, + simp=simp, + kind=kind, + input_core_dims={"obs": "t_time", "simh": "time", "simp": "time"}, + ) + assert isinstance(result, XRData_t) + assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp) + + # grouped + result = adjust( + method=method, + obs=obsh, + simh=simh, + simp=simp, + kind=kind, + group={"obs": "t_time.month", "simh": "time.month", "simp": "time.month"}, + input_core_dims={"obs": "t_time", "simh": "time", "simp": "time"}, + ) + assert isinstance(result, XRData_t) + assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp) + + +@pytest.mark.parametrize( + ("method", "kind"), + [ + ("linear_scaling", "+"), + ("linear_scaling", "*"), + ("delta_method", "+"), + ("delta_method", "*"), + ("variance_scaling", "+"), + ], +) +def test_1d_scaling_simh_shorter( + datasets: dict, + method: str, + kind: str, +) -> None: + obsh: XRData_t = datasets[kind]["obsh"][:, 0, 0] + obsp: XRData_t = datasets[kind]["obsp"][:, 0, 0] + simh: XRData_t = datasets[kind]["simh"][:7300, 0, 0].rename({"time": "t_time"}) # 20/30 years + simp: XRData_t = datasets[kind]["simp"][:, 0, 0] + + # not group + result: XRData_t = adjust( + method=method, + obs=obsh, + simh=simh, + simp=simp, + kind=kind, + input_core_dims={"obs": "time", "simh": "t_time", "simp": "time"}, + ) + assert isinstance(result, XRData_t) + assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp) + + # grouped + result = adjust( + method=method, + obs=obsh, + simh=simh, + simp=simp, + kind=kind, + group={"obs": "time.month", "simh": "t_time.month", "simp": "time.month"}, + input_core_dims={"obs": "time", "simh": "t_time", "simp": "time"}, + ) + assert isinstance(result, XRData_t) + assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp) + + +@pytest.mark.parametrize( + ("method", "kind"), + [ + ("linear_scaling", "+"), + ("linear_scaling", "*"), + ("variance_scaling", "+"), + ], +) +def test_1d_scaling_simp_shorter( + datasets: dict, + method: str, + kind: str, +) -> None: + obsh: XRData_t = datasets[kind]["obsh"][:, 0, 0] + obsp: XRData_t = datasets[kind]["obsp"][:7300, 0, 0].rename({"time": "t_time"}) # 20/30 years + simh: XRData_t = datasets[kind]["simh"][:, 0, 0] + simp: XRData_t = datasets[kind]["simp"][:7300, 0, 0].rename({"time": "t_time"}) # 20/30 years + + # not group + result: XRData_t = adjust( + method=method, + obs=obsh, + simh=simh, + simp=simp, + kind=kind, + input_core_dims={"obs": "time", "simh": "time", "simp": "t_time"}, + ) + assert isinstance(result, XRData_t) + assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp) + + # grouped + result = adjust( + method=method, + obs=obsh, + simh=simh, + simp=simp, + kind=kind, + group={"obs": "time.month", "simh": "time.month", "simp": "t_time.month"}, + input_core_dims={"obs": "time", "simh": "time", "simp": "t_time"}, + ) + assert isinstance(result, XRData_t) + assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp) + + +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + ("method", "kind"), + [ + ("quantile_mapping", "+"), + ("quantile_mapping", "*"), + ("quantile_delta_mapping", "+"), + ("quantile_delta_mapping", "*"), + ], +) +def test_1d_distribution_obs_shorter( + datasets: dict, + method: str, + kind: str, +) -> None: + obsh: XRData_t = datasets[kind]["obsh"][:7300, 0, 0].rename({"time": "t_time"}) # 20/30 years + obsp: XRData_t = datasets[kind]["obsp"][:, 0, 0] + simh: XRData_t = datasets[kind]["simh"][:, 0, 0] + simp: XRData_t = datasets[kind]["simp"][:, 0, 0] + + result: XRData_t = adjust( + method=method, + obs=obsh, + simh=simh, + simp=simp, + kind=kind, + n_quantiles=N_QUANTILES, + input_core_dims={"obs": "t_time", "simh": "time", "simp": "time"}, + ) + + assert isinstance(result, XRData_t) + assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp) + + +@pytest.mark.parametrize( + ("method", "kind"), + [ + ("quantile_mapping", "+"), + ("quantile_mapping", "*"), + ("quantile_delta_mapping", "+"), + ("quantile_delta_mapping", "*"), + ], +) +def test_1d_distribution_simh_shorter( + datasets: dict, + method: str, + kind: str, +) -> None: + obsh: XRData_t = datasets[kind]["obsh"][:, 0, 0] + obsp: XRData_t = datasets[kind]["obsp"][:, 0, 0] + simh: XRData_t = datasets[kind]["simh"][:7300, 0, 0].rename({"time": "t_time"}) # 20/30 years + simp: XRData_t = datasets[kind]["simp"][:, 0, 0] + + result: XRData_t = adjust( + method=method, + obs=obsh, + simh=simh, + simp=simp, + kind=kind, + n_quantiles=N_QUANTILES, + input_core_dims={"obs": "time", "simh": "t_time", "simp": "time"}, + ) + + assert isinstance(result, XRData_t) + assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp) + + +@pytest.mark.parametrize( + ("method", "kind"), + [ + ("quantile_mapping", "+"), + ("quantile_mapping", "*"), + ("quantile_delta_mapping", "+"), + ("quantile_delta_mapping", "*"), + ], +) +def test_1d_distribution_simp_shorter( + datasets: dict, + method: str, + kind: str, +) -> None: + obsh: XRData_t = datasets[kind]["obsh"][:, 0, 0] + obsp: XRData_t = datasets[kind]["obsp"][:7300, 0, 0].rename({"time": "t_time"}) # 20/30 years + simh: XRData_t = datasets[kind]["simh"][:, 0, 0] + simp: XRData_t = datasets[kind]["simp"][:7300, 0, 0].rename({"time": "t_time"}) # 20/30 years + + result: XRData_t = adjust( + method=method, + obs=obsh, + simh=simh, + simp=simp, + kind=kind, + n_quantiles=N_QUANTILES, + input_core_dims={"obs": "time", "simh": "time", "simp": "t_time"}, + ) + + assert isinstance(result, XRData_t) + assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp) diff --git a/tests/test_utils.py b/tests/test_utils.py index 6212aca..822bfc0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -61,7 +61,7 @@ def test_quantile_mapping_all_nan() -> None: def test_quantile_delta_mapping_single_nan() -> None: obs, simh, simp = list(np.arange(10)), list(np.arange(10)), list(np.arange(10)) obs[0] = np.nan - expected = np.array([0.0, 1.9, 2.9, 3.9, 4.9, 5.9, 6.9, 7.9, 8.9, 9.0]) + expected = np.array([0.0, 1.8, 2.7, 3.6, 4.5, 5.4, 6.3, 7.2, 8.1, 9.0]) res = quantile_delta_mapping(obs=obs, simh=simh, simp=simp, n_quantiles=5) assert np.allclose(res, expected) diff --git a/tests/test_zarr_dask_compatibility.py b/tests/test_zarr_dask_compatibility.py index 7e5fbf5..5f691df 100644 --- a/tests/test_zarr_dask_compatibility.py +++ b/tests/test_zarr_dask_compatibility.py @@ -22,9 +22,9 @@ ("method", "kind"), [ ("linear_scaling", "+"), + ("linear_scaling", "*"), ("variance_scaling", "+"), ("delta_method", "+"), - ("linear_scaling", "*"), ("delta_method", "*"), ], ) @@ -67,8 +67,8 @@ def test_3d_scaling_zarr( ("method", "kind"), [ ("quantile_mapping", "+"), - ("quantile_delta_mapping", "+"), ("quantile_mapping", "*"), + ("quantile_delta_mapping", "+"), ("quantile_delta_mapping", "*"), ], )