From b609c6ea2663fc37fefc5c2e798e9544c36ef563 Mon Sep 17 00:00:00 2001
From: "Benjamin T. Schwertfeger" <contact@b-schwertfeger.de>
Date: Mon, 20 May 2024 10:04:41 +0200
Subject: [PATCH] Resolve "QDM not working with longer simp length" (#102)

---
 .../workflows/dependabot_auto_approve.yaml    |  39 +++
 cmethods/distribution.py                      |  10 -
 cmethods/utils.py                             |  15 +-
 doc/methods.rst                               |   4 +-
 tests/helper.py                               |  10 +-
 tests/test_methods.py                         |  12 +-
 tests/test_methods_different_input_shape.py   | 259 ++++++++++++++++++
 tests/test_utils.py                           |   2 +-
 tests/test_zarr_dask_compatibility.py         |   4 +-
 9 files changed, 322 insertions(+), 33 deletions(-)
 create mode 100644 .github/workflows/dependabot_auto_approve.yaml
 create mode 100644 tests/test_methods_different_input_shape.py

diff --git a/.github/workflows/dependabot_auto_approve.yaml b/.github/workflows/dependabot_auto_approve.yaml
new file mode 100644
index 0000000..db5f69e
--- /dev/null
+++ b/.github/workflows/dependabot_auto_approve.yaml
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2024 Benjamin Thomas Schwertfeger
+# GitHub: https://github.com/btschwertfeger
+#
+# Workflow that approves and merges all pull requests from the dependabot[bot]
+# author.
+#
+# Source (May, 2024):
+# - https://blog.somewhatabstract.com/2021/10/11/setting-up-dependabot-with-github-actions-to-approve-and-merge/
+
+name: Dependabot auto-merge
+on: pull_request_target
+
+permissions:
+  pull-requests: write
+  contents: write
+
+jobs:
+  dependabot:
+    runs-on: ubuntu-latest
+    if: ${{ github.actor == 'dependabot[bot]' }}
+    steps:
+      - name: Dependabot metadata
+        id: dependabot-metadata
+        uses: dependabot/fetch-metadata@v1.1.1
+        with:
+          github-token: "${{ secrets.GITHUB_TOKEN }}"
+      - name: Approve a PR
+        if: ${{ steps.dependabot-metadata.outputs.update-type != 'version-update:semver-major' }}
+        run: gh pr review --approve "$PR_URL"
+        env:
+          PR_URL: ${{ github.event.pull_request.html_url }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Enable auto-merge for Dependabot PRs
+        if: ${{ steps.dependabot-metadata.outputs.update-type != 'version-update:semver-major' }}
+        run: gh pr merge --auto --squash "$PR_URL"
+        env:
+          PR_URL: ${{ github.event.pull_request.html_url }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/cmethods/distribution.py b/cmethods/distribution.py
index ca7565c..2c63856 100644
--- a/cmethods/distribution.py
+++ b/cmethods/distribution.py
@@ -62,11 +62,6 @@ def quantile_mapping(
 
     cdf_obs = get_cdf(obs, xbins)
     cdf_simh = get_cdf(simh, xbins)
-    cdf_simh = np.interp(
-        cdf_simh,
-        (cdf_simh.min(), cdf_simh.max()),
-        (cdf_obs.min(), cdf_obs.max()),
-    )
 
     if kind in ADDITIVE:
         epsilon = np.interp(simp, xbins, cdf_simh)  # Eq. 1
@@ -129,11 +124,6 @@ def detrended_quantile_mapping(
 
     cdf_obs = get_cdf(obs, xbins)
     cdf_simh = get_cdf(simh, xbins)
-    cdf_simh = np.interp(
-        cdf_simh,
-        (cdf_simh.min(), cdf_simh.max()),
-        (cdf_obs.min(), cdf_obs.max()),
-    )
 
     # detrended => shift mean of $X_{sim,p}$ to range of $X_{sim,h}$ to adjust extremes
     res = np.zeros(len(simp.values))
diff --git a/cmethods/utils.py b/cmethods/utils.py
index 52073f0..fdca6c8 100644
--- a/cmethods/utils.py
+++ b/cmethods/utils.py
@@ -149,11 +149,11 @@ def get_pdf(
         :linenos:
         :caption: Compute the probability density function :math:`P(x)`
 
-        >>> from cmethods import CMethods as cm
+        >>> from cmethods get_pdf
 
         >>> x = [1, 2, 3, 4, 5, 5, 5, 6, 7, 8, 9, 10]
         >>> xbins = [0, 3, 6, 10]
-        >>> print(cm.get_pdf(x=x, xbins=xbins))
+        >>> print(get_pdf(x=x, xbins=xbins))
         [2, 5, 5]
     """
     pdf, _ = np.histogram(x, xbins)
@@ -178,17 +178,18 @@ def get_cdf(
 
     .. code-block:: python
         :linenos:
-        :caption: Compute the cmmulative distribution function :math:`F(x)`
+        :caption: Compute the cumulative distribution function :math:`F(x)`
 
-        >>> from cmethods import CMethods as cm
+        >>> from cmethods.utils import get_cdf
 
         >>> x = [1, 2, 3, 4, 5, 5, 5, 6, 7, 8, 9, 10]
         >>> xbins = [0, 3, 6, 10]
-        >>> print(cm.get_cdf(x=x, xbins=xbins))
-        [0, 2, 7, 12]
+        >>> print(get_cdf(x=x, xbins=xbins))
+        [0.0, 0.16666667, 0.58333333, 1.]
     """
     pdf, _ = np.histogram(x, xbins)
-    return np.insert(np.cumsum(pdf), 0, 0.0)
+    cdf = np.insert(np.cumsum(pdf), 0, 0.0)
+    return cdf / cdf[-1]
 
 
 def get_inverse_of_cdf(
diff --git a/doc/methods.rst b/doc/methods.rst
index a6ee684..1e088f0 100644
--- a/doc/methods.rst
+++ b/doc/methods.rst
@@ -171,7 +171,9 @@ The Delta Method bias correction technique can be applied on stochastic and
 non-stochastic climate variables to minimize deviations in the mean values
 between predicted and observed time-series of past and future time periods.
 
-This method requires that the time series can be grouped by ``time.month``.
+This method requires that the time series can be grouped by ``time.month`` while
+the reference data of the control period must have the same temporal resolution
+as the data that is going to be adjusted.
 
 Since the multiplicative scaling can result in very high scaling factors, a
 maximum scaling factor of 10 is set. This can be changed by passing the desired
diff --git a/tests/helper.py b/tests/helper.py
index 2f1e6ff..ef63b70 100644
--- a/tests/helper.py
+++ b/tests/helper.py
@@ -8,8 +8,6 @@
 
 from __future__ import annotations
 
-from typing import List
-
 import numpy as np
 import xarray as xr
 from sklearn.metrics import mean_squared_error
@@ -62,7 +60,7 @@ def get_datasets(kind: str) -> tuple[xr.Dataset, xr.Dataset, xr.Dataset, xr.Data
     )
     latitudes = np.arange(23, 27, 1)
 
-    def get_hist_temp_for_lat(lat: int) -> List[float]:
+    def get_hist_temp_for_lat(lat: int) -> list[float]:
         """Returns a fake interval time series by latitude value"""
         return 273.15 - (
             lat * np.cos(2 * np.pi * historical_time.dayofyear / 365)
@@ -71,7 +69,7 @@ def get_hist_temp_for_lat(lat: int) -> List[float]:
             + 0.1 * (historical_time - historical_time[0]).days / 365
         )
 
-    def get_fake_hist_precipitation_data() -> List[float]:
+    def get_fake_hist_precipitation_data() -> list[float]:
         """Returns ratio based fake time series"""
         pr = (
             np.cos(2 * np.pi * historical_time.dayofyear / 365)
@@ -120,7 +118,7 @@ def get_dataset(data, time, kind: str) -> xr.Dataset:
         )
         obsh = get_dataset(data, historical_time, kind=kind)
         obsp = get_dataset(data * 1.02, historical_time, kind=kind)
-        simh = get_dataset(data * 0.98, historical_time, kind=kind)
-        simp = get_dataset(data * 0.09, future_time, kind=kind)
+        simh = get_dataset(data * 0.95, historical_time, kind=kind)
+        simp = get_dataset(data * 0.965, future_time, kind=kind)
 
     return obsh, obsp, simh, simp
diff --git a/tests/test_methods.py b/tests/test_methods.py
index 315c09c..315e823 100644
--- a/tests/test_methods.py
+++ b/tests/test_methods.py
@@ -27,9 +27,9 @@
     ("method", "kind"),
     [
         ("linear_scaling", "+"),
+        ("linear_scaling", "*"),
         ("variance_scaling", "+"),
         ("delta_method", "+"),
-        ("linear_scaling", "*"),
         ("delta_method", "*"),
     ],
 )
@@ -65,9 +65,9 @@ def test_1d_scaling(
     ("method", "kind"),
     [
         ("linear_scaling", "+"),
+        ("linear_scaling", "*"),
         ("variance_scaling", "+"),
         ("delta_method", "+"),
-        ("linear_scaling", "*"),
         ("delta_method", "*"),
     ],
 )
@@ -111,8 +111,8 @@ def test_3d_scaling(
     ("method", "kind"),
     [
         ("linear_scaling", "+"),
-        ("variance_scaling", "+"),
         ("linear_scaling", "*"),
+        ("variance_scaling", "+"),
     ],
 )
 def test_3d_scaling_different_time_span(
@@ -160,8 +160,8 @@ def test_3d_scaling_different_time_span(
     ("method", "kind"),
     [
         ("quantile_mapping", "+"),
-        ("quantile_delta_mapping", "+"),
         ("quantile_mapping", "*"),
+        ("quantile_delta_mapping", "+"),
         ("quantile_delta_mapping", "*"),
     ],
 )
@@ -192,8 +192,8 @@ def test_1d_distribution(
     ("method", "kind"),
     [
         ("quantile_mapping", "+"),
-        ("quantile_delta_mapping", "+"),
         ("quantile_mapping", "*"),
+        ("quantile_delta_mapping", "+"),
         ("quantile_delta_mapping", "*"),
     ],
 )
@@ -224,8 +224,8 @@ def test_3d_distribution(
     ("method", "kind"),
     [
         ("quantile_mapping", "+"),
-        ("quantile_delta_mapping", "+"),
         ("quantile_mapping", "*"),
+        ("quantile_delta_mapping", "+"),
         ("quantile_delta_mapping", "*"),
     ],
 )
diff --git a/tests/test_methods_different_input_shape.py b/tests/test_methods_different_input_shape.py
new file mode 100644
index 0000000..cf7aec7
--- /dev/null
+++ b/tests/test_methods_different_input_shape.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2024 Benjamin Thomas Schwertfeger
+# GitHub: https://github.com/btschwertfeger
+#
+
+"""
+Module implementing the unit tests that check if the input data sets can have
+different shapes.
+
+TODO: Remove the copy-paste stuff here. That could be done way simpler.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from cmethods import adjust
+from cmethods.types import XRData_t
+
+from .helper import is_1d_rmse_better
+
+N_QUANTILES = 100
+
+
+@pytest.mark.parametrize(
+    ("method", "kind"),
+    [
+        ("linear_scaling", "+"),
+        ("linear_scaling", "*"),
+        ("variance_scaling", "+"),
+    ],
+)
+def test_1d_scaling_obs_shorter(
+    datasets: dict,
+    method: str,
+    kind: str,
+) -> None:
+    obsh: XRData_t = datasets[kind]["obsh"][:7300, 0, 0].rename({"time": "t_time"})  # 20/30 years
+    obsp: XRData_t = datasets[kind]["obsp"][:, 0, 0]
+    simh: XRData_t = datasets[kind]["simh"][:, 0, 0]
+    simp: XRData_t = datasets[kind]["simp"][:, 0, 0]
+
+    # not group
+    result: XRData_t = adjust(
+        method=method,
+        obs=obsh,
+        simh=simh,
+        simp=simp,
+        kind=kind,
+        input_core_dims={"obs": "t_time", "simh": "time", "simp": "time"},
+    )
+    assert isinstance(result, XRData_t)
+    assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
+
+    # grouped
+    result = adjust(
+        method=method,
+        obs=obsh,
+        simh=simh,
+        simp=simp,
+        kind=kind,
+        group={"obs": "t_time.month", "simh": "time.month", "simp": "time.month"},
+        input_core_dims={"obs": "t_time", "simh": "time", "simp": "time"},
+    )
+    assert isinstance(result, XRData_t)
+    assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
+
+
+@pytest.mark.parametrize(
+    ("method", "kind"),
+    [
+        ("linear_scaling", "+"),
+        ("linear_scaling", "*"),
+        ("delta_method", "+"),
+        ("delta_method", "*"),
+        ("variance_scaling", "+"),
+    ],
+)
+def test_1d_scaling_simh_shorter(
+    datasets: dict,
+    method: str,
+    kind: str,
+) -> None:
+    obsh: XRData_t = datasets[kind]["obsh"][:, 0, 0]
+    obsp: XRData_t = datasets[kind]["obsp"][:, 0, 0]
+    simh: XRData_t = datasets[kind]["simh"][:7300, 0, 0].rename({"time": "t_time"})  # 20/30 years
+    simp: XRData_t = datasets[kind]["simp"][:, 0, 0]
+
+    # not group
+    result: XRData_t = adjust(
+        method=method,
+        obs=obsh,
+        simh=simh,
+        simp=simp,
+        kind=kind,
+        input_core_dims={"obs": "time", "simh": "t_time", "simp": "time"},
+    )
+    assert isinstance(result, XRData_t)
+    assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
+
+    # grouped
+    result = adjust(
+        method=method,
+        obs=obsh,
+        simh=simh,
+        simp=simp,
+        kind=kind,
+        group={"obs": "time.month", "simh": "t_time.month", "simp": "time.month"},
+        input_core_dims={"obs": "time", "simh": "t_time", "simp": "time"},
+    )
+    assert isinstance(result, XRData_t)
+    assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
+
+
+@pytest.mark.parametrize(
+    ("method", "kind"),
+    [
+        ("linear_scaling", "+"),
+        ("linear_scaling", "*"),
+        ("variance_scaling", "+"),
+    ],
+)
+def test_1d_scaling_simp_shorter(
+    datasets: dict,
+    method: str,
+    kind: str,
+) -> None:
+    obsh: XRData_t = datasets[kind]["obsh"][:, 0, 0]
+    obsp: XRData_t = datasets[kind]["obsp"][:7300, 0, 0].rename({"time": "t_time"})  # 20/30 years
+    simh: XRData_t = datasets[kind]["simh"][:, 0, 0]
+    simp: XRData_t = datasets[kind]["simp"][:7300, 0, 0].rename({"time": "t_time"})  # 20/30 years
+
+    # not group
+    result: XRData_t = adjust(
+        method=method,
+        obs=obsh,
+        simh=simh,
+        simp=simp,
+        kind=kind,
+        input_core_dims={"obs": "time", "simh": "time", "simp": "t_time"},
+    )
+    assert isinstance(result, XRData_t)
+    assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
+
+    # grouped
+    result = adjust(
+        method=method,
+        obs=obsh,
+        simh=simh,
+        simp=simp,
+        kind=kind,
+        group={"obs": "time.month", "simh": "time.month", "simp": "t_time.month"},
+        input_core_dims={"obs": "time", "simh": "time", "simp": "t_time"},
+    )
+    assert isinstance(result, XRData_t)
+    assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
+
+
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    ("method", "kind"),
+    [
+        ("quantile_mapping", "+"),
+        ("quantile_mapping", "*"),
+        ("quantile_delta_mapping", "+"),
+        ("quantile_delta_mapping", "*"),
+    ],
+)
+def test_1d_distribution_obs_shorter(
+    datasets: dict,
+    method: str,
+    kind: str,
+) -> None:
+    obsh: XRData_t = datasets[kind]["obsh"][:7300, 0, 0].rename({"time": "t_time"})  # 20/30 years
+    obsp: XRData_t = datasets[kind]["obsp"][:, 0, 0]
+    simh: XRData_t = datasets[kind]["simh"][:, 0, 0]
+    simp: XRData_t = datasets[kind]["simp"][:, 0, 0]
+
+    result: XRData_t = adjust(
+        method=method,
+        obs=obsh,
+        simh=simh,
+        simp=simp,
+        kind=kind,
+        n_quantiles=N_QUANTILES,
+        input_core_dims={"obs": "t_time", "simh": "time", "simp": "time"},
+    )
+
+    assert isinstance(result, XRData_t)
+    assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
+
+
+@pytest.mark.parametrize(
+    ("method", "kind"),
+    [
+        ("quantile_mapping", "+"),
+        ("quantile_mapping", "*"),
+        ("quantile_delta_mapping", "+"),
+        ("quantile_delta_mapping", "*"),
+    ],
+)
+def test_1d_distribution_simh_shorter(
+    datasets: dict,
+    method: str,
+    kind: str,
+) -> None:
+    obsh: XRData_t = datasets[kind]["obsh"][:, 0, 0]
+    obsp: XRData_t = datasets[kind]["obsp"][:, 0, 0]
+    simh: XRData_t = datasets[kind]["simh"][:7300, 0, 0].rename({"time": "t_time"})  # 20/30 years
+    simp: XRData_t = datasets[kind]["simp"][:, 0, 0]
+
+    result: XRData_t = adjust(
+        method=method,
+        obs=obsh,
+        simh=simh,
+        simp=simp,
+        kind=kind,
+        n_quantiles=N_QUANTILES,
+        input_core_dims={"obs": "time", "simh": "t_time", "simp": "time"},
+    )
+
+    assert isinstance(result, XRData_t)
+    assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
+
+
+@pytest.mark.parametrize(
+    ("method", "kind"),
+    [
+        ("quantile_mapping", "+"),
+        ("quantile_mapping", "*"),
+        ("quantile_delta_mapping", "+"),
+        ("quantile_delta_mapping", "*"),
+    ],
+)
+def test_1d_distribution_simp_shorter(
+    datasets: dict,
+    method: str,
+    kind: str,
+) -> None:
+    obsh: XRData_t = datasets[kind]["obsh"][:, 0, 0]
+    obsp: XRData_t = datasets[kind]["obsp"][:7300, 0, 0].rename({"time": "t_time"})  # 20/30 years
+    simh: XRData_t = datasets[kind]["simh"][:, 0, 0]
+    simp: XRData_t = datasets[kind]["simp"][:7300, 0, 0].rename({"time": "t_time"})  # 20/30 years
+
+    result: XRData_t = adjust(
+        method=method,
+        obs=obsh,
+        simh=simh,
+        simp=simp,
+        kind=kind,
+        n_quantiles=N_QUANTILES,
+        input_core_dims={"obs": "time", "simh": "time", "simp": "t_time"},
+    )
+
+    assert isinstance(result, XRData_t)
+    assert is_1d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 6212aca..822bfc0 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -61,7 +61,7 @@ def test_quantile_mapping_all_nan() -> None:
 def test_quantile_delta_mapping_single_nan() -> None:
     obs, simh, simp = list(np.arange(10)), list(np.arange(10)), list(np.arange(10))
     obs[0] = np.nan
-    expected = np.array([0.0, 1.9, 2.9, 3.9, 4.9, 5.9, 6.9, 7.9, 8.9, 9.0])
+    expected = np.array([0.0, 1.8, 2.7, 3.6, 4.5, 5.4, 6.3, 7.2, 8.1, 9.0])
 
     res = quantile_delta_mapping(obs=obs, simh=simh, simp=simp, n_quantiles=5)
     assert np.allclose(res, expected)
diff --git a/tests/test_zarr_dask_compatibility.py b/tests/test_zarr_dask_compatibility.py
index 7e5fbf5..5f691df 100644
--- a/tests/test_zarr_dask_compatibility.py
+++ b/tests/test_zarr_dask_compatibility.py
@@ -22,9 +22,9 @@
     ("method", "kind"),
     [
         ("linear_scaling", "+"),
+        ("linear_scaling", "*"),
         ("variance_scaling", "+"),
         ("delta_method", "+"),
-        ("linear_scaling", "*"),
         ("delta_method", "*"),
     ],
 )
@@ -67,8 +67,8 @@ def test_3d_scaling_zarr(
     ("method", "kind"),
     [
         ("quantile_mapping", "+"),
-        ("quantile_delta_mapping", "+"),
         ("quantile_mapping", "*"),
+        ("quantile_delta_mapping", "+"),
         ("quantile_delta_mapping", "*"),
     ],
 )