Skip to content

Commit

Permalink
auto_regression: add multi ens & scen functions (MESMER-group#344)
Browse files Browse the repository at this point in the history
* auto_regression: add multi ens & scen functions

* convert to scalar

* undo unneeded changes
  • Loading branch information
mathause authored Dec 5, 2023
1 parent 8aa99a9 commit 35ad227
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 76 deletions.
61 changes: 16 additions & 45 deletions mesmer/calibrate_mesmer/train_gv.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@

import numpy as np
import xarray as xr
from packaging.version import Version

from mesmer.io.save_mesmer_bundle import save_mesmer_data
from mesmer.stats.auto_regression import fit_auto_regression, select_ar_order
from mesmer.stats.auto_regression import (
_fit_auto_regression_scen_ens,
_select_ar_order_scen_ens,
)


def train_gv(gv, targ, esm, cfg, save_params=True, **kwargs):
Expand Down Expand Up @@ -170,52 +172,21 @@ def train_gv_AR(params_gv, gv, max_lag, sel_crit):
params_gv["max_lag"] = max_lag
params_gv["sel_crit"] = sel_crit

if Version(xr.__version__) >= Version("2022.03.0"):
method = "method"
else:
method = "interpolation"

# select the AR Order
AR_order_scen = list()
for scen in gv.keys():

# create temporary DataArray
data = xr.DataArray(gv[scen], dims=["run", "time"])

AR_order = select_ar_order(data, dim="time", maxlag=max_lag, ic=sel_crit)

# median over all ensemble members ("nearest" ensures an 'existing' lag is selected)
AR_order = AR_order.quantile(q=0.5, **{method: "nearest"})
AR_order_scen.append(AR_order)

# median over all scenarios
AR_order_scen = xr.concat(AR_order_scen, dim="scen")
AR_order_sel = int(AR_order_scen.quantile(q=0.5, **{method: "nearest"}).item())

# determine the AR params for the selected AR order
params_scen = list()
for scen in gv.keys():
data = gv[scen]

# create temporary DataArray
data = xr.DataArray(data, dims=("run", "time"))

params = fit_auto_regression(data, dim="time", lags=AR_order_sel)
# BUG/ TODO: we wrongfully average over the standard deviation
# see https://github.com/MESMER-group/mesmer/issues/307
params["standard_deviation"] = np.sqrt(params.variance)
params = params.mean("run")

params_scen.append(params)
# create temporary DataArray objects
data = [xr.DataArray(data, dims=["run", "time"]) for data in gv.values()]

params_scen = xr.concat(params_scen, dim="scen")
params_scen = params_scen.mean("scen")
AR_order = _select_ar_order_scen_ens(
*data, dim="time", ens_dim="run", maxlag=max_lag, ic=sel_crit
)
params = _fit_auto_regression_scen_ens(
*data, dim="time", ens_dim="run", lags=AR_order
)

# TODO: remove np.float64(...) (only here so the tests pass)
params_gv["AR_order_sel"] = AR_order_sel
params_gv["AR_int"] = np.float64(params_scen.intercept.values)
params_gv["AR_coefs"] = params_scen.coeffs.values.squeeze()
params_gv["AR_std_innovs"] = np.float64(params_scen.standard_deviation.values)
params_gv["AR_order_sel"] = AR_order.item()
params_gv["AR_int"] = np.float64(params.intercept.values)
params_gv["AR_coefs"] = params.coeffs.values.squeeze()
params_gv["AR_std_innovs"] = np.float64(params.standard_deviation.values)

# check if fitted AR process is stationary
# (highly unlikely this test will ever fail but better safe than sorry)
Expand Down
36 changes: 9 additions & 27 deletions mesmer/calibrate_mesmer/train_lv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
Functions to train local variability module of MESMER.
"""

import numpy as np
import xarray as xr

from mesmer.io.save_mesmer_bundle import save_mesmer_data
from mesmer.stats.auto_regression import fit_auto_regression
from mesmer.stats.auto_regression import _fit_auto_regression_scen_ens
from mesmer.stats.localized_covariance import (
adjust_covariance_ar1,
find_localized_empirical_covariance,
Expand Down Expand Up @@ -230,26 +229,15 @@ def train_lv_AR1_sci(params_lv, targs, y, wgt_scen_eq, aux, cfg):
# fit parameters for each target individually
for targ_name, targ in targs.items():

params_scen = list()
for scen, data in targ.items():
# create temporary DataArray
dims = ("run", "time", "cell")
data = [xr.DataArray(data, dims=dims) for data in targ.values()]

# create temporary DataArray
data = xr.DataArray(data, dims=("run", "time", "cell"))
params = _fit_auto_regression_scen_ens(*data, dim="time", ens_dim="run", lags=1)

params = fit_auto_regression(data, dim="time", lags=1)
# BUG/ TODO: we wrongfully average over the standard deviation
# see https://github.com/MESMER-group/mesmer/issues/307
params["standard_deviation"] = np.sqrt(params.variance)
params = params.mean("run")

params_scen.append(params)

params_scen = xr.concat(params_scen, dim="scen")
params_scen = params_scen.mean("scen")

params_lv["AR1_int"][targ_name] = params_scen.intercept.values
params_lv["AR1_coef"][targ_name] = params_scen.coeffs.values.squeeze()
params_lv["AR1_std_innovs"][targ_name] = params_scen.standard_deviation.values
params_lv["AR1_int"][targ_name] = params.intercept.values
params_lv["AR1_coef"][targ_name] = params.coeffs.values.squeeze()
params_lv["AR1_std_innovs"][targ_name] = params.standard_deviation.values

# determine localization radius, empirical cov matrix, and localized ecov matrix

Expand All @@ -264,13 +252,7 @@ def train_lv_AR1_sci(params_lv, targs, y, wgt_scen_eq, aux, cfg):
params_lv["loc_ecov"][targ_name] = res.localized_covariance.values

# adjust localized cov matrix with the coefficients of the AR(1) process
loc_cov = params_lv["loc_ecov"][targ_name]
loc_cov = xr.DataArray(loc_cov, dims=("cell_i", "cell_j"))

ar_coefs = params_lv["AR1_coef"][targ_name]
ar_coefs = xr.DataArray(ar_coefs, dims="cell")

loc_cov_ar1 = adjust_covariance_ar1(loc_cov, ar_coefs)
loc_cov_ar1 = adjust_covariance_ar1(res.localized_covariance, params.coeffs)

params_lv["loc_ecov_AR1_innovs"][targ_name] = loc_cov_ar1.values

Expand Down
113 changes: 109 additions & 4 deletions mesmer/stats/auto_regression.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,118 @@
import numpy as np
import pandas as pd
import xarray as xr
from packaging.version import Version

from mesmer.core.utils import _check_dataarray_form, _check_dataset_form


def _select_ar_order_scen_ens(*objs, dim, ens_dim, maxlag, ic="bic"):
"""
Select the order of an autoregressive process and potentially calculate the median
over ensemble members and scenarios
Parameters
----------
objs : iterable of DataArray
A list of ``xr.DataArray`` to estimate the auto regression order over.
dim : str
Dimension along which to determine the order.
ens_dim : str
Dimension name of the ensemble members.
maxlag : int
The maximum lag to consider.
ic : {'aic', 'hqic', 'bic'}, default 'bic'
The information criterion to use in the selection.
Returns
-------
selected_ar_order : DataArray
Array indicating the selected order with the same size as the input but ``dim``
removed.
Notes
-----
Calculates the median auto regression order, first over the ensemble members,
then over all scenarios.
"""

if Version(xr.__version__) >= Version("2022.03.0"):
method = "method"
else:
method = "interpolation"

ar_order_scen = list()
for obj in objs:
res = select_ar_order(obj, dim=dim, maxlag=maxlag, ic=ic)

if ens_dim in res.dims:
res = res.quantile(dim=ens_dim, q=0.5, **{method: "nearest"})

ar_order_scen.append(res)

ar_order_scen = xr.concat(ar_order_scen, dim="scen")

ar_order = ar_order_scen.quantile(0.5, dim="scen", **{method: "nearest"})

if not np.isnan(ar_order).any():
ar_order = ar_order.astype(int)

return ar_order


def _fit_auto_regression_scen_ens(*objs, dim, ens_dim, lags):
"""
fit an auto regression and potentially calculate the mean over ensemble members
and scenarios
Parameters
----------
objs : iterable of DataArray
A list of ``xr.DataArray`` to estimate the auto regression over.
dim : str
Dimension along which to fit the auto regression.
ens_dim : str
Dimension name of the ensemble members.
lags : int
The number of lags to include in the model.
Returns
-------
:obj:`xr.Dataset`
Dataset containing the estimated parameters of the ``intercept``, the AR
``coeffs`` and the ``variance`` of the residuals.
Notes
-----
Calculates the mean auto regression, first over the ensemble members, then over all
scenarios.
"""

ar_params_scen = list()
for obj in objs:
ar_params = fit_auto_regression(obj, dim=dim, lags=int(lags))

# BUG/ TODO: fix for v1, see https://github.com/MESMER-group/mesmer/issues/307
ar_params["standard_deviation"] = np.sqrt(ar_params.variance)

if ens_dim in ar_params.dims:
ar_params = ar_params.mean(ens_dim)

ar_params_scen.append(ar_params)

ar_params_scen = xr.concat(ar_params_scen, dim="scen")

# return the mean over all scenarios
ar_params = ar_params_scen.mean("scen")

return ar_params


# ======================================================================================


def select_ar_order(data, dim, maxlag, ic="bic"):
"""Select the order of an autoregressive process - xarray wrapper
"""Select the order of an autoregressive process
Parameters
----------
Expand Down Expand Up @@ -396,15 +502,14 @@ def _draw_auto_regression_correlated_np(


def fit_auto_regression(data, dim, lags):
"""
fit an auto regression - xarray wrapper
"""fit an auto regression
Parameters
----------
data : xr.DataArray
A ``xr.DataArray`` to estimate the auto regression over.
dim : str
Dimension along which to fit the auto regression over.
Dimension along which to fit the auto regression.
lags : int
The number of lags to include in the model.
Expand Down
110 changes: 110 additions & 0 deletions tests/unit/test_auto_regression_scen_ens.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import numpy as np
import xarray as xr
from statsmodels.tsa.arima_process import ArmaProcess

import mesmer.stats.auto_regression


def generate_ar_samples(ar, n_timesteps=100, n_ens=4):

np.random.seed(0)

data = ArmaProcess(ar, 0.1).generate_sample([n_timesteps, n_ens])

ens = np.arange(n_ens)

da = xr.DataArray(data, dims=("time", "ens"), coords={"ens": ens})

return da


def test_select_ar_order_scen_ens_one_scen():

da = generate_ar_samples([1, 0.5, 0.3, 0.4], n_timesteps=100, n_ens=4)

result = mesmer.stats.auto_regression._select_ar_order_scen_ens(
da, dim="time", ens_dim="ens", maxlag=5
)

expected = xr.DataArray(3, coords={"quantile": 0.5})

xr.testing.assert_equal(result, expected)


def test_select_ar_order_scen_ens_multi_scen():

da1 = generate_ar_samples([1, 0.5, 0.3], n_timesteps=100, n_ens=4)
da2 = generate_ar_samples([1, 0.5, 0.3, 0.4], n_timesteps=100, n_ens=4)

result = mesmer.stats.auto_regression._select_ar_order_scen_ens(
da1, da2, dim="time", ens_dim="ens", maxlag=5
)

expected = xr.DataArray(2, coords={"quantile": 0.5})

xr.testing.assert_equal(result, expected)


def test_select_ar_order_scen_ens_no_ens_dim():

da = generate_ar_samples([1, 0.5, 0.3, 0.4], n_timesteps=100, n_ens=4)

result = mesmer.stats.auto_regression._select_ar_order_scen_ens(
da, dim="time", ens_dim=None, maxlag=5
)

ens = [0, 1, 2, 3]
expected = xr.DataArray(
[3, 1, 3, 3], dims="ens", coords={"quantile": 0.5, "ens": ens}
)

xr.testing.assert_equal(result, expected)


def test_fit_auto_regression_scen_ens_one_scen():

da = generate_ar_samples([1, 0.5, 0.3, 0.4], n_timesteps=100, n_ens=4)

result = mesmer.stats.auto_regression._fit_auto_regression_scen_ens(
da, dim="time", ens_dim="ens", lags=3
)

expected = mesmer.stats.auto_regression.fit_auto_regression(da, dim="time", lags=3)
expected["standard_deviation"] = np.sqrt(expected.variance)
expected = expected.mean("ens")

xr.testing.assert_equal(result, expected)


def test_fit_auto_regression_scen_ens_multi_scen():

da1 = generate_ar_samples([1, 0.5, 0.3], n_timesteps=100, n_ens=4)
da2 = generate_ar_samples([1, 0.5, 0.3, 0.4], n_timesteps=100, n_ens=5)

result = mesmer.stats.auto_regression._fit_auto_regression_scen_ens(
da1, da2, dim="time", ens_dim="ens", lags=3
)

da = xr.concat([da1, da2], dim="scen")
da = da.stack(scen_ens=("scen", "ens")).dropna("scen_ens")
expected = mesmer.stats.auto_regression.fit_auto_regression(da, dim="time", lags=3)
expected = expected.unstack()
expected["standard_deviation"] = np.sqrt(expected.variance)
expected = expected.mean("ens").mean("scen")

xr.testing.assert_equal(result, expected)


def test_fit_auto_regression_scen_ens_no_ens_dim():

da = generate_ar_samples([1, 0.5, 0.3, 0.4], n_timesteps=100, n_ens=4)

result = mesmer.stats.auto_regression._fit_auto_regression_scen_ens(
da, dim="time", ens_dim=None, lags=3
)

expected = mesmer.stats.auto_regression.fit_auto_regression(da, dim="time", lags=3)

expected["standard_deviation"] = np.sqrt(expected.variance)

xr.testing.assert_allclose(result, expected)

0 comments on commit 35ad227

Please sign in to comment.