Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement eigh() fallback #493

Open
wants to merge 40 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
68daea4
remove note 90% -> 25% in crossval
veni-vidi-vici-dormivi Aug 6, 2024
1dd681d
NOTE
veni-vidi-vici-dormivi Aug 6, 2024
1c0fdfa
switch to rng for drawing innovations
veni-vidi-vici-dormivi Aug 7, 2024
e39e775
implement fallback eigh in ecov crossval
veni-vidi-vici-dormivi Aug 7, 2024
e3e653d
developing
veni-vidi-vici-dormivi Aug 8, 2024
ce30336
more developing
veni-vidi-vici-dormivi Aug 9, 2024
7182d9c
Merge branch 'main' into cleanupcov
veni-vidi-vici-dormivi Aug 9, 2024
15af307
commenting
veni-vidi-vici-dormivi Aug 9, 2024
806ba64
remove debug print statement
veni-vidi-vici-dormivi Aug 9, 2024
86ebb94
fix bug first element inf
veni-vidi-vici-dormivi Aug 9, 2024
74376b7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 9, 2024
ab2488b
switch to scipy multivariate drawing again
veni-vidi-vici-dormivi Aug 13, 2024
c28324e
switch to Ecov class
veni-vidi-vici-dormivi Aug 14, 2024
7822dc5
fix local minimize
veni-vidi-vici-dormivi Aug 14, 2024
a304edb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 14, 2024
960c900
raise for eigh
veni-vidi-vici-dormivi Aug 16, 2024
e30876d
Merge branch 'main' into cleanupcov
veni-vidi-vici-dormivi Aug 19, 2024
1e87e8f
remove superfluous variables
veni-vidi-vici-dormivi Aug 19, 2024
c26ea02
linting
veni-vidi-vici-dormivi Aug 19, 2024
eb63431
changelog
veni-vidi-vici-dormivi Aug 19, 2024
6c2091a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 19, 2024
1aa1ba7
more folds for test
veni-vidi-vici-dormivi Aug 20, 2024
f123be8
Update mesmer/core/utils.py
veni-vidi-vici-dormivi Aug 21, 2024
8e33bea
Update mesmer/stats/_localized_covariance.py
veni-vidi-vici-dormivi Aug 21, 2024
f6d6144
refactor
veni-vidi-vici-dormivi Aug 24, 2024
3306cd3
handle test warnings
veni-vidi-vici-dormivi Aug 24, 2024
7be6d7c
date_range: update freq string (#504)
mathause Aug 21, 2024
386d86f
reference: only year as link (#505)
mathause Aug 21, 2024
d689445
GHA: use CODECOV_TOKEN (#507)
mathause Aug 21, 2024
7f5a165
add mesmer_m example script (#491)
veni-vidi-vici-dormivi Aug 22, 2024
90533ba
another warning
veni-vidi-vici-dormivi Aug 24, 2024
35c5d1a
rewrite test
veni-vidi-vici-dormivi Aug 24, 2024
4dbc927
refine warning
veni-vidi-vici-dormivi Aug 24, 2024
a5a9957
Merge branch 'main' into cleanupcov
veni-vidi-vici-dormivi Aug 24, 2024
948a844
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 24, 2024
9198c4e
fix bug in development.rst
veni-vidi-vici-dormivi Aug 24, 2024
fbd4228
Merge branch 'main' into cleanupcov
veni-vidi-vici-dormivi Sep 19, 2024
c7c355c
implement allow_singluar switch
veni-vidi-vici-dormivi Sep 19, 2024
5e59000
documentation
veni-vidi-vici-dormivi Sep 19, 2024
234890e
CHANGELOG
veni-vidi-vici-dormivi Sep 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions mesmer/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def create_equal_dim_names(dim, suffixes):
return tuple(f"{dim}{suffix}" for suffix in suffixes)


def _minimize_local_discrete(func, sequence, **kwargs):
def _minimize_local_discrete(func, sequence, changable, **kwargs):
"""find the local minimum for a function that consumes discrete input

Parameters
Expand All @@ -33,6 +33,9 @@ def _minimize_local_discrete(func, sequence, **kwargs):
as input and return a float that is to be minimized.
sequence : iterable
An iterable with discrete values to evaluate func for.
changable : str
changable parameter for the function, enables to feed back function
output at input
**kwargs : Mapping
Keyword arguments passed to `func`.

Expand All @@ -57,7 +60,7 @@ def _minimize_local_discrete(func, sequence, **kwargs):

for i, element in enumerate(sequence):

res = func(element, **kwargs)
res, changable = func(element, changable, **kwargs)

if np.isneginf(res):
raise ValueError("`fun` returned `-inf`")
Expand All @@ -72,6 +75,9 @@ def _minimize_local_discrete(func, sequence, **kwargs):
sel = i - 1
if sel == 0:
warnings.warn("First element is local minimum.", OptimizeWarning)
elif sel == -1:
raise ValueError("First radius already inf.")

veni-vidi-vici-dormivi marked this conversation as resolved.
Show resolved Hide resolved
return sequence[sel]

warnings.warn("No local minimum found, returning the last element", OptimizeWarning)
Expand Down
49 changes: 25 additions & 24 deletions mesmer/stats/_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,11 +475,8 @@ def _draw_auto_regression_correlated_np(
# arbitrary lags? no, see: https://github.com/MESMER-group/mesmer/issues/164
ar_lags = np.arange(1, ar_order + 1, dtype=int)

# ensure reproducibility (TODO: https://github.com/MESMER-group/mesmer/issues/35)
np.random.seed(seed)

innovations = _draw_innovations_correlated_np(
covariance, n_coeffs, n_samples, n_ts, buffer
seed, covariance, n_coeffs, n_samples, n_ts, buffer
)

out = np.zeros([n_samples, n_ts + buffer, n_coeffs])
Expand All @@ -492,25 +489,32 @@ def _draw_auto_regression_correlated_np(
return out[:, buffer:, :]


def _draw_innovations_correlated_np(covariance, n_gridcells, n_samples, n_ts, buffer):
def _draw_innovations_correlated_np(
seed, covariance, n_gridcells, n_samples, n_ts, buffer
):
# NOTE: 'innovations' is the error or noise term.
# innovations has shape (n_samples, n_ts + buffer, n_coeffs)
# ensure reproducibility (TODO: https://github.com/MESMER-group/mesmer/issues/35)
rng = np.random.default_rng(seed=seed)
try:
cov = scipy.stats.Covariance.from_cholesky(np.linalg.cholesky(covariance))
except np.linalg.LinAlgError as e:
if "Matrix is not positive definite" in str(e):
w, v = np.linalg.eigh(covariance)
cov = scipy.stats.Covariance.from_eigendecomposition((w, v))
warnings.warn(
"Covariance matrix is not positive definite, using eigh instead of cholesky.",
LinAlgWarning,
)

innovations = scipy.stats.multivariate_normal.rvs(
mean=np.zeros(n_gridcells),
cov=cov,
size=[n_samples, n_ts + buffer],
).reshape(n_samples, n_ts + buffer, n_gridcells)
innovations = rng.multivariate_normal(
veni-vidi-vici-dormivi marked this conversation as resolved.
Show resolved Hide resolved
mean=np.zeros(n_gridcells),
cov=covariance,
size=[n_samples, n_ts + buffer],
method="cholesky",
).reshape(n_samples, n_ts + buffer, n_gridcells)

except np.linalg.LinAlgError:
warnings.warn(
"Covariance matrix is not positive definite, using eigh instead of cholesky.",
LinAlgWarning,
)
innovations = rng.multivariate_normal(
mean=np.zeros(n_gridcells),
cov=covariance,
size=[n_samples, n_ts + buffer],
method="eigh",
).reshape(n_samples, n_ts + buffer, n_gridcells)

return innovations

Expand Down Expand Up @@ -879,16 +883,13 @@ def _draw_auto_regression_monthly_np(

_, n_gridcells = intercept.shape

# ensure reproducibility (TODO: https://github.com/MESMER-group/mesmer/issues/35)
np.random.seed(seed)

# draw innovations for each month
innovations = np.zeros([n_samples, n_ts // 12 + buffer, 12, n_gridcells])
if covariance is not None:
for month in range(12):
cov_month = covariance[month, :, :]
innovations[:, :, month, :] = _draw_innovations_correlated_np(
cov_month, n_gridcells, n_samples, n_ts // 12, buffer
seed, cov_month, n_gridcells, n_samples, n_ts // 12, buffer
)
# reshape innovations into continuous time series
innovations = innovations.reshape(n_samples, n_ts + buffer * 12, n_gridcells)
Expand Down
33 changes: 23 additions & 10 deletions mesmer/stats/_localized_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,13 @@ def _find_localized_empirical_covariance_np(data, weights, localizer, k_folds):
# start again. Better to stop once min is reached (to limit computational effort
# and singular matrices).

# start with cholesky decomposition
# function returns method and can switch to eigh() if cov is singular
method = "cholesky"
localization_radius = _minimize_local_discrete(
_ecov_crossvalidation,
localization_radii,
method,
data=data,
weights=weights,
localizer=localizer,
Expand All @@ -265,7 +269,9 @@ def _find_localized_empirical_covariance_np(data, weights, localizer, k_folds):
return localization_radius, covariance, localized_covariance


def _ecov_crossvalidation(localization_radius, *, data, weights, localizer, k_folds):
def _ecov_crossvalidation(
veni-vidi-vici-dormivi marked this conversation as resolved.
Show resolved Hide resolved
localization_radius, method, *, data, weights, localizer, k_folds
):
"""k-fold crossvalidation for a single localization radius"""

n_samples, __ = data.shape
Expand All @@ -291,19 +297,22 @@ def _ecov_crossvalidation(localization_radius, *, data, weights, localizer, k_fo

try:
# sum log likelihood of all crossvalidation folds
nll += _get_neg_loglikelihood(data_cv, localized_cov, weights_cv)
nll += _get_neg_loglikelihood(data_cv, localized_cov, weights_cv, method)
except np.linalg.LinAlgError:
# NOTE: this error is thrown by np.linalg.cholesky not by the logpdf anymore
veni-vidi-vici-dormivi marked this conversation as resolved.
Show resolved Hide resolved
warnings.warn(
f"Singular matrix for localization_radius of {localization_radius}."
" Skipping this radius.",
"\n Switching to eigh().",
LinAlgWarning,
)
return float("inf")
# switch to eigh from now on
method = "eigh"
nll += _get_neg_loglikelihood(data_cv, localized_cov, weights_cv, method)

return nll
return nll, method


def _get_neg_loglikelihood(data, covariance, weights):
def _get_neg_loglikelihood(data, covariance, weights, method):
"""calculate weighted log likelihood for multivariate normal distribution

Parameters
Expand All @@ -330,11 +339,15 @@ def _get_neg_loglikelihood(data, covariance, weights):
The mean is assumed to be zero for all points.
"""

# NOTE: 90 % of time is spent in multivariate_normal.logpdf - not much point
# optimizing the rest
if method == "cholesky":
cov = scipy.stats.Covariance.from_cholesky(np.linalg.cholesky(covariance))
veni-vidi-vici-dormivi marked this conversation as resolved.
Show resolved Hide resolved
else:
w, v = np.linalg.eigh(covariance)
cov = scipy.stats.Covariance.from_eigendecomposition((w, v))

cov = scipy.stats.Covariance.from_cholesky(np.linalg.cholesky(covariance))
log_likelihood = scipy.stats.multivariate_normal.logpdf(data, cov=cov)
log_likelihood = scipy.stats.multivariate_normal.logpdf(
data, cov=cov, allow_singular=True
)

# logpdf can return a scalar, which np.average does not like
log_likelihood = np.atleast_1d(log_likelihood)
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def test_draw_auto_regression_deterministic_coefs_buffer():
np.testing.assert_allclose(result, expected[:, i : i + 4])


def test_draw_auto_regression_random():
def test_draw_auto_regression_numeric():

result = mesmer.stats._auto_regression._draw_auto_regression_correlated_np(
intercept=1,
Expand All @@ -444,7 +444,7 @@ def test_draw_auto_regression_random():
buffer=3,
)

expected = np.array([2.58455078, 3.28976946, 1.86569258, 2.78266986])
expected = np.array([1.07417558, 1.0240404, 1.77397341, 2.71531235])
expected = expected.reshape(1, 4, 1)

np.testing.assert_allclose(result, expected)
Expand Down
Loading
Loading