-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MESMER-X: Test distrib_cov
#540
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -38,6 +38,7 @@ def wrapper(*args, **kwargs): | |||||||||
return wrapper | ||||||||||
|
||||||||||
|
||||||||||
# TODO: would want to switch this, have a distrib class that takes xarrays and have a training func that potentially works on xarrays | ||||||||||
def xr_train_distrib( | ||||||||||
predictors, | ||||||||||
target, | ||||||||||
|
@@ -293,11 +294,13 @@ def __init__( | |||||||||
---------- | ||||||||||
data_targ : numpy array 1D | ||||||||||
Sample of the target for fit of a conditional distribution | ||||||||||
Normally the timeseries of the target at one gridpoint. | ||||||||||
|
||||||||||
data_pred : dict of 1D vectors | ||||||||||
Covariates for the conditional distribution. Each key must be the exact name | ||||||||||
of the inputs used in 'expr_fit', and the values must be aligned with the | ||||||||||
values in 'data_targ'. | ||||||||||
Normally the timeseries of the global mean predictor. | ||||||||||
|
||||||||||
expr_fit : class 'expression' | ||||||||||
Expression to train. The string provided to the class can be found in | ||||||||||
|
@@ -353,7 +356,8 @@ def __init__( | |||||||||
* type_fun_optim: string, default: "NLL" | ||||||||||
If 'NLL', will optimize using the negative log likelihood. If 'fcNLL', | ||||||||||
will use the full conditional negative log likelihood based on the | ||||||||||
stopping rule. | ||||||||||
stopping rule. The arguments `threshold_stopping_rule`, `ind_year_thres` | ||||||||||
and `exclude_trigger` only apply to 'fcNLL'. | ||||||||||
|
||||||||||
* weighted_NLL: boolean, default: False | ||||||||||
If True, the optimization function will based on the weighted sum of the | ||||||||||
|
@@ -540,21 +544,19 @@ def __init__( | |||||||||
elif isinstance(options_solver, dict): | ||||||||||
default_options_solver.update(options_solver) | ||||||||||
else: | ||||||||||
raise ValueError("options_solver must be a dictionary") | ||||||||||
raise ValueError("`options_solver` must be a dictionary") | ||||||||||
self.xtol_req = default_options_solver["xtol_req"] | ||||||||||
self.ftol_req = default_options_solver["ftol_req"] | ||||||||||
self.maxiter = default_options_solver["maxiter"] | ||||||||||
self.maxfev = default_options_solver["maxfev"] | ||||||||||
self.method_fit = default_options_solver["method_fit"] | ||||||||||
if self.method_fit in [ | ||||||||||
"dogleg", | ||||||||||
"trust-ncg", | ||||||||||
"trust-krylov", | ||||||||||
"trust-exact", | ||||||||||
"COBYLA", | ||||||||||
"SLSQP", | ||||||||||
"CG", | ||||||||||
"Newton-CG", | ||||||||||
if self.method_fit not in [ | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice |
||||||||||
"BFGS", | ||||||||||
"L-BFGS-B", | ||||||||||
"Nelder-Mead", | ||||||||||
"Powell", | ||||||||||
"TNC", | ||||||||||
"trust-constr", | ||||||||||
]: | ||||||||||
raise ValueError("method for this fit not prepared, to avoid") | ||||||||||
else: | ||||||||||
|
@@ -614,8 +616,8 @@ def __init__( | |||||||||
): | ||||||||||
raise ValueError( | ||||||||||
"Lack of consistency on the options 'type_fun_optim'," | ||||||||||
" 'threshold_stopping_rule' and 'ind_year_thres', not sure if the" | ||||||||||
" stopping rule will be employed" | ||||||||||
" 'threshold_stopping_rule' and 'ind_year_thres', threshold_stopping_rule", | ||||||||||
"and 'ind_year_thres' must be used together, and only for 'fcNLL'", | ||||||||||
Comment on lines
+619
to
+620
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
) | ||||||||||
|
||||||||||
def get_weights(self, n_bins_density=40): | ||||||||||
|
@@ -649,7 +651,7 @@ def _get_weights_nll(self, n_bins_density=40): | |||||||||
# interpolating over whole region | ||||||||||
gmt_hist, edges = np.histogramdd(sample=tmp, bins=bins.T) | ||||||||||
|
||||||||||
gmt_bins_center = [0.5 * (edge[1:] + edges[:-1]) for edge in edges] | ||||||||||
gmt_bins_center = [0.5 * (edge[1:] + edge[:-1]) for edge in edges] | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice find |
||||||||||
interp = RegularGridInterpolator(points=gmt_bins_center, values=gmt_hist) | ||||||||||
weights_driver = 1 / interp(tmp) # inverse of density | ||||||||||
|
||||||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,234 @@ | ||||||
import numpy as np | ||||||
import pytest | ||||||
|
||||||
from mesmer.mesmer_x import Expression, distrib_cov | ||||||
|
||||||
|
||||||
def test_distrib_cov_init_all_default(): | ||||||
rng = np.random.default_rng(0) | ||||||
n = 250 | ||||||
pred = np.linspace(0, 1, n) | ||||||
targ = rng.normal(loc=2 * pred, scale=0.1, size=n) | ||||||
|
||||||
expression = Expression("norm(loc=c1 * __tas__, scale=c2)", expr_name="exp1") | ||||||
|
||||||
dist = distrib_cov(targ, {"tas": pred}, expression) | ||||||
|
||||||
np.testing.assert_equal(dist.data_targ, targ) | ||||||
np.testing.assert_equal(dist.data_pred, {"tas": pred}) | ||||||
np.testing.assert_equal(dist.weights_driver, np.ones(n) / n) | ||||||
assert dist.n_sample == n | ||||||
assert dist.expr_fit == expression | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does that actually work? Ah it does but I think it checks that it's the same objects ant thus better to use
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah thanks! |
||||||
assert not dist.add_test | ||||||
assert dist.data_targ_addtest is None | ||||||
assert dist.data_preds_addtest is None | ||||||
assert dist.threshold_min_proba == 1e-09 | ||||||
assert dist.boundaries_params == expression.boundaries_parameters | ||||||
assert dist.boundaries_coeffs == {} | ||||||
assert dist.first_guess is None | ||||||
assert dist.func_first_guess is None | ||||||
assert dist.n_coeffs == 2 | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that should be a property of |
||||||
assert dist.scores_fit == ["func_optim", "NLL", "BIC"] | ||||||
assert dist.xtol_req == 1e-06 | ||||||
assert dist.ftol_req == 1e-06 | ||||||
assert dist.maxiter == 1000 * dist.n_coeffs * np.log(dist.n_coeffs) | ||||||
assert dist.maxfev == 1000 * dist.n_coeffs * np.log(dist.n_coeffs) | ||||||
assert dist.method_fit == "Powell" | ||||||
assert dist.name_ftol == "ftol" | ||||||
assert dist.name_xtol == "xtol" | ||||||
assert not dist.error_failedfit | ||||||
assert not dist.fg_with_global_opti | ||||||
assert not dist.weighted_NLL | ||||||
assert dist.type_fun_optim == "NLL" | ||||||
assert dist.threshold_stopping_rule is None | ||||||
assert dist.exclude_trigger is None | ||||||
assert dist.ind_year_thres is None | ||||||
|
||||||
|
||||||
def test_distrib_cov_init(): | ||||||
rng = np.random.default_rng(0) | ||||||
n = 250 | ||||||
pred = np.linspace(0, 1, n) | ||||||
targ = rng.normal(loc=2 * pred, scale=0.1, size=n) | ||||||
|
||||||
expression = Expression("norm(loc=c1 * __tas__, scale=c2)", expr_name="exp1") | ||||||
|
||||||
data_targ_addtest = rng.normal(loc=2 * pred, scale=0.1, size=n) | ||||||
data_preds_addtest = {"tas": np.linspace(0, 0.9, n)} | ||||||
threshold_min_proba = 0.1 | ||||||
boundaries_params = {"loc": [-10, 10], "scale": [0, 1]} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Could try -1 here to check we end up with 0 again |
||||||
boundaries_coeffs = {"c1": [0, 5], "c2": [0, 1]} | ||||||
first_guess = np.array([1, 0.1]) | ||||||
func_first_guess = None | ||||||
scores_fit = ["func_optim", "NLL"] | ||||||
options_optim = { | ||||||
"type_fun_optim": "fcNLL", | ||||||
"weighted_NLL": True, | ||||||
"threshold_stopping_rule": 0.1, | ||||||
"ind_year_thres": 10, | ||||||
"exclude_trigger": True, | ||||||
} | ||||||
options_solver = { | ||||||
"method_fit": "Nelder-Mead", | ||||||
"xtol_req": 0.1, | ||||||
"ftol_req": 0.01, | ||||||
"maxiter": 10_000, | ||||||
"maxfev": 12_000, | ||||||
"error_failedfit": True, | ||||||
"fg_with_global_opti": True, | ||||||
} | ||||||
|
||||||
dist = distrib_cov( | ||||||
targ, | ||||||
{"tas": pred}, | ||||||
expression, | ||||||
data_targ_addtest=data_targ_addtest, | ||||||
data_preds_addtest=data_preds_addtest, | ||||||
threshold_min_proba=threshold_min_proba, | ||||||
boundaries_params=boundaries_params, | ||||||
boundaries_coeffs=boundaries_coeffs, | ||||||
first_guess=first_guess, | ||||||
func_first_guess=func_first_guess, | ||||||
scores_fit=scores_fit, | ||||||
options_optim=options_optim, | ||||||
options_solver=options_solver, | ||||||
) | ||||||
|
||||||
np.testing.assert_equal(dist.data_targ, targ) | ||||||
np.testing.assert_equal(dist.data_pred, {"tas": pred}) | ||||||
np.testing.assert_equal(dist.weights_driver, dist.get_weights()) | ||||||
# np.testing.assert_equal(dist.weights_driver, dist._get_weights_nll()) # WHY NOT??? | ||||||
np.testing.assert_equal(dist.first_guess, first_guess) | ||||||
np.testing.assert_equal(dist.data_targ_addtest, data_targ_addtest) | ||||||
np.testing.assert_equal(dist.data_preds_addtest, data_preds_addtest) | ||||||
assert dist.n_sample == n | ||||||
assert dist.expr_fit == expression | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
assert dist.add_test # is True | ||||||
assert dist.threshold_min_proba == threshold_min_proba | ||||||
assert dist.boundaries_params == boundaries_params | ||||||
assert dist.boundaries_coeffs == boundaries_coeffs | ||||||
assert dist.func_first_guess is None | ||||||
assert dist.n_coeffs == 2 | ||||||
assert dist.scores_fit == scores_fit | ||||||
assert dist.xtol_req == 0.1 | ||||||
assert dist.ftol_req == 0.01 | ||||||
assert dist.maxiter == 10_000 | ||||||
assert dist.maxfev == 12_000 | ||||||
assert dist.method_fit == "Nelder-Mead" | ||||||
assert dist.name_ftol == "fatol" | ||||||
assert dist.name_xtol == "xatol" | ||||||
assert dist.error_failedfit # is True | ||||||
assert dist.fg_with_global_opti # is True | ||||||
assert dist.weighted_NLL # is True | ||||||
assert dist.type_fun_optim == "fcNLL" | ||||||
assert dist.threshold_stopping_rule == 0.1 | ||||||
assert dist.exclude_trigger # is True | ||||||
assert dist.ind_year_thres == 10 | ||||||
|
||||||
|
||||||
def test_distrib_cov_init_errors(): | ||||||
expression = Expression("norm(loc=c1 * __tas__, scale=c2)", expr_name="exp1") | ||||||
|
||||||
with pytest.raises(ValueError, match="NaN or infinite values in target of fit"): | ||||||
distrib_cov(np.array([1, 2, np.nan]), {"tas": np.array([1, 2, 3])}, expression) | ||||||
|
||||||
with pytest.raises(ValueError, match="NaN or infinite values in target of fit"): | ||||||
distrib_cov(np.array([1, 2, np.inf]), {"tas": np.array([1, 2, 3])}, expression) | ||||||
|
||||||
with pytest.raises(ValueError, match="NaN or infinite values in predictors of fit"): | ||||||
distrib_cov(np.array([1, 2, 3]), {"tas": np.array([1, 2, np.nan])}, expression) | ||||||
|
||||||
with pytest.raises(ValueError, match="NaN or infinite values in predictors of fit"): | ||||||
distrib_cov(np.array([1, 2, 3]), {"tas": np.array([1, 2, np.inf])}, expression) | ||||||
|
||||||
with pytest.raises(ValueError, match="NaN or infinite values in predictors of fit"): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, np.inf]), "tas2": np.array([1, 2, np.nan])}, | ||||||
expression, | ||||||
) | ||||||
|
||||||
with pytest.raises(ValueError, match="Only one of "): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
data_targ_addtest=np.array([1, 2, 3]), | ||||||
) | ||||||
|
||||||
with pytest.raises(ValueError, match="Only one of "): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
data_preds_addtest={"tas": np.array([1, 2, 3])}, | ||||||
) | ||||||
|
||||||
with pytest.raises(ValueError, match="`threshold_min_proba` must be in"): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
threshold_min_proba=-1, | ||||||
) | ||||||
with pytest.raises(ValueError, match="`threshold_min_proba` must be in"): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
threshold_min_proba=2, | ||||||
) | ||||||
|
||||||
with pytest.raises( | ||||||
ValueError, match="The provided first guess does not have the correct shape:" | ||||||
): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
first_guess=np.array([1, 2, 3]), | ||||||
) | ||||||
|
||||||
with pytest.raises(ValueError, match="`options_solver` must be a dictionary"): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
options_solver="this is not a dictionary", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😄 |
||||||
) | ||||||
|
||||||
with pytest.raises(ValueError, match="`options_optim` must be a dictionary"): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
options_optim="this is not a dictionary", | ||||||
) | ||||||
|
||||||
with pytest.raises(ValueError, match="method for this fit not prepared, to avoid"): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
options_solver={"method_fit": "this is not a method"}, | ||||||
) | ||||||
|
||||||
with pytest.raises( | ||||||
ValueError, match="Lack of consistency on the options 'type_fun_optim'" | ||||||
): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
options_optim={"type_fun_optim": "NLL", "threshold_stopping_rule": 0.1}, | ||||||
) | ||||||
|
||||||
with pytest.raises( | ||||||
ValueError, match="Lack of consistency on the options 'type_fun_optim'" | ||||||
): | ||||||
distrib_cov( | ||||||
np.array([1, 2, 3]), | ||||||
{"tas": np.array([1, 2, 3])}, | ||||||
expression, | ||||||
options_optim={"type_fun_optim": "fcNLL", "threshold_stopping_rule": None}, | ||||||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.