Skip to content

Commit

Permalink
MESMER-X: reflow and fix comments and docstrings (MESMER-group#465)
Browse files Browse the repository at this point in the history
* MESMER-X: more formatting

* MESMER-X: reflow and fix comments and docstrings

* fixed patch add

* another line

* Expression: docstring to __init__

* more velin fixes
  • Loading branch information
mathause authored Jun 13, 2024
1 parent ea98d40 commit 9fcb6be
Show file tree
Hide file tree
Showing 6 changed files with 588 additions and 398 deletions.
14 changes: 4 additions & 10 deletions mesmer/mesmer_x/OLD_train_l_distrib.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,21 +83,16 @@ def train_l_distrib(preds, targs, cfg, form_fit_distrib, save_params=True, **kwa
----------
preds : dict
dictionnary[target][name_covariant] = covariant. The covariants must be dictionnaries with scenarios as keys.
targs : dict
nested dictionary of targets with keys
- [targ][scen] (3d array (run, time, gp) of target for specific scenario)
cfg : module
config file containing metadata
form_fit_distrib : str
string containing information on which fit to do: transformation and evolution of parameters
Examples: transfo-_loc-gttasL-gthfdsL_scale-_shape-gttasS; transfo-logistic_loc-gttasL_scale-
save_params : bool, optional
determines if parameters are saved or not, default = True
**kwargs : additional parameters that will be fed to the class 'distrib_cov' for fit of a distribution
Returns
Expand Down Expand Up @@ -307,14 +302,11 @@ def transf_distrib2normal(preds, targs, params_l_distrib, threshold_sigma=6.0):
preds : dict
nested dictionary with 3 keys: cov_loc, cov_scale, cov_shape. Each one may be empty for no variation of the parameter of the distribution. If not empty, the variables will be used as covariants.
- [targ][cov_...][covariant][scen] (1d array (time) of predictor for specific scenario)
targs : dict
nested dictionary of targets with keys
- [targ][scen] (3d array (run, time, gp) of target for specific scenario)
params_l_distrib : dict
nested dictionary of local variability paramters. Result of the function 'train_l_distrib'.
threshold_sigma : float
If a distribution is not correctly fitted, some values to transform may lead to unlikely values for a standard normal distribution. If above, they will be set to the threshold. Default : 6 (~happens once every 1e9 times)
Expand Down Expand Up @@ -511,7 +503,8 @@ def transf_distrib2normal(preds, targs, params_l_distrib, threshold_sigma=6.0):
print(
"WARNING: some transformed values of "
+ var_targ
+ " are very unlikely, a possible cause is a fit missing strong signals. Action taken: blocking them at a limit."
+ " are very unlikely, a possible cause is a fit missing strong"
" signals. Action taken: blocking them at a limit."
)

return transf_inputs
Expand Down Expand Up @@ -1785,7 +1778,8 @@ def fit(self):
# checking if that one failed as well
if self.error_failedfit and not m.success:
raise Exception(
"The fast detrend provides with a valid first guess, but not good enough."
"The fast detrend provides with a valid first guess, but not"
" good enough."
)

return self.translate_m_sol(m.x)
Expand Down
10 changes: 5 additions & 5 deletions mesmer/mesmer_x/create_emus_l_distrib.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,11 @@ def backtransf_normal2distrib(transf_emus_lv, preds, params_distrib, force_scen=
transf_emus_lv : dict
nested dictionary for transformed emulators, result of 'create_emus_lv'
- [targ][scen] (3d array (emus, time, gp) of target for specific scenario)
preds : dict
nested dictionary with 3 keys: cov_loc, cov_scale, cov_shape. Each one may be empty for no variation of the parameter of the distribution. If not empty, the variables will be used as covariants.
- [targ][cov_...][covariant][scen] (1d array (time) of predictor for specific scenario)
params_distrib : dict
nested dictionary of local variability paramters. Result of the function 'train_l_distrib'.
force_scen : None or iterable (list, set, 1d array)
Used to prescribe a specific list of scenarios. If None, they will be deduced from the covariants. Important if no covariants, otherwise the parameters would not depend on scenarios, but using 'force_scen', we can have the desired scenarios.
Expand All @@ -63,7 +60,8 @@ def backtransf_normal2distrib(transf_emus_lv, preds, params_distrib, force_scen=
# checking that the provided inputs are transformed.
if list(transf_emus_lv.keys()) != ["all"]:
raise Exception(
"Data to backtransform must be emulations from 'create_emus_lv' with only the key 'all'"
"Data to backtransform must be emulations from 'create_emus_lv' with only"
" the key 'all'"
)

# creating the dictionary that will be filled in
Expand Down Expand Up @@ -111,7 +109,9 @@ def backtransf_normal2distrib(transf_emus_lv, preds, params_distrib, force_scen=
# checking if different scenarios are provided
elif np.any(list_scens != maybe_scens):
raise Exception(
"The different covariants for the parameters have different list of scenarios, please provide the same ones. NB: 'all' applies to all other scenarios, thus did not cause this issue."
"The different covariants for the parameters have different list of"
" scenarios, please provide the same ones. NB: 'all' applies to all"
" other scenarios, thus did not cause this issue."
)

if force_scen is not None:
Expand Down
78 changes: 42 additions & 36 deletions mesmer/mesmer_x/temporary_config_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,24 @@


class ConfigMesmerX:
"""
This class defines the full configuration of MESMER.
Inputs:
- paths: information on what paths to use.
If nothing is provided in 'paths', default is to assume paths for tests.
If any known keyword is provided in 'paths', they will be used instead of default.
Unknown keywords in 'paths' cause an error.
- gen: generation of CMIP data (default: 6). If some paths are provided in 'paths', it MUST be consistent with 'gen'.
- esms: list of the ESMs used. The default is 'all', BUT if no paths are provided, it is assumed that only data for tests are used, then only ["IPSL-CM6A-LR"].
"""

def __init__(self, paths={}, gen=6, esms="all"):
"""full configuration of MESMER.
Parameters
----------
paths : dict
Information on what paths to use. If nothing is provided in 'paths', default
is to assume paths for tests. If any known keyword is provided in 'paths',
they will be used instead of default.
Unknown keywords in 'paths' cause an error.
gen : int, default: 6
Generation of CMIP data. If some paths are provided in 'paths', it MUST be
consistent with 'gen'.
esms : list of str, default: "all"
The ESMs used. The default is 'all', BUT if no paths are provided, it is
assumed that only data for tests are used, then only ["IPSL-CM6A-LR"].
"""

# preparing some parameters
self.paths = paths
Expand All @@ -39,7 +44,6 @@ def __init__(self, paths={}, gen=6, esms="all"):

def paths_directories(self):

# ---------------------------------------------------------------------------------
# PATHS

# path to mesmer root directory can be found in a slightly sneaky way
Expand All @@ -53,7 +57,6 @@ def paths_directories(self):
MESMER_ROOT, "tests", "test-data", "calibrate-coarse-grid"
)

# ---------------------------------------------------------------------------------
# DIRECTORIES
# checking if any is unknown:
for key_path in self.paths:
Expand All @@ -68,7 +71,8 @@ def paths_directories(self):
"dir_plots",
]:
raise Exception(
'Unknown type of directory provided in "paths", please check available options.'
'Unknown type of directory provided in "paths", please check'
" available options."
)

# cmip-ng
Expand Down Expand Up @@ -123,14 +127,11 @@ def paths_directories(self):
else:
self.dir_plots = "/net/exo/landclim/yquilcaille/across_scen_T/plots/"

# ---------------------------------------------------------------------------------
return

def flex_config(self):

# ---------------------------------------------------------------------------------
# ESMs

if self.paths == {}:
# running in test mode, only using this ESM
self.esms = ["IPSL-CM6A-LR"]
Expand Down Expand Up @@ -163,21 +164,23 @@ def flex_config(self):
"NorESM2-MM",
"UKESM1-0-LL",
]
# tmp removed (need to investigate stms soon how can get them in too!):
# -CAMS-CSM1-0 (train_lt did not work: nans?!)
# -CIESM (sth wrong with GHFDS)
# -"EC-Earth3" (sth wrong when reading in files, index issue)

# temporarily removed
# - "CAMS-CSM1-0" (train_lt did not work: nans?!)
# - "CIESM" (sth wrong with GHFDS)
# - "EC-Earth3" (sth wrong when reading in files, index issue)
# - "EC-Earth3-Veg" (probably sth wrong with GHFDS)
# - "EC-Earth3-Veg-LR" (didn't even try. Just assumed same problem)
# - "GFDL-CM4" (probably sth wrong with GHFDS)
# - "GFDL-ESM4" (didn't even try. Just assumed same problem)
# - "GISS-E2-1-G" (sth wrong when reading in files, index issue)

# TODO: need to investigate stms soon how can get them in too!
# Check update on this aspect on slack Yann-Lea

else:
pass # nothing to change, esms is used as provided.

# ---------------------------------------------------------------------------------
# Variables, ensembles, regions
self.targs = ["tas"] # emulated variables

Expand All @@ -186,7 +189,6 @@ def flex_config(self):

self.reg_type = "ar6.land"

# ---------------------------------------------------------------------------------
# Time
self.ref = {}
self.ref["type"] = "individ" # alternatives: 'first','all'
Expand All @@ -198,26 +200,27 @@ def flex_config(self):
self.time = {}
# first included year
self.time["start"] = "1850"
# last included year #TODO: check if even used anywhere??
# last included year
# TODO: check if even used anywhere??
self.time["end"] = "2100"

# ---------------------------------------------------------------------------------
# Parameters
self.threshold_land = 1 / 3

# if True weigh each scenario equally (ie less weight to individ runs of scens
# with more ic members)
self.wgt_scen_tr_eq = True
# if True weigh each scenario equally (ie less weight to individ runs of scens with more ic members)

# temporarily made smaller for testing purposes. Normally 6000.
self.nr_emus_v = 1000
# tmp made smaller for testing purposes. Normally 6000.

# 0 meaning same emulations drawn for each scen, if put a number will have
# different ones for each scen
self.scen_seed_offset_v = 0
# 0 meaning same emulations drawn for each scen, if put a number will have different ones for each scen

# max. nr of iterations in cross validation, will increase later
self.max_iter_cv = 15

# ---------------------------------------------------------------------------------
# predictors (for global module)
self.preds = {}
self.preds["tas"] = {}
Expand All @@ -229,7 +232,6 @@ def flex_config(self):
self.preds["tas"]["gv"] = []
self.preds["tas"]["g_all"] = self.preds["tas"]["gt"] + self.preds["tas"]["gv"]

# ---------------------------------------------------------------------------------
# methods (for all modules)
self.methods = {}

Expand Down Expand Up @@ -304,8 +306,10 @@ def flex_config(self):

def nonflex_config(self):

# ---------------------------------------------------------------------------------
# list of scenarios that could be considered. Right now, complying to previous scripts of configurations, passing scenarios as set configuration, but could be passed in flexible configurations.
# list of scenarios that could be considered. Right now, complying to previous
# scripts of configurations, passing scenarios as set configuration, but could
# be passed in flexible configurations.

if self.paths == {}:
# running in test mode, only using this ESM
self.scenarios = ["h-ssp126"]
Expand All @@ -322,7 +326,6 @@ def nonflex_config(self):
"h-ssp119",
]

# ---------------------------------------------------------------------------------
# Emulations of scenarios and seeds
if self.scen_seed_offset_v == 0:
self.scenarios_emus_v = ["all"]
Expand Down Expand Up @@ -351,6 +354,9 @@ def nonflex_config(self):
# information about loaded data:

# cmip-ng
# Data downloaded from ESGF (https://esgf-node.llnl.gov/projects/esgf-llnl/) and pre-processed according to Brunner et al. 2020 (https://doi.org/10.5281/zenodo.3734128)
# assumes folder structure / file name as in cmip-ng archives at ETHZ -> see mesmer.io.load_cmipng.file_finder_cmipng() for details
# - global mean stratospheric AOD, monthly, 1850-"2020" (0 after 2012), downloaded from KNMI climate explorer in August 2020, no pre-processing
# Data downloaded from ESGF (https://esgf-node.llnl.gov/projects/esgf-llnl/) and
# pre-processed according to Brunner et al. [2020](https://doi.org/10.5281/zenodo.3734128)
# assumes folder structure / file name as in cmip-ng archives at ETHZ
# -> see mesmer.io.load_cmipng.file_finder_cmipng() for details
# - global mean stratospheric AOD, monthly, 1850-"2020" (0 after 2012),
# downloaded from KNMI climate explorer in August 2020, no pre-processing
6 changes: 4 additions & 2 deletions mesmer/mesmer_x/temporary_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ def load_inputs_MESMERx(cfg, variables, esms):
pred_g, reg_dict, wgt_g, ls, threshold_land=cfg.threshold_land
)

# prepare the auxiliary files. better results with default values L, but like this much faster + less space needed
# prepare the auxiliary files. better results with default values L, but like this
# much faster + less space needed
phi_gc = load_phi_gc(lon, lat, ls, cfg, L_start=1750, L_end=10000, L_interval=250)

lon_mesh, lat_mesh = np.meshgrid(lon["c"], lat["c"])
Expand All @@ -108,7 +109,8 @@ def load_inputs_MESMERx(cfg, variables, esms):
gp2reg = reg_dict["grids"][:, ind[0], ind[1]] # grid points to regions
ww_reg = np.nansum((ls["wgt_gp_l"] * gp2reg).T, axis=0)

# Just checking what ESMs are actually used. Some are removed because not having all drivers
# Just checking what ESMs are actually used. Some are removed because not having all
# drivers
used_esms = [esm for esm in esms if len(PRED[esm].keys()) > 0]

# adding few lines for SMA, because some values in NaN or inf!
Expand Down
Loading

0 comments on commit 9fcb6be

Please sign in to comment.