MESMER-X: reflow and fix comments and docstrings (MESMER-group#465)

* MESMER-X: more formatting * MESMER-X: reflow and fix comments and docstrings * fixed patch add * another line * Expression: docstring to __init__ * more velin fixes
mathause · Jun 13, 2024 · 9fcb6be · 9fcb6be
1 parent ea98d40
commit 9fcb6be
Show file tree

Hide file tree

Showing 6 changed files with 588 additions and 398 deletions.
diff --git a/mesmer/mesmer_x/OLD_train_l_distrib.py b/mesmer/mesmer_x/OLD_train_l_distrib.py
@@ -83,21 +83,16 @@ def train_l_distrib(preds, targs, cfg, form_fit_distrib, save_params=True, **kwa
     ----------
     preds : dict
         dictionnary[target][name_covariant] = covariant. The covariants must be dictionnaries with scenarios as keys.
-
     targs : dict
         nested dictionary of targets with keys
         - [targ][scen] (3d array (run, time, gp) of target for specific scenario)
-
     cfg : module
         config file containing metadata
-
     form_fit_distrib : str
         string containing information on which fit to do: transformation and evolution of parameters
         Examples: transfo-_loc-gttasL-gthfdsL_scale-_shape-gttasS; transfo-logistic_loc-gttasL_scale-
-
     save_params : bool, optional
         determines if parameters are saved or not, default = True
-
     **kwargs : additional parameters that will be fed to the class 'distrib_cov' for fit of a distribution
 
     Returns
@@ -307,14 +302,11 @@ def transf_distrib2normal(preds, targs, params_l_distrib, threshold_sigma=6.0):
     preds : dict
         nested dictionary with 3 keys: cov_loc, cov_scale, cov_shape. Each one may be empty for no variation of the parameter of the distribution. If not empty, the variables will be used as covariants.
         - [targ][cov_...][covariant][scen]  (1d array (time) of predictor for specific scenario)
-
     targs : dict
         nested dictionary of targets with keys
         - [targ][scen] (3d array (run, time, gp) of target for specific scenario)
-
     params_l_distrib : dict
         nested dictionary of local variability paramters. Result of the function 'train_l_distrib'.
-
     threshold_sigma : float
         If a distribution is not correctly fitted, some values to transform may lead to unlikely values for a standard normal distribution. If above, they will be set to the threshold. Default : 6 (~happens once every 1e9 times)
 
@@ -511,7 +503,8 @@ def transf_distrib2normal(preds, targs, params_l_distrib, threshold_sigma=6.0):
             print(
                 "WARNING: some transformed values of "
                 + var_targ
-                + " are very unlikely, a possible cause is a fit missing strong signals. Action taken: blocking them at a limit."
+                + " are very unlikely, a possible cause is a fit missing strong"
+                " signals. Action taken: blocking them at a limit."
             )
 
     return transf_inputs
@@ -1785,7 +1778,8 @@ def fit(self):
                 # checking if that one failed as well
                 if self.error_failedfit and not m.success:
                     raise Exception(
-                        "The fast detrend provides with a valid first guess, but not good enough."
+                        "The fast detrend provides with a valid first guess, but not"
+                        " good enough."
                     )
 
             return self.translate_m_sol(m.x)

diff --git a/mesmer/mesmer_x/create_emus_l_distrib.py b/mesmer/mesmer_x/create_emus_l_distrib.py
@@ -34,14 +34,11 @@ def backtransf_normal2distrib(transf_emus_lv, preds, params_distrib, force_scen=
     transf_emus_lv : dict
         nested dictionary for transformed emulators, result of 'create_emus_lv'
         - [targ][scen] (3d array (emus, time, gp) of target for specific scenario)
-
     preds : dict
         nested dictionary with 3 keys: cov_loc, cov_scale, cov_shape. Each one may be empty for no variation of the parameter of the distribution. If not empty, the variables will be used as covariants.
         - [targ][cov_...][covariant][scen]  (1d array (time) of predictor for specific scenario)
-
     params_distrib : dict
         nested dictionary of local variability paramters. Result of the function 'train_l_distrib'.
-
     force_scen : None or iterable (list, set, 1d array)
         Used to prescribe a specific list of scenarios. If None, they will be deduced from the covariants. Important if no covariants, otherwise the parameters would not depend on scenarios, but using 'force_scen', we can have the desired scenarios.
 
@@ -63,7 +60,8 @@ def backtransf_normal2distrib(transf_emus_lv, preds, params_distrib, force_scen=
     # checking that the provided inputs are transformed.
     if list(transf_emus_lv.keys()) != ["all"]:
         raise Exception(
-            "Data to backtransform must be emulations from 'create_emus_lv' with only the key 'all'"
+            "Data to backtransform must be emulations from 'create_emus_lv' with only"
+            " the key 'all'"
         )
 
     # creating the dictionary that will be filled in
@@ -111,7 +109,9 @@ def backtransf_normal2distrib(transf_emus_lv, preds, params_distrib, force_scen=
             # checking if different scenarios are provided
             elif np.any(list_scens != maybe_scens):
                 raise Exception(
-                    "The different covariants for the parameters have different list of scenarios, please provide the same ones. NB: 'all' applies to all other scenarios, thus did not cause this issue."
+                    "The different covariants for the parameters have different list of"
+                    " scenarios, please provide the same ones. NB: 'all' applies to all"
+                    " other scenarios, thus did not cause this issue."
                 )
 
         if force_scen is not None:

diff --git a/mesmer/mesmer_x/temporary_config_all.py b/mesmer/mesmer_x/temporary_config_all.py
@@ -7,19 +7,24 @@
 
 
 class ConfigMesmerX:
-    """
-    This class defines the full configuration of MESMER.
-
-    Inputs:
-     - paths: information on what paths to use.
-        If nothing is provided in 'paths', default is to assume paths for tests.
-        If any known keyword is provided in 'paths', they will be used instead of default.
-        Unknown keywords in 'paths' cause an error.
-     - gen: generation of CMIP data (default: 6). If some paths are provided in 'paths', it MUST be consistent with 'gen'.
-     - esms: list of the ESMs used. The default is 'all', BUT if no paths are provided, it is assumed that only data for tests are used, then only ["IPSL-CM6A-LR"].
-    """
 
     def __init__(self, paths={}, gen=6, esms="all"):
+        """full configuration of MESMER.
+
+        Parameters
+        ----------
+        paths : dict
+            Information on what paths to use. If nothing is provided in 'paths', default
+            is to assume paths for tests. If any known keyword is provided in 'paths',
+            they will be used instead of default.
+            Unknown keywords in 'paths' cause an error.
+        gen : int, default: 6
+            Generation of CMIP data. If some paths are provided in 'paths', it MUST be
+            consistent with 'gen'.
+        esms : list of str, default: "all"
+            The ESMs used. The default is 'all', BUT if no paths are provided, it is
+            assumed that only data for tests are used, then only ["IPSL-CM6A-LR"].
+        """
 
         # preparing some parameters
         self.paths = paths
@@ -39,7 +44,6 @@ def __init__(self, paths={}, gen=6, esms="all"):
 
     def paths_directories(self):
 
-        # ---------------------------------------------------------------------------------
         # PATHS
 
         # path to mesmer root directory can be found in a slightly sneaky way
@@ -53,7 +57,6 @@ def paths_directories(self):
             MESMER_ROOT, "tests", "test-data", "calibrate-coarse-grid"
         )
 
-        # ---------------------------------------------------------------------------------
         # DIRECTORIES
         # checking if any is unknown:
         for key_path in self.paths:
@@ -68,7 +71,8 @@ def paths_directories(self):
                 "dir_plots",
             ]:
                 raise Exception(
-                    'Unknown type of directory provided in "paths", please check available options.'
+                    'Unknown type of directory provided in "paths", please check'
+                    " available options."
                 )
 
         # cmip-ng
@@ -123,14 +127,11 @@ def paths_directories(self):
         else:
             self.dir_plots = "/net/exo/landclim/yquilcaille/across_scen_T/plots/"
 
-        # ---------------------------------------------------------------------------------
         return
 
     def flex_config(self):
 
-        # ---------------------------------------------------------------------------------
         # ESMs
-
         if self.paths == {}:
             # running in test mode, only using this ESM
             self.esms = ["IPSL-CM6A-LR"]
@@ -163,21 +164,23 @@ def flex_config(self):
                 "NorESM2-MM",
                 "UKESM1-0-LL",
             ]
-            # tmp removed (need to investigate stms soon how can get them in too!):
-            # -CAMS-CSM1-0 (train_lt did not work: nans?!)
-            # -CIESM (sth wrong with GHFDS)
-            # -"EC-Earth3" (sth wrong when reading in files, index issue)
+
+            # temporarily removed
+            # - "CAMS-CSM1-0" (train_lt did not work: nans?!)
+            # - "CIESM" (sth wrong with GHFDS)
+            # - "EC-Earth3" (sth wrong when reading in files, index issue)
             # - "EC-Earth3-Veg" (probably sth wrong with GHFDS)
             # - "EC-Earth3-Veg-LR" (didn't even try. Just assumed same problem)
             # - "GFDL-CM4" (probably sth wrong with GHFDS)
             # - "GFDL-ESM4" (didn't even try. Just assumed same problem)
             # - "GISS-E2-1-G" (sth wrong when reading in files, index issue)
+
+            # TODO: need to investigate stms soon how can get them in too!
             # Check update on this aspect on slack Yann-Lea
 
         else:
             pass  # nothing to change, esms is used as provided.
 
-        # ---------------------------------------------------------------------------------
         # Variables, ensembles, regions
         self.targs = ["tas"]  # emulated variables
 
@@ -186,7 +189,6 @@ def flex_config(self):
 
         self.reg_type = "ar6.land"
 
-        # ---------------------------------------------------------------------------------
         # Time
         self.ref = {}
         self.ref["type"] = "individ"  # alternatives: 'first','all'
@@ -198,26 +200,27 @@ def flex_config(self):
         self.time = {}
         # first included year
         self.time["start"] = "1850"
-        # last included year #TODO: check if even used anywhere??
+        # last included year
+        # TODO: check if even used anywhere??
         self.time["end"] = "2100"
 
-        # ---------------------------------------------------------------------------------
         # Parameters
         self.threshold_land = 1 / 3
 
+        # if True weigh each scenario equally (ie less weight to individ runs of scens
+        # with more ic members)
         self.wgt_scen_tr_eq = True
-        # if True weigh each scenario equally (ie less weight to individ runs of scens with more ic members)
 
+        # temporarily made smaller for testing purposes. Normally 6000.
         self.nr_emus_v = 1000
-        # tmp made smaller for testing purposes. Normally 6000.
 
+        # 0 meaning same emulations drawn for each scen, if put a number will have
+        # different ones for each scen
         self.scen_seed_offset_v = 0
-        # 0 meaning same emulations drawn for each scen, if put a number will have different ones for each scen
 
         # max. nr of iterations in cross validation, will increase later
         self.max_iter_cv = 15
 
-        # ---------------------------------------------------------------------------------
         # predictors (for global module)
         self.preds = {}
         self.preds["tas"] = {}
@@ -229,7 +232,6 @@ def flex_config(self):
         self.preds["tas"]["gv"] = []
         self.preds["tas"]["g_all"] = self.preds["tas"]["gt"] + self.preds["tas"]["gv"]
 
-        # ---------------------------------------------------------------------------------
         # methods (for all modules)
         self.methods = {}
 
@@ -304,8 +306,10 @@ def flex_config(self):
 
     def nonflex_config(self):
 
-        # ---------------------------------------------------------------------------------
-        # list of scenarios that could be considered. Right now, complying to previous scripts of configurations, passing scenarios as set configuration, but could be passed in flexible configurations.
+        # list of scenarios that could be considered. Right now, complying to previous
+        # scripts of configurations, passing scenarios as set configuration, but could
+        # be passed in flexible configurations.
+
         if self.paths == {}:
             # running in test mode, only using this ESM
             self.scenarios = ["h-ssp126"]
@@ -322,7 +326,6 @@ def nonflex_config(self):
                 "h-ssp119",
             ]
 
-        # ---------------------------------------------------------------------------------
         # Emulations of scenarios and seeds
         if self.scen_seed_offset_v == 0:
             self.scenarios_emus_v = ["all"]
@@ -351,6 +354,9 @@ def nonflex_config(self):
 # information about loaded data:
 
 # cmip-ng
-# Data downloaded from ESGF (https://esgf-node.llnl.gov/projects/esgf-llnl/) and pre-processed according to Brunner et al. 2020 (https://doi.org/10.5281/zenodo.3734128)
-# assumes folder structure / file name as in cmip-ng archives at ETHZ -> see mesmer.io.load_cmipng.file_finder_cmipng() for details
-# - global mean stratospheric AOD, monthly, 1850-"2020" (0 after 2012), downloaded from KNMI climate explorer in August 2020, no pre-processing
+# Data downloaded from ESGF (https://esgf-node.llnl.gov/projects/esgf-llnl/) and
+# pre-processed according to Brunner et al. [2020](https://doi.org/10.5281/zenodo.3734128)
+# assumes folder structure / file name as in cmip-ng archives at ETHZ
+# -> see mesmer.io.load_cmipng.file_finder_cmipng() for details
+# - global mean stratospheric AOD, monthly, 1850-"2020" (0 after 2012),
+# downloaded from KNMI climate explorer in August 2020, no pre-processing
diff --git a/mesmer/mesmer_x/temporary_support.py b/mesmer/mesmer_x/temporary_support.py
@@ -98,7 +98,8 @@ def load_inputs_MESMERx(cfg, variables, esms):
         pred_g, reg_dict, wgt_g, ls, threshold_land=cfg.threshold_land
     )
 
-    # prepare the auxiliary files. better results with default values L, but like this much faster + less space needed
+    # prepare the auxiliary files. better results with default values L, but like this
+    # much faster + less space needed
     phi_gc = load_phi_gc(lon, lat, ls, cfg, L_start=1750, L_end=10000, L_interval=250)
 
     lon_mesh, lat_mesh = np.meshgrid(lon["c"], lat["c"])
@@ -108,7 +109,8 @@ def load_inputs_MESMERx(cfg, variables, esms):
     gp2reg = reg_dict["grids"][:, ind[0], ind[1]]  # grid points to regions
     ww_reg = np.nansum((ls["wgt_gp_l"] * gp2reg).T, axis=0)
 
-    # Just checking what ESMs are actually used. Some are removed because not having all drivers
+    # Just checking what ESMs are actually used. Some are removed because not having all
+    # drivers
     used_esms = [esm for esm in esms if len(PRED[esm].keys()) > 0]
 
     # adding few lines for SMA, because some values in NaN  or inf!