erge branch 'master' of github.com:pangeo-data/climpred

pangeo-data · Feb 22, 2021 · 77237b5 · 77237b5
2 parents 36d128b + 14b7d90
commit 77237b5
Show file tree

Hide file tree

Showing 5 changed files with 93 additions and 34 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -24,6 +24,8 @@ Bug fixes
   (:issue:`532`, :pr:`533`) `Aaron Spring`_.
 - Adapt to ``xesmf>=0.5.2`` for spatial xesmf smoothing. (:issue:`543`, :pr:`548`)
   `Aaron Spring`_.
+- :py:meth:`~climpred.classes.HindcastEnsemble.remove_bias` now carries attributes.
+  (:issue:`531`, :pr:`551`) `Aaron Spring`_.
 
 
 climpred v2.1.1 (2020-10-13)

diff --git a/climpred/bias_removal.py b/climpred/bias_removal.py
@@ -17,9 +17,11 @@ def _mean_bias_removal_quick(hind, bias, dim):
         xr.object: bias removed hind
 
     """
-    bias_removed_hind = (
-        hind.groupby(f"{dim}.dayofyear") - bias.groupby(f"{dim}.dayofyear").mean()
-    )
+    with xr.set_options(keep_attrs=True):
+        bias_removed_hind = (
+            hind.groupby(f"{dim}.dayofyear") - bias.groupby(f"{dim}.dayofyear").mean()
+        )
+    bias_removed_hind.attrs = hind.attrs
     return bias_removed_hind
 
 
@@ -56,11 +58,16 @@ def _mean_bias_removal_cross_validate(hind, bias, dim):
             f"{hind_drop_init_where_bias.min().values}-"
             f"{hind_drop_init_where_bias.max().values}"
         )
-        bias_removed_hind.append(
-            hind.sel(init=init)
-            - bias.sel(init=hind_drop_init_where_bias).groupby("init.dayofyear").mean()
-        )
+        with xr.set_options(keep_attrs=True):
+            init_bias_removed = (
+                hind.sel(init=init)
+                - bias.sel(init=hind_drop_init_where_bias)
+                .groupby("init.dayofyear")
+                .mean()
+            )
+        bias_removed_hind.append(init_bias_removed)
     bias_removed_hind = xr.concat(bias_removed_hind, "init")
+    bias_removed_hind.attrs = hind.attrs
     return bias_removed_hind
 
 

diff --git a/climpred/tests/test_HindcastEnsemble_class.py b/climpred/tests/test_HindcastEnsemble_class.py
@@ -105,25 +105,62 @@ def test_inplace(
 
 @pytest.mark.parametrize("alignment", ["same_inits", "same_verifs", "maximize"])
 def test_mean_remove_bias(hindcast_hist_obs_1d, alignment):
-    """Test remove mean bias."""
+    """Test remove mean bias, ensure than skill doesnt degrade and keeps attrs."""
     how = "mean"
     metric = "rmse"
     dim = "init"
     comparison = "e2o"
     hindcast = hindcast_hist_obs_1d
-    biased_skill = hindcast.verify(
-        metric=metric, alignment=alignment, dim=dim, comparison=comparison
+    hindcast._datasets["initialized"].attrs["test"] = "test"
+    hindcast._datasets["initialized"]["SST"].attrs["units"] = "test_unit"
+    verify_kwargs = dict(
+        metric=metric,
+        alignment=alignment,
+        dim=dim,
+        comparison=comparison,
+        keep_attrs=True,
     )
-    bias_removed_skill = hindcast.remove_bias(
+
+    biased_skill = hindcast.verify(**verify_kwargs)
+
+    hindcast_bias_removed = hindcast.remove_bias(
         how=how, alignment=alignment, cross_validate=False
-    ).verify(metric=metric, alignment=alignment, dim=dim, comparison=comparison)
-    bias_removed_skill_properly = hindcast.remove_bias(
+    )
+    bias_removed_skill = hindcast_bias_removed.verify(**verify_kwargs)
+
+    hindcast_bias_removed_properly = hindcast.remove_bias(
         how=how, cross_validate=True, alignment=alignment
-    ).verify(metric=metric, alignment=alignment, dim=dim, comparison=comparison)
+    )
+    bias_removed_skill_properly = hindcast_bias_removed_properly.verify(**verify_kwargs)
+
     assert "dayofyear" not in bias_removed_skill_properly.coords
     assert biased_skill > bias_removed_skill
     assert biased_skill > bias_removed_skill_properly
     assert bias_removed_skill_properly >= bias_removed_skill
+    # keeps data_vars attrs
+    for v in hindcast_bias_removed.get_initialized().data_vars:
+        assert (
+            hindcast_bias_removed_properly.get_initialized()[v].attrs
+            == hindcast.get_initialized()[v].attrs
+        )
+        assert (
+            hindcast_bias_removed.get_initialized()[v].attrs
+            == hindcast.get_initialized()[v].attrs
+        )
+    # keeps dataset attrs
+    assert (
+        hindcast_bias_removed_properly.get_initialized().attrs
+        == hindcast.get_initialized().attrs
+    )
+    assert (
+        hindcast_bias_removed.get_initialized().attrs
+        == hindcast.get_initialized().attrs
+    )
+    # keep lead attrs
+    assert (
+        hindcast_bias_removed.get_initialized().lead.attrs
+        == hindcast.get_initialized().lead.attrs
+    )
 
 
 def test_verify_metric_kwargs(hindcast_hist_obs_1d):

diff --git a/joss/paper.bib b/joss/paper.bib
@@ -69,23 +69,24 @@ @article{DelSole:2016
   number = {2},
 }
 
-@misc{Eynard:2019,
-type = "Proceedings paper",
-year = "2019",
-title = "The {Pangeo} Big Data Ecosystem and its use at {CNES}",
-journal = "",
-editor = "",
-volume = "",
-number = "",
-pages = "",
-author = "Eynard-Bontemps Guillaume, Abernathey Ryan, Hamman Joseph, Ponte Aurelien, Rath Willi",
-url = "https://archimer.ifremer.fr/doc/00503/61441/",
-organization = "",
-address = "France, USA, Germany",
-abstract = "<p>Pangeo[1] is a community-driven effort for open-source big data initially focused on the Earth System Sciences. One of its primary goals is to enable scientists in analyzing petascale datasets both on classical high-performance computing (HPC) and on public cloud infrastructure. In only a few years, Pangeo has grown into a very productive community collaborating on the development of open-source analysis tools for science. It provides a set of example deployments based on open-source Scientific Python packages like Jupyter[2], Dask[3], and Xarray[4] that bring together scientists and developer with their actual use-cases. In this paper, we first describe Pangeo ecosystem and community. We then present its impact on the work of scientists from CNES on the HPC deployment there. We conclude with a future outlook for Pangeo in this agency and beyond.</p>",
-key = ""
+@article{Abernathey:2017,
+  title = {Pangeo {{NSF Earthcube Proposal}}},
+  author = {Abernathey, Ryan and Paul, Kevin and Hamman, Joe and Rocklin, Matthew and Lepore, Chiara and Tippett, Michael and Henderson, Naomi and Seager, Richard and May, Ryan and Del Vento, Davide},
+  year = {2017},
+  publisher = {{figshare}},
+  doi = {10/gh3ts4},
+  language = {en}
 }
 
+@misc{Eynard:2019,
+  type = "Proceedings paper",
+  year = "2019",
+  title = "The {Pangeo} Big Data Ecosystem and its use at {CNES}",
+  author = "Eynard-Bontemps, Guillaume and Abernathey, Ryan and Hamman, Joseph and Ponte, Aurelien and Rath Willi",
+  url = "https://archimer.ifremer.fr/doc/00503/61441/",
+  doi = "10.2760/848593",
+  isbn = "978-92-76-00034-1",
+}
 
 @article{Eyring:2016,
   title = {Overview of the {{Coupled Model Intercomparison Project Phase}} 6 ({{CMIP6}}) Experimental Design and Organization},
@@ -357,6 +358,18 @@ @article{Spring:2020
   number = {9},
 }
 
+
+@article{Spring:2021,
+  title = {Trivial Improvements of Predictive Skill Due to Direct Reconstruction of Global Carbon Cycle},
+  author = {Spring, Aaron and Dunkl, Istv{\'a}n and Li, Hongmei and Brovkin, Victor and Ilyina, Tatiana},
+  year = {2021},
+  pages = {1--36},
+  issn = {2190-4979},
+  doi = {10/gh3tn3},
+  journal = {Earth System Dynamics Discussions},
+  language = {English}
+}
+
 @article{Yeager:2018,
   title = {Predicting Near-Term Changes in the {{Earth System}}: {{A}} Large Ensemble of Initialized Decadal Prediction Simulations Using the {{Community Earth System Model}}},
   shorttitle = {Predicting Near-Term Changes in the {{Earth System}}},

diff --git a/joss/paper.md b/joss/paper.md
@@ -78,7 +78,7 @@ that always required pre-processing. This time scale of seconds allows for a tru
 interactive analysis experience.
 
 5. `climpred` is part of the wider scientific python community, `pangeo`
-[@Eynard:2019]. A wide adoption of `climpred` could standardize prediction model
+[@Abernathey:2017; @Eynard:2019]. A wide adoption of `climpred` could standardize prediction model
 evaluation and make verification reproducible [@Irving:2015].
 
 <!-- 207 words -->
@@ -155,7 +155,7 @@ are not held constant for each lead.
 2. Use the identical set of initializations that can verify over the given observational
 window at all leads. However, the verification dates change at each lead.
 
-(3) Use the identical verification window at each lead, while allowing the set of
+3. Use the identical verification window at each lead, while allowing the set of
 initializations used at each lead to change.
 
 These strategies are shown graphically and explained in more
@@ -168,9 +168,9 @@ and a graphics library.
 # Use in Academic Literature
 `climpred` has been used to drive analysis in three academic papers so far. @Brady:2020
 used the `HindcastEnsemble` class to highlight multi-year predictability of ocean
-acidification in the California Current; @Spring:2020 used the `PerfectModelEnsemble`
-class to highlight predictability horizons in the global carbon cycle; and
-@Krumhardt:2020 used the `HindcastEnsemble` class to illuminate multi-year
+acidification in the California Current; @Spring:2020 and @Spring:2021 used the
+`PerfectModelEnsemble` class to highlight predictability horizons in the global carbon
+cycle; and @Krumhardt:2020 used the `HindcastEnsemble` class to illuminate multi-year
 predictability in marine Net Primary Productivity.
 
 # Acknolwedgements