From 670707a07221edcd28a74219c70f64815421eb99 Mon Sep 17 00:00:00 2001
From: Mattia Almansi <m.almansi@bopen.eu>
Date: Mon, 15 Apr 2024 16:32:45 +0200
Subject: [PATCH] add energy

---
 notebooks/wp4/cmip6_energy_indices.ipynb | 427 +++++++++++++++++++++++
 1 file changed, 427 insertions(+)
 create mode 100644 notebooks/wp4/cmip6_energy_indices.ipynb

diff --git a/notebooks/wp4/cmip6_energy_indices.ipynb b/notebooks/wp4/cmip6_energy_indices.ipynb
new file mode 100644
index 0000000..82c8606
--- /dev/null
+++ b/notebooks/wp4/cmip6_energy_indices.ipynb
@@ -0,0 +1,427 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0",
+   "metadata": {},
+   "source": [
+    "# Energy-consumption-related indices from CMIP6 Global Climate Models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1",
+   "metadata": {},
+   "source": [
+    "## Import packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tempfile\n",
+    "\n",
+    "import icclim\n",
+    "import matplotlib.pyplot as plt\n",
+    "import xarray as xr\n",
+    "from c3s_eqc_automatic_quality_control import diagnostics, download\n",
+    "from xarrayMannKendall import Mann_Kendall_test\n",
+    "\n",
+    "plt.style.use(\"seaborn-v0_8-notebook\")\n",
+    "plt.rcParams[\"hatch.linewidth\"] = 0.5"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3",
+   "metadata": {},
+   "source": [
+    "## Define parameters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Time period\n",
+    "year_start = 1971\n",
+    "year_stop = 2000\n",
+    "assert year_start >= 1971\n",
+    "\n",
+    "# Choose annual or seasonal timeseries\n",
+    "index_timeseries = {\n",
+    "    \"HDD15.5\": \"DJF\",\n",
+    "    \"CDD22\": \"JJA\",\n",
+    "}\n",
+    "if \"annual\" in index_timeseries.values():\n",
+    "    assert set(index_timeseries.values()) == {\"annual\"}\n",
+    "\n",
+    "# Interpolation method\n",
+    "interpolation_method = \"bilinear\"\n",
+    "\n",
+    "# Area to show\n",
+    "area = [72, -22, 27, 45]\n",
+    "\n",
+    "# Chunks for download\n",
+    "chunks = {\"year\": 1}\n",
+    "\n",
+    "# Define models\n",
+    "models_cmip6 = (\n",
+    "    \"access_cm2\",\n",
+    "    \"awi_cm_1_1_mr\",\n",
+    "    \"cmcc_esm2\",\n",
+    "    \"cnrm_cm6_1_hr\",\n",
+    "    \"ec_earth3_cc\",\n",
+    "    \"gfdl_esm4\",\n",
+    "    \"inm_cm5_0\",\n",
+    "    \"miroc6\",\n",
+    "    \"mpi_esm1_2_lr\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5",
+   "metadata": {},
+   "source": [
+    "## Define ERA5 request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "request_era = (\n",
+    "    \"reanalysis-era5-single-levels\",\n",
+    "    {\n",
+    "        \"product_type\": \"reanalysis\",\n",
+    "        \"format\": \"netcdf\",\n",
+    "        \"time\": [f\"{hour:02d}:00\" for hour in range(24)],\n",
+    "        \"variable\": \"2m_temperature\",\n",
+    "        \"year\": [\n",
+    "            str(year) for year in range(year_start - 1, year_stop + 1)\n",
+    "        ],  # Include D(year-1)\n",
+    "        \"month\": [f\"{month:02d}\" for month in range(1, 13)],\n",
+    "        \"day\": [f\"{day:02d}\" for day in range(1, 32)],\n",
+    "        \"area\": area,\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "request_lsm = (\n",
+    "    request_era[0],\n",
+    "    request_era[1]\n",
+    "    | {\n",
+    "        \"year\": \"1940\",\n",
+    "        \"month\": \"01\",\n",
+    "        \"day\": \"01\",\n",
+    "        \"time\": \"00:00\",\n",
+    "        \"variable\": \"land_sea_mask\",\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7",
+   "metadata": {},
+   "source": [
+    "## Define CMIP6 request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "request_cmip6 = {\n",
+    "    \"format\": \"zip\",\n",
+    "    \"temporal_resolution\": \"daily\",\n",
+    "    \"experiment\": \"historical\",\n",
+    "    \"variable\": \"near_surface_air_temperature\",\n",
+    "    \"year\": [\n",
+    "        str(year) for year in range(year_start - 1, year_stop + 1)\n",
+    "    ],  # Include D(year-1)\n",
+    "    \"month\": [f\"{month:02d}\" for month in range(1, 13)],\n",
+    "    \"day\": [f\"{day:02d}\" for day in range(1, 32)],\n",
+    "    \"area\": area,\n",
+    "}\n",
+    "\n",
+    "model_requests = {}\n",
+    "for model in models_cmip6:\n",
+    "    model_requests[model] = (\n",
+    "        \"projections-cmip6\",\n",
+    "        download.split_request(request_cmip6 | {\"model\": model}, chunks=chunks),\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9",
+   "metadata": {},
+   "source": [
+    "## Functions to cache"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def select_timeseries(ds, index_timeseries, year_start, year_stop):\n",
+    "    timeseries = set(index_timeseries.values())\n",
+    "    if timeseries == {\"annual\"}:\n",
+    "        return ds.sel(time=slice(str(year_start), str(year_stop)))\n",
+    "    assert \"annual\" not in timeseries\n",
+    "    return ds.sel(time=slice(f\"{year_start-1}-12\", f\"{year_stop}-11\"))\n",
+    "\n",
+    "\n",
+    "def compute_indices(ds, index_timeseries, tmpdir):\n",
+    "    labels, datasets = zip(*ds.groupby(\"time.year\"))\n",
+    "    paths = [f\"{tmpdir}/{label}.nc\" for label in labels]\n",
+    "    datasets = [ds.chunk(-1) for ds in datasets]\n",
+    "    xr.save_mfdataset(datasets, paths)\n",
+    "\n",
+    "    ds = xr.open_mfdataset(paths)\n",
+    "    in_files = f\"{tmpdir}/rechunked.zarr\"\n",
+    "    chunks = {dim: -1 if dim == \"time\" else \"auto\" for dim in ds.dims}\n",
+    "    ds.chunk(chunks).to_zarr(in_files)\n",
+    "\n",
+    "    datasets = []\n",
+    "    for index_name, timeseries in index_timeseries.items():\n",
+    "        kwargs = {\n",
+    "            \"in_files\": in_files,\n",
+    "            \"out_file\": f\"{tmpdir}/{index_name}.nc\",\n",
+    "            \"slice_mode\": \"year\" if timeseries == \"annual\" else timeseries,\n",
+    "        }\n",
+    "        if index_name == \"HDD15.5\":\n",
+    "            ds_index = icclim.index(\n",
+    "                **kwargs,\n",
+    "                index_name=\"deficit\",\n",
+    "                threshold=icclim.build_threshold(\"15.5 degC\"),\n",
+    "            )\n",
+    "        elif index_name == \"CDD22\":\n",
+    "            ds_index = icclim.excess(\n",
+    "                **kwargs,\n",
+    "                threshold=icclim.build_threshold(\"22 degC\"),\n",
+    "            )\n",
+    "        else:\n",
+    "            raise NotImplementedError(f\"{index_name=}\")\n",
+    "\n",
+    "        ds_index = ds_index.drop_dims(\"bounds\")\n",
+    "        num_days = {\"DJF\": 90, \"MAM\": 92, \"JJA\": 92, \"SON\": 91}\n",
+    "        with xr.set_options(keep_attrs=True):\n",
+    "            ds_index /= (\n",
+    "                num_days[timeseries]\n",
+    "                if timeseries != \"annual\"\n",
+    "                else sum(num_days.values())\n",
+    "            )\n",
+    "        datasets.append(ds_index)\n",
+    "\n",
+    "    ds = xr.merge(datasets)\n",
+    "    for da in ds.data_vars.values():\n",
+    "        da.attrs[\"units\"] = da.attrs[\"units\"].replace(\" d\", \"\")\n",
+    "    return ds\n",
+    "\n",
+    "\n",
+    "def compute_trends(ds):\n",
+    "    datasets = []\n",
+    "    (lat,) = set(ds.dims) & set(ds.cf.axes[\"Y\"])\n",
+    "    (lon,) = set(ds.dims) & set(ds.cf.axes[\"X\"])\n",
+    "    coords_name = {\n",
+    "        \"time\": \"time\",\n",
+    "        \"y\": lat,\n",
+    "        \"x\": lon,\n",
+    "    }\n",
+    "    for index, da in ds.data_vars.items():\n",
+    "        ds = Mann_Kendall_test(\n",
+    "            da - da.mean(\"time\"),\n",
+    "            alpha=0.05,\n",
+    "            method=\"theilslopes\",\n",
+    "            coords_name=coords_name,\n",
+    "        ).compute()\n",
+    "        ds = ds.rename({k: v for k, v in coords_name.items() if k in ds.dims})\n",
+    "        ds = ds.assign_coords({dim: da[dim] for dim in ds.dims})\n",
+    "        datasets.append(ds.expand_dims(index=[index]))\n",
+    "    ds = xr.concat(datasets, \"index\")\n",
+    "    return ds\n",
+    "\n",
+    "\n",
+    "def add_bounds(ds):\n",
+    "    for coord in {\"latitude\", \"longitude\"} - set(ds.cf.bounds):\n",
+    "        ds = ds.cf.add_bounds(coord)\n",
+    "    return ds\n",
+    "\n",
+    "\n",
+    "def get_grid_out(request_grid_out, method):\n",
+    "    ds_regrid = download.download_and_transform(*request_grid_out)\n",
+    "    coords = [\"latitude\", \"longitude\"]\n",
+    "    if method == \"conservative\":\n",
+    "        ds_regrid = add_bounds(ds_regrid)\n",
+    "        for coord in list(coords):\n",
+    "            coords.extend(ds_regrid.cf.bounds[coord])\n",
+    "    grid_out = ds_regrid[coords]\n",
+    "    coords_to_drop = set(grid_out.coords) - set(coords) - set(grid_out.dims)\n",
+    "    grid_out = ds_regrid[coords].reset_coords(coords_to_drop, drop=True)\n",
+    "    grid_out.attrs = {}\n",
+    "    return grid_out\n",
+    "\n",
+    "\n",
+    "def compute_indices_and_trends(\n",
+    "    ds,\n",
+    "    index_timeseries,\n",
+    "    year_start,\n",
+    "    year_stop,\n",
+    "    resample_reduction=None,\n",
+    "    request_grid_out=None,\n",
+    "    **regrid_kwargs,\n",
+    "):\n",
+    "    assert (request_grid_out and regrid_kwargs) or not (\n",
+    "        request_grid_out or regrid_kwargs\n",
+    "    )\n",
+    "    ds = ds.drop_vars([var for var, da in ds.data_vars.items() if len(da.dims) != 3])\n",
+    "    ds = ds[list(ds.data_vars)]\n",
+    "\n",
+    "    # Original bounds for conservative interpolation\n",
+    "    if regrid_kwargs.get(\"method\") == \"conservative\":\n",
+    "        ds = add_bounds(ds)\n",
+    "        bounds = [\n",
+    "            ds.cf.get_bounds(coord).reset_coords(drop=True)\n",
+    "            for coord in (\"latitude\", \"longitude\")\n",
+    "        ]\n",
+    "    else:\n",
+    "        bounds = []\n",
+    "\n",
+    "    ds = select_timeseries(ds, index_timeseries, year_start, year_stop)\n",
+    "    if resample_reduction:\n",
+    "        resampled = ds.resample(time=\"1D\")\n",
+    "        ds = getattr(resampled, resample_reduction)(keep_attrs=True)\n",
+    "        if resample_reduction == \"sum\":\n",
+    "            for da in ds.data_vars.values():\n",
+    "                da.attrs[\"units\"] = f\"{da.attrs['units']} / day\"\n",
+    "    with tempfile.TemporaryDirectory() as tmpdir:\n",
+    "        ds_indices = compute_indices(ds, index_timeseries, tmpdir).compute()\n",
+    "        ds_trends = compute_trends(ds_indices)\n",
+    "        ds = ds_indices.mean(\"time\", keep_attrs=True)\n",
+    "        ds = ds.merge(ds_trends)\n",
+    "        if request_grid_out:\n",
+    "            ds = diagnostics.regrid(\n",
+    "                ds.merge({da.name: da for da in bounds}),\n",
+    "                grid_out=get_grid_out(request_grid_out, regrid_kwargs[\"method\"]),\n",
+    "                **regrid_kwargs,\n",
+    "            )\n",
+    "        return ds"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "11",
+   "metadata": {},
+   "source": [
+    "## Download and transform ERA5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transform_func_kwargs = {\n",
+    "    \"index_timeseries\": dict(sorted(index_timeseries.items())),\n",
+    "    \"year_start\": year_start,\n",
+    "    \"year_stop\": year_stop,\n",
+    "}\n",
+    "ds_era5 = download.download_and_transform(\n",
+    "    *request_era,\n",
+    "    chunks=chunks,\n",
+    "    transform_chunks=False,\n",
+    "    transform_func=compute_indices_and_trends,\n",
+    "    transform_func_kwargs=transform_func_kwargs | {\"resample_reduction\": \"mean\"},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13",
+   "metadata": {},
+   "source": [
+    "## Download and transform models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "interpolated_datasets = []\n",
+    "model_datasets = {}\n",
+    "for model, requests in model_requests.items():\n",
+    "    print(f\"{model=}\")\n",
+    "    model_kwargs = {\n",
+    "        \"chunks\": chunks,\n",
+    "        \"transform_chunks\": False,\n",
+    "        \"transform_func\": compute_indices_and_trends,\n",
+    "    }\n",
+    "    # Original model\n",
+    "    model_datasets[model] = download.download_and_transform(\n",
+    "        *requests,\n",
+    "        **model_kwargs,\n",
+    "        transform_func_kwargs=transform_func_kwargs,\n",
+    "    )\n",
+    "\n",
+    "    # Interpolated model\n",
+    "    ds = download.download_and_transform(\n",
+    "        *requests,\n",
+    "        **model_kwargs,\n",
+    "        transform_func_kwargs=transform_func_kwargs\n",
+    "        | {\n",
+    "            \"request_grid_out\": request_lsm,\n",
+    "            \"method\": interpolation_method,\n",
+    "            \"skipna\": True,\n",
+    "        },\n",
+    "    )\n",
+    "    interpolated_datasets.append(ds.expand_dims(model=[model]))\n",
+    "\n",
+    "ds_interpolated = xr.concat(interpolated_datasets, \"model\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}