From 670707a07221edcd28a74219c70f64815421eb99 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Mon, 15 Apr 2024 16:32:45 +0200 Subject: [PATCH] add energy --- notebooks/wp4/cmip6_energy_indices.ipynb | 427 +++++++++++++++++++++++ 1 file changed, 427 insertions(+) create mode 100644 notebooks/wp4/cmip6_energy_indices.ipynb diff --git a/notebooks/wp4/cmip6_energy_indices.ipynb b/notebooks/wp4/cmip6_energy_indices.ipynb new file mode 100644 index 0000000..82c8606 --- /dev/null +++ b/notebooks/wp4/cmip6_energy_indices.ipynb @@ -0,0 +1,427 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Energy-consumption-related indices from CMIP6 Global Climate Models" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "## Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "import tempfile\n", + "\n", + "import icclim\n", + "import matplotlib.pyplot as plt\n", + "import xarray as xr\n", + "from c3s_eqc_automatic_quality_control import diagnostics, download\n", + "from xarrayMannKendall import Mann_Kendall_test\n", + "\n", + "plt.style.use(\"seaborn-v0_8-notebook\")\n", + "plt.rcParams[\"hatch.linewidth\"] = 0.5" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "## Define parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "# Time period\n", + "year_start = 1971\n", + "year_stop = 2000\n", + "assert year_start >= 1971\n", + "\n", + "# Choose annual or seasonal timeseries\n", + "index_timeseries = {\n", + " \"HDD15.5\": \"DJF\",\n", + " \"CDD22\": \"JJA\",\n", + "}\n", + "if \"annual\" in index_timeseries.values():\n", + " assert set(index_timeseries.values()) == {\"annual\"}\n", + "\n", + "# Interpolation method\n", + "interpolation_method = \"bilinear\"\n", + "\n", + "# Area to show\n", + "area = [72, -22, 27, 45]\n", + "\n", + "# Chunks for download\n", + "chunks = {\"year\": 1}\n", + "\n", + "# Define models\n", + "models_cmip6 = (\n", + " \"access_cm2\",\n", + " \"awi_cm_1_1_mr\",\n", + " \"cmcc_esm2\",\n", + " \"cnrm_cm6_1_hr\",\n", + " \"ec_earth3_cc\",\n", + " \"gfdl_esm4\",\n", + " \"inm_cm5_0\",\n", + " \"miroc6\",\n", + " \"mpi_esm1_2_lr\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "## Define ERA5 request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "request_era = (\n", + " \"reanalysis-era5-single-levels\",\n", + " {\n", + " \"product_type\": \"reanalysis\",\n", + " \"format\": \"netcdf\",\n", + " \"time\": [f\"{hour:02d}:00\" for hour in range(24)],\n", + " \"variable\": \"2m_temperature\",\n", + " \"year\": [\n", + " str(year) for year in range(year_start - 1, year_stop + 1)\n", + " ], # Include D(year-1)\n", + " \"month\": [f\"{month:02d}\" for month in range(1, 13)],\n", + " \"day\": [f\"{day:02d}\" for day in range(1, 32)],\n", + " \"area\": area,\n", + " },\n", + ")\n", + "\n", + "request_lsm = (\n", + " request_era[0],\n", + " request_era[1]\n", + " | {\n", + " \"year\": \"1940\",\n", + " \"month\": \"01\",\n", + " \"day\": \"01\",\n", + " \"time\": \"00:00\",\n", + " \"variable\": \"land_sea_mask\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "## Define CMIP6 request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "request_cmip6 = {\n", + " \"format\": \"zip\",\n", + " \"temporal_resolution\": \"daily\",\n", + " \"experiment\": \"historical\",\n", + " \"variable\": \"near_surface_air_temperature\",\n", + " \"year\": [\n", + " str(year) for year in range(year_start - 1, year_stop + 1)\n", + " ], # Include D(year-1)\n", + " \"month\": [f\"{month:02d}\" for month in range(1, 13)],\n", + " \"day\": [f\"{day:02d}\" for day in range(1, 32)],\n", + " \"area\": area,\n", + "}\n", + "\n", + "model_requests = {}\n", + "for model in models_cmip6:\n", + " model_requests[model] = (\n", + " \"projections-cmip6\",\n", + " download.split_request(request_cmip6 | {\"model\": model}, chunks=chunks),\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "## Functions to cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "def select_timeseries(ds, index_timeseries, year_start, year_stop):\n", + " timeseries = set(index_timeseries.values())\n", + " if timeseries == {\"annual\"}:\n", + " return ds.sel(time=slice(str(year_start), str(year_stop)))\n", + " assert \"annual\" not in timeseries\n", + " return ds.sel(time=slice(f\"{year_start-1}-12\", f\"{year_stop}-11\"))\n", + "\n", + "\n", + "def compute_indices(ds, index_timeseries, tmpdir):\n", + " labels, datasets = zip(*ds.groupby(\"time.year\"))\n", + " paths = [f\"{tmpdir}/{label}.nc\" for label in labels]\n", + " datasets = [ds.chunk(-1) for ds in datasets]\n", + " xr.save_mfdataset(datasets, paths)\n", + "\n", + " ds = xr.open_mfdataset(paths)\n", + " in_files = f\"{tmpdir}/rechunked.zarr\"\n", + " chunks = {dim: -1 if dim == \"time\" else \"auto\" for dim in ds.dims}\n", + " ds.chunk(chunks).to_zarr(in_files)\n", + "\n", + " datasets = []\n", + " for index_name, timeseries in index_timeseries.items():\n", + " kwargs = {\n", + " \"in_files\": in_files,\n", + " \"out_file\": f\"{tmpdir}/{index_name}.nc\",\n", + " \"slice_mode\": \"year\" if timeseries == \"annual\" else timeseries,\n", + " }\n", + " if index_name == \"HDD15.5\":\n", + " ds_index = icclim.index(\n", + " **kwargs,\n", + " index_name=\"deficit\",\n", + " threshold=icclim.build_threshold(\"15.5 degC\"),\n", + " )\n", + " elif index_name == \"CDD22\":\n", + " ds_index = icclim.excess(\n", + " **kwargs,\n", + " threshold=icclim.build_threshold(\"22 degC\"),\n", + " )\n", + " else:\n", + " raise NotImplementedError(f\"{index_name=}\")\n", + "\n", + " ds_index = ds_index.drop_dims(\"bounds\")\n", + " num_days = {\"DJF\": 90, \"MAM\": 92, \"JJA\": 92, \"SON\": 91}\n", + " with xr.set_options(keep_attrs=True):\n", + " ds_index /= (\n", + " num_days[timeseries]\n", + " if timeseries != \"annual\"\n", + " else sum(num_days.values())\n", + " )\n", + " datasets.append(ds_index)\n", + "\n", + " ds = xr.merge(datasets)\n", + " for da in ds.data_vars.values():\n", + " da.attrs[\"units\"] = da.attrs[\"units\"].replace(\" d\", \"\")\n", + " return ds\n", + "\n", + "\n", + "def compute_trends(ds):\n", + " datasets = []\n", + " (lat,) = set(ds.dims) & set(ds.cf.axes[\"Y\"])\n", + " (lon,) = set(ds.dims) & set(ds.cf.axes[\"X\"])\n", + " coords_name = {\n", + " \"time\": \"time\",\n", + " \"y\": lat,\n", + " \"x\": lon,\n", + " }\n", + " for index, da in ds.data_vars.items():\n", + " ds = Mann_Kendall_test(\n", + " da - da.mean(\"time\"),\n", + " alpha=0.05,\n", + " method=\"theilslopes\",\n", + " coords_name=coords_name,\n", + " ).compute()\n", + " ds = ds.rename({k: v for k, v in coords_name.items() if k in ds.dims})\n", + " ds = ds.assign_coords({dim: da[dim] for dim in ds.dims})\n", + " datasets.append(ds.expand_dims(index=[index]))\n", + " ds = xr.concat(datasets, \"index\")\n", + " return ds\n", + "\n", + "\n", + "def add_bounds(ds):\n", + " for coord in {\"latitude\", \"longitude\"} - set(ds.cf.bounds):\n", + " ds = ds.cf.add_bounds(coord)\n", + " return ds\n", + "\n", + "\n", + "def get_grid_out(request_grid_out, method):\n", + " ds_regrid = download.download_and_transform(*request_grid_out)\n", + " coords = [\"latitude\", \"longitude\"]\n", + " if method == \"conservative\":\n", + " ds_regrid = add_bounds(ds_regrid)\n", + " for coord in list(coords):\n", + " coords.extend(ds_regrid.cf.bounds[coord])\n", + " grid_out = ds_regrid[coords]\n", + " coords_to_drop = set(grid_out.coords) - set(coords) - set(grid_out.dims)\n", + " grid_out = ds_regrid[coords].reset_coords(coords_to_drop, drop=True)\n", + " grid_out.attrs = {}\n", + " return grid_out\n", + "\n", + "\n", + "def compute_indices_and_trends(\n", + " ds,\n", + " index_timeseries,\n", + " year_start,\n", + " year_stop,\n", + " resample_reduction=None,\n", + " request_grid_out=None,\n", + " **regrid_kwargs,\n", + "):\n", + " assert (request_grid_out and regrid_kwargs) or not (\n", + " request_grid_out or regrid_kwargs\n", + " )\n", + " ds = ds.drop_vars([var for var, da in ds.data_vars.items() if len(da.dims) != 3])\n", + " ds = ds[list(ds.data_vars)]\n", + "\n", + " # Original bounds for conservative interpolation\n", + " if regrid_kwargs.get(\"method\") == \"conservative\":\n", + " ds = add_bounds(ds)\n", + " bounds = [\n", + " ds.cf.get_bounds(coord).reset_coords(drop=True)\n", + " for coord in (\"latitude\", \"longitude\")\n", + " ]\n", + " else:\n", + " bounds = []\n", + "\n", + " ds = select_timeseries(ds, index_timeseries, year_start, year_stop)\n", + " if resample_reduction:\n", + " resampled = ds.resample(time=\"1D\")\n", + " ds = getattr(resampled, resample_reduction)(keep_attrs=True)\n", + " if resample_reduction == \"sum\":\n", + " for da in ds.data_vars.values():\n", + " da.attrs[\"units\"] = f\"{da.attrs['units']} / day\"\n", + " with tempfile.TemporaryDirectory() as tmpdir:\n", + " ds_indices = compute_indices(ds, index_timeseries, tmpdir).compute()\n", + " ds_trends = compute_trends(ds_indices)\n", + " ds = ds_indices.mean(\"time\", keep_attrs=True)\n", + " ds = ds.merge(ds_trends)\n", + " if request_grid_out:\n", + " ds = diagnostics.regrid(\n", + " ds.merge({da.name: da for da in bounds}),\n", + " grid_out=get_grid_out(request_grid_out, regrid_kwargs[\"method\"]),\n", + " **regrid_kwargs,\n", + " )\n", + " return ds" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "## Download and transform ERA5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "transform_func_kwargs = {\n", + " \"index_timeseries\": dict(sorted(index_timeseries.items())),\n", + " \"year_start\": year_start,\n", + " \"year_stop\": year_stop,\n", + "}\n", + "ds_era5 = download.download_and_transform(\n", + " *request_era,\n", + " chunks=chunks,\n", + " transform_chunks=False,\n", + " transform_func=compute_indices_and_trends,\n", + " transform_func_kwargs=transform_func_kwargs | {\"resample_reduction\": \"mean\"},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "## Download and transform models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "interpolated_datasets = []\n", + "model_datasets = {}\n", + "for model, requests in model_requests.items():\n", + " print(f\"{model=}\")\n", + " model_kwargs = {\n", + " \"chunks\": chunks,\n", + " \"transform_chunks\": False,\n", + " \"transform_func\": compute_indices_and_trends,\n", + " }\n", + " # Original model\n", + " model_datasets[model] = download.download_and_transform(\n", + " *requests,\n", + " **model_kwargs,\n", + " transform_func_kwargs=transform_func_kwargs,\n", + " )\n", + "\n", + " # Interpolated model\n", + " ds = download.download_and_transform(\n", + " *requests,\n", + " **model_kwargs,\n", + " transform_func_kwargs=transform_func_kwargs\n", + " | {\n", + " \"request_grid_out\": request_lsm,\n", + " \"method\": interpolation_method,\n", + " \"skipna\": True,\n", + " },\n", + " )\n", + " interpolated_datasets.append(ds.expand_dims(model=[model]))\n", + "\n", + "ds_interpolated = xr.concat(interpolated_datasets, \"model\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}