From f01f3aa87af17790f42a7a88b31c58106dcf5284 Mon Sep 17 00:00:00 2001
From: Tom Vo <tomvothecoder@gmail.com>
Date: Thu, 26 Oct 2023 13:44:29 -0700
Subject: [PATCH] Delete performance metric scripts

---
 .../xcdat-cdat-output-comparison.py           | 211 ----------
 .../xcdat-cdat-runtime-comparison.py          | 396 ------------------
 2 files changed, 607 deletions(-)
 delete mode 100644 docs/examples/xcdat-cdat-perf-metrics/xcdat-cdat-output-comparison.py
 delete mode 100644 docs/examples/xcdat-cdat-perf-metrics/xcdat-cdat-runtime-comparison.py

diff --git a/docs/examples/xcdat-cdat-perf-metrics/xcdat-cdat-output-comparison.py b/docs/examples/xcdat-cdat-perf-metrics/xcdat-cdat-output-comparison.py
deleted file mode 100644
index 4c7a9f38..00000000
--- a/docs/examples/xcdat-cdat-perf-metrics/xcdat-cdat-output-comparison.py
+++ /dev/null
@@ -1,211 +0,0 @@
-# %%
-import time
-import warnings
-from typing import Dict, Tuple
-
-import cdms2
-import cdutil
-import numpy as np
-import xarray as xr
-import xcdat as xc
-
-# Silence Xarray warning: `SerializationWarning: variable 'ta' has multiple fill
-# values {1e+20, 1e+20}, decoding all values to NaN.`
-warnings.filterwarnings(
-    action="ignore", category=xr.SerializationWarning, module=".*conventions"
-)
-
-# Input data configurations
-# -------------------------
-# Only test 7 GB and 12 GB because 22 GB + crashes CDAT (memory allocation)
-FILES_DICT: Dict[str, Dict[str, str]] = {
-    "7_gb": {
-        "var_key": "tas",
-        "dir_path": "/p/css03/esgf_publish/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/day/tas/gn/v20190308/",
-        "xml_path": "/p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/day/tas/CMIP6.CMIP.historical.NCAR.CESM2.r1i1p1f1.day.tas.atmos.glb-p8-gn.v20190308.0000000.0.xml",
-    },
-    "12_gb": {
-        "var_key": "tas",
-        "dir_path": "/p/css03/esgf_publish/CMIP6/CMIP/MRI/MRI-ESM2-0/amip/r1i1p1f1/3hr/tas/gn/v20190829/",
-        "xml_path": "/p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/3hr/tas/CMIP6.CMIP.historical.MRI.MRI-ESM2-0.r1i1p1f1.3hr.tas.gn.v20190829.0000000.0.xml",
-    },
-}
-
-
-# %%
-def main(
-    fsize: str, finfo: Dict[str, str]
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-    var_key = finfo["var_key"]
-    dir_path = finfo["dir_path"]
-    xml_path = finfo["xml_path"]
-
-    print(
-        f"Variable: '{var_key}', File size: {fsize}\n"
-        f"Dir Path: {dir_path}, XML Path: {xml_path} "
-    )
-
-    print("1. xCDAT Serial Spatial Average")
-    xc_sa_ser = _get_xc_spatial_avg(var_key, dir_path, chunks=None, parallel=False)
-    xc_sa_ser_arr = xc_sa_ser[var_key].values
-
-    print("2. xCDAT Parallel Spatial Average")
-    xc_sa_par = _get_xc_spatial_avg(
-        var_key, dir_path, chunks={"time": "auto"}, parallel=True
-    )
-    # Make sure to load the data into memory before doing floating point
-    # comparison. Otherwise it will be loaded during that operation instead.
-    xc_sa_par_arr = xc_sa_par[var_key].values
-
-    print("3. CDAT Spatial Average (Serial-Only)")
-    cdat_sa = _get_cdat_spatial_avg(var_key, xml_path)
-
-    return xc_sa_ser_arr, xc_sa_par_arr, cdat_sa
-
-
-def _get_xc_spatial_avg(
-    var_key: str, dir_path: str, chunks: None | Dict[str, str], parallel: bool
-):
-    time_start_io = time.perf_counter(), time.process_time()
-    ds = xc.open_mfdataset(dir_path, chunks=chunks, parallel=parallel)  # type: ignore
-    time_end_io = time.perf_counter(), time.process_time()
-
-    print(f"  * Real I/O time: {time_end_io[0] - time_start_io[0]:.4f} seconds")
-    print(f"  * CPU I/O time: {time_end_io[1] - time_start_io[1]:.4f} seconds")
-
-    time_start = time.perf_counter(), time.process_time()
-    result = ds.spatial.average(var_key, axis=["X", "Y"])
-    time_end = time.perf_counter(), time.process_time()
-
-    print(f"  * Real compute time: {time_end[0] - time_start[0]:.4f} seconds")
-    print(f"  * CPU compute time: {time_end[1] - time_start[1]:.4f} seconds")
-
-    # Just making sure the dataset is for sure closed.
-    ds.close()
-
-    return result
-
-
-def _get_cdat_spatial_avg(var_key: str, xml_path: str):
-    time_start_io = time.perf_counter(), time.process_time()
-    ds = cdms2.open(xml_path)
-    tvar = ds(var_key)
-    time_end_io = time.perf_counter(), time.process_time()
-
-    print(f"  * Real I/O time: {time_end_io[0] - time_start_io[0]:.4f} seconds")
-    print(f"  * CPU I/O time: {time_end_io[1] - time_start_io[1]:.4f} seconds")
-
-    time_start = time.perf_counter(), time.process_time()
-    result = cdutil.averager(tvar, axis="xy")
-    time_end = time.perf_counter(), time.process_time()
-
-    print(f"  * Real compute time: {time_end[0] - time_start[0]:.4f} seconds")
-    print(f"  * CPU compute time: {time_end[1] - time_start[1]:.4f} seconds")
-
-    # # Just making sure the dataset is for sure closed.
-    ds.close()
-
-    return result
-
-
-# %%
-# Get spatial averaging outputs.
-xc_7gb_s, xc_7gb_p, cdat_7gb = main("7 GB", FILES_DICT["7_gb"])
-xc_12gb_s, xc_12gb_p, cdat_12gb = main("12 GB", FILES_DICT["12_gb"])
-
-
-# %%
-def _compare_outputs(arr_a: np.ndarray, arr_b: np.ndarray):
-    np.testing.assert_allclose(arr_a, arr_b, rtol=0, atol=0)
-
-
-# %% Test case 1: xCDAT serial vs. xCDAT Parallel
-# Both are identical.
-_compare_outputs(xc_7gb_s, xc_7gb_p)
-_compare_outputs(xc_12gb_s, xc_12gb_p)
-
-# Test Case 2: 7 GB dataset
-# --------------------------------------
-# NOTE: For some reason the first value of CDAT's spatial averager is missing (inf).
-# We skip this value to make sure it doesn't influence the results.
-_compare_outputs(xc_7gb_s[1:], cdat_7gb.data[1:])  # type: ignore
-_compare_outputs(xc_7gb_p[1:], cdat_7gb.data[1:])  # type: ignore
-
-"""
-AssertionError:
-Not equal to tolerance rtol=0, atol=0
-
-Mismatched elements: 60225 / 60225 (100%)
-Max absolute difference: 0.00642914
-Max relative difference: 2.2326587e-05
- x: array([285.212858, 285.194082, 285.187531, ..., 286.16203 , 286.127491,
-       286.190507])
- y: array([285.21893, 285.2002 , 285.1936 , ..., 286.16824, 286.13358,
-       286.19662], dtype=float32)
-"""
-# %%
-# Test Case 2: 12 GB dataset
-# --------------------------------------
-_compare_outputs(xc_12gb_s, cdat_12gb.data)  # type: ignore
-_compare_outputs(xc_12gb_p, cdat_12gb.data)  # type: ignore
-
-# AssertionError:
-# Not equal to tolerance rtol=0, atol=0
-
-# Mismatched elements: 105192 / 105192 (100%)
-# Max absolute difference: 7.44648787e-12
-# Max relative difference: 2.57343965e-14
-#  x: array([285.027095, 285.062351, 285.270414, ..., 287.040131, 286.85913 ,
-#        286.67638 ])
-#  y: array([285.027095, 285.062351, 285.270414, ..., 287.040131, 286.85913 ,
-#        286.67638 ])
-
-
-"""
-Variable: 'tas', File size: 7 GB
-Dir Path: /p/css03/esgf_publish/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/day/tas/gn/v20190308/, XML Path: /p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/day/tas/CMIP6.CMIP.historical.NCAR.CESM2.r1i1p1f1.day.tas.atmos.glb-p8-gn.v20190308.0000000.0.xml
-1. xCDAT Serial Spatial Average
-  * Real I/O time: 1.5270 seconds
-  * CPU I/O time: 1.5098 seconds
-  * Real compute time: 1.4385 seconds
-  * CPU compute time: 1.4395 seconds
-2. xCDAT Parallel Spatial Average
-  * Real I/O time: 2.4733 seconds
-  * CPU I/O time: 3.3816 seconds
-  * Real compute time: 0.2422 seconds
-  * CPU compute time: 0.2434 seconds
-3. CDAT Spatial Average (Serial-Only)
-  * Real I/O time: 73.6501 seconds
-  * CPU I/O time: 73.6546 seconds
-  * Real compute time: 413.0872 seconds
-  * CPU compute time: 413.1232 seconds
-Variable: 'tas', File size: 12 GB
-Dir Path: /p/css03/esgf_publish/CMIP6/CMIP/MRI/MRI-ESM2-0/amip/r1i1p1f1/3hr/tas/gn/v20190829/, XML Path: /p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/3hr/tas/CMIP6.CMIP.historical.MRI.MRI-ESM2-0.r1i1p1f1.3hr.tas.gn.v20190829.0000000.0.xml
-1. xCDAT Serial Spatial Average
-  * Real I/O time: 16.6844 seconds
-  * CPU I/O time: 3.9275 seconds
-  * Real compute time: 2.5575 seconds
-  * CPU compute time: 2.5589 seconds
-2. xCDAT Parallel Spatial Average
-  * Real I/O time: 16.3017 seconds
-  * CPU I/O time: 4.6564 seconds
-  * Real compute time: 0.3206 seconds
-  * CPU compute time: 0.3224 seconds
-3. CDAT Spatial Average (Serial-Only)
-  * Real I/O time: 95.0600 seconds
-  * CPU I/O time: 94.8878 seconds
-  * Real compute time: 664.6169 seconds
-  * CPU compute time: 664.6510 seconds
-"""
-
-
-# %%
-"""
-Info on CDML files:
-  - They are stored here for data on climate machines: /p/user_pub/xclim
-  - You can also generate them from the command line cdscan -x myxml.xml /full/path/to/file/*nc
-  - `/p/user_pub/xclim/$MIP_ERA/$ACTIVITY/$EXPERIMENT/$REALM/$FREQUENCY/$VARIABLE/`
-  - `filename: MIP_ERA.ACTIVITY.EXPERIMENT.INSTITUTION.MODEL.MEMBER.FREQUENCY.VARIABLE.REALM.GRID.VERSION.FLAGS.LATEST.xml`
-
-cdscan -x /p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/day/tas/CMIP6.CMIP.historical.NCAR.CESM2.r1i1p1f1.day.tas.atmos.glb-p8-gn.v20190308.0000000.0.xml /p/css03/esgf_publish/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/day/tas/gn/v20190308/*.nc && cdscan -x /p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/3hr/tas/CMIP6.CMIP.historical.MRI.MRI-ESM2-0.r1i1p1f1.3hr.tas.gn.v20190829.0000000.0.xml /p/css03/esgf_publish/CMIP6/CMIP/MRI/MRI-ESM2-0/amip/r1i1p1f1/3hr/tas/gn/v20190829/*.nc && cdscan -x /p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/day/ta/CMIP6.CMIP.historical.CCCma.CanESM5.r1i1p2f1.CFday.ta.atmos.glb-p80-gn.v20190429.0000000.0.xml /p/css03/esgf_publish/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p2f1/CFday/ta/gn/v20190429/*.nc
-"""
diff --git a/docs/examples/xcdat-cdat-perf-metrics/xcdat-cdat-runtime-comparison.py b/docs/examples/xcdat-cdat-perf-metrics/xcdat-cdat-runtime-comparison.py
deleted file mode 100644
index 00537315..00000000
--- a/docs/examples/xcdat-cdat-perf-metrics/xcdat-cdat-runtime-comparison.py
+++ /dev/null
@@ -1,396 +0,0 @@
-# %%
-"""
-A script that compares the API runtimes of xCDAT against CDAT using multi-file
-time series datasets with varying sizes. The default number of samples taken
-for each API runtime is 5, and the minimum value is recorded. Runtimes only
-include computation and exclude I/O.  xCDAT can operate in serial or parallel,
-while CDAT can only operate in serial.
-
-xCDAT's parallel configuration:
-  - datasets are chunked using the "time" axis with Dask's auto chunking option.
-  - datasets are also opened in parallel using the `parallel=True`
-    (uses `dask.delayed`).
-  - The `flox` package is used for map-reduce grouping instead of the native
-    Xarray serial grouping logic for temporal averaging APIs that use Xarray's
-    groupby() under the hood. This includes `group_average()`, `climatology()`,
-    and `departures()`)
-
-How to use:
-   1. Must have direct access to LLNL Climate Program filesystem with CMIP data.
-   2. Create the conda/mamba environment:
-      - `mamba create -n xcdat-cdat-runtime -c conda-forge python<3.12 numpy pandas xcdat=0.6.0 xesmf cdms2 cdutil
-      - `mamba activate xcdat-cdat-runtime`
-   3. Run the script
-      - `python xcdat-cdat-runtime-comparison.py`
-"""
-from __future__ import annotations
-
-import time
-import timeit
-import warnings
-from typing import Dict, List, Tuple
-
-import numpy as np
-import pandas as pd
-import xarray as xr
-
-from xcdat._logger import _setup_custom_logger
-
-warnings.filterwarnings(
-    action="ignore", category=xr.SerializationWarning, module=".*conventions"
-)
-
-# FIXME: I can't get the logger to not print out two messages.
-# I already tried logger.propagate=False and using the root logger.
-logger = _setup_custom_logger(__name__, propagate=True)
-
-# Output file configurations
-# --------------------------
-TIME_STR = time.strftime("%Y%m%d-%H%M%S")
-XC_FILENAME = f"{TIME_STR}-xcdat-runtimes"
-CD_FILENAME = f"{TIME_STR}-cdat-runtimes"
-
-# Plot Configurations
-# -------------------
-# The base plot configuration passed to Panda's DataFrame plotting API.
-PLOT_CONFIG: pd.DataFrame.plot.__init__ = {
-    "kind": "bar",
-    "legend": True,
-    "rot": 0,
-    "x": "gb",
-    "xlabel": "File Size (GB)",
-    "ylabel": "Runtime (secs)",
-    "figsize": (6, 4),
-}
-# The base bar label configuration passed to axis containers to add
-# the floating point labels above the bars.
-BAR_LABEL_CONFIG = {"fmt": "{:10.2f}", "label_type": "edge", "padding": 3}
-
-
-# Input data configurations
-# -------------------------
-FILES_DICT: Dict[str, Dict[str, str]] = {
-    "7_gb": {
-        "var_key": "tas",
-        "dir_path": "/p/css03/esgf_publish/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/day/tas/gn/v20190308/",
-        "xml_path": "/p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/day/tas/CMIP6.CMIP.historical.NCAR.CESM2.r1i1p1f1.day.tas.atmos.glb-p8-gn.v20190308.0000000.0.xml",
-    },
-    "12_gb": {
-        "var_key": "tas",
-        "dir_path": "/p/css03/esgf_publish/CMIP6/CMIP/MRI/MRI-ESM2-0/amip/r1i1p1f1/3hr/tas/gn/v20190829/",
-        "xml_path": "/p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/3hr/tas/CMIP6.CMIP.historical.MRI.MRI-ESM2-0.r1i1p1f1.3hr.tas.gn.v20190829.0000000.0.xml",
-    },
-    "22_gb": {
-        "var_key": "ta",
-        "dir_path": "/p/css03/esgf_publish/CMIP6/CMIP/MOHC/UKESM1-0-LL/historical/r5i1p1f3/day/ta/gn/v20191115/",
-        "xml_path": "/p/user_pub/xclim/CMIP6/CMIP/historical/atmos/day/ta/CMIP6.CMIP.historical.MOHC.UKESM1-0-LL.r5i1p1f3.day.ta.atmos.glb-p8-gn.v20191115.0000000.0.xml",
-    },
-    "50_gb": {
-        "var_key": "ta",
-        "dir_path": "/p/css03/esgf_publish/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/day/ta/gn/v20190308/",
-        "xml_path": "/p/user_pub/xclim/CMIP6/CMIP/historical/atmos/day/ta/CMIP6.CMIP.historical.NCAR.CESM2.r1i1p1f1.day.ta.atmos.glb-p8-gn.v20190308.0000000.0.xml",
-    },
-    "74_gb": {
-        "var_key": "ta",
-        "dir_path": "/p/css03/esgf_publish/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p2f1/CFday/ta/gn/v20190429/",
-        "xml_path": "/p/user_pub/e3sm/vo13/xclim/CMIP6/CMIP/historical/atmos/day/ta/CMIP6.CMIP.historical.CCCma.CanESM5.r1i1p2f1.CFday.ta.atmos.glb-p80-gn.v20190429.0000000.0.xml",
-    },
-    "105_gb": {
-        "var_key": "ta",
-        "dir_path": "/p/css03/esgf_publish/CMIP6/CMIP/MOHC/HadGEM3-GC31-MM/historical/r2i1p1f3/day/ta/gn/v20191218",
-        "xml_path": "/p/user_pub/xclim/CMIP6/CMIP/historical/atmos/day/ta/CMIP6.CMIP.historical.MOHC.HadGEM3-GC31-MM.r2i1p1f3.day.ta.atmos.glb-p8-gn.v20191218.0000000.0.xml",
-    },
-}
-
-
-def main(repeat: int):
-    """Get the API runtimes for xCDAT and CDAT.
-
-    APIs tested include:
-      - Spatial averaging
-      - Time averaging (single snap-shot)
-      - Climatology
-      - Depatures
-
-    Parameters
-    ----------
-    repeat : int
-        Number of samples to take for each API call. The minimum runtime is
-        taken as the final runtime (refer to Notes).
-
-    Notes
-    -----
-    According to `Timer.repeat()`:
-
-        "In a typical case, the lowest value gives a lower bound for how fast
-        your machine can run the given code snippet; higher values in the result
-        vector are typically not caused by variability in Python's speed, but by
-        other processes interfering with your timing accuracy. So the min() of
-        the result is probably the only number you should be interested in."
-
-    Source: https://github.com/python/cpython/blob/2587b9f64eefde803a5e0b050171ad5f6654f31b/Lib/timeit.py#L193-L203
-    """
-    # 1. Get xCDAT runtimes and plot results.
-    df_xc_serial = _get_xcdat_runtimes(parallel=False, repeat=repeat)
-    df_xc_parallel = _get_xcdat_runtimes(parallel=True, repeat=repeat)
-
-    df_xc_times = pd.merge(df_xc_serial, df_xc_parallel, on=["pkg", "gb", "api"])
-    df_xc_times = df_xc_times.sort_values(by=["pkg", "api", "gb"])
-    df_xc_times.to_csv(f"{XC_FILENAME}.csv", index=False)
-
-    # 2. Get CDAT runtimes and plot results.
-    df_cdat_times = _get_cdat_runtimes(repeat=repeat)
-    df_cdat_times = df_cdat_times.sort_values(by=["pkg", "api", "gb"])
-    df_cdat_times.to_csv(f"{CD_FILENAME}.csv", index=False)
-
-    # 3. Plot the results.
-    # TODO: Update plots to dynamically fit larger floating point values.
-    plot_xcdat_runtimes(df_xc_times)
-    plot_cdat_runtimes(df_cdat_times)
-
-
-def _get_xcdat_runtimes(
-    parallel: bool,
-    repeat: int,
-) -> pd.DataFrame:
-    """Get the xCDAT API runtimes for spatial and temporal averaging.
-
-    Parameters
-    ----------
-    parallel : bool
-        Whether to run the APIs using Dask parallelism (True) or in serial
-        (False). If in parallel, datasets are chunked on the time axis using
-        Dask's auto chunking, and `flox` is used for temporal averaging.
-    repeat : int
-        Number of samples to take for each API call. The minimum runtime is
-        taken as the final runtime.
-
-    Returns
-    -------
-    pd.DataFrame
-        A DataFrame of API runtimes.
-    """
-    process_type = "serial" if parallel is False else "parallel"
-    logger.info(f"Getting xCDAT {process_type} runtimes.")
-
-    chunks, parallel, use_flox = _get_xr_config(parallel)
-
-    api_runtimes = []
-
-    for fsize, finfo in FILES_DICT.items():
-        dir_path = finfo["dir_path"]
-        var_key = finfo["var_key"]
-
-        setup = _get_xr_setup(dir_path, chunks, parallel)
-        api_map = _get_xr_api_map(var_key)
-
-        logger.info(
-            f"Variable: '{var_key}', File Size: {fsize}, Dir Path: `{dir_path}`."
-        )
-        open_stmt = setup.split("\n")[-1]
-        logger.info(f"  * I/O code: `{open_stmt}`")
-
-        for api, call in api_map.items():
-            logger.info(f"  * Getting runtime for {api}: `{call}`.")
-
-            entry: Dict[str, str | float | None] = {
-                "pkg": "xcdat",
-                "gb": fsize.split("_")[0],
-                "api": api,
-            }
-
-            try:
-                runtime = _get_runtime(setup=setup, stmt=use_flox + call, repeat=repeat)
-            except Exception as e:
-                print(e)
-                runtime = None
-
-            entry[f"runtime_{process_type}"] = runtime
-            logger.info(f"  * Runtime: {runtime}")
-
-            api_runtimes.append(entry)
-
-    df_runtimes = pd.DataFrame(api_runtimes)
-
-    return df_runtimes
-
-
-def _get_xr_config(parallel: bool) -> Tuple[None | Dict[str, str], bool, str]:
-    if not parallel:
-        chunks = None
-        parallel = False
-        use_flox = "with xr.set_options(use_flox=False): \n    "
-    elif parallel:
-        chunks = {"time": "auto"}
-        parallel = True
-        use_flox = "with xr.set_options(use_flox=True): \n    "
-
-    return chunks, parallel, use_flox
-
-
-def _get_xr_setup(dir_path: str, chunks: None | Dict[str, str], parallel: bool):
-    return (
-        "import xarray as xr\n"
-        "import xcdat as xc\n"
-        f"ds = xc.open_mfdataset('{dir_path}', chunks={chunks}, add_bounds=['X', 'Y', 'T'], parallel={parallel})"
-    )
-
-
-def _get_xr_api_map(var_key: str):
-    return {
-        "spatial_avg": f"ds.spatial.average('{var_key}', axis=['X', 'Y'])",
-        "temporal_avg": f"ds.temporal.average('{var_key}', weighted=True)",
-        "climatology": f"ds.temporal.climatology('{var_key}', freq='month', weighted=True)",
-        "departures": f"ds.temporal.departures('{var_key}', freq='month', weighted=True)",
-    }
-
-
-def _get_cdat_runtimes(repeat: int) -> pd.DataFrame:
-    """Get the CDAT API runtimes (only supports serial).
-
-    Parameters
-    ----------
-    repeat : int
-        Number of samples to take for each API call.
-
-    Returns
-    -------
-    pd.DataFrame
-        A DataFrame of runtimes for CDAT APIs.
-    """
-    logger.info("Getting CDAT runtimes (serial-only).")
-
-    runtimes = []
-
-    for fsize, finfo in FILES_DICT.items():
-        xml_path = finfo["xml_path"]
-        var_key = finfo["var_key"]
-
-        setup = _get_cdat_setup(var_key, xml_path)
-        api_map = _get_cdat_api_map()
-
-        logger.info(
-            f"Variable: '{var_key}', File Size: {fsize}, XML Path: `{xml_path}`."
-        )
-        for api, call in api_map.items():
-            logger.info(f"  * Getting runtime for {api}: `{call}`.")
-
-            entry: Dict[str, str | float | None] = {
-                "pkg": "cdat",
-                "gb": fsize.split("_")[0],
-                "api": api,
-            }
-            try:
-                runtime = _get_runtime(setup=setup, stmt=call, repeat=repeat)
-            except Exception as e:
-                logger.error(e)
-                runtime = None
-
-            entry["runtime_serial"] = runtime
-            logger.info(f"  * Runtime: {runtime}")
-
-        runtimes.append(entry)
-
-    df_runtimes = pd.DataFrame(runtimes)
-
-    return df_runtimes
-
-
-def _get_cdat_setup(var_key: str, xml_path: str):
-    setup = (
-        "import cdms2\n"
-        "import cdutil\n"
-        "cdms2.setAutoBounds('on')\n"
-        f"ds = cdms2.open('{xml_path}')\n"
-        f"t_var = ds['{var_key}']\n"
-        "tvar.getTime().getBounds()"
-    )
-
-    return setup
-
-
-def _get_cdat_api_map():
-    api_calls = {
-        "spatial_avg": "cdutil.averager(t_var, axis='xy')",
-        "temporal_avg": "cdutil.averager(t_var, axis='t')",
-        "climatology": "cdutil.ANNUALCYCLE.climatology(t_var)",
-        "departures": "cdutil.ANNUALCYCLE.departures(t_var)",
-    }
-
-    return api_calls
-
-
-def _get_runtime(setup: str, stmt: str, repeat: int = 5, number: int = 1) -> float:
-    """Get the minimum runtime for a code statement using timeit.
-
-    Parameters
-    ----------
-    setup : str
-        The setup code (e.g,. imports).
-    stmt : str
-        The statement to measure performance on (e.g., API calls).
-    repeat : int, optional
-        Number of samples to take, by default 5.
-    number : int, optional
-        Number of times to repeat the statement for each sample, by default 1.
-
-    Returns
-    -------
-    float
-        The average minimum runtime out of all of the samples.
-    """
-    runtimes: List[float] = timeit.repeat(
-        setup=setup,
-        stmt=stmt,
-        repeat=repeat,
-        number=number,
-    )
-
-    min = np.around(np.min(runtimes), decimals=6)
-
-    return min
-
-
-def plot_xcdat_runtimes(df_xcdat: pd.DataFrame):
-    apis = df_xcdat.api.unique()
-
-    for api in apis:
-        ax = df_xcdat.plot(**PLOT_CONFIG)
-
-        for cont in ax.containers:
-            ax.bar_label(cont, **BAR_LABEL_CONFIG)
-
-        ax.margins(y=0.1)
-        ax.legend(["Serial", "Parallel"], fontsize="medium", loc="upper center", ncol=2)
-
-        fig = ax.get_figure()
-
-        api_title = api.title().replace("_", " ")
-        fig.suptitle(f"xCDAT {api_title} Runtime")
-        fig.tight_layout()
-        fig.savefig(f"{XC_FILENAME}-{api}.png")
-
-
-def plot_cdat_runtimes(df_cdat: pd.DataFrame):
-    apis = df_cdat.api.unique()
-
-    for api in apis:
-        ax = df_cdat.plot(**PLOT_CONFIG)
-
-        for cont in ax.containers:
-            ax.bar_label(cont, **BAR_LABEL_CONFIG)
-
-        ax.margins(y=0.1)
-        ax.legend(["Serial"], fontsize="medium", loc="upper center", ncol=1)
-
-        fig = ax.get_figure()
-
-        api_title = api.title().replace("_", " ")
-        fig.suptitle(f"CDAT {api_title} Runtime")
-        fig.tight_layout()
-
-        fig.savefig(f"{CD_FILENAME}-{api}.png")
-
-
-if __name__ == "__main__":
-    main(repeat=3)