diff --git a/MANIFEST.in b/MANIFEST.in index 4626e9ff7a..752fcab4db 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,6 +6,7 @@ prune .github prune message_ix_models/data/test/advance prune message_ix_models/data/test/gea prune message_ix_models/data/test/iea +prune message_ix_models/data/test/report prune message_ix_models/data/test/shape prune message_ix_models/data/test/snapshot-* prune message_ix_models/data/test/ssp diff --git a/message_ix_models/cli.py b/message_ix_models/cli.py index 446baf371c..d17d5d547b 100644 --- a/message_ix_models/cli.py +++ b/message_ix_models/cli.py @@ -57,7 +57,7 @@ def main(click_ctx, **kwargs): # Check for a non-trivial execution of the CLI non_trivial = ( - not any(s in sys.argv for s in {"last-log", "--help"}) + not any(s in sys.argv for s in {"config", "last-log", "--help"}) and click_ctx.invoked_subcommand != "_test" and "pytest" not in sys.argv[0] ) diff --git a/message_ix_models/data/report/global.yaml b/message_ix_models/data/report/global.yaml index 996d811604..b274ad8102 100644 --- a/message_ix_models/data/report/global.yaml +++ b/message_ix_models/data/report/global.yaml @@ -685,7 +685,7 @@ general: # All other technologies not in out::h2 - key: out:*:se_0 comp: select - inputs: [out] + inputs: [ out ] args: indexers: t: [h2_coal, h2_coal_ccs, h2_smr, h2_smr_ccs, h2_bio, h2_bio_ccs] @@ -862,12 +862,12 @@ iamc: # <<: *pe_iamc - variable: Primary Energy|Hydro - base: out:nl-t-ya-m-c-l:se + base: out:nl-t-ya-m-c-l:se_1+se select: {l: [secondary], t: [hydro]} <<: *pe_iamc - variable: Primary Energy|Nuclear - base: out:nl-t-ya-m-c-l:se + base: out:nl-t-ya-m-c-l:se_1+se select: {l: [secondary], t: [nuclear]} <<: *pe_iamc @@ -1043,7 +1043,7 @@ iamc: report: - key: pe test members: -# - Primary Energy|Biomass::iamc + # - Primary Energy|Biomass::iamc - Primary Energy|Coal::iamc - Primary Energy|Gas::iamc - Primary Energy|Hydro::iamc diff --git a/message_ix_models/data/report/legacy/default_units.yaml b/message_ix_models/data/report/legacy/default_units.yaml index d0fbb9fc60..0cb7250275 100644 --- a/message_ix_models/data/report/legacy/default_units.yaml +++ b/message_ix_models/data/report/legacy/default_units.yaml @@ -41,14 +41,18 @@ conversion_factors: ZJ: .00003154 km3/yr: 1. Index (2005 = 1): 1 + GWyr/yr: + EJ/yr: 0.03154 + GWa: 1. + km3/yr: 1. EJ/yr: ZJ: .001 - y: + "y": years: 1. # New units from unit-revision "Mt C/GWyr/yr": Mt CO2/yr: "float(f\"{mu['conv_c2co2']}\")" - Mt CO2-equiv/yr: "float(f\"{mu['conv_c2co2']}\")" + Mt CO2-equiv/yr: "float(f\"{mu['conv_c2co2']}\")" # Emissions currently have the units ??? -: Mt CO2/yr: "float(f\"{mu['conv_c2co2']}\")" @@ -57,7 +61,7 @@ conversion_factors: # NB this values implies that whatever quantity it is applied to is # internally [Mt C/yr] Mt CO2/yr: "float(f\"{mu['conv_c2co2']}\")" - Mt CO2-equiv/yr: "float(f\"{mu['conv_c2co2']}\")" + Mt CO2-equiv/yr: "float(f\"{mu['conv_c2co2']}\")" # N2O is always left in kt kt N2O/yr: 1. # All other units are in kt @@ -139,7 +143,7 @@ conversion_factors: Mt C/yr: "float(f\"{mu['conv_co22c']}\")" Mt C/yr: Mt CO2eq/yr: "float(f\"{mu['conv_c2co2']}\")" - Mt CO2/yr: "float(f\"{mu['conv_c2co2']}\")" + Mt CO2/yr: "float(f\"{mu['conv_c2co2']}\")" Mt CO2-equiv/yr: "float(f\"{mu['conv_c2co2']}\")" Mt CO2/yr: Mt CO2/yr: 1. diff --git a/message_ix_models/data/test/report/snapshot-1.csv.gz b/message_ix_models/data/test/report/snapshot-1.csv.gz new file mode 100644 index 0000000000..480f8c8632 --- /dev/null +++ b/message_ix_models/data/test/report/snapshot-1.csv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b096505d79154852870cba8ebe0404a68ac754cd7022d38602444f21156870fc +size 2884451 diff --git a/message_ix_models/report/__init__.py b/message_ix_models/report/__init__.py index fd01006481..0e5ddb264d 100644 --- a/message_ix_models/report/__init__.py +++ b/message_ix_models/report/__init__.py @@ -3,6 +3,7 @@ from copy import deepcopy from functools import partial from importlib import import_module +from itertools import chain from operator import itemgetter from pathlib import Path from typing import Callable, List, Optional, Tuple, Union @@ -74,6 +75,11 @@ def iamc(c: Reporter, info): # Common base_key = Key(info["base"]) + # First part of the 'Variable' name + name = info.pop("variable", base_key.name) + # Parts (string literals or dimension names) to concatenate into variable name + var_parts = info.pop("var", [name]) + # Use message_ix_models custom collapse() method info.setdefault("collapse", {}) @@ -96,7 +102,7 @@ def iamc(c: Reporter, info): # TODO allow iterable of str dims = dims.split("-") - label = f"{info['variable']} {'-'.join(dims) or 'full'}" + label = f"{name} {'-'.join(dims) or 'full'}" # Modified copy of `info` for this invocation _info = info.copy() @@ -104,9 +110,7 @@ def iamc(c: Reporter, info): _info.update(base=base_key.drop(*dims), variable=label) # Exclude any summed dimensions from the IAMC Variable to be constructed _info["collapse"].update( - callback=partial( - collapse, var=list(filter(lambda v: v not in dims, info.get("var", []))) - ) + callback=partial(collapse, var=[v for v in var_parts if v not in dims]) ) # Invoke the genno built-in handler @@ -115,7 +119,7 @@ def iamc(c: Reporter, info): keys.append(f"{label}::iamc") # Concatenate together the multiple tables - c.add("concat", f"{info['variable']}::iamc", *keys) + c.add("concat", f"{name}::iamc", *keys) def register(name_or_callback: Union[Callable, str]) -> Optional[str]: @@ -365,8 +369,11 @@ def prepare_reporter( ) rep.configure(model=deepcopy(context.model)) + # Add a placeholder task to concatenate IAMC-structured data + rep.add("all::iamc", "concat") + # Apply callbacks for other modules which define additional reporting computations - for callback in CALLBACKS: + for callback in chain(CALLBACKS, context.report.iter_callbacks()): callback(rep, context) key = context.report.key diff --git a/message_ix_models/report/compat.py b/message_ix_models/report/compat.py index 9e0d198b1b..8690a5e030 100644 --- a/message_ix_models/report/compat.py +++ b/message_ix_models/report/compat.py @@ -416,6 +416,10 @@ def full(name: str) -> Key: info = dict(variable="transport emissions", base=k1.drop("h", "m", "yv"), var=[var]) iamc(rep, info) + # Append to the "all::iamc" task + # TODO Use a helper function for this + rep.graph["all::iamc"] += ("transport emissions::iamc",) + # TODO use store_ts() to store on scenario log.info(f"Added {len(rep.graph) - N} keys") diff --git a/message_ix_models/report/config.py b/message_ix_models/report/config.py index 46eec60601..641824bd50 100644 --- a/message_ix_models/report/config.py +++ b/message_ix_models/report/config.py @@ -1,7 +1,8 @@ import logging from dataclasses import InitVar, dataclass, field +from importlib import import_module from pathlib import Path -from typing import TYPE_CHECKING, Dict, Optional, Union +from typing import TYPE_CHECKING, Callable, Dict, Generator, List, Optional, Union from message_ix_models.util import local_data_path, package_data_path from message_ix_models.util.config import ConfigHelper @@ -35,6 +36,9 @@ class Config(ConfigHelper): #: Key for the Quantity or computation to report. key: Optional["KeyLike"] = None + #: Modules with reporting callbacks. + modules: List[str] = field(default_factory=list) + #: Directory for output. output_dir: Optional[Path] = field( default_factory=lambda: local_data_path("report") @@ -52,6 +56,11 @@ def __post_init__(self, from_file, _legacy) -> None: self.use_file(from_file) self.legacy.update(use=_legacy) + def iter_callbacks(self) -> Generator[Callable, None, None]: + """Yield the :py:`callback()` function for each of :attr:`.modules`.""" + for mod in map(import_module, self.modules): + yield getattr(mod, "callback") + def set_output_dir(self, arg: Optional[Path]) -> None: """Set :attr:`output_dir`, the output directory. diff --git a/message_ix_models/tests/report/test_legacy.py b/message_ix_models/tests/report/test_legacy.py index 61bd830f79..e433db6c6f 100644 --- a/message_ix_models/tests/report/test_legacy.py +++ b/message_ix_models/tests/report/test_legacy.py @@ -35,3 +35,8 @@ def test_legacy_report(test_context, loaded_snapshot): ) report(test_context) + + # commented: Dump resulting time series data for debugging and testing + # scenario.timeseries()[ + # "model", "scenario", "region", "variable", "year", "value", "unit" + # ].to_csv(f"test_legacy_report-{scenario.scenario}.csv", index=False) diff --git a/message_ix_models/tests/test_report.py b/message_ix_models/tests/test_report.py index 483000f07d..e00ed7e2a9 100644 --- a/message_ix_models/tests/test_report.py +++ b/message_ix_models/tests/test_report.py @@ -1,6 +1,8 @@ """Tests for :mod:`message_ix_models.report`.""" +import re from importlib.metadata import version +from typing import List, Optional import numpy as np import pandas as pd @@ -249,8 +251,8 @@ def test_collapse(input, exp): pdt.assert_frame_equal(util.collapse(df_in), df_exp) -def simulated_solution_reporter(): - """Reporter with a simulated solution for snapshot 0. +def simulated_solution_reporter(snapshot_id: int = 0): + """Reporter with a simulated solution for `snapshot_id`. This uses :func:`.add_simulated_solution`, so test functions that use it should be marked with :py:`@to_simulate.minimum_version`. @@ -265,7 +267,7 @@ def simulated_solution_reporter(): ScenarioInfo(), path=package_data_path( "test", - "snapshot-0", + f"snapshot-{snapshot_id}", "MESSAGEix-GLOBIOM_1.1_R11_no-policy_baseline", ), ) @@ -311,3 +313,127 @@ def test_prepare_reporter(test_context): # A number of keys were added assert 14299 <= len(rep.graph) - N + + +# Filters for comparison +PE0 = r"Primary Energy\|(Coal|Gas|Hydro|Nuclear|Solar|Wind)" +PE1 = r"Primary Energy\|(Coal|Gas|Solar|Wind)" +E = ( + r"Emissions\|CO2\|Energy\|Demand\|Transportation\|Road Rail and Domestic " + "Shipping" +) + +IGNORE = [ + # Other 'variable' codes are missing from `obs` + re.compile(f"variable='(?!{PE0}).*': no right data"), + # 'variable' codes with further parts are missing from `obs` + re.compile(f"variable='{PE0}.*': no right data"), + # For `pe1` (NB: not Hydro or Solar) units and most values differ + re.compile(f"variable='{PE1}.*': units mismatch .*EJ/yr.*'', nan"), + re.compile(r"variable='Primary Energy|Coal': 220 of 240 values with \|diff"), + re.compile(r"variable='Primary Energy|Gas': 234 of 240 values with \|diff"), + re.compile(r"variable='Primary Energy|Solar': 191 of 240 values with \|diff"), + re.compile(r"variable='Primary Energy|Wind': 179 of 240 values with \|diff"), + # For `e` units and most values differ + re.compile(f"variable='{E}': units mismatch: .*Mt CO2/yr.*Mt / a"), + re.compile(rf"variable='{E}': 20 missing right entries"), + re.compile(rf"variable='{E}': 220 of 240 values with \|diff"), +] + + +@to_simulate.minimum_version +def test_compare(test_context): + """Compare the output of genno-based and legacy reporting.""" + key = "all::iamc" + # key = "pe test" + + # Obtain the output from reporting `key` on `snapshot_id` + snapshot_id: int = 1 + rep = simulated_solution_reporter(snapshot_id) + rep.add( + "scenario", + ScenarioInfo( + model="MESSAGEix-GLOBIOM_1.1_R11_no-policy", scenario="baseline_v1" + ), + ) + test_context.report.modules.append("message_ix_models.report.compat") + prepare_reporter(test_context, reporter=rep) + # print(rep.describe(key)); assert False + obs = rep.get(key).as_pandas() # Convert from pyam.IamDataFrame to pd.DataFrame + + # Expected results + exp = pd.read_csv( + package_data_path("test", "report", f"snapshot-{snapshot_id}.csv.gz"), + engine="pyarrow", + ) + + # Perform the comparison, ignoring some messages + if messages := compare_iamc(exp, obs, ignore=IGNORE): + # Other messages that were not explicitly ignored → some error + print("\n".join(messages)) + assert False + + +def compare_iamc( + left: pd.DataFrame, right: pd.DataFrame, atol: float = 1e-3, ignore=List[re.Pattern] +) -> List[str]: + """Compare IAMC-structured data in `left` and `right`; return a list of messages.""" + result = [] + + def record(message: str, condition: Optional[bool] = True) -> None: + if not condition or any(p.match(message) for p in ignore): + return + result.append(message) + + def checks(df: pd.DataFrame): + prefix = f"variable={df.variable.iloc[0]!r}:" + + if df.value_left.isna().all(): + record(f"{prefix} no left data") + return + elif df.value_right.isna().all(): + record(f"{prefix} no right data") + return + + tmp = df.eval("value_diff = value_right - value_left").eval( + "value_rel = value_diff / value_left" + ) + + na_left = tmp.isna()[["unit_left", "value_left"]] + if na_left.any(axis=None): + record(f"{prefix} {na_left.sum(axis=0).max()} missing left entries") + tmp = tmp[~na_left.any(axis=1)] + na_right = tmp.isna()[["unit_right", "value_right"]] + if na_right.any(axis=None): + record(f"{prefix} {na_right.sum(axis=0).max()} missing right entries") + tmp = tmp[~na_right.any(axis=1)] + + units_left = set(tmp.unit_left.unique()) + units_right = set(tmp.unit_right.unique()) + record( + condition=units_left != units_right, + message=f"{prefix} units mismatch: {units_left} != {units_right}", + ) + + N0 = len(df) + + mask1 = tmp.query("abs(value_diff) > @atol") + record( + condition=len(mask1), + message=f"{prefix} {len(mask1)} of {N0} values with |diff| > {atol}", + ) + + for (model, scenario), group_0 in left.merge( + right, + how="outer", + on=["model", "scenario", "variable", "region", "year"], + suffixes=("_left", "_right"), + ).groupby(["model", "scenario"]): + if group_0.value_left.isna().all(): + record("No left data for model={model!r}, scenario={scenario!r}") + elif group_0.value_right.isna().all(): + record("No right data for model={model!r}, scenario={scenario!r}") + else: + group_0.groupby(["variable"]).apply(checks) + + return result