Skip to content

Commit

Permalink
Harmonize logging of ixmp4 and pyam in Jupyter notebooks (#774)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored Aug 28, 2023
1 parent 525ede9 commit 53c806c
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 159 deletions.
24 changes: 9 additions & 15 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,23 @@ and methods.
api/timeseries
api/variables

**Notebook logging behaviour**
Logging behaviour in Jupyter notebooks
--------------------------------------

|pyam| wants to provide sensible defaults for users unfamiliar with
`setting up python's logging library <https://realpython.com/python-logging/#basic-configurations>`_,
and therefore will provide a basic configuration by invoking
The |pyam| package wants to provide sensible defaults for users unfamiliar with setting
up python's logging library (`read more`_), and therefore will add a streamhandler if
(and only if) it determines to be running within a notebook.

.. code-block:: python
import logging
logging.basicConfig(level="INFO", format="%(name)s - %(levelname)s: %(message)s")
if (and only if):

1. it determines to be running within a notebook, and
2. logging is still *unconfigured by the time the first logging message by |pyam| is to be emitted*.
.. _`read more` : https://realpython.com/python-logging/#basic-configurations
**Intersphinx mapping**
Intersphinx mapping
-------------------

To use sphinx.ext.intersphinx_ for generating automatic links from your project
to the documentation of |pyam| classes and functions, please add the following
to your project's :code:`conf.py`:

.. _sphinx.ext.intersphinx: https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html
.. _sphinx.ext.intersphinx : https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html

.. code-block:: python
Expand Down
19 changes: 8 additions & 11 deletions pyam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,13 @@
validate,
)
from pyam.statistics import Statistics
from pyam.logging import *
from pyam.iiasa import read_iiasa, lazy_read_iiasa # noqa: F401
from pyam.datareader import read_worldbank # noqa: F401
from pyam.unfccc import read_unfccc # noqa: F401
from pyam.testing import assert_iamframe_equal # noqa: F401
from pyam.run_control import run_control # noqa: F401
from pyam.utils import IAMC_IDX # noqa: F401

from pyam.logging import defer_logging_config

logger = logging.getLogger(__name__)

# get version number either from git (preferred) or metadata
Expand All @@ -44,15 +41,15 @@

shell = get_ipython()
if isinstance(shell, ZMQInteractiveShell):
# set up basic logging if running in a notebook
log_msg = "Running in a notebook, setting up a basic logging at level INFO"
# harmonize formatting of ixmp4 and pyam logging
ixmp4_logger = logging.getLogger("ixmp4")
ixmp4_logger.removeHandler(ixmp4_logger.handlers[0])

defer_logging_config(
logger,
log_msg,
level="INFO",
format="%(name)s - %(levelname)s: %(message)s",
)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(name)s - %(levelname)s: %(message)s"))

for _logger in [logger, ixmp4_logger]:
_logger.addHandler(handler)

# deactivate in-cell scrolling in a Jupyter notebook
shell.run_cell_magic(
Expand Down
75 changes: 35 additions & 40 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
append_index_col,
)
from pyam.time import swap_time_for_year, swap_year_for_time
from pyam.logging import raise_data_error, deprecation_warning
from pyam.logging import raise_data_error, deprecation_warning, format_log_message
from pyam.validation import _exclude_on_fail

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -160,7 +160,6 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):

# read from file
if isinstance(data, Path):
data = Path(data) # casting str or LocalPath to Path
if not data.is_file():
raise FileNotFoundError(f"No such file: '{data}'")
logger.info(f"Reading file {data}")
Expand Down Expand Up @@ -2488,9 +2487,7 @@ def to_datapackage(self, path):
# return the package (needs to reloaded because `tmp` was deleted)
return Package(path)

def load_meta(
self, path, sheet_name="meta", ignore_conflict=False, *args, **kwargs
):
def load_meta(self, path, sheet_name="meta", ignore_conflict=False, **kwargs):
"""Load 'meta' indicators from file
Parameters
Expand All @@ -2505,59 +2502,57 @@ def load_meta(
kwargs
Passed to :func:`pandas.read_excel` or :func:`pandas.read_csv`
"""

# load from file
path = path if isinstance(path, pd.ExcelFile) else Path(path)
df = read_pandas(path, sheet_name=sheet_name, **kwargs)

# cast model-scenario column headers to lower-case (if necessary)
df = df.rename(columns=dict([(i.capitalize(), i) for i in META_IDX]))
meta = read_pandas(path, sheet_name=sheet_name, **kwargs)

# check that required index columns exist
missing_cols = [c for c in self.index.names if c not in df.columns]
if missing_cols:
# cast index-column headers to lower-case, check that required index exists
meta = meta.rename(columns=dict([(i.capitalize(), i) for i in META_IDX]))
if missing_cols := [c for c in self.index.names if c not in meta.columns]:
raise ValueError(
f"File {Path(path)} (sheet {sheet_name}) "
f"missing required index columns {missing_cols}!"
f"Missing index columns for meta indicators: {missing_cols}"
)

# set index, filter to relevant scenarios from imported file
n = len(df)
df.set_index(self.index.names, inplace=True)
df = df.loc[self.meta.index.intersection(df.index)]

# skip import of meta indicators if np
if not n:
logger.info(f"No scenarios found in sheet {sheet_name}")
# skip import of meta indicators if no rows in meta
if not len(meta.index):
logger.warning(f"No scenarios found in sheet {sheet_name}")
return

msg = "Reading meta indicators"
# indicate if not all scenarios are included in the meta file
if len(df) < len(self.meta):
i = len(self.meta)
msg += f" for {len(df)} out of {i} scenario{s(i)}"

# indicate if more scenarios exist in meta file than in self
invalid = n - len(df)
if invalid:
msg += f", ignoring {invalid} scenario{s(invalid)} from file"
logger.warning(msg)
else:
logger.info(msg)
# set index, check consistency between existing index and meta
meta.set_index(self.index.names, inplace=True)

missing = self.index.difference(meta.index)
invalid = meta.index.difference(self.index)

if not missing.empty:
logger.warning(
format_log_message(
"No meta indicators for the following scenarios", missing
)
)
if not invalid.empty:
logger.warning(
format_log_message(
"Ignoring meta indicators for the following scenarios", invalid
)
)
meta = meta.loc[self.meta.index.intersection(meta.index)]

# in pyam < 2.0, an "exclude" columns was part of the `meta` attribute
# this section ensures compatibility with xlsx files created with pyam < 2.0
if "exclude" in df.columns:
if "exclude" in meta.columns:
logger.info(
f"Found column 'exclude' in sheet '{sheet_name}', "
"moved to attribute `IamDataFrame.exclude`."
)
self._exclude = merge_exclude(
df.exclude, self.exclude, ignore_conflict=ignore_conflict
meta.exclude, self.exclude, ignore_conflict=ignore_conflict
)
df.drop(columns="exclude", inplace=True)
meta.drop(columns="exclude", inplace=True)

# merge imported meta indicators
self.meta = merge_meta(df, self.meta, ignore_conflict=ignore_conflict)
self.meta = merge_meta(meta, self.meta, ignore_conflict=ignore_conflict)

def map_regions(
self,
Expand Down Expand Up @@ -2849,7 +2844,7 @@ def filter_by_meta(data, df, join_meta=False, **kwargs):
to nan if `(model, scenario)` not in `df.meta.index`)
"""
if not set(META_IDX).issubset(data.index.names + list(data.columns)):
raise ValueError("Missing required index dimensions or columns!")
raise ValueError("Missing required index dimensions or data columns.")

meta = pd.DataFrame(df.meta[list(set(kwargs) - set(META_IDX))].copy())

Expand Down
66 changes: 11 additions & 55 deletions pyam/logging.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from contextlib import contextmanager
import logging
from logging import *

import pandas as pd
import warnings


logger = logging.getLogger(__name__)
logger = getLogger(__name__)


@contextmanager
def adjust_log_level(logger="pyam", level="ERROR"):
"""Context manager to change log level"""
if isinstance(logger, str):
logger = logging.getLogger(logger)
logger = getLogger(logger)
old_level = logger.getEffectiveLevel()
logger.setLevel(level)
yield
Expand All @@ -26,58 +27,13 @@ def deprecation_warning(msg, item="This method", stacklevel=3):


def raise_data_error(msg, data):
"""Utils function to format error message from data formatting"""
"""Format error message with (head of) data table and raise"""
raise ValueError(format_log_message(msg, data))


def format_log_message(msg, data):
"""Utils function to format message with (head of) data table"""
if isinstance(data, pd.MultiIndex):
data = data.to_frame(index=False)
data = data.drop_duplicates()
msg = f"{msg}:\n{data.head()}" + ("\n..." if len(data) > 5 else "")
raise ValueError(msg)


class ConfigPseudoHandler(logging.Handler):
"""Pseudo logging handler to defer configuring logging until the first message
Registers itself as a handler for the provided logger and temporarily
sets the logger as sensitive to INFO messages. Upon receival of the first
message (of at least INFO level), it configures logging with the provided
`config_kwargs` and prints `log_msg`
Parameters
----------
logger : logging.Logger
Logger to listen for the first message
log_msg : str, optional
Message to print once logging is configured, by default None
**config_kwargs
Arguments to pass on to logging.basicConfig
"""

def __init__(self, logger, log_msg=None, **config_kwargs):
super().__init__()

self.logger = logger
self.log_msg = log_msg
self.config_kwargs = config_kwargs

self.logger.addHandler(self)

# temporarily set the logging level to a non-standard value,
# slightly below logging.INFO == 20 and use that as a sentinel
# to switch back to logging.NOTSET later
self.logger.setLevel(19)

def emit(self, record):
self.logger.removeHandler(self)

if self.logger.level == 19:
self.logger.setLevel(logging.NOTSET)

if not self.logger.root.hasHandlers():
logging.basicConfig(**self.config_kwargs)

if self.log_msg is not None:
self.logger.info(self.log_msg)


# Give the Handler a function like alias
defer_logging_config = ConfigPseudoHandler
return f"{msg}:\n{data.head()}" + ("\n..." if len(data) > 5 else "")
Loading

0 comments on commit 53c806c

Please sign in to comment.