Skip to content

Commit

Permalink
Merge branch 'main-dev' into pydantic-colocationsetup
Browse files Browse the repository at this point in the history
  • Loading branch information
lewisblake authored May 31, 2024
2 parents 4242cfc + b9c3b6c commit 4691c71
Show file tree
Hide file tree
Showing 24 changed files with 421 additions and 91 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,4 @@ jobs:
- name: Install pyaerocom
run: python -m pip install . --no-deps
- name: Run pytest
run: python -m pytest -ra -q --cov --no-cov-on-fail --cov-report xml
run: python -m pytest -ra -q --cov --no-cov-on-fail --cov-report xml
9 changes: 3 additions & 6 deletions pyaerocom/aeroval/experiment_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def _run_single_entry(self, model_name, obs_name, var_list):
if self.cfg.processing_opts.only_json:
files_to_convert = col.get_available_coldata_files(var_list)
else:
col.run(var_list)
model_read_kwargs = self.cfg.model_cfg[model_name]["kwargs"]
col.run(var_list, model_read_kwargs=model_read_kwargs)
files_to_convert = col.files_written

if self.cfg.processing_opts.only_colocation:
Expand Down Expand Up @@ -122,11 +123,7 @@ def run(self, model_name=None, obs_name=None, var_list=None, update_interface=Tr
if not self.cfg.model_cfg:
logger.info("No model found, will make dummy model data")
self.cfg.webdisp_opts.hide_charts = ["scatterplot"]
self.cfg.webdisp_opts.hide_pages = [
"maps.php",
"intercomp.php",
"overall.php",
]
self.cfg.webdisp_opts.pages = ["evaluation", "infos"]
model_id = make_dummy_model(obs_list, self.cfg)
self.cfg.processing_opts.obs_only = True
use_dummy_model = True
Expand Down
13 changes: 13 additions & 0 deletions pyaerocom/aeroval/modelentry.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import inspect
from copy import deepcopy

from pyaerocom._lowlevel_helpers import BrowseDict, DictStrKeysListVals, DictType, StrType
Expand Down Expand Up @@ -55,6 +56,8 @@ def __init__(self, model_id, **kwargs):
self.model_rename_vars = {}
self.model_read_aux = {}

self.kwargs = kwargs

self.update(**kwargs)

@property
Expand All @@ -64,6 +67,16 @@ def aux_funs_required(self):
"""
return True if bool(self.model_read_aux) else False

def json_repr(self) -> dict:
sup_rep = super().json_repr()

for key in sup_rep["model_read_aux"]:
sup_rep["model_read_aux"][key]["fun"] = inspect.getsource(
deepcopy(sup_rep["model_read_aux"][key]["fun"])
)

return sup_rep

def get_vars_to_process(self, obs_vars: list) -> tuple:
"""
Get lists of obs / mod variables to be processed
Expand Down
2 changes: 1 addition & 1 deletion pyaerocom/aeroval/setupclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ class WebDisplaySetup(BaseModel):
hide_charts: tuple[str, ...] = ()
hide_pages: tuple[str, ...] = ()
ts_annotations: dict[str, str] = Field(default_factory=dict)
add_pages: tuple[str, ...] = ()
pages: tuple[str, ...] = ["maps", "evaluation", "intercomp", "overall", "infos"]


class EvalRunOptions(BaseModel):
Expand Down
7 changes: 6 additions & 1 deletion pyaerocom/colocation_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@


class Colocator:

"""High level class for running co-location
Note
Expand All @@ -52,6 +53,7 @@ class Colocator:
}

STATUS_CODES: dict[int, str] = {

1: "SUCCESS",
2: "NOT OK: Missing/invalid model variable",
3: "NOT OK: Missing/invalid obs variable",
Expand Down Expand Up @@ -592,7 +594,10 @@ def _instantiate_gridded_reader(self, what):
data_id = self.colocation_setup.obs_id
data_dir = self.colocation_setup.obs_data_dir
reader_class = self._get_gridded_reader_class(what=what)
reader = reader_class(data_id=data_id, data_dir=data_dir)
if what == "model" and reader_class in self.MODELS_WITH_KWARGS:
reader = reader_class(data_id=data_id, data_dir=data_dir, **self.model_read_kwargs)
else:
reader = reader_class(data_id=data_id, data_dir=data_dir)
return reader

def _get_gridded_reader_class(self, what):
Expand Down
28 changes: 28 additions & 0 deletions pyaerocom/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
from configparser import ConfigParser
from pathlib import Path
from typing import Union

import numpy as np

Expand All @@ -18,6 +19,7 @@
from pyaerocom.grid_io import GridIO
from pyaerocom.region_defs import ALL_REGION_NAME, HTAP_REGIONS, OLD_AEROCOM_REGIONS
from pyaerocom.varcollection import VarCollection
from pyaerocom.variable import Variable

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -220,6 +222,9 @@ def __init__(self, config_file=None, try_infer_environment=True):
self._var_param = None
self._coords = None

# Custom variables
self._custom_var_dict = None

# Attributes that are used to store search directories
self.OBSLOCS_UNGRIDDED = {}
self.OBS_UNGRIDDED_POST = {}
Expand Down Expand Up @@ -308,6 +313,25 @@ def infer_basedir_and_config(self):
return (basedir, self._config_files[cfg_id])
raise FileNotFoundError("Could not establish access to any registered database")

def register_custom_variables(
self, vars: Union[dict[str, Variable], dict[str, dict[str, str]]]
) -> None:
var_dict = {}
for key, item in vars.items():
if isinstance(item, Variable):
var_dict[key] = item
elif isinstance(item, dict):
if "var_name" in item and "units" in item:
var_dict[key] = Variable(**item)
else:
raise ValueError(
f"Dict item {item} must atleast have the keys 'var_name' and 'units'"
)
else:
raise ValueError(f"Item {item} must be either dict or Variable")
self._custom_var_dict = var_dict.copy()
self._var_param = None

@property
def has_access_users_database(self):
chk_dir = self._check_subdirs_cfg["users-db"]
Expand Down Expand Up @@ -482,6 +506,10 @@ def VARS(self):
"""Instance of class VarCollection (for default variable information)"""
if self._var_param is None: # has not been accessed before
self._var_param = VarCollection(self._var_info_file)

if self._custom_var_dict is not None:
for var in self._custom_var_dict:
self._var_param.add_var(self._custom_var_dict[var])
return self._var_param

@property
Expand Down
5 changes: 4 additions & 1 deletion pyaerocom/data/emep_variables.ini
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,7 @@ concCocCoarse = "SURF_ugC_PM_OMCOARSE"
concecFine = "SURF_ug_ECFINE"
concecCoarse = "SURF_ug_ECCOARSE"
concoxn = "SURF_ugN_OXN"
vmrno = "SURF_ppb_NO"
vmrno = "SURF_ppb_NO"

#Data used for Pollen data
concspores = "SURF_ug_FUNGAL_SPORES"
16 changes: 16 additions & 0 deletions pyaerocom/data/variables.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3078,6 +3078,22 @@ unit = ug C m-3
description=Mass concentration of total carbon
unit = ug m-3

[conchoa]
description=Mass concentration hydrocarbon like OA
unit = ug m-3

[concbcbb]
description=Mass concentration elemental carbon, biomass burning
unit = ug m-3

[concspores]
description=Mass concentration fungal spores
unit = ug m-3

[concpolyol]
description=Mass concentration Polyol
unit = ug m-3

[conco3]
description=Mass concentration of ozone
unit = ug m-3
Expand Down
9 changes: 9 additions & 0 deletions pyaerocom/io/mscw_ctm/additional_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,3 +385,12 @@ def calc_concSso2(concso2):
concSso2.attrs["units"] = "ug S m-3"

return concSso2


def calc_concpolyol(concspores):
# polyol is 4.5% of spores. Spores is in ug/cm3 in Gunnars run, eventhough the unit is marked as ugm-3, so a factor of 1000 is needed for unit change
factor = 45.0 / 1000.0

concpolyol = concspores.copy(deep=True) * factor
concpolyol.attrs["units"] = "ug m-3"
return concpolyol
3 changes: 2 additions & 1 deletion pyaerocom/io/mscw_ctm/emep_variables.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,5 @@ concCocCoarse = "SURF_ugC_PM_OMCOARSE"
concecFine = "SURF_ug_ECFINE"
concecCoarse = "SURF_ug_ECCOARSE"
concoxn = "SURF_ugN_OXN"
vmrno = "SURF_ppb_NO"
vmrno = "SURF_ppb_NO"
concspores = "SURF_ug_FUNGAL_SPORES"
36 changes: 35 additions & 1 deletion pyaerocom/io/mscw_ctm/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
calc_concno3pm10,
calc_concno3pm25,
calc_concNtnh,
calc_concpolyol,
calc_concso4t,
calc_concSso2,
calc_concsspm25,
Expand Down Expand Up @@ -103,6 +104,8 @@ class ReadMscwCtm:
"concNno2": ["concno2"],
"concSso2": ["concso2"],
"vmro3": ["conco3"],
# For Pollen
# "concpolyol": ["concspores"],
}

# Functions that are used to compute additional variables (i.e. one
Expand Down Expand Up @@ -145,6 +148,7 @@ class ReadMscwCtm:
"concNno2": calc_concNno2,
"concSso2": calc_concSso2,
"vmro3": calc_vmro3,
# "concpolyol": calc_concpolyol,
}

#: supported filename masks, placeholder is for frequencies
Expand All @@ -167,7 +171,7 @@ class ReadMscwCtm:

DEFAULT_FILE_NAME = "Base_day.nc"

def __init__(self, data_id=None, data_dir=None):
def __init__(self, data_id=None, data_dir=None, **kwargs):
self._data_dir = None
# opened dataset (for performance boost), will be reset if data_dir is
# changed
Expand All @@ -180,6 +184,12 @@ def __init__(self, data_id=None, data_dir=None):
self._files = None

self.var_map = emep_variables()
if "emep_vars" in kwargs:
new_map = kwargs["emep_vars"]
if isinstance(new_map, dict):
self.var_map.update(new_map)
else:
logger.warn(f"New map {new_map} is not a dict. Skipping")

if data_dir is not None:
if not isinstance(data_dir, str) or not os.path.exists(data_dir):
Expand Down Expand Up @@ -765,6 +775,30 @@ def preprocess_units(units, prefix):
return "m-1"
return units

def add_aux_compute(self, var_name, vars_required, fun):
"""Register new variable to be computed
Parameters
----------
var_name : str
variable name to be computed
vars_required : list
list of variables to read, that are required to compute `var_name`
fun : callable
function that takes a list of `GriddedData` objects as input and
that are read using variable names specified by `vars_required`.
"""
if isinstance(vars_required, str):
vars_required = [vars_required]
if not isinstance(vars_required, list):
raise ValueError(
f"Invalid input for vars_required. Need str or list. Got: {vars_required}"
)
elif not callable(fun):
raise ValueError("Invalid input for fun. Input is not a callable object")
self.AUX_REQUIRES[var_name] = vars_required
self.AUX_FUNS[var_name] = fun


class ReadEMEP(ReadMscwCtm):
"""Old name of :class:`ReadMscwCtm`."""
Expand Down
41 changes: 16 additions & 25 deletions pyaerocom/io/pyaro/read_pyaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,17 @@ class PyaroToUngriddedData:
_STOPTIMEINDEX = 10 # can be used to store stop time of acq.
_TRASHINDEX = 11 # index where invalid data can be moved to (e.g. when outliers are removed)

# List of keys needed by every station from Pyaro. Used to find extra metadata
STATION_KEYS = (
"station",
"latitude",
"longitude",
"altitude",
"long_name",
"country",
"url",
)

def __init__(self, config: PyaroConfig) -> None:
self.data: UngriddedData = UngriddedData()
self.config = config
Expand Down Expand Up @@ -218,6 +229,9 @@ def _get_metadata_from_pyaro(self, station: Station) -> list[dict[str, str]]:

return metadata

def _get_additional_metadata(self, station: Station) -> list[dict[str, str]]:
return station.metadata

def _make_single_ungridded_metadata(
self, station: Station, name: str, ts_type: Optional[TsType], units: dict[str, str]
) -> MetadataEntry:
Expand All @@ -233,34 +247,11 @@ def _make_single_ungridded_metadata(
country=station["country"],
ts_type=str(ts_type) if ts_type is not None else "undefined",
)
entry.update(self._get_metadata_from_pyaro(station))
entry.update(self._get_metadata_from_pyaro(station=station))
entry.update(self._get_additional_metadata(station=station))

return MetadataEntry(entry)

def _make_ungridded_metadata(
self, stations: dict[str, Station], var_idx: dict[str, int], units: dict[str, str]
) -> Metadata:
idx = 0
metadata = {}
for name, station in stations.items():
metadata[idx] = dict(
data_id=self.config.name,
variables=list(self.get_variables()),
var_info=units,
latitude=station["latitude"],
longitude=station["longitude"],
altitude=station["altitude"],
station_name=station["long_name"],
station_id=name,
country=station["country"],
ts_type="undefined", # TEMP: Changes dynamically below
)

metadata[idx].update(self._get_metadata_from_pyaro(station))
idx += 1

return Metadata(metadata)

def _pyaro_dataline_to_ungriddeddata_dataline(
self, data: np.void, idx: int, var_idx: int
) -> np.ndarray:
Expand Down
7 changes: 2 additions & 5 deletions pyaerocom/scripts/cams2_83/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import logging
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
from datetime import date, timedelta
from enum import Enum
Expand Down Expand Up @@ -179,7 +178,7 @@ def runnermos(

logger.info("Running Statistics (MOS)")
ExperimentProcessor(stp).run()
print("Done Running Statistics (MOS)")
logger.info("Done Running Statistics (MOS)")


def runnermedianscores(
Expand All @@ -198,8 +197,6 @@ def runnermedianscores(

stp = EvalSetup(**cfg)

start = time.time()

logger.info(
"Running CAMS2_83 Specific Statistics, cache is not cleared, colocated data is assumed in place, regular statistics are assumed to have been run"
)
Expand All @@ -216,4 +213,4 @@ def runnermedianscores(
logger.info(f"Making median scores plot with pool {pool} and analysis {analysis}")
CAMS2_83_Processer(stp).run(analysis=analysis)

print(f"Long run: {time.time() - start} sec")
logger.info("Median scores run finished")
Loading

0 comments on commit 4691c71

Please sign in to comment.