Skip to content

Commit

Permalink
working on file conventions for cso
Browse files Browse the repository at this point in the history
  • Loading branch information
lewisblake committed Aug 28, 2023
1 parent 56cb1b7 commit a00c129
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 7 deletions.
5 changes: 4 additions & 1 deletion pyaerocom/data/file_conventions.ini
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,7 @@ data_id_pos=1

[cso]
file_sep=_
year_pos=-3
year_pos=-3
var_pos=None
ts_pos=None
data_id_pos=None
52 changes: 50 additions & 2 deletions pyaerocom/io/fileconventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,18 +109,29 @@ def from_file(self, file):
"""

if basename(file).count("_") >= 4:
self.import_default("aerocom3")
if "CSO" in basename(file): # LB: This needs testing
self.import_default("cso")
else:
self.import_default("aerocom3")

elif basename(file).count(".") >= 4:
self.import_default("aerocom2")
else:
raise FileConventionError(
f"Could not identify convention from input file {basename(file)}"
)
breakpoint()
self.check_validity(file)
return self

def from_filepath(
self, filepath
): # LB: for the CSO fielconvention, some info is in the filepath, so deal with this case separately
pass

def check_validity(self, file):
"""Check if filename is valid"""
breakpoint()
info = self.get_info_from_file(file)
year = info["year"]
if not TsType.valid(info["ts_type"]):
Expand Down Expand Up @@ -257,10 +268,44 @@ def _info_from_aerocom2(self, file: str) -> dict:
)
return info

def _info_from_cso(self, file: str) -> dict:
"""Extract info from filename CSO convention
Parameters
-----------
file : str
netcdf file name
Returns
-------
dict
dictionary containing infos that were extracted from filename
"""
info = self.info_init
spl = splitext(basename(file))[0].split(self.file_sep)
try:
info["year"] = int(spl[self.year_pos][0:4])
except Exception:
raise FileConventionError(
f"Failed to extract year information from file {basename(file)} "
f"using file convention {self.name}"
)

try:
info["var_name"] = spl[self.var_pos]
except Exception:
raise FileConventionError(
f"Failed to extract variable information from file {basename(file)} "
f"using file convention {self.name}"
)

breakpoint()
return info

def get_info_from_file(self, file: str) -> dict:
"""Identify convention from a file
Currently only two conventions (aerocom2 and aerocom3) exist that are
Currently only three conventions (aerocom2, aerocom3, cso) exist that are
identified by the delimiter used.
Parameters
Expand Down Expand Up @@ -294,6 +339,9 @@ def get_info_from_file(self, file: str) -> dict:
return self._info_from_aerocom3(file)
if self.name == "aerocom2":
return self._info_from_aerocom2(file)
if self.name == "cso": # LB: new file convention for gridded CSO files we will have
breakpoint()
return self._info_from_cso(file)
raise FileConventionError(f"Unknown {self.name}")

def string_mask(self, data_id, var, year, ts_type, vert_which=None):
Expand Down
5 changes: 3 additions & 2 deletions pyaerocom/io/readgridded.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,7 @@ def _update_file_convention(self, files):
if none of the input files matches a registered convention.
"""
for file in files:
breakpoint()
try:
self.file_convention.from_file(os.path.basename(file))
return
Expand Down Expand Up @@ -812,7 +813,7 @@ def search_all_files(self, update_file_convention=True):
----
It is presumed, that naming conventions of files in
the data directory are not mixed but all correspond to either of the
conventions defined in
conventions defined in file_conventions.ini
Parameters
----------
Expand Down Expand Up @@ -847,7 +848,7 @@ def search_all_files(self, update_file_convention=True):
except FileNotFoundError as e:
logger.warning(repr(e))
return

breakpoint() # LB: At what point can we get info from the filepath not the name?
result = self._evaluate_fileinfo(files)
df = self._fileinfo_to_dataframe(result)
self.file_info = df
Expand Down
5 changes: 3 additions & 2 deletions pyaerocom/plugins/tropomi/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
"value",
)

TMP_DATA_DIR = "/lustre/storeB/project/fou/kl/sesam/archive/CSO-gridded/xEMEP__r01x01__qa08/NO2/2022/10/"
TMP_DATA_DIR = (
"/lustre/storeB/project/fou/kl/sesam/archive/CSO-gridded/xEMEP__r01x01__qa08/NO2/2022/10/"
)


class ReadTropomiL3(ReadGridded):
Expand All @@ -47,7 +49,6 @@ class ReadTropomiL3(ReadGridded):
def __init__(self, data_id=None, data_dir=None):
if data_dir is None:
data_dir = TMP_DATA_DIR
breakpoint()
# raise Exception("Need a data_dir")
super().__init__(data_dir=data_dir, file_convention="cso")
breakpoint()

0 comments on commit a00c129

Please sign in to comment.