Merge pull request #28 from AugustinMortier/0.10.0

0.10.0
AugustinMortier · Oct 10, 2024 · 49dfffe · 49dfffe
2 parents 930ccad + 85233f1
commit 49dfffe
Show file tree

Hide file tree

Showing 10 changed files with 210 additions and 100 deletions.
diff --git a/aprofiles/cli/__init__.py b/aprofiles/cli/__init__.py
@@ -1 +1 @@
-from .utils import config, json_calendar, json_climatology, json_map, workflow
+from .utils import config, calendar, climatology, map, l2b, workflow
diff --git a/aprofiles/cli/aprocess.py → aprofiles/cli/apro.py b/aprofiles/cli/aprocess.py → aprofiles/cli/apro.py
@@ -22,7 +22,7 @@ class InstrumentType(str, Enum):
     cl61 = "CL61"
 
 @app.command()
-def main(
+def run(
     _dates: List[datetime] = typer.Option(
         [], "--date", formats=["%Y-%m-%d"], help="📅 Processing date."
     ),
@@ -45,10 +45,10 @@ def main(
     workers: int = typer.Option(
         2, "--workers", min=1, envvar="NSLOTS", help="👷 Number of workers (NSLOTS, if multiprocessing mode is enabled)."
     ),
-    basedir_in: Path = typer.Option(
+    path_in: Path = typer.Option(
         "data/e-profile", exists=True, readable=True, help="📂 Base path for input data."
     ),
-    basedir_out: Path = typer.Option(
+    path_out: Path = typer.Option(
         "data/v-profiles",
         exists=True,
         writable=True,
@@ -66,9 +66,9 @@ def main(
     progress_bar: bool = typer.Option(True, help="⌛ Show progress bar."),
 ):
     """
-    Run aprofiles standard workflow for given dates, optionally for specific instruments types.
+    run aprofiles standard workflow for given dates and specific instruments types
     
-    See some examples here: https://a-profiles.readthedocs.io/en/latest/cli.html
+    see some examples [here](https://augustinmortier.github.io/a-profiles/cli/)
     """
 
     #typer.echo(f"dates: {dates}, today: {today}, yesterday: {yesterday}, from: {_from}, to: {_to}, instruments_types: {instruments_types}, multiprocessing: {multiprocessing}")
@@ -110,7 +110,7 @@ def main(
         dd = str(date.day).zfill(2)
 
         # list all files in in directory
-        datepath = Path(basedir_in, yyyy, mm, dd)
+        datepath = Path(path_in, yyyy, mm, dd)
         onlyfiles = [str(e) for e in datepath.iterdir() if e.is_file()]
 
         # data processing
@@ -123,46 +123,46 @@ def main(
                             utils.workflow.workflow, 
                             path=file, 
                             instruments_types=instruments_types, 
-                            base_dir=basedir_out, CFG=CFG, verbose=False
+                            base_dir=path_out, CFG=CFG, verbose=False
                         )
                         for file in onlyfiles]
                         for future in concurrent.futures.as_completed(futures):
                             progress.update(task, advance=1)
             else:
                 for file in track(onlyfiles, description=f"{date.strftime('%Y-%m-%d')}   ", disable=disable_progress_bar):
                     utils.workflow.workflow(
-                        file, instruments_types, basedir_out, CFG, verbose=False
+                        file, instruments_types, path_out, CFG, verbose=False
                     )
 
         # list all files in out directory
-        datepath = Path(basedir_out, yyyy, mm, dd)
+        datepath = Path(path_out, yyyy, mm, dd)
 
         if update_calendar:
             # create calendar
             calname = f"{yyyy}-{mm}-cal.json"
-            path = Path(basedir_out, yyyy, mm, calname)
+            path = Path(path_out, yyyy, mm, calname)
             if not path.is_file():
-                utils.json_calendar.make_calendar(basedir_out, yyyy, mm, calname)
+                utils.calendar.make_calendar(path_out, yyyy, mm, calname)
 
             # list all files in out directory
             onlyfiles = [str(e) for e in datepath.iterdir() if e.is_file()]
             # add to calendar
             for file in track(onlyfiles, description="calendar     ", disable=disable_progress_bar):
-                utils.json_calendar.add_to_calendar(file, basedir_out, yyyy, mm, dd, calname)
+                utils.calendar.add_to_calendar(file, path_out, yyyy, mm, dd, calname)
 
 
         if update_map:
             # create map
             mapname = f"{yyyy}-{mm}-map.json"
-            path = Path(basedir_out, yyyy, mm, mapname)
+            path = Path(path_out, yyyy, mm, mapname)
             if not path.is_file():
-                utils.json_map.make_map(basedir_out, yyyy, mm, mapname)
+                utils.map.make_map(path_out, yyyy, mm, mapname)
 
             # list all files in out directory
             onlyfiles = [str(e) for e in datepath.iterdir() if e.is_file()]
             # add to map
             for file in track(onlyfiles, description="map          ", disable=disable_progress_bar):
-                utils.json_map.add_to_map(file, basedir_out, yyyy, mm, dd, mapname)
+                utils.map.add_to_map(file, path_out, yyyy, mm, dd, mapname)
 
     if update_climatology:
         # list all files in out directory
@@ -177,8 +177,8 @@ def main(
                 task = progress.add_task(total=len(stations_id), description=f"clim.      :rocket:", visible=not disable_progress_bar)
                 with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
                     futures = [executor.submit(
-                        utils.json_climatology.compute_climatology,
-                        basedir_out, 
+                        utils.climatology.compute_climatology,
+                        path_out, 
                         station_id, 
                         season_variables=["extinction"],
                         all_variables=["aod", "lidar_ratio"],
@@ -189,8 +189,33 @@ def main(
                         progress.update(task, advance=1)
         else:
             for station_id in track(stations_id, description='clim.        ', disable=disable_progress_bar):
-                utils.json_climatology.compute_climatology(basedir_out, station_id, season_variables=["extinction"], all_variables=["aod", "lidar_ratio"], aerosols_only=True)
+                utils.climatology.compute_climatology(path_out, station_id, season_variables=["extinction"], all_variables=["aod", "lidar_ratio"], aerosols_only=True)
 
 
+@app.command()
+def l2b(
+        path_in: Path = typer.Option(
+            "data/v-profiles", exists=True, readable=True, help="📂 Base path for input data."
+        ),
+        path_out: Path = typer.Option(
+            "data/l2b", exists=True, writable=True, help="📂 Base path for output data."
+        ),
+        time_steps: int = typer.Option(
+            12, help="🔂 Number of most recent time steps to be processed."
+        ),
+        progress_bar: bool = typer.Option(True, help="⌛ Show progress bar.")
+    ):
+    """
+    make E-PROFILE L2b files out of AP files
+    """
+
+    # if path_in is "data/v-profiles", use today's date to find the directory
+    if path_in == Path("data/v-profiles"):
+        # get todays date
+        today = datetime.today()
+        path_in = Path(path_in, today.strftime('%Y'), today.strftime('%m'), today.strftime('%d'))
+
+    utils.l2b.make_files(path_in, path_out, time_steps, progress_bar)
+
 if __name__ == "__main__":
     app()
diff --git a/aprofiles/cli/utils/__init__.py b/aprofiles/cli/utils/__init__.py
@@ -1 +1 @@
-from . import json_calendar, json_climatology, json_map, config, workflow
+from . import calendar, climatology, map, l2b, config, workflow
diff --git a/aprofiles/cli/utils/json_calendar.py → aprofiles/cli/utils/calendar.py b/aprofiles/cli/utils/json_calendar.py → aprofiles/cli/utils/calendar.py
@@ -7,12 +7,12 @@
 import xarray as xr
 
 
-def make_calendar(base_dir, yyyy, mm, calname):
+def make_calendar(path, yyyy, mm, calname):
     # one calendar, per month
-    with open(Path(base_dir, yyyy, mm, calname), 'w') as json_file:
+    with open(Path(path, yyyy, mm, calname), 'w') as json_file:
         json.dump({}, json_file)
 
-def add_to_calendar(fn, base_dir, yyyy, mm, dd, calname):
+def add_to_calendar(fn, path, yyyy, mm, dd, calname):
     # calendar collects the number of inversions with no low-level clouds (<6km) at each station
     # for each station, write number of each scene class (aer, cloud<6km, cloud>6km, )
 
@@ -28,7 +28,7 @@ def add_to_calendar(fn, base_dir, yyyy, mm, dd, calname):
     scene_counts['total'] = len(ds.retrieval_scene.data)
 
     # open current calendar
-    with open(Path(base_dir, yyyy, mm, calname), 'r') as json_file:
+    with open(Path(path, yyyy, mm, calname), 'r') as json_file:
         data = json.load(json_file)
     json_file.close()        
 
@@ -39,5 +39,5 @@ def add_to_calendar(fn, base_dir, yyyy, mm, dd, calname):
     data[station_id][dd] = scene_counts
 
     # write new calendar
-    with open(Path(base_dir, yyyy, mm, calname), 'w') as json_file:
+    with open(Path(path, yyyy, mm, calname), 'w') as json_file:
         json.dump(data, json_file)
diff --git a/aprofiles/cli/utils/json_climatology.py → aprofiles/cli/utils/climatology.py b/aprofiles/cli/utils/json_climatology.py → aprofiles/cli/utils/climatology.py
@@ -10,10 +10,10 @@
 import xarray as xr
 
 
-def compute_climatology(basedir, station_id, season_variables, all_variables, aerosols_only):
+def compute_climatology(path, station_id, season_variables, all_variables, aerosols_only):
     # get all files
     station_files = []
-    for root, dirs, files in os.walk(basedir, followlinks=True):
+    for root, dirs, files in os.walk(path, followlinks=True):
         for file in files:
             if station_id in file and file.endswith(".nc"):
                 station_files.append(os.path.join(root, file))
@@ -67,7 +67,7 @@ def compute_climatology(basedir, station_id, season_variables, all_variables, ae
         multivars_dict["attrs"] = attrs
 
         # define path
-        clim_path = Path(basedir, "climato")
+        clim_path = Path(path, "climato")
         # create directory if does not exist
         clim_path.mkdir(parents=True, exist_ok=True)
 

diff --git a/aprofiles/cli/utils/l2b.py b/aprofiles/cli/utils/l2b.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+import os
+import sys
+from pathlib import Path
+
+from glob import glob
+
+import xarray as xr
+import pandas as pd
+from rich.progress import track
+
+def make_files(path_in: Path, path_out: Path, time_steps: int, progress_bar: bool) -> None:
+
+    # list all AP files in path_in
+    files = list(Path(path_in).glob("AP*nc"))
+
+    for f in track(files, description=f"Reading AP files", disable=not progress_bar):
+
+        ds = xr.open_dataset(f, decode_times=True, chunks=-1)
+        # get unique id and extract yyyymmdd from first time step
+        unique_id = f"{ds.attrs['wigos_station_id']}_{ds.attrs['instrument_id']}"
+        yyyymmdd = str(ds.time.data[0].astype('M8[D]')).replace('-','')        
+
+        # we just work with n latest time steps
+        start_idx = max(0, ds.time.size - time_steps)
+
+        for i in range(start_idx, ds.time.size):
+            ds1t = ds.isel(time=i)
+            mmhh = pd.to_datetime(ds1t.time.data).strftime('%H%M')
+            file_name = Path(path_out, f"L2B_{unique_id}{yyyymmdd}{mmhh}.nc")
+            ds1t.to_netcdf('out.nc')
+            os.rename('out.nc',file_name)
diff --git a/aprofiles/cli/utils/json_map.py → aprofiles/cli/utils/map.py b/aprofiles/cli/utils/json_map.py → aprofiles/cli/utils/map.py
@@ -9,12 +9,12 @@
 import xarray as xr
 
 
-def make_map(base_dir, yyyy, mm, mapname):
+def make_map(path, yyyy, mm, mapname):
     # one map, per day, which collects the maximum extinction with no low-level clouds (<6km) at each station
-    with open(Path(base_dir) / yyyy / mm / mapname, 'w') as json_file:
+    with open(Path(path) / yyyy / mm / mapname, 'w') as json_file:
         json.dump({}, json_file)
 
-def add_to_map(fn, base_dir, yyyy, mm, dd, mapname):
+def add_to_map(fn, path, yyyy, mm, dd, mapname):
     # map collects the maximum extinction value with no low-level clouds (<6km) at each station at a hourly resolution
     # for each station, write an array with extinction values, and array with scenes for each hour of the day
 
@@ -51,7 +51,7 @@ def add_to_map(fn, base_dir, yyyy, mm, dd, mapname):
     mean_lidar_ratio = ds.lidar_ratio.resample(time='1h').mean().data
 
     # open current map
-    with open(Path(base_dir) / yyyy / mm / mapname, 'r') as json_file:
+    with open(Path(path) / yyyy / mm / mapname, 'r') as json_file:
         data = json.load(json_file)
     json_file.close()        
 
@@ -81,5 +81,5 @@ def add_to_map(fn, base_dir, yyyy, mm, dd, mapname):
     }
 
     # write new map
-    with open(Path(base_dir) / yyyy / mm / mapname, 'w') as json_file:
+    with open(Path(path) / yyyy / mm / mapname, 'w') as json_file:
         json.dump(data, json_file)
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -2,6 +2,13 @@
 
 :material-history:{ style="text-align: center; font-size: xx-large; display: block" }
 
+## 0.10.0
+Oct 10, 2024
+
+- revisit CLI: two commands
+    - `apro run` (formerly `aprocess`: run standard workflow)
+    - `apro l2b` (creates L2B files out of AP files)
+
 ## 0.9.7
 Oct 3, 2024