diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..8a817e4 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,51 @@ +# This workflow will install dependencies, run tests and the black-formatter + +name: om3_scripts + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +permissions: + contents: read + +jobs: + formatting: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Code formatting + uses: psf/black@stable + with: + options: "--check --verbose --diff" + + test: + needs: formatting + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10', '3.11', '3.12'] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install nco + run: | + sudo apt-get update + sudo apt-get -y install nco + - name: Install python dependencies + run: | + python -m pip install --upgrade pip + pip install -r test/test_requirements.txt + - name: Test with pytest + run: | + python -m pytest -m "not broken" + - name: Upload coverage reports to Codecov with GitHub Action + uses: codecov/codecov-action@v3 + env: + CODEOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9b92a6c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: local + hooks: + - id: black + name: black + entry: black + language: system + types: [python] \ No newline at end of file diff --git a/README.md b/README.md index 45b79fb..790b574 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,5 @@ When contributing scripts to this repo, please follow these guidelines where it - [x] Include documentation with your script. For example, a README explaining what the script is for, how to use it, what it can/can't do etc. - [x] Any outputs from your script should include metadata that allows someone in the future to track down the exact version of the script used to create that output. Any easy way to do this is to include as metadata the url and commit hash of the script on GitHub. See existing scripts for examples of how to do this. - [x] Consider including information about the dependencies or environment required to run your script, e.g. a conda-lock file describing your conda environment. + +Python scripts are formatted using the [black formatter](https://github.com/psf/black). This is enforced using a github action running on pull requests. You will need to have _pre-commit_ and _black_ installed in your python environment. It is included in the _conda/analysis_ enviornments in _hh5_, otherwise it can be installed through pip or conda. Then run `pre-commit install` once to ensure your commits have black run on them before committing. \ No newline at end of file diff --git a/data_stream_xml_generation/generate_xml_datm.py b/data_stream_xml_generation/generate_xml_datm.py index 3cd3cb4..698295a 100644 --- a/data_stream_xml_generation/generate_xml_datm.py +++ b/data_stream_xml_generation/generate_xml_datm.py @@ -48,7 +48,9 @@ # Add metadata metadata = SubElement(root, "metadata") SubElement(metadata, "File_type").text = "DATM xml file provides forcing data" -SubElement(metadata, "date_generated").text = datetime.now().strftime("%Y-%m-%d %H:%M:%S") +SubElement(metadata, "date_generated").text = datetime.now().strftime( + "%Y-%m-%d %H:%M:%S" +) SubElement(metadata, "history").text = metadata_info # Define the stream info names and corresponding var names @@ -96,15 +98,17 @@ datafiles = SubElement(stream_info, "datafiles") datavars = SubElement(stream_info, "datavars") - if stream_name in ([ - "CORE_IAF_JRA55do.PRSN", - "CORE_IAF_JRA55do.PRRN", - "CORE_IAF_JRA55do.LWDN", - "CORE_IAF_JRA55do.SWDN", - ]) and (year_first != year_last): - SubElement( - stream_info, "offset" - ).text = "-5400" # shift back 1.5hr to match RYF + if stream_name in ( + [ + "CORE_IAF_JRA55do.PRSN", + "CORE_IAF_JRA55do.PRRN", + "CORE_IAF_JRA55do.LWDN", + "CORE_IAF_JRA55do.SWDN", + ] + ) and (year_first != year_last): + SubElement(stream_info, "offset").text = ( + "-5400" # shift back 1.5hr to match RYF + ) else: SubElement(stream_info, "offset").text = "0" diff --git a/data_stream_xml_generation/generate_xml_drof.py b/data_stream_xml_generation/generate_xml_drof.py index 43c622b..bba287e 100644 --- a/data_stream_xml_generation/generate_xml_drof.py +++ b/data_stream_xml_generation/generate_xml_drof.py @@ -46,7 +46,9 @@ # Add metadata metadata = SubElement(root, "metadata") SubElement(metadata, "File_type").text = "DROF xml file provides river runoff data" -SubElement(metadata, "date_generated").text = datetime.now().strftime("%Y-%m-%d %H:%M:%S") +SubElement(metadata, "date_generated").text = datetime.now().strftime( + "%Y-%m-%d %H:%M:%S" +) SubElement(metadata, "history").text = metadata_info # Define the stream info names and corresponding var names @@ -77,11 +79,11 @@ datavars = SubElement(stream_info, "datavars") if year_first == year_last: - SubElement(stream_info, "offset").text = "0" #RYF starts at midnight - else: - SubElement( - stream_info, "offset" - ).text = "-43200" # shift backwards from noon to midnight to match RYF + SubElement(stream_info, "offset").text = "0" # RYF starts at midnight + else: + SubElement(stream_info, "offset").text = ( + "-43200" # shift backwards from noon to midnight to match RYF + ) var_element = SubElement(datavars, "var") var_element.text = f"{var_prefix} {var_suffix}" diff --git a/payu_config/README.md b/payu_config/README.md new file mode 100644 index 0000000..93b50e4 --- /dev/null +++ b/payu_config/README.md @@ -0,0 +1,15 @@ +This directory contains Payu setup and archive userscripts for ACCESS-OM3. + +To use, add these to config.yaml: + +```yaml +userscripts: + setup: /usr/bin/bash /g/data/vk83/apps/om3-scripts/payu_config/setup.sh + archive: /usr/bin/bash /g/data/vk83/apps/om3-scripts/payu_config/archive.sh + +modules: + use: + - /g/data/hh5/public/modules + load: + - conda/analysis +``` diff --git a/payu_config/archive.sh b/payu_config/archive.sh new file mode 100644 index 0000000..b5d3ff4 --- /dev/null +++ b/payu_config/archive.sh @@ -0,0 +1,5 @@ +#!/usr/bin/bash -i + +source $(dirname "$0")/archive_scripts/archive_cice_restarts.sh +source $(dirname "$0")/archive_scripts/concat_ice_daily.sh +python3 $(dirname "$0")/archive_scripts/build_intake_ds.py diff --git a/payu_config/archive_scripts/archive_cice_restarts.sh b/payu_config/archive_scripts/archive_cice_restarts.sh new file mode 100755 index 0000000..c55aa7c --- /dev/null +++ b/payu_config/archive_scripts/archive_cice_restarts.sh @@ -0,0 +1,20 @@ +#!/usr/bin/bash -i +# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 +# clean up cice_restarts.sh + +if [ -f archive/output*/input/iced.1900-01-01-10800.nc ] +then +rm archive/output*/input/iced.1900-01-01-10800.nc +return 0 +fi + +latest_o=$(ls -drv archive/output*[0-9] | head -1) + +#initial restart was copied from the previous run +ic_restart=$(ls -dv $latest_o/access-om3.cice.r.* | head -1) + +if [ -f $latest_o/access-om3.cice.r.* ] +then +rm $latest_o/access-om3.cice.r.* +fi \ No newline at end of file diff --git a/payu_config/archive_scripts/build_intake_ds.py b/payu_config/archive_scripts/build_intake_ds.py new file mode 100644 index 0000000..1d06513 --- /dev/null +++ b/payu_config/archive_scripts/build_intake_ds.py @@ -0,0 +1,72 @@ +#!python3 +# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 +# modules: +# use: +# - /g/data/hh5/public/modules +# load: +# - conda/analysis + +from access_nri_intake.source import builders +import os +import sys +from pathlib import Path +from warnings import warn +from ruamel.yaml import YAML +from ruamel.yaml.comments import CommentedMap + +path_root = Path(__file__).parents[2] +sys.path.append(str(path_root)) + +from scripts_common import get_provenance_metadata, md5sum + +METADATA_FILENAME = "metadata.yaml" +UUID_FIELD = "experiment_uuid" +ARCHIVE_PATH = "archive" + + +def description(): + + # Get experiment uuid + # follows https://github.com/payu-org/payu/blob/ef55e93fe23fcde19024479c0dc4112dcdf6603f/payu/metadata.py#L90 + metadata_filename = Path(METADATA_FILENAME) + if metadata_filename.exists(): + metadata = CommentedMap() + metadata = YAML().load(metadata_filename) + uuid = metadata.get(UUID_FIELD, None) + else: + warn(f"{METADATA_FILENAME} not found in archive folder") + uuid = False + + # Check git status of this .py file + this_file = os.path.normpath(__file__) + + runcmd = f"python3 {os.path.basename(this_file)}" + + # Get string "Created using $file: $command" + provenance = get_provenance_metadata(this_file, runcmd) + + if uuid: + description = f"intake-esm datastore for experiment {uuid}, in folder {os.getcwd()}. {provenance}. (md5 hash: {md5sum(this_file)})" + else: + description = f"intake-esm datastore for experiment in folder {os.getcwd()}. {provenance}. (md5 hash: {md5sum(this_file)})" + + return description + + +if __name__ == "__main__": + + builder = builders.AccessOm3Builder(path=ARCHIVE_PATH) + + print("LOG: Building intake-esm datastore") + + builder.build() + + # Log invalid assets + builder.invalid_assets + + builder.save( + name="intake_esm_ds", + description=description(), + directory=ARCHIVE_PATH, + ) diff --git a/payu_config/archive_scripts/concat_ice_daily.sh b/payu_config/archive_scripts/concat_ice_daily.sh new file mode 100755 index 0000000..bd02b4d --- /dev/null +++ b/payu_config/archive_scripts/concat_ice_daily.sh @@ -0,0 +1,69 @@ +#!/usr/bin/bash -i +# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Concatenate sea-ice daily output from access-om3 +# this was written assuming it would be used as a payu "userscript" at the "archive" stage, but alternatively a path to an "archive" directory can be provided +# Script inspired from https://github.com/COSIMA/1deg_jra55_ryf/blob/master/sync_data.sh#L87-L108 +# +# This script uses "ncrcat". Load this through either 'module use /g/data/vk83/modules; module load payu' or 'module load nco'. + +shopt -s extglob + +Help() +{ + # Display Help + echo "Concatenante daily history output from the (sea) ice model to a single file" + echo + echo "Syntax: scriptTemplate [-h|d DIRECTORY]" + echo "options:" + echo "h Print this Help." + echo "d Process "name" directory rather than latest output in archive folder." + echo +} + +# Get the options +while getopts ":hd:" option; do + case $option in + h) # display Help + Help + exit;; + d) # Enter a directory + out_dir=$OPTARG + if [ ! -d $out_dir ]; then + echo $out_dir Does not exist + exit + fi;; + \?) # Invalid option + echo "Error: Invalid option" + exit;; + esac +done + +#If no directory option provided , then use latest +if [ -z $out_dir ]; then + #latest output dir only + out_dir=$(ls -drv archive/output*[0-9] | head -1) +fi + +if ! command -v -- "ncrcat" > /dev/null 2>&1; then + echo "ncrcat not available, trying module load nco" + module load nco +fi + +for f in $out_dir/access-om3.cice.h.????-??-01.nc ; do + #if the 1st and the 28th of that month exists, then assume its a whole month and concatenate + if [ -f $f ] && [ -f ${f/-01.nc/-28.nc} ]; then + + output_f=${f/-01.nc/.nc} #remove day in date string + + #concat daily files for this month + echo LOG: concatenating daily sea ice files in $out_dir + echo doing ncrcat -O -L 5 -4 ${f/-01.nc/-??.nc} $output_f + ncrcat -O -L 5 -4 ${f/-01.nc/-??.nc} $output_f + + if [[ $? == 0 ]]; then + rm ${f/-01.nc/-??.nc} #delete individual dailys on success + fi + fi +done diff --git a/payu_config/setup.sh b/payu_config/setup.sh new file mode 100644 index 0000000..55f0c3d --- /dev/null +++ b/payu_config/setup.sh @@ -0,0 +1,3 @@ +#!/usr/bin/bash -i + +source $(dirname "$0")/setup_scripts/setup_cice_restarts.sh \ No newline at end of file diff --git a/payu_config/setup_scripts/setup_cice_restarts.sh b/payu_config/setup_scripts/setup_cice_restarts.sh new file mode 100755 index 0000000..e800e07 --- /dev/null +++ b/payu_config/setup_scripts/setup_cice_restarts.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# patch for https://github.com/open-mpi/ompi/issues/12141 +if ! [ -f work/access-om3.cice.r.* ] +then +# no restart files yet, use initial conditions +IC=$(readlink work/input/iced.1900-01-01-10800.nc) +rm work/input/iced.1900-01-01-10800.nc +cp $IC work/input/iced.1900-01-01-10800.nc +else +# change restart symlink to hardlink +RESTART=$(echo work/access-om3.cice.r.*) +ln -f $(readlink $RESTART) $RESTART +fi diff --git a/test/test_payu_conf/test_concat_ice_daily.py b/test/test_payu_conf/test_concat_ice_daily.py new file mode 100644 index 0000000..1fbf3d7 --- /dev/null +++ b/test/test_payu_conf/test_concat_ice_daily.py @@ -0,0 +1,138 @@ +import pytest +import xarray as xr +import numpy as np +import pandas as pd + +from os import makedirs, chdir +from subprocess import run +from pathlib import Path + +scripts_base = Path(__file__).parents[2] +run_str = f"{scripts_base}/payu_config/archive_scripts/concat_ice_daily.sh" + + +def assert_file_exists(p): + if not Path(p).resolve().is_file(): + raise AssertionError("File does not exist: %s" % str(p)) + + +def assert_f_not_exists(p): + if Path(p).resolve().is_file(): + raise AssertionError("File exists and should not: %s" % str(p)) + + +@pytest.fixture +def daily_files(request, tmp_path): + """ + Make 365 days of fake data, and then write it into 365 files + + request = (path, ndays) + e.g. request = ("archive/output000", "365") + + """ + + if request.param[0] == "Default": + dir_name = "archive/output000" + else: + dir_name = str(request.param[0]) + + ndays = request.param[1] + + nx = 30 + ny = 50 + + da = xr.DataArray( + np.random.rand(ndays, nx, ny), + dims=[ + "time", + "x", + "y", + ], # there is a bug in nco that means time needs to be the first dimension! + coords={"time": pd.date_range("2010-01-01 12:00", freq="D", periods=ndays)}, + ) + ds = da.to_dataset(name="aice") + + # Setting these would be more like the source data, but maybe it doesn't matter! + # ds.time.encoding['units'] = 'Days since 01/01/2000 00:00:00 UTC' + # ds.time.encoding['calendar'] = 'gregorian' + # ds.time.encoding['dtype'] = 'float' + + out_dir = str(tmp_path) + "/" + dir_name + "/" + paths = [f"{out_dir}access-om3.cice.h.{str(t.values)[0:10]}.nc" for t in ds.time] + datasets = [ds.sel(time=slice(t, t)) for t in ds.time] + + makedirs(out_dir) + xr.save_mfdataset(datasets, paths, unlimited_dims=["time"]) + + return paths + + +@pytest.mark.parametrize( + "daily_files, use_dir, nmonths", + [ + (("archive/output000", 365), False, 12), + (("archive/output999", 31), False, 1), + (("archive/output9999", 31), False, 1), + (("archive/output574", 365), True, 12), + ], + indirect=["daily_files"], +) # run this test with a several folder names and lengths, provide the directory as an argument sometimes +def test_true_case(daily_files, use_dir, nmonths, tmp_path): + """ + Run the script to convert the daily data into monthly files, and check the monthly files and the daily files dont exist. + """ + + chdir(tmp_path) + output_dir = Path(daily_files[0]).parents[0] + + if not use_dir: # default path + run([run_str]) + expected_months = pd.date_range("2010-01-01", freq="ME", periods=nmonths + 1) + else: # provide path + run( + [ + run_str, + "-d", + output_dir, + ] + ) + expected_months = pd.date_range("2010-01-01", freq="ME", periods=nmonths + 1) + + # valid output filenames + monthly_paths = [ + f"{output_dir}/access-om3.cice.h.{str(t)[0:7]}.nc" for t in expected_months + ] + + for p in monthly_paths[0:nmonths]: + assert_file_exists(p) + + for p in monthly_paths[nmonths]: + assert_f_not_exists(p) + + for p in daily_files: + assert_f_not_exists(p) + + +@pytest.mark.parametrize( + "daily_files", [("Default", 1), ("Default", 27)], indirect=True +) +def test_no_concat_case(daily_files, tmp_path): + """ + Run the script to convert the daily data into monthly files, with less than 28 days data, and check no things happen. + """ + + chdir(tmp_path) + output_dir = Path(daily_files[0]).parents[0] + + run([run_str]) + expected_months = pd.date_range("2010-01-01", freq="ME", periods=1) + + monthly_paths = [ + f"{output_dir}/access-om3.cice.h.{str(t)[0:7]}.nc" for t in expected_months + ] + + for p in daily_files: + assert_file_exists(p) + + for p in monthly_paths: + assert_f_not_exists(p) diff --git a/test/test_payu_conf/test_intake_desc.py b/test/test_payu_conf/test_intake_desc.py new file mode 100644 index 0000000..aaffe4e --- /dev/null +++ b/test/test_payu_conf/test_intake_desc.py @@ -0,0 +1,31 @@ +from os import chdir + +import sys +from pathlib import Path + +path_root = Path(__file__).parents[2] +sys.path.append(str(path_root)) + +from payu_config.archive_scripts.build_intake_ds import description + + +def test_no_metadata_file(tmp_path): + chdir(tmp_path) + desc = description() + assert desc.startswith("intake-esm datastore for experiment in folder") + + +def test_empty_metadata_file(tmp_path): + chdir(tmp_path) + open("metadata,yaml", "a").close() + desc = description() + assert desc.startswith("intake-esm datastore for experiment in folder") + + +def test_metadata_file(tmp_path): + chdir(tmp_path) + f = open(f"{tmp_path}/metadata.yaml", "w") + print("experiment_uuid: ccedea3c-b42a-4d98-82a1-6a3255549fc6", file=f) + f.close() + desc = description() + assert desc.startswith("intake-esm datastore for experiment ccedea3c") diff --git a/test/test_requirements.txt b/test/test_requirements.txt new file mode 100644 index 0000000..3c032aa --- /dev/null +++ b/test/test_requirements.txt @@ -0,0 +1,8 @@ +numpy +pytest +xarray +pandas +netCDF4 +access-nri-intake +setuptools +ruamel.yaml \ No newline at end of file