From fac8382406db337c15166585a75b28f79bf53adb Mon Sep 17 00:00:00 2001 From: Naser Mahfouz Date: Mon, 23 Sep 2024 12:01:53 -0400 Subject: [PATCH 1/7] add gh/ci eamxx standalone testing --- .github/workflows/eamxx-gh-ci-standalone.yml | 69 +++++++++++++++++++ components/eamxx/cmake/BuildCprnc.cmake | 62 ++++++++++------- .../eamxx/cmake/machine-files/ghci-oci.cmake | 13 ++++ components/eamxx/scripts/machines_specs.py | 4 ++ 4 files changed, 123 insertions(+), 25 deletions(-) create mode 100644 .github/workflows/eamxx-gh-ci-standalone.yml diff --git a/.github/workflows/eamxx-gh-ci-standalone.yml b/.github/workflows/eamxx-gh-ci-standalone.yml new file mode 100644 index 000000000000..fbbdf813581a --- /dev/null +++ b/.github/workflows/eamxx-gh-ci-standalone.yml @@ -0,0 +1,69 @@ +name: gh-standalone + +on: + pull_request: + branches: [ master ] + paths: + # first, yes to these + - '.github/workflows/eamxx-gh-ci-standalone.yml' + - 'cime_config/**' + - 'components/eam/**' + - 'components/eamxx/**' + - 'components/elm/**' + - 'driver-moab/**' + - 'driver-mct/**' + - 'components/homme/**' + # second, no to these + - '!components/eam/docs/**' + - '!components/eam/mkdocs.yml' + - '!components/eamxx/docs/**' + - '!components/eamxx/mkdocs.yml' + - '!components/elm/docs/**' + - '!components/elm/mkdocs.yml' + + workflow_dispatch: + +jobs: + + ci: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + test: + # TODO: add opt, dbg, etc. here once we stabilize testing + # TODO: note that currently, there is a fail in atm_proc test + # TODO: components/eamxx/src/share/tests/atm_process_tests.cpp + # TODO: REQUIRE (dag.has_unmet_dependencies()); + # TODO: but only on some machines... + - sp + container: + image: ghcr.io/e3sm-project/containers-ghci:ghci-0.1.2 + + steps: + - + name: Checkout + uses: actions/checkout@v4 + with: + show-progress: false + submodules: recursive + - + name: standalone + env: + SHELL: sh + run: | + # TODO: get rid of this extra line if we can? + git config --global safe.directory '*' + ./components/eamxx/scripts/test-all-scream -m ghci-oci -t ${{ matrix.test }} + # TODO: add logging... + # - + # name: Artifacts + # uses: actions/upload-artifact@v4 + # if: ${{ always() }} + # with: + # name: ${{ matrix.test }} + # path: | + # /projects/e3sm/scratch/${{ matrix.test }}*/TestStatus.log + # /projects/e3sm/scratch/${{ matrix.test }}*/bld/*.bldlog.* + # /projects/e3sm/scratch/${{ matrix.test }}*/run/*.log.* + # /projects/e3sm/scratch/${{ matrix.test }}*/run/*.cprnc.out diff --git a/components/eamxx/cmake/BuildCprnc.cmake b/components/eamxx/cmake/BuildCprnc.cmake index 2f4f1f00a362..287956c5a9dd 100644 --- a/components/eamxx/cmake/BuildCprnc.cmake +++ b/components/eamxx/cmake/BuildCprnc.cmake @@ -8,32 +8,44 @@ include (EkatUtils) macro(BuildCprnc) - # Make sure this is built only once - if (NOT TARGET cprnc) - if (SCREAM_CIME_BUILD) - string (CONCAT MSG - "WARNING! By default, scream should not build tests in a CIME build,\n" - "and cprnc should only be built by scream in case tests are enabled.\n" - "If you explicitly requested tests to be on in a CIME build,\n" - "then you can discard this warning. Otherwise, please, contact developers.\n") - message("${MSG}") - endif() - set(BLDROOT ${PROJECT_BINARY_DIR}/externals/cprnc) - file(WRITE ${BLDROOT}/Macros.cmake - " - set(SCC ${CMAKE_C_COMPILER}) - set(SFC ${CMAKE_Fortran_COMPILER}) - set(FFLAGS \"${CMAKE_Fortran_FLAGS}\") - set(NETCDF_PATH ${NetCDF_Fortran_PATH}) - " - ) - set(SRC_ROOT ${SCREAM_BASE_DIR}/../..) - add_subdirectory(${SRC_ROOT}/cime/CIME/non_py/cprnc ${BLDROOT}) - EkatDisableAllWarning(cprnc) - - set(CPRNC_BINARY ${BLDROOT}/cprnc CACHE INTERNAL "") - + # TODO: handle this more carefully and more gracefully in the future + # TODO: For now, it is just a hack to get going... + # find cprnc defined in machine entries + set(CCSM_CPRNC $ENV{CCSM_CPRNC}) + if(EXISTS "${CCSM_CPRNC}") + message(STATUS "Path ${CCSM_CPRNC} exists, so we will use it") + set(CPRNC_BINARY ${CCSM_CPRNC} CACHE INTERNAL "") configure_file (${SCREAM_BASE_DIR}/cmake/CprncTest.cmake.in ${CMAKE_BINARY_DIR}/bin/CprncTest.cmake @ONLY) + else() + message(WARNING "Path ${CCSM_CPRNC} does not exist, so we will try to build it") + # Make sure this is built only once + if (NOT TARGET cprnc) + if (SCREAM_CIME_BUILD) + string (CONCAT MSG + "WARNING! By default, scream should not build tests in a CIME build,\n" + "and cprnc should only be built by scream in case tests are enabled.\n" + "If you explicitly requested tests to be on in a CIME build,\n" + "then you can discard this warning. Otherwise, please, contact developers.\n") + message("${MSG}") + endif() + set(BLDROOT ${PROJECT_BINARY_DIR}/externals/cprnc) + file(WRITE ${BLDROOT}/Macros.cmake + " + set(SCC ${CMAKE_C_COMPILER}) + set(SFC ${CMAKE_Fortran_COMPILER}) + set(FFLAGS \"${CMAKE_Fortran_FLAGS}\") + set(NETCDF_PATH ${NetCDF_Fortran_PATH}) + " + ) + set(SRC_ROOT ${SCREAM_BASE_DIR}/../..) + add_subdirectory(${SRC_ROOT}/cime/CIME/non_py/cprnc ${BLDROOT}) + EkatDisableAllWarning(cprnc) + + set(CPRNC_BINARY ${BLDROOT}/cprnc CACHE INTERNAL "") + + configure_file (${SCREAM_BASE_DIR}/cmake/CprncTest.cmake.in + ${CMAKE_BINARY_DIR}/bin/CprncTest.cmake @ONLY) + endif() endif() endmacro() diff --git a/components/eamxx/cmake/machine-files/ghci-oci.cmake b/components/eamxx/cmake/machine-files/ghci-oci.cmake index 86a2fb1d5302..85eabbaa848a 100644 --- a/components/eamxx/cmake/machine-files/ghci-oci.cmake +++ b/components/eamxx/cmake/machine-files/ghci-oci.cmake @@ -1,2 +1,15 @@ include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) common_setup() + +set(CMAKE_Fortran_FLAGS "-Wno-maybe-uninitialized -Wno-unused-dummy-argument -fallow-argument-mismatch" CACHE STRING "" FORCE) +set(CMAKE_CXX_FLAGS "-fvisibility-inlines-hidden -fmessage-length=0 -Wno-use-after-free -Wno-unused-variable -Wno-maybe-uninitialized" CACHE STRING "" FORCE) + +# TODO: figure out a better way to handle this, e.g., +# TODO: --map-by ppr:1:node:pe=1 doesn't work with mpich, +# TODO: but -map-by core:1:numa:hwthread=1 may work well? +# TODO: this will need to be handled in EKAT at some point +set(EKAT_MPI_NP_FLAG "-np" CACHE STRING "-np") + +# TODO: hack in place to get eamxx to recognize CPRNC +# TODO: See note in BuildCprnc.cmake... +set(ENV{CCSM_CPRNC} "/usr/local/packages/bin/cprnc") diff --git a/components/eamxx/scripts/machines_specs.py b/components/eamxx/scripts/machines_specs.py index 9536d415c0de..0b0567c3cce2 100644 --- a/components/eamxx/scripts/machines_specs.py +++ b/components/eamxx/scripts/machines_specs.py @@ -82,6 +82,10 @@ ["mpicxx","mpifort","mpicc"], "", ""), + "ghci-oci" : ([f"eval $({CIMEROOT}/CIME/Tools/get_case_env -c SMS.ne4pg2_ne4pg2.F2010-SCREAMv1.ghci-oci_gnu)"], + ["mpicxx","mpifort","mpicc"], + "", + ""), "linux-generic" : ([],["mpicxx","mpifort","mpicc"],"", ""), "linux-generic-debug" : ([],["mpicxx","mpifort","mpicc"],"", ""), "linux-generic-serial" : ([],["mpicxx","mpifort","mpicc"],"", ""), From 89b7ade03312aac5aa8ece869cebc356919741c6 Mon Sep 17 00:00:00 2001 From: Naser Mahfouz Date: Tue, 24 Sep 2024 11:42:17 -0400 Subject: [PATCH 2/7] fix pl semanitics --- components/eamxx/src/share/tests/atm_process_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/eamxx/src/share/tests/atm_process_tests.cpp b/components/eamxx/src/share/tests/atm_process_tests.cpp index 01985c33c6d0..fce25cd28e21 100644 --- a/components/eamxx/src/share/tests/atm_process_tests.cpp +++ b/components/eamxx/src/share/tests/atm_process_tests.cpp @@ -443,7 +443,7 @@ TEST_CASE("atm_proc_dag", "") { using strvec_t = std::vector; auto params = create_test_params(); - auto p1 = params.sublist("BarBaz"); + auto& p1 = params.sublist("BarBaz"); // Make sure there's a missing piece (whatever Baz computes); p1.set("atm_procs_list",{"Bar"}); From cdaa5c4b8c4af6eeae2f11a487a185e96d7dbae6 Mon Sep 17 00:00:00 2001 From: Naser Mahfouz Date: Tue, 24 Sep 2024 18:09:01 -0400 Subject: [PATCH 3/7] only test for eamxx, save logs --- .github/workflows/eamxx-gh-ci-standalone.yml | 36 ++++++-------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/.github/workflows/eamxx-gh-ci-standalone.yml b/.github/workflows/eamxx-gh-ci-standalone.yml index fbbdf813581a..4a194a891062 100644 --- a/.github/workflows/eamxx-gh-ci-standalone.yml +++ b/.github/workflows/eamxx-gh-ci-standalone.yml @@ -6,20 +6,12 @@ on: paths: # first, yes to these - '.github/workflows/eamxx-gh-ci-standalone.yml' - - 'cime_config/**' - - 'components/eam/**' + - 'cime_config/machine/config_machines.xml' - 'components/eamxx/**' - - 'components/elm/**' - - 'driver-moab/**' - - 'driver-mct/**' - 'components/homme/**' # second, no to these - - '!components/eam/docs/**' - - '!components/eam/mkdocs.yml' - '!components/eamxx/docs/**' - '!components/eamxx/mkdocs.yml' - - '!components/elm/docs/**' - - '!components/elm/mkdocs.yml' workflow_dispatch: @@ -31,12 +23,8 @@ jobs: fail-fast: false matrix: test: - # TODO: add opt, dbg, etc. here once we stabilize testing - # TODO: note that currently, there is a fail in atm_proc test - # TODO: components/eamxx/src/share/tests/atm_process_tests.cpp - # TODO: REQUIRE (dag.has_unmet_dependencies()); - # TODO: but only on some machines... - sp + - opt container: image: ghcr.io/e3sm-project/containers-ghci:ghci-0.1.2 @@ -55,15 +43,11 @@ jobs: # TODO: get rid of this extra line if we can? git config --global safe.directory '*' ./components/eamxx/scripts/test-all-scream -m ghci-oci -t ${{ matrix.test }} - # TODO: add logging... - # - - # name: Artifacts - # uses: actions/upload-artifact@v4 - # if: ${{ always() }} - # with: - # name: ${{ matrix.test }} - # path: | - # /projects/e3sm/scratch/${{ matrix.test }}*/TestStatus.log - # /projects/e3sm/scratch/${{ matrix.test }}*/bld/*.bldlog.* - # /projects/e3sm/scratch/${{ matrix.test }}*/run/*.log.* - # /projects/e3sm/scratch/${{ matrix.test }}*/run/*.cprnc.out + - + name: Artifacts + uses: actions/upload-artifact@v4 + if: ${{ always() }} + with: + name: ${{ matrix.test }} + path: | + components/eamxx/ctest-build/*/Testing/Temporary/Last*.log From d6bf9943ca314a693e2db46d5235e4c024ded28e Mon Sep 17 00:00:00 2001 From: Naser Mahfouz Date: Sat, 28 Sep 2024 15:38:48 -0400 Subject: [PATCH 4/7] try new slimmed container --- .github/workflows/eamxx-gh-ci-standalone.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/eamxx-gh-ci-standalone.yml b/.github/workflows/eamxx-gh-ci-standalone.yml index 4a194a891062..f916384476aa 100644 --- a/.github/workflows/eamxx-gh-ci-standalone.yml +++ b/.github/workflows/eamxx-gh-ci-standalone.yml @@ -25,8 +25,10 @@ jobs: test: - sp - opt + - dbg + - cov container: - image: ghcr.io/e3sm-project/containers-ghci:ghci-0.1.2 + image: ghcr.io/e3sm-project/containers-standalone-ghci:standalone-ghci-0.1.0 steps: - From da9951ac22463f0443b47daf3445d4a723a797e1 Mon Sep 17 00:00:00 2001 From: Naser Mahfouz Date: Sat, 28 Sep 2024 16:15:11 -0400 Subject: [PATCH 5/7] don't run coverage test --- .github/workflows/eamxx-gh-ci-standalone.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/eamxx-gh-ci-standalone.yml b/.github/workflows/eamxx-gh-ci-standalone.yml index f916384476aa..ffb9afb02fe7 100644 --- a/.github/workflows/eamxx-gh-ci-standalone.yml +++ b/.github/workflows/eamxx-gh-ci-standalone.yml @@ -26,7 +26,6 @@ jobs: - sp - opt - dbg - - cov container: image: ghcr.io/e3sm-project/containers-standalone-ghci:standalone-ghci-0.1.0 From 0012b49af6326a8213b2f8b55d586e69856b1b5a Mon Sep 17 00:00:00 2001 From: mahf708 Date: Sat, 28 Sep 2024 18:42:11 -0500 Subject: [PATCH 6/7] add fpe test --- .github/workflows/eamxx-gh-ci-standalone.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/eamxx-gh-ci-standalone.yml b/.github/workflows/eamxx-gh-ci-standalone.yml index ffb9afb02fe7..9fde89ed35bf 100644 --- a/.github/workflows/eamxx-gh-ci-standalone.yml +++ b/.github/workflows/eamxx-gh-ci-standalone.yml @@ -26,6 +26,7 @@ jobs: - sp - opt - dbg + - fpe container: image: ghcr.io/e3sm-project/containers-standalone-ghci:standalone-ghci-0.1.0 From b33da8c83090b4c7a468f5f04caa8c63e165e55c Mon Sep 17 00:00:00 2001 From: mahf708 Date: Sat, 28 Sep 2024 18:45:44 -0500 Subject: [PATCH 7/7] update cosp2 submod --- .gitmodules | 4 ++-- components/eam/src/physics/cosp2/external | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 69d56e140361..b36d90a00a42 100644 --- a/.gitmodules +++ b/.gitmodules @@ -22,8 +22,8 @@ branch = scorpio_classic [submodule "cosp2"] path = components/eam/src/physics/cosp2/external - url = git@github.com:CFMIP/COSPv2.0.git - branch = CESM_v2.1.4 + url = git@github.com:bartgol/COSPv2.0.git + branch = bartgol/fix-cosp_optical_inputs [submodule "cime"] path = cime url = git@github.com:ESMCI/cime.git diff --git a/components/eam/src/physics/cosp2/external b/components/eam/src/physics/cosp2/external index 9d910acba3e3..2deb41975faa 160000 --- a/components/eam/src/physics/cosp2/external +++ b/components/eam/src/physics/cosp2/external @@ -1 +1 @@ -Subproject commit 9d910acba3e3a3151de231184d4b109f65e28aee +Subproject commit 2deb41975faa4f5eacfc8b8f12b85acf6583d407