From 2e6e019374d52ddc2d7734b8fd1832d5bd8ce91a Mon Sep 17 00:00:00 2001 From: Benjamin Auer Date: Mon, 17 Jul 2023 15:14:14 -0400 Subject: [PATCH 1/7] fixes #2232 --- Apps/time_ave_util.F90 | 16 +++++++++------- CHANGELOG.md | 2 ++ base/MAPL_TripolarGridFactory.F90 | 4 ++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Apps/time_ave_util.F90 b/Apps/time_ave_util.F90 index 70bf28ec4199..a275e9c4bc3e 100644 --- a/Apps/time_ave_util.F90 +++ b/Apps/time_ave_util.F90 @@ -1166,12 +1166,14 @@ subroutine get_file_levels(filename,vertical_data,rc) basic_metadata=formatter%read(_RC) call metadata%create(basic_metadata,trim(filename)) lev_name = metadata%get_level_name(_RC) - call metadata%get_coordinate_info(lev_name,coords=levs,coordUnits=lev_units,long_name=long_name,& - standard_name=standard_name,coordinate_attr=vcoord,_RC) - plevs => levs - vertical_data = VerticalData(levels=plevs,vunit=lev_units,vcoord=vcoord,standard_name=standard_name,long_name=long_name, & - force_no_regrid=.true.,_RC) - nullify(plevs) + if (lev_name /= '') then + call metadata%get_coordinate_info(lev_name,coords=levs,coordUnits=lev_units,long_name=long_name,& + standard_name=standard_name,coordinate_attr=vcoord,_RC) + plevs => levs + vertical_data = VerticalData(levels=plevs,vunit=lev_units,vcoord=vcoord,standard_name=standard_name,long_name=long_name, & + force_no_regrid=.true.,_RC) + nullify(plevs) + end if if (present(rc)) then rc=_SUCCESS @@ -1185,7 +1187,7 @@ function has_level(grid,rc) result(grid_has_level) integer, intent(out), optional :: rc integer :: status, global_dims(3) call MAPL_GridGet(grid,globalCellCountPerDim=global_dims,_RC) - grid_has_level = (global_dims(3)/=1) + grid_has_level = (global_dims(3)>1) if (present(rc)) then RC=_SUCCESS end if diff --git a/CHANGELOG.md b/CHANGELOG.md index 364de51d9909..bf23a59eab34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,8 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Changed +- Relaxed restriction in the tripolar grid factory so that grids can be made even when the decomposition deos not evenly divide the grid dimension so that the factory can be used in utilities where the core count makes such a condition impossible to satisfiy ### Fixed +- Fix a bug in time\_ave\_util.x so that it can work with files with no veritcal coordinate ### Removed diff --git a/base/MAPL_TripolarGridFactory.F90 b/base/MAPL_TripolarGridFactory.F90 index 0989cd3d4ed0..ef052d0019c1 100644 --- a/base/MAPL_TripolarGridFactory.F90 +++ b/base/MAPL_TripolarGridFactory.F90 @@ -489,8 +489,8 @@ subroutine check_and_fill_consistency(this, unusable, rc) ! Check decomposition/bounds ! Tripolar requires even divisibility - _ASSERT(mod(this%im_world, this%nx) == 0,"needs message") - _ASSERT(mod(this%jm_world, this%ny) == 0,"needs message") + !_ASSERT(mod(this%im_world, this%nx) == 0,"needs message") + !_ASSERT(mod(this%jm_world, this%ny) == 0,"needs message") ! local extents call verify(this%nx, this%im_world, this%ims, rc=status) From 33ca452a79fc993286bf371377b6d628e8945b78 Mon Sep 17 00:00:00 2001 From: Benjamin Auer Date: Mon, 17 Jul 2023 16:15:04 -0400 Subject: [PATCH 2/7] remove commented out code --- base/MAPL_TripolarGridFactory.F90 | 4 ---- 1 file changed, 4 deletions(-) diff --git a/base/MAPL_TripolarGridFactory.F90 b/base/MAPL_TripolarGridFactory.F90 index ef052d0019c1..d44867b06121 100644 --- a/base/MAPL_TripolarGridFactory.F90 +++ b/base/MAPL_TripolarGridFactory.F90 @@ -488,10 +488,6 @@ subroutine check_and_fill_consistency(this, unusable, rc) end if ! Check decomposition/bounds - ! Tripolar requires even divisibility - !_ASSERT(mod(this%im_world, this%nx) == 0,"needs message") - !_ASSERT(mod(this%jm_world, this%ny) == 0,"needs message") - ! local extents call verify(this%nx, this%im_world, this%ims, rc=status) call verify(this%ny, this%jm_world, this%jms, rc=status) From 3bb00e853dea50429f8978c81af9f5ca400f7b59 Mon Sep 17 00:00:00 2001 From: Weiyuan Jiang Date: Tue, 18 Jul 2023 11:44:34 -0400 Subject: [PATCH 3/7] Fix a bug so that MultigroupServer does not allow a file written by multiple processes at the same time --- CHANGELOG.md | 2 ++ pfio/MultiGroupServer.F90 | 48 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf23a59eab34..1700e92b750a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Relaxed restriction in the tripolar grid factory so that grids can be made even when the decomposition deos not evenly divide the grid dimension so that the factory can be used in utilities where the core count makes such a condition impossible to satisfiy ### Fixed + +- Fix a bug so that MultigroupServer does not allow a file written by multiple processes at the same time. - Fix a bug in time\_ave\_util.x so that it can work with files with no veritcal coordinate ### Removed diff --git a/pfio/MultiGroupServer.F90 b/pfio/MultiGroupServer.F90 index 32798edbe21c..bc2220dccaf0 100644 --- a/pfio/MultiGroupServer.F90 +++ b/pfio/MultiGroupServer.F90 @@ -46,6 +46,7 @@ module pFIO_MultiGroupServerMod use pFIO_AbstractRequestHandleMod use pFIO_FileMetadataMod use pFIO_IntegerMessageMapMod + use gFTL2_StringSet, StringSetIterator =>SetIterator use mpi use pFlogger, only: logging, Logger @@ -87,6 +88,8 @@ module pFIO_MultiGroupServerMod module procedure new_MultiGroupServer end interface MultiGroupServer + integer, parameter :: FNAME_LEN = 512 + contains function new_MultiGroupServer(server_comm, port_name, nwriter_per_node, with_profiler, rc) result(s) @@ -335,6 +338,7 @@ subroutine receive_output_data(this, rc) type (HistoryCollection), pointer :: hist_collection integer, pointer :: i_ptr(:) class (AbstractRequestHandle), pointer :: handle + character(len=FNAME_LEN) :: FileName if (associated(ioserver_profiler)) call ioserver_profiler%start("receive_data") client_num = this%threads%size() @@ -395,6 +399,15 @@ subroutine receive_output_data(this, rc) if (this%I_am_front_root) then collection_id = collection_ids%at(collection_counter) call Mpi_Send(collection_id, 1, MPI_INTEGER, this%back_ranks(1), this%back_ranks(1), this%server_comm, ierror) + msg =>f_d_ms(collection_counter)%msg_vec%at(1) ! just pick first one. All messages should have the same filename + select type (q=>msg) + class is (AbstractCollectiveDataMessage) + Filename = q%file_name + call Mpi_Send(FileName, FNAME_LEN, MPI_CHARACTER, this%back_ranks(1), this%back_ranks(1), this%server_comm, ierror) + class default + _FAIL( "yet to implemented") + end select + ! here thread_ptr can point to any thread hist_collection => thread_ptr%hist_collections%at(collection_id) call hist_collection%fmd%serialize(buffer) @@ -438,6 +451,7 @@ subroutine start_back(this, rc) integer, parameter :: stag = 6782 integer :: status + type (StringSet) :: FilesBeingWritten allocate(this%serverthread_done_msgs(1)) this%serverthread_done_msgs(:) = .false. @@ -462,6 +476,7 @@ subroutine start_back_captain(rc) integer :: i, no_job, local_rank, node_rank, nth_writer integer :: terminate, idle_writer, ierr integer :: MPI_STAT(MPI_STATUS_SIZE) + character(len=FNAME_LEN) :: FileName nwriter_per_node = this%nwriter/this%Node_Num allocate(num_idlePEs(0:this%Node_Num-1)) @@ -482,8 +497,12 @@ subroutine start_back_captain(rc) this%front_ranks(1), this%back_ranks(1), this%server_comm, & MPI_STAT, ierr) if (collection_id == -1) exit + + call MPI_recv( FileName, FNAME_LEN , MPI_CHARACTER, & + this%front_ranks(1), this%back_ranks(1), this%server_comm, & + MPI_STAT, ierr) ! 2) get an idle processor and notify front root - call dispatch_work(collection_id, idleRank, num_idlePEs, rc=status) + call dispatch_work(collection_id, idleRank, num_idlePEs, FileName, rc=status) _VERIFY(status) enddo ! while .true. @@ -498,16 +517,19 @@ subroutine start_back_captain(rc) _RETURN(_SUCCESS) end subroutine start_back_captain - subroutine dispatch_work(collection_id, idleRank, num_idlePEs, rc) + subroutine dispatch_work(collection_id, idleRank, num_idlePEs, FileName, rc) integer, intent(in) :: collection_id integer, intent(inout) :: idleRank(0:,0:) integer, intent(inout) :: num_idlePEs(0:) + character(*), intent(in) :: FileName integer, optional, intent(out) :: rc integer :: MPI_STAT(MPI_STATUS_SIZE) integer :: local_rank, idle_writer, nth_writer, node_rank integer :: i, ierr, nwriter_per_node logical :: flag + character(len=FNAME_LEN) :: FileDone + type (StringSetIterator) :: iter ! 2.1) try to retrieve idle writers ! keep looping (waiting) until there are idle processors @@ -526,10 +548,21 @@ subroutine dispatch_work(collection_id, idleRank, num_idlePEs, rc) num_idlePEs(node_rank) = num_idlePEs(node_rank) + 1 nth_writer = mod(local_rank, nwriter_per_node) idleRank(node_rank, nth_writer) = local_rank + + call MPI_recv(FileDone, FNAME_LEN, MPI_CHARACTER, & + local_rank, stag+1, this%back_comm, & + MPI_STAT, ierr) + + iter = FilesBeingWritten%find(FileDone) + _ASSERT( iter /= FilesBeingWritten%end(), "FileDone should be in the set") + iter = FilesBeingWritten%erase(iter) endif enddo ! if there is no idle processor, get back to probe if (all(num_idlePEs == 0)) cycle + ! if this file is still being written, get back to probe + iter = FilesBeingWritten%find(FileName) + if (iter /= FilesBeingWritten%end()) cycle ! get the node with the most idle processors node_rank = maxloc(num_idlePEs, dim=1) - 1 @@ -541,7 +574,8 @@ subroutine dispatch_work(collection_id, idleRank, num_idlePEs, rc) exit enddo _ASSERT(1<= idle_writer .and. idle_writer <= this%nwriter-1, "wrong local rank of writer") - exit ! exit while loop after get one idle processor + call FilesBeingWritten%insert(FileName) + exit ! exit the loop after get one idle processor and the file is done enddo ! while, get one idle writer ! 2.2) tell front comm which idel_worker is ready @@ -559,6 +593,7 @@ subroutine terminate_back_writers(idleRank, rc) integer :: MPI_STAT(MPI_STATUS_SIZE) integer :: node_rank, local_rank, nth_writer integer :: ierr, no_job, nwriter_per_node, idle_writer + character(len=FNAME_LEN) :: FileDone no_job = -1 nwriter_per_node = size(idleRank, 2) @@ -574,6 +609,9 @@ subroutine terminate_back_writers(idleRank, rc) call MPI_recv( idle_writer, 1, MPI_INTEGER, & local_rank, stag, this%back_comm, & MPI_STAT, ierr) + call MPI_recv( FileDone, FNAME_LEN, MPI_CHARACTER, & + local_rank, stag+1, this%back_comm, & + MPI_STAT, ierr) _ASSERT(local_rank == idle_writer, "local_rank and idle_writer should match") call MPI_send(no_job, 1, MPI_INTEGER, local_rank, local_rank, this%back_comm, ierr) endif @@ -612,6 +650,7 @@ subroutine start_back_writers(rc) type(AdvancedMeter) :: file_timer real(kind=REAL64) :: time character(len=:), allocatable :: filename + character(len=FNAME_LEN) :: FileDone real(kind=REAL64) :: file_size, speed class(Logger), pointer :: lgr @@ -828,7 +867,10 @@ subroutine start_back_writers(rc) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! telling captain it is idle by sending its own rank !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + call MPI_send(back_local_rank, 1, MPI_INTEGER, 0, stag, this%back_comm , ierr) + FileDone = Filename + call MPI_send(FileDone, FNAME_LEN, MPI_CHARACTER, 0, stag+1, this%back_comm , ierr) enddo _RETURN(_SUCCESS) end subroutine start_back_writers From b70c31c7554dd84a02923e2c168df5aabf5f0efe Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 18 Jul 2023 12:29:02 -0400 Subject: [PATCH 4/7] Fixes for 2.39.6 Release --- CHANGELOG.md | 12 ++++++++++-- CMakeLists.txt | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf23a59eab34..b245e3081d38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,15 +10,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Changed -- Relaxed restriction in the tripolar grid factory so that grids can be made even when the decomposition deos not evenly divide the grid dimension so that the factory can be used in utilities where the core count makes such a condition impossible to satisfiy ### Fixed -- Fix a bug in time\_ave\_util.x so that it can work with files with no veritcal coordinate ### Removed ### Deprecated +## [2.39.6] - 2023-07-18 + +### Changed + +- Relaxed restriction in the tripolar grid factory so that grids can be made even when the decomposition deos not evenly divide the grid dimension so that the factory can be used in utilities where the core count makes such a condition impossible to satisfiy + +### Fixed + +- Fix a bug in `time_ave_util.x` so that it can work with files with no vertical coordinate + ## [2.39.5] - 2023-07-10 ### Fixed diff --git a/CMakeLists.txt b/CMakeLists.txt index 4503c00eb6fd..68c38c821d3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ endif () project ( MAPL - VERSION 2.39.5 + VERSION 2.39.6 LANGUAGES Fortran CXX C) # Note - CXX is required for ESMF # Set the possible values of build type for cmake-gui From 9ce1e9d0c352a2485c4d64b138591916404ff3e8 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 18 Jul 2023 13:27:52 -0400 Subject: [PATCH 5/7] Update for 2.39.7 release --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 68c38c821d3c..566a48858a98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ endif () project ( MAPL - VERSION 2.39.6 + VERSION 2.39.7 LANGUAGES Fortran CXX C) # Note - CXX is required for ESMF # Set the possible values of build type for cmake-gui From 9eeeed3465b1364ea93ffefa3982992db300040c Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 18 Jul 2023 13:33:04 -0400 Subject: [PATCH 6/7] Add space --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e3f9bf669e3..c69effd0fff5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [2.39.5] - 2023-07-10 ### Fixed + - Fixed logic in generating the names of the split fields. If the alias field in the History.rc has separators (;), each substring is used to name the resulting fields. If there are no separators, this will be the exact name of the first split field ## [2.39.4] - 2023-06-23 From 54391aa231b048a7a47801bf875afa21ec398799 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 18 Jul 2023 14:09:49 -0400 Subject: [PATCH 7/7] Add explicit gFTL-v2 dependence --- pfio/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pfio/CMakeLists.txt b/pfio/CMakeLists.txt index c56e3722d7ec..9bb4fbac29fa 100644 --- a/pfio/CMakeLists.txt +++ b/pfio/CMakeLists.txt @@ -117,7 +117,7 @@ if (BUILD_WITH_PFLOGGER) endif () esma_add_library (${this} SRCS ${srcs} DEPENDENCIES MAPL.shared MAPL.profiler NetCDF::NetCDF_Fortran NetCDF::NetCDF_C TYPE ${MAPL_LIBRARY_TYPE}) -target_link_libraries (${this} PUBLIC GFTL_SHARED::gftl-shared PFLOGGER::pflogger PRIVATE MPI::MPI_Fortran) +target_link_libraries (${this} PUBLIC GFTL::gftl-v2 GFTL_SHARED::gftl-shared-v2 PFLOGGER::pflogger PRIVATE MPI::MPI_Fortran) # CMake has an OpenMP issue with NAG Fortran: https://gitlab.kitware.com/cmake/cmake/-/issues/21280 if (NOT CMAKE_Fortran_COMPILER_ID MATCHES "NAG") target_link_libraries(${this} PRIVATE OpenMP::OpenMP_Fortran)