Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remap_restarts uses SLURM for lake/landice/saltwater (Resolution for issue #85) #95

Merged
merged 3 commits into from
Sep 13, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
261 changes: 216 additions & 45 deletions pre/remap_restart/remap_lake_landice_saltwater.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import ruamel.yaml
import shlex
from remap_base import remap_base
from remap_utils import get_label
from remap_utils import *
from remap_utils import get_geomdir
from remap_bin2nc import bin2nc

Expand Down Expand Up @@ -47,13 +47,9 @@ def remap(self):
ogrid = config['input']['shared']['ogrid']
omodel = config['input']['shared']['omodel']
stretch = config['input']['shared']['stretch']

in_geomdir = get_geomdir(in_bc_base, in_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch)
in_tile_file = glob.glob(in_geomdir+ '/*-Pfafstetter.til')[0]

agrid = config['output']['shared']['agrid']
ogrid = config['output']['shared']['ogrid']
omodel = config['output']['shared']['omodel']
stretch = config['output']['shared']['stretch']
out_geomdir = get_geomdir(out_bc_base, out_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch)
out_tile_file = glob.glob(out_geomdir+ '/*-Pfafstetter.til')[0]

Expand Down Expand Up @@ -170,6 +166,180 @@ def remap(self):
print('cd ' + cwdir)
os.chdir(cwdir)


agrid = config['output']['shared']['agrid']
if agrid[0].upper() == 'C':
imout = int(agrid[1:])
else:
exit("Only support cs grid so far")

if (imout <90):
NPE = 12; nwrit = 1
elif (imout<=180):
NPE = 24; nwrit = 1
elif (imout<=540):
NPE = 96; nwrit = 1
elif (imout<=720):
NPE = 192; nwrit = 2
elif (imout<=1080):
NPE = 384; nwrit = 2
elif (imout<=1440):
NPE = 576; nwrit = 2
elif (imout< 2880):
NPE = 768; nwrit = 2
elif (imout>=2880):
NPE = 5400; nwrit = 6

PARTITION =''
QOS = config['slurm_pbs']['qos']
TIME = "1:00:00"
if NPE > 532:
assert config['slurm_pbs']['qos'] != 'debug', "qos should be 'allnccs' for NCCS or 'normal' for NAS"
TIME = "12:00:00"
NNODE = ''
job=''
if GEOS_SITE == "NAS":
CONSTRAINT = 'cas_ait'
NNODE = (NPE-1)//40 + 1
job='PBS'
else:
job='SLURM'
partition = config['slurm_pbs']['partition']
if (partition != ''):
PARTITION = "#SBATCH --partition=" + partition

CONSTRAINT = '"[cas|sky]"'
if BUILT_ON_SLES15:
CONSTRAINT = 'mil'


remap_template = job_directive[job] + \
"""
source {Bin}/g5_modules
limit stacksize unlimited

cd {out_dir}/upper_data
/bin/touch input.nml

# The MERRA fvcore_internal_restarts don't include W or DZ, but we can add them by setting
# HYDROSTATIC = 0 which means HYDROSTATIC = FALSE

if ($?I_MPI_ROOT) then
# intel scaling suggestions
#--------------------------
setenv I_MPI_ADJUST_ALLREDUCE 12
setenv I_MPI_ADJUST_GATHERV 3

setenv I_MPI_SHM_HEAP_VSIZE 512
setenv PSM2_MEMORY large
setenv I_MPI_EXTRA_FILESYSTEM 1
setenv I_MPI_EXTRA_FILESYSTEM_FORCE gpfs
setenv ROMIO_FSTYPE_FORCE "gpfs:"
endif
set infiles = ()
set outfils = ()
foreach infile ( *_restart_in )
if ( $infile == fvcore_internal_restart_in ) continue
if ( $infile == moist_internal_restart_in ) continue

set infiles = ( $infiles $infile )
set outfil = `echo $infile | sed "s/restart_in/rst_out/"`
set outfils = ($outfils $outfil)
end

set interp_restartsX = {Bin}/interp_restarts.x
if ( $#infiles ) then
set ioflag = "-input_files $infiles -output_files $outfils"
set ftype = `file -Lb --mime-type fvcore_internal_restart_in`
if ($ftype =~ *stream*) then
set interp_restartsX = {Bin}/interp_restarts_bin.x
endif
else
set ioflag = ""
endif

set drymassFLG = {drymassFLG}
if ($drymassFLG) then
set dmflag = ""
else
set dmflag = "-scalers F"
endif

{Bin}/esma_mpirun -np {NPE} $interp_restartsX -im {imout} -lm {nlevel} \\
-do_hydro {hydrostatic} $ioflag $dmflag -nwriter {nwrit} {stretch_str}

"""
account = config['slurm_pbs']['account']
drymassFLG = config['input']['air']['drymass']
hydrostatic = config['input']['air']['hydrostatic']
nlevel = config['output']['air']['nlevel']
log_name = out_dir+'/remap_lake_landice_saltwater_log'
job_name = 'remap_lake_landice_saltwater'
stretch_str = ''
remap_script = remap_template.format(Bin=bindir, account = account, \
out_dir = out_dir, log_name = log_name, job_name= job_name, drymassFLG = drymassFLG, \
imout = imout, nwrit = nwrit, NPE = NPE, NNODE = NNODE, \
QOS = QOS, TIME = TIME, CONSTRAINT = CONSTRAINT, PARTITION = PARTITION, nlevel = nlevel, hydrostatic = hydrostatic,
stretch_str = stretch_str)

script_name = './remap_lake_landice_saltwater.j'

lake_landice_saltwater = open(script_name, 'wt')
lake_landice_saltwater.write(remap_script)
lake_landice_saltwater.close()

in_bc_base = config['input']['shared']['bc_base']
if "gmao_SIteam/ModelData" in in_bc_base:
assert GEOS_SITE == "NAS", "wrong site to run the package"

interactive = None
if GEOS_SITE == 'NAS':
interactive = os.getenv('PBS_JOBID', default = None)
else:
interactive = os.getenv('SLURM_JOB_ID', default = None)

if (interactive) :
print('interactive mode\n')
if GEOS_SITE != 'NAS':
ntasks = os.getenv('SLURM_NTASKS', default = None)
if ( not ntasks):
nnodes = int(os.getenv('SLURM_NNODES', default = '1'))
ncpus = int(os.getenv('SLURM_CPUS_ON_NODE', default = '28'))
ntasks = nnodes * ncpus
ntasks = int(ntasks)
if (ntasks < NPE ):
print("\nYou should have at least {NPE} cores. Now you only have {ntasks} cores ".format(NPE=NPE, ntasks=ntasks))

subprocess.call(['chmod', '755', script_name])
print(script_name+ ' 1>' + log_name + ' 2>&1')
os.system(script_name + ' 1>' + log_name+ ' 2>&1')
elif GEOS_SITE == "NAS" :
print('qsub -W block=true '+ script_name +'\n')
subprocess.call(['qsub', '-W','block=true', script_name])
else:
print('sbatch -W '+ script_name +'\n')
subprocess.call(['sbatch', '-W', script_name])

#
# post process
#
suffix = '_rst.' + suffix

for out_rst in glob.glob("*_rst*"):
filename = expid + os.path.basename(out_rst).split('_rst')[0].split('.')[-1]+suffix
print('\n Move ' + out_rst + ' to ' + out_dir+"/"+filename)
shutil.move(out_rst, out_dir+"/"+filename)

print('\n Move remap_lake_landice_saltwater.j to ' + out_dir)
shutil.move('remap_lake_landice_saltwater.j', out_dir+"/remap_lake_landice_saltwater.j")
with open(out_dir+'/cap_restart', 'w') as f:
yyyymmddhh_ = str(config['input']['shared']['yyyymmddhh'])
time = yyyymmddhh_[0:8]+' '+yyyymmddhh_[8:10]+'0000'
print('Create cap_restart')
f.write(time)
print('cd ' + cwdir)
os.chdir(cwdir)

self.remove_merra2()

def run_and_log(self, cmd, log_name):
Expand All @@ -185,14 +355,15 @@ def run_and_log(self, cmd, log_name):
log_.write(stdout)
log_.write(stderr)


def find_rst(self):
surf_restarts =[
"route_internal_rst" ,
"lake_internal_rst" ,
"landice_internal_rst" ,
"openwater_internal_rst" ,
"saltwater_internal_rst" ,
"seaicethermo_internal_rst"]
"route_internal_rst" ,
"lake_internal_rst" ,
"landice_internal_rst" ,
"openwater_internal_rst" ,
"saltwater_internal_rst" ,
"seaicethermo_internal_rst"]

rst_dir = self.config['input']['shared']['rst_dir']
yyyymmddhh_ = str(self.config['input']['shared']['yyyymmddhh'])
Expand All @@ -201,48 +372,48 @@ def find_rst(self):
for f in surf_restarts :
files = glob.glob(rst_dir+ '/*'+f+'*'+time+'*')
if len(files) >0:
restarts_in.append(files[0])
restarts_in.append(files[0])
if (len(restarts_in) == 0) :
print("\n try restart file names without time stamp\n")
for f in surf_restarts :
fname = rst_dir+ '/'+f
if os.path.exists(fname):
restarts_in.append(fname)
restarts_in.append(fname)

return restarts_in

def copy_merra2(self):
if not self.config['input']['shared']['MERRA-2']:
return

expid = self.config['input']['shared']['expid']
yyyymmddhh_ = str(self.config['input']['shared']['yyyymmddhh'])
yyyy_ = yyyymmddhh_[0:4]
mm_ = yyyymmddhh_[4:6]
dd_ = yyyymmddhh_[6:8]
hh_ = yyyymmddhh_[8:10]

suffix = yyyymmddhh_[0:8]+'_'+ hh_ + 'z.bin'
merra_2_rst_dir = '/archive/users/gmao_ops/MERRA2/gmao_ops/GEOSadas-5_12_4/'+expid +'/rs/Y'+yyyy_ +'/M'+mm_+'/'
rst_dir = self.config['input']['shared']['rst_dir'] + '/'
os.makedirs(rst_dir, exist_ok = True)
print(' Copy MERRA-2 surface restarts \n from \n ' + merra_2_rst_dir + '\n to\n '+ rst_dir +'\n')

surfin = [ merra_2_rst_dir + expid+'.lake_internal_rst.' + suffix,
merra_2_rst_dir + expid+'.landice_internal_rst.' + suffix,
merra_2_rst_dir + expid+'.saltwater_internal_rst.'+ suffix]
bin2nc_yaml = ['bin2nc_merra2_lake.yaml', 'bin2nc_merra2_landice.yaml','bin2nc_merra2_salt.yaml']
bin_path = os.path.dirname(os.path.realpath(__file__))
for (f,yf) in zip(surfin, bin2nc_yaml):
fname = os.path.basename(f)
dest = rst_dir + '/'+fname
print("Copy file "+f +" to " + rst_dir)
shutil.copy(f, dest)
ncdest = dest.replace('z.bin', 'z.nc4')
yaml_file = bin_path + '/'+yf
print('Convert bin to nc4:' + dest + ' to \n' + ncdest + '\n')
bin2nc(dest, ncdest, yaml_file)
os.remove(dest)
if not self.config['input']['shared']['MERRA-2']:
return

expid = self.config['input']['shared']['expid']
yyyymmddhh_ = str(self.config['input']['shared']['yyyymmddhh'])
yyyy_ = yyyymmddhh_[0:4]
mm_ = yyyymmddhh_[4:6]
dd_ = yyyymmddhh_[6:8]
hh_ = yyyymmddhh_[8:10]

suffix = yyyymmddhh_[0:8]+'_'+ hh_ + 'z.bin'
merra_2_rst_dir = '/archive/users/gmao_ops/MERRA2/gmao_ops/GEOSadas-5_12_4/'+expid +'/rs/Y'+yyyy_ +'/M'+mm_+'/'
rst_dir = self.config['input']['shared']['rst_dir'] + '/'
os.makedirs(rst_dir, exist_ok = True)
print(' Copy MERRA-2 surface restarts \n from \n ' + merra_2_rst_dir + '\n to\n '+ rst_dir +'\n')

surfin = [ merra_2_rst_dir + expid+'.lake_internal_rst.' + suffix,
merra_2_rst_dir + expid+'.landice_internal_rst.' + suffix,
merra_2_rst_dir + expid+'.saltwater_internal_rst.'+ suffix]
bin2nc_yaml = ['bin2nc_merra2_lake.yaml', 'bin2nc_merra2_landice.yaml','bin2nc_merra2_salt.yaml']
bin_path = os.path.dirname(os.path.realpath(__file__))
for (f,yf) in zip(surfin, bin2nc_yaml):
fname = os.path.basename(f)
dest = rst_dir + '/'+fname
print("Copy file "+f +" to " + rst_dir)
shutil.copy(f, dest)
ncdest = dest.replace('z.bin', 'z.nc4')
yaml_file = bin_path + '/'+yf
print('Convert bin to nc4:' + dest + ' to \n' + ncdest + '\n')
bin2nc(dest, ncdest, yaml_file)
os.remove(dest)

if __name__ == '__main__' :
lls = lake_landice_saltwater(params_file='remap_params.yaml')
Expand Down
Loading